コード例 #1
0
def generate_meta_experiment(exp_id,
                             agent_paths,
                             path,
                             unique=True,
                             expdomain=None):
    opt = {}
    if unique:
        opt["path"] = os.path.join(path, get_time_str())
    else:
        opt["path"] = path

    opt["exp_id"] = exp_id
    opt["max_steps"] = 10000
    opt["num_policy_checks"] = 40
    opt["checks_per_policy"] = 20
    # start_at = np.array([4, 6])

    agents = load_all_agents(agent_paths,
                             pretrained=True,
                             load_confidence=True)
    for a in agents:
        a.policy.epsilon = 0
    if expdomain:
        actual_domain = expdomain(mapname=mapname,
                                  terrain_augmentation=False,
                                  noise=0.1)
    else:
        actual_domain = GridWorldMixed(mapname=mapname,
                                       terrain_augmentation=False,
                                       noise=0.1)
    domain = PolicyMixer(actual_domain, agents)
    representation = Tabular(domain)
    policy = eGreedy(representation)  # , tau=.1)
    opt['agent'] = Q_Learning(policy,
                              representation,
                              discount_factor=0.9,
                              initial_learn_rate=0.8,
                              lambda_=0.5,
                              learn_rate_decay_mode='boyan',
                              boyan_N0=2380)
    opt['domain'] = domain
    experiment = Experiment(**opt)

    path_join = lambda s: os.path.join(opt["path"], s)
    if not os.path.exists(opt["path"]):
        os.makedirs(opt["path"])

    shutil.copy(inspect.getsourcefile(inspect.currentframe()),
                path_join("experiment.py"))

    return experiment
コード例 #2
0
def make_experiment(exp_id=2,
                    agent_paths=None,
                    mapname="12x12-Bridge.txt",
                    path="./Results/MetaRLSarsa",
                    boyan_N0=1000,
                    discount_factor=0.8286376417073243,
                    initial_learn_rate=0.5,
                    lambda_=0.2):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 10000
    opt["num_policy_checks"] = 5
    opt["checks_per_policy"] = 10
    # start_at = np.array([4, 5])

    # Logging

    # Domain:
    # MAZE                = '/Domains/GridWorldMaps/1x3.txt'
    maze = os.path.join(map_dir, mapname)
    actual_domain = GridWorldModified(
        maze,
        random_start=True,
        noise=0.1,
        # start_at=np.array([4,6])
    )

    # agent_1 = loadagent("QL") # most likely preloaded
    # agent_2 = loadagent("SARSA")
    # agent_3 = loadagent("NAC")
    # agents = [agent_1, agent_2, agent_3]
    agents = load_all_agents(agent_paths)

    domain = PolicyMixer(actual_domain, agents, seed=exp_id)
    representation = Tabular(domain)
    meta_policy = eGreedy(representation, epsilon=0.3)
    opt['agent'] = Q_Learning(representation=representation,
                              policy=meta_policy,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0,
                              lambda_=lambda_,
                              initial_learn_rate=initial_learn_rate,
                              discount_factor=discount_factor)
    opt['domain'] = domain
    experiment = Experiment(**opt)

    return experiment
コード例 #3
0
def generate_multinomial_experiment(exp_id,
                                    agent_paths,
                                    path,
                                    unique=True,
                                    expdomain=None):
    opt = {}
    if unique:
        opt["path"] = os.path.join(path, get_time_str())
    else:
        opt["path"] = path

    opt["exp_id"] = exp_id
    opt["max_steps"] = 8000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 1

    agents = load_all_agents(agent_paths,
                             pretrained=True,
                             load_confidence=True)

    for a in agents:
        assert type(a.policy).__name__ == "DynamicsConfidenceGreedy"
        a.policy.epsilon = 0

    if expdomain:
        domain = expdomain(mapname=mapname, noise=0.1)
    else:
        domain = GridWorldMixed(mapname=mapname, noise=0.1)
    representation = IncrementalTabular(domain)
    policy = MAgentMultinomial(representation, agents)  # , tau=.1)

    print "$" * 10
    print "You are currently running {}".format(policy.__class__)
    opt['agent'] = NoopAgent(representation=representation, policy=policy)
    opt['domain'] = domain
    experiment = Experiment(**opt)

    path_join = lambda s: os.path.join(opt["path"], s)
    if not os.path.exists(opt["path"]):
        os.makedirs(opt["path"])

    shutil.copy(inspect.getsourcefile(inspect.currentframe()),
                path_join("experiment.py"))

    return experiment
コード例 #4
0
def make_experiment(agent_paths=["./"],
                    sublearning=False,
                    exp_id=3,
                    path="./Results/Confidence2/",
                    temp=0.10517212721767522,
                    discount_factor=0.7,
                    lambda_=0.0,
                    init_state=None):
    opt = {}
    opt["path"] = os.path.join(path, get_time_str())
    opt["exp_id"] = exp_id
    opt["max_steps"] = 40000
    opt["num_policy_checks"] = 10
    opt["checks_per_policy"] = 20
    # start_at = np.array([4, 6])

    agents = load_all_agents(agent_paths,
                             pretrained=True,
                             load_confidence=True)

    for i, a in enumerate(agents):
        # import ipdb; ipdb.set_trace()
        a.policy.epsilon = i * 0.02 + 0.1
        # a.learn_rate_decay_mode = 'boyan'
        # a.learn_rate = a.initial_learn_rate = 0.9
        # a.boyan_N0 = 3000
        a.learn_rate_decay_mode = 'dabney'

    domain = RCCarModified(noise=0.1, init_state=(-2, 0.8, 0, 2.5))
    representation = IncrementalTabular(domain)
    policy = MultiAgentConfidence(representation, agents, tau=.1)
    print "$" * 10
    print "You are currently running {}".format(policy.__class__)
    opt['agent'] = MetaAgent(representation=representation,
                             policy=policy,
                             learn_rate_decay_mode="const")
    opt['domain'] = domain
    experiment = Experiment(**opt)
    return experiment
コード例 #5
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/MetaHP/",
                    unique=False,
                    max_episode=300,
                    num_rbfs=4000,
                    initial_learn_rate=0.9,
                    lambda_=0.7,
                    resolution=20):
    opt = {}
    if unique:
        opt["path"] = os.path.join(path, get_time_str())
    else:
        opt["path"] = path

    opt["exp_id"] = exp_id
    opt["max_steps"] = 30000
    # opt["max_episode"] = max_episode
    opt["num_policy_checks"] = 10
    opt["checks_per_policy"] = 1
    # start_at = np.array([4, 6])
    from Domains import RCCarSlideTurn

    expdomain = Domains.RCCarSlideTurn
    agent_paths = [
        '/home/jarvis/work/clipper/models/rl/Results/Mixed_ActionsB/agent0/Aug21_11-38-389943',
        '/home/jarvis/work/clipper/models/rl/Results/Mixed_ActionsB/agent1/Aug21_11-43-003799'
    ]

    agents = load_all_agents(agent_paths,
                             pretrained=True,
                             load_confidence=False)
    for a in agents:
        a.policy.epsilon = 0
        # a.policy.turnOffExploration()
    if expdomain:
        actual_domain = expdomain(noise=0.)
    else:
        actual_domain = RCCarModified(noise=0.1)
    domain = RCPolicyMixer(actual_domain, agents)
    representation = MahaRBF(
        domain,
        num_rbfs=int(num_rbfs),
        # state_dimensions=np.array([0,1,3]),
        const_feature=False,
        resolution_min=resolution,
        resolution_max=resolution,
        include_border=True,
        normalize=True,
        seed=exp_id)
    policy = eGreedy(representation)  #, epsilon=0.1) # , tau=.1)
    opt['agent'] = Q_Learning(policy,
                              representation,
                              discount_factor=0.8,
                              initial_learn_rate=initial_learn_rate,
                              lambda_=lambda_,
                              learn_rate_decay_mode='const')
    opt['domain'] = domain
    experiment = Experiment(**opt)

    # path_join = lambda s: os.path.join(opt["path"], s)
    # if not os.path.exists(opt["path"]):
    # 	os.makedirs(opt["path"])

    # shutil.copy(inspect.getsourcefile(inspect.currentframe()), path_join("experiment.py"))

    return experiment
コード例 #6
0
def generate_meta_experiment(exp_id,
                             agent_paths,
                             path,
                             unique=True,
                             expdomain=None,
                             max_episode=5000):
    opt = {}
    if unique:
        opt["path"] = os.path.join(path, get_time_str())
    else:
        opt["path"] = path

    opt["exp_id"] = exp_id
    # opt["max_steps"] = 50000
    opt["max_episode"] = max_episode
    opt["num_policy_checks"] = 50
    opt["checks_per_policy"] = 1
    # start_at = np.array([4, 6])

    agents = load_all_agents(agent_paths,
                             pretrained=True,
                             load_confidence=False)
    for a in agents:
        a.policy.epsilon = 0
        a.policy.turnOffExploration()
    if expdomain:
        actual_domain = expdomain(noise=0.1)
    else:
        actual_domain = RCCarModified(noise=0.1)
    domain = RCPolicyMixer(actual_domain, agents)
    # representation = MahaRBF(domain,
    # 					num_rbfs=3000,
    # 					# state_dimensions=np.array([0,1,3]),
    # 					const_feature=False,
    # 					resolution_min=21,
    # 					resolution_max=21,
    # 					include_border=True,
    # 					normalize=True,
    # 					seed=exp_id)
    # representation = RandomLocalBases(domain, gaussian_kernel,
    # 					 				num=100,
    # 									normalization=True,
    # 									resolution_max=20,
    # 									seed=exp_id)
    representation = NonparametricLocalBases(domain,
                                             gaussian_kernel,
                                             normalization=True)
    policy = eGreedy(representation, 0.05)  #, epsilon=0.1) # , tau=.1)
    # policy = GibbsPolicy(representation)
    opt['agent'] = Q_Learning(policy,
                              representation,
                              discount_factor=0.8,
                              initial_learn_rate=.8,
                              lambda_=0.1,
                              learn_rate_decay_mode='boyan',
                              boyan_N0=500)
    opt['domain'] = domain
    experiment = ExperimentMod(**opt)

    path_join = lambda s: os.path.join(opt["path"], s)
    if not os.path.exists(opt["path"]):
        os.makedirs(opt["path"])

    shutil.copy(inspect.getsourcefile(inspect.currentframe()),
                path_join("experiment.py"))

    return experiment
コード例 #7
0
def make_experiment(agent_paths="./",
                    pretrained=False,
                    sublearning=False,
                    yaml_file=None,
                    exp_id=3,
                    path="./Results/TestVoting/",
                    init_state=None):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 80000
    opt["num_policy_checks"] = 40
    opt["checks_per_policy"] = 10
    # start_at = np.array([4, 6])

    # Logging

    # Domain:
    # MAZE                = '/Domains/GridWorldMaps/1x3.txt'
    domain = RCCarModified(noise=0.1,
                           # random_start=True,
                           # init_state=init_state
                           )

    # agent_1 = loadagent("QL") # most likely preloaded
    # agent_2 = loadagent("SARSA")
    # agent_3 = loadagent("NAC")
    # agents = [agent_1, agent_2, agent_3]
    agents = load_all_agents(agent_paths,
                             pretrained=pretrained,
                             yaml_file=yaml_file)

    for i, a in enumerate(agents):
        # import ipdb; ipdb.set_trace()
        a.policy.epsilon = i * 0.03 + 0.1

    # import ipdb; ipdb.set_trace()
    representation = IncrementalTabular(domain)  #This doesn't matter
    policy = MultiAgentVoting(representation, agents, tau=.7)
    opt['agent'] = MetaAgent(
        representation=representation,
        policy=policy,
    )
    opt['domain'] = domain
    experiment = Experiment(**opt)

    #Seeding to match standard experiment
    for agent in agents:
        agent.random_state = np.random.RandomState(
            experiment.randomSeeds[experiment.exp_id - 1])
        agent.init_randomization()
        # agent.representation.random_state = np.random.RandomState(
        #     experiment.randomSeeds[experiment.exp_id - 1])
        # agent.representation.init_randomization() #init_randomization is called on instantiation
        agent.policy.random_state = np.random.RandomState(
            experiment.randomSeeds[experiment.exp_id - 1])
        agent.policy.init_randomization()

    for i, a in enumerate(agents):
        # import ipdb; ipdb.set_trace()
        print a.policy.epsilon

    path_join = lambda s: os.path.join(opt["path"], s)
    if not os.path.exists(opt["path"]):
        os.makedirs(opt["path"])

    param_path = os.path.join(agent_paths[0], yaml_file)
    shutil.copy(param_path, path_join("params.yml"))
    shutil.copy(inspect.getsourcefile(inspect.currentframe()),
                path_join("experiment.py"))

    # import ipdb; ipdb.set_trace()

    return experiment
コード例 #8
0
def make_experiment(agent_paths="./",
                    pretrained=False,
                    sublearning=False,
                    yaml_file=None,
                    exp_id=3,
                    path="./Results/TestVoting/",
                    temp=0.90517212721767522,
                    boyan_N0=100,
                    discount_factor=0.7,
                    initial_learn_rate=0.1,
                    lambda_=0.0,
                    init_state=None):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 50000
    opt["num_policy_checks"] = 50
    opt["checks_per_policy"] = 10
    # start_at = np.array([4, 6])

    # Logging

    # Domain:
    # MAZE                = '/Domains/GridWorldMaps/1x3.txt'
    actual_domain = RCCarModified(
        noise=0.1,
        # random_start=True,
        init_state=init_state)

    # agent_1 = loadagent("QL") # most likely preloaded
    # agent_2 = loadagent("SARSA")
    # agent_3 = loadagent("NAC")
    # agents = [agent_1, agent_2, agent_3]
    agents = load_all_agents(agent_paths,
                             eps=0.0,
                             pretrained=pretrained,
                             yaml_file=yaml_file)

    for i, a in enumerate(agents):
        # import ipdb; ipdb.set_trace()
        a.policy.epsilon = i * 0.05 + 0.1

    # assert agents[1].policy.eps == 0.25

    assert agents[0].policy.representation.random_state.get_state()[2] == 432
    assert agents[0].policy.representation.random_state.get_state(
    )[1][0] == 2308721491
    assert agents[0].policy.random_state.get_state()[1][0] == 1
    assert agents[0].policy.random_state.get_state()[2] == 624

    print "ASSERTIONS PASSED"

    # import ipdb; ipdb.set_trace()
    domain = PolicyMixer(actual_domain,
                         agents,
                         voting=True,
                         seed=exp_id,
                         temp=temp,
                         sublearning=sublearning)

    # import ipdb; ipdb.set_trace()
    representation = Tabular(domain)
    meta_policy = eGreedy(representation, epsilon=0.3)
    opt['agent'] = Q_Learning(representation=representation,
                              policy=meta_policy,
                              learn_rate_decay_mode="const",
                              boyan_N0=0,
                              lambda_=0,
                              initial_learn_rate=0.0,
                              discount_factor=discount_factor)
    opt['domain'] = domain
    experiment = Experiment(**opt)

    #Seeding to match standard experiment
    for agent in agents:
        agent.random_state = np.random.RandomState(
            experiment.randomSeeds[experiment.exp_id - 1])
        agent.init_randomization()
        agent.representation.random_state = np.random.RandomState(
            experiment.randomSeeds[experiment.exp_id - 1])
        agent.representation.init_randomization()
        agent.policy.random_state = np.random.RandomState(
            experiment.randomSeeds[experiment.exp_id - 1])
        agent.policy.init_randomization()

    return experiment