def generate_meta_experiment(exp_id, agent_paths, path, unique=True, expdomain=None): opt = {} if unique: opt["path"] = os.path.join(path, get_time_str()) else: opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 10000 opt["num_policy_checks"] = 40 opt["checks_per_policy"] = 20 # start_at = np.array([4, 6]) agents = load_all_agents(agent_paths, pretrained=True, load_confidence=True) for a in agents: a.policy.epsilon = 0 if expdomain: actual_domain = expdomain(mapname=mapname, terrain_augmentation=False, noise=0.1) else: actual_domain = GridWorldMixed(mapname=mapname, terrain_augmentation=False, noise=0.1) domain = PolicyMixer(actual_domain, agents) representation = Tabular(domain) policy = eGreedy(representation) # , tau=.1) opt['agent'] = Q_Learning(policy, representation, discount_factor=0.9, initial_learn_rate=0.8, lambda_=0.5, learn_rate_decay_mode='boyan', boyan_N0=2380) opt['domain'] = domain experiment = Experiment(**opt) path_join = lambda s: os.path.join(opt["path"], s) if not os.path.exists(opt["path"]): os.makedirs(opt["path"]) shutil.copy(inspect.getsourcefile(inspect.currentframe()), path_join("experiment.py")) return experiment
def make_experiment(exp_id=2, agent_paths=None, mapname="12x12-Bridge.txt", path="./Results/MetaRLSarsa", boyan_N0=1000, discount_factor=0.8286376417073243, initial_learn_rate=0.5, lambda_=0.2): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 10000 opt["num_policy_checks"] = 5 opt["checks_per_policy"] = 10 # start_at = np.array([4, 5]) # Logging # Domain: # MAZE = '/Domains/GridWorldMaps/1x3.txt' maze = os.path.join(map_dir, mapname) actual_domain = GridWorldModified( maze, random_start=True, noise=0.1, # start_at=np.array([4,6]) ) # agent_1 = loadagent("QL") # most likely preloaded # agent_2 = loadagent("SARSA") # agent_3 = loadagent("NAC") # agents = [agent_1, agent_2, agent_3] agents = load_all_agents(agent_paths) domain = PolicyMixer(actual_domain, agents, seed=exp_id) representation = Tabular(domain) meta_policy = eGreedy(representation, epsilon=0.3) opt['agent'] = Q_Learning(representation=representation, policy=meta_policy, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0, lambda_=lambda_, initial_learn_rate=initial_learn_rate, discount_factor=discount_factor) opt['domain'] = domain experiment = Experiment(**opt) return experiment
def generate_multinomial_experiment(exp_id, agent_paths, path, unique=True, expdomain=None): opt = {} if unique: opt["path"] = os.path.join(path, get_time_str()) else: opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 8000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 1 agents = load_all_agents(agent_paths, pretrained=True, load_confidence=True) for a in agents: assert type(a.policy).__name__ == "DynamicsConfidenceGreedy" a.policy.epsilon = 0 if expdomain: domain = expdomain(mapname=mapname, noise=0.1) else: domain = GridWorldMixed(mapname=mapname, noise=0.1) representation = IncrementalTabular(domain) policy = MAgentMultinomial(representation, agents) # , tau=.1) print "$" * 10 print "You are currently running {}".format(policy.__class__) opt['agent'] = NoopAgent(representation=representation, policy=policy) opt['domain'] = domain experiment = Experiment(**opt) path_join = lambda s: os.path.join(opt["path"], s) if not os.path.exists(opt["path"]): os.makedirs(opt["path"]) shutil.copy(inspect.getsourcefile(inspect.currentframe()), path_join("experiment.py")) return experiment
def make_experiment(agent_paths=["./"], sublearning=False, exp_id=3, path="./Results/Confidence2/", temp=0.10517212721767522, discount_factor=0.7, lambda_=0.0, init_state=None): opt = {} opt["path"] = os.path.join(path, get_time_str()) opt["exp_id"] = exp_id opt["max_steps"] = 40000 opt["num_policy_checks"] = 10 opt["checks_per_policy"] = 20 # start_at = np.array([4, 6]) agents = load_all_agents(agent_paths, pretrained=True, load_confidence=True) for i, a in enumerate(agents): # import ipdb; ipdb.set_trace() a.policy.epsilon = i * 0.02 + 0.1 # a.learn_rate_decay_mode = 'boyan' # a.learn_rate = a.initial_learn_rate = 0.9 # a.boyan_N0 = 3000 a.learn_rate_decay_mode = 'dabney' domain = RCCarModified(noise=0.1, init_state=(-2, 0.8, 0, 2.5)) representation = IncrementalTabular(domain) policy = MultiAgentConfidence(representation, agents, tau=.1) print "$" * 10 print "You are currently running {}".format(policy.__class__) opt['agent'] = MetaAgent(representation=representation, policy=policy, learn_rate_decay_mode="const") opt['domain'] = domain experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/MetaHP/", unique=False, max_episode=300, num_rbfs=4000, initial_learn_rate=0.9, lambda_=0.7, resolution=20): opt = {} if unique: opt["path"] = os.path.join(path, get_time_str()) else: opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 30000 # opt["max_episode"] = max_episode opt["num_policy_checks"] = 10 opt["checks_per_policy"] = 1 # start_at = np.array([4, 6]) from Domains import RCCarSlideTurn expdomain = Domains.RCCarSlideTurn agent_paths = [ '/home/jarvis/work/clipper/models/rl/Results/Mixed_ActionsB/agent0/Aug21_11-38-389943', '/home/jarvis/work/clipper/models/rl/Results/Mixed_ActionsB/agent1/Aug21_11-43-003799' ] agents = load_all_agents(agent_paths, pretrained=True, load_confidence=False) for a in agents: a.policy.epsilon = 0 # a.policy.turnOffExploration() if expdomain: actual_domain = expdomain(noise=0.) else: actual_domain = RCCarModified(noise=0.1) domain = RCPolicyMixer(actual_domain, agents) representation = MahaRBF( domain, num_rbfs=int(num_rbfs), # state_dimensions=np.array([0,1,3]), const_feature=False, resolution_min=resolution, resolution_max=resolution, include_border=True, normalize=True, seed=exp_id) policy = eGreedy(representation) #, epsilon=0.1) # , tau=.1) opt['agent'] = Q_Learning(policy, representation, discount_factor=0.8, initial_learn_rate=initial_learn_rate, lambda_=lambda_, learn_rate_decay_mode='const') opt['domain'] = domain experiment = Experiment(**opt) # path_join = lambda s: os.path.join(opt["path"], s) # if not os.path.exists(opt["path"]): # os.makedirs(opt["path"]) # shutil.copy(inspect.getsourcefile(inspect.currentframe()), path_join("experiment.py")) return experiment
def generate_meta_experiment(exp_id, agent_paths, path, unique=True, expdomain=None, max_episode=5000): opt = {} if unique: opt["path"] = os.path.join(path, get_time_str()) else: opt["path"] = path opt["exp_id"] = exp_id # opt["max_steps"] = 50000 opt["max_episode"] = max_episode opt["num_policy_checks"] = 50 opt["checks_per_policy"] = 1 # start_at = np.array([4, 6]) agents = load_all_agents(agent_paths, pretrained=True, load_confidence=False) for a in agents: a.policy.epsilon = 0 a.policy.turnOffExploration() if expdomain: actual_domain = expdomain(noise=0.1) else: actual_domain = RCCarModified(noise=0.1) domain = RCPolicyMixer(actual_domain, agents) # representation = MahaRBF(domain, # num_rbfs=3000, # # state_dimensions=np.array([0,1,3]), # const_feature=False, # resolution_min=21, # resolution_max=21, # include_border=True, # normalize=True, # seed=exp_id) # representation = RandomLocalBases(domain, gaussian_kernel, # num=100, # normalization=True, # resolution_max=20, # seed=exp_id) representation = NonparametricLocalBases(domain, gaussian_kernel, normalization=True) policy = eGreedy(representation, 0.05) #, epsilon=0.1) # , tau=.1) # policy = GibbsPolicy(representation) opt['agent'] = Q_Learning(policy, representation, discount_factor=0.8, initial_learn_rate=.8, lambda_=0.1, learn_rate_decay_mode='boyan', boyan_N0=500) opt['domain'] = domain experiment = ExperimentMod(**opt) path_join = lambda s: os.path.join(opt["path"], s) if not os.path.exists(opt["path"]): os.makedirs(opt["path"]) shutil.copy(inspect.getsourcefile(inspect.currentframe()), path_join("experiment.py")) return experiment
def make_experiment(agent_paths="./", pretrained=False, sublearning=False, yaml_file=None, exp_id=3, path="./Results/TestVoting/", init_state=None): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 80000 opt["num_policy_checks"] = 40 opt["checks_per_policy"] = 10 # start_at = np.array([4, 6]) # Logging # Domain: # MAZE = '/Domains/GridWorldMaps/1x3.txt' domain = RCCarModified(noise=0.1, # random_start=True, # init_state=init_state ) # agent_1 = loadagent("QL") # most likely preloaded # agent_2 = loadagent("SARSA") # agent_3 = loadagent("NAC") # agents = [agent_1, agent_2, agent_3] agents = load_all_agents(agent_paths, pretrained=pretrained, yaml_file=yaml_file) for i, a in enumerate(agents): # import ipdb; ipdb.set_trace() a.policy.epsilon = i * 0.03 + 0.1 # import ipdb; ipdb.set_trace() representation = IncrementalTabular(domain) #This doesn't matter policy = MultiAgentVoting(representation, agents, tau=.7) opt['agent'] = MetaAgent( representation=representation, policy=policy, ) opt['domain'] = domain experiment = Experiment(**opt) #Seeding to match standard experiment for agent in agents: agent.random_state = np.random.RandomState( experiment.randomSeeds[experiment.exp_id - 1]) agent.init_randomization() # agent.representation.random_state = np.random.RandomState( # experiment.randomSeeds[experiment.exp_id - 1]) # agent.representation.init_randomization() #init_randomization is called on instantiation agent.policy.random_state = np.random.RandomState( experiment.randomSeeds[experiment.exp_id - 1]) agent.policy.init_randomization() for i, a in enumerate(agents): # import ipdb; ipdb.set_trace() print a.policy.epsilon path_join = lambda s: os.path.join(opt["path"], s) if not os.path.exists(opt["path"]): os.makedirs(opt["path"]) param_path = os.path.join(agent_paths[0], yaml_file) shutil.copy(param_path, path_join("params.yml")) shutil.copy(inspect.getsourcefile(inspect.currentframe()), path_join("experiment.py")) # import ipdb; ipdb.set_trace() return experiment
def make_experiment(agent_paths="./", pretrained=False, sublearning=False, yaml_file=None, exp_id=3, path="./Results/TestVoting/", temp=0.90517212721767522, boyan_N0=100, discount_factor=0.7, initial_learn_rate=0.1, lambda_=0.0, init_state=None): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 50000 opt["num_policy_checks"] = 50 opt["checks_per_policy"] = 10 # start_at = np.array([4, 6]) # Logging # Domain: # MAZE = '/Domains/GridWorldMaps/1x3.txt' actual_domain = RCCarModified( noise=0.1, # random_start=True, init_state=init_state) # agent_1 = loadagent("QL") # most likely preloaded # agent_2 = loadagent("SARSA") # agent_3 = loadagent("NAC") # agents = [agent_1, agent_2, agent_3] agents = load_all_agents(agent_paths, eps=0.0, pretrained=pretrained, yaml_file=yaml_file) for i, a in enumerate(agents): # import ipdb; ipdb.set_trace() a.policy.epsilon = i * 0.05 + 0.1 # assert agents[1].policy.eps == 0.25 assert agents[0].policy.representation.random_state.get_state()[2] == 432 assert agents[0].policy.representation.random_state.get_state( )[1][0] == 2308721491 assert agents[0].policy.random_state.get_state()[1][0] == 1 assert agents[0].policy.random_state.get_state()[2] == 624 print "ASSERTIONS PASSED" # import ipdb; ipdb.set_trace() domain = PolicyMixer(actual_domain, agents, voting=True, seed=exp_id, temp=temp, sublearning=sublearning) # import ipdb; ipdb.set_trace() representation = Tabular(domain) meta_policy = eGreedy(representation, epsilon=0.3) opt['agent'] = Q_Learning(representation=representation, policy=meta_policy, learn_rate_decay_mode="const", boyan_N0=0, lambda_=0, initial_learn_rate=0.0, discount_factor=discount_factor) opt['domain'] = domain experiment = Experiment(**opt) #Seeding to match standard experiment for agent in agents: agent.random_state = np.random.RandomState( experiment.randomSeeds[experiment.exp_id - 1]) agent.init_randomization() agent.representation.random_state = np.random.RandomState( experiment.randomSeeds[experiment.exp_id - 1]) agent.representation.init_randomization() agent.policy.random_state = np.random.RandomState( experiment.randomSeeds[experiment.exp_id - 1]) agent.policy.init_randomization() return experiment