def make_experiment(exp_id=1, path="./Results/Experiments/HalfReward/" + getTimeStr() + "/"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ opt = {} opt["exp_id"] = exp_id opt["path"] = path ## Domain: maze = os.path.join(GridWorldInter.default_map_dir, '11x11-RoomsSeg.txt') domain = GridWorldInter(maze, noise=0.01) opt["domain"] = domain ## Representation # discretization only needed for continuous state spaces, discarded otherwise representation = Tabular(domain, discretization=20) ## Policy policy = eGreedy(representation, epsilon=0.1) ## Need to change this back, limiting noise ATM ## Agent opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.3) opt["checks_per_policy"] = 50 opt["max_steps"] = 12000 opt["num_policy_checks"] = 20 # experiment = ExperimentDelayed(**opt) experiment = Experiment(**opt) return experiment
def run_experiment_params(param_path='./params.yaml'): params = type("Parameters", (), load_yaml(param_path)) def goalfn(state, goal, radius=0.1): # Can be quickly modified to have a new radius per goal element position = state[:2] return ( np.linalg.norm(np.array(position) - np.array(goal)) < radius ) # # Load domain def encode_trial(): rewards = list(params.domain_params['goalArray']) encode = Encoding(rewards, goalfn) return encode.strict_encoding params.domain_params['goalfn'] = goalfn params.domain_params['encodingFunction'] = encode_trial() # params.domain_params['goalArray'] = params.domain_params['goalArray'][::4] domain = eval(params.domain)(**params.domain_params) # domain = eval(params.domain)() #Load Representation representation = eval(params.representation)( domain, **params.representation_params) policy = eval(params.policy)( representation, **params.policy_params) agent = eval(params.agent)( policy, representation, discount_factor=domain.discount_factor, **params.agent_params) opt = {} opt["exp_id"] = params.exp_id opt["path"] = params.results_path + getTimeStr() + "/" opt["max_steps"] = params.max_steps # opt["max_eps"] = params.max_eps opt["num_policy_checks"] = params.num_policy_checks opt["checks_per_policy"] = params.checks_per_policy opt["domain"] = domain opt["agent"] = agent if not os.path.exists(opt["path"]): os.makedirs(opt["path"]) shutil.copy(param_path, opt["path"] + "params.yml") shutil.copy(inspect.getfile(eval(params.domain)), opt["path"] + "domain.py") shutil.copy(inspect.getfile(inspect.currentframe()), opt["path"] + "exper.py") return eval(params.experiment)(**opt)
def run_experiment_params(param_path='./params.yaml'): params = type("Parameters", (), load_yaml(param_path)) def goalfn(s, goal): return -np.cos(s[0]) - np.cos(s[1] + s[0]) > goal def allMarkovReward(ps, ga, sr, gr): r = sr last_state = ps[len(ps) - 1] if any([goalfn(last_state, g) for g in ga]): r = gr return r # # Load domain def encode_trial(): rewards = list(params.domain_params['goalArray']) print rewards encode = Encoding(rewards, goalfn) return encode.strict_encoding params.domain_params['goalfn'] = goalfn params.domain_params['rewardFunction'] = allMarkovReward params.domain_params['encodingFunction'] = encode_trial() # params.domain_params['goalArray'] = params.domain_params['goalArray'][::4] domain = eval(params.domain)(**params.domain_params) #only for acrobot #Load Representation representation = eval(params.representation)( domain, **params.representation_params) policy = eval(params.policy)(representation, **params.policy_params) agent = eval(params.agent)(policy, representation, discount_factor=domain.discount_factor, **params.agent_params) opt = {} opt["exp_id"] = params.exp_id opt["path"] = params.results_path + getTimeStr() + "/" opt["max_steps"] = params.max_steps # opt["max_eps"] = params.max_eps opt["num_policy_checks"] = params.num_policy_checks opt["checks_per_policy"] = params.checks_per_policy opt["domain"] = domain opt["agent"] = agent if not os.path.exists(opt["path"]): os.makedirs(opt["path"]) shutil.copy(param_path, opt["path"] + "params.yml") shutil.copy(inspect.getfile(eval(params.domain)), opt["path"] + "domain.py") shutil.copy(inspect.getfile(inspect.currentframe()), opt["path"] + "exper.py") return eval(params.experiment)(**opt)
def run_experiment_params(param_path="./params.yaml"): params = type("Parameters", (), load_yaml(param_path)) def goalfn(s, goal): return -np.cos(s[0]) - np.cos(s[1] + s[0]) > goal def allMarkovReward(ps, ga, sr, gr): r = sr last_state = ps[len(ps) - 1] if any([goalfn(last_state, g) for g in ga]): r = gr return r # # Load domain def encode_trial(): rewards = list(params.domain_params["goalArray"]) print rewards encode = Encoding(rewards, goalfn) return encode.strict_encoding params.domain_params["goalfn"] = goalfn params.domain_params["rewardFunction"] = allMarkovReward params.domain_params["encodingFunction"] = encode_trial() # params.domain_params['goalArray'] = params.domain_params['goalArray'][::4] domain = eval(params.domain)(**params.domain_params) # only for acrobot # Load Representation representation = eval(params.representation)(domain, **params.representation_params) policy = eval(params.policy)(representation, **params.policy_params) agent = eval(params.agent)(policy, representation, discount_factor=domain.discount_factor, **params.agent_params) opt = {} opt["exp_id"] = params.exp_id opt["path"] = params.results_path + getTimeStr() + "/" opt["max_steps"] = params.max_steps # opt["max_eps"] = params.max_eps opt["num_policy_checks"] = params.num_policy_checks opt["checks_per_policy"] = params.checks_per_policy opt["domain"] = domain opt["agent"] = agent if not os.path.exists(opt["path"]): os.makedirs(opt["path"]) shutil.copy(param_path, opt["path"] + "params.yml") shutil.copy(inspect.getfile(eval(params.domain)), opt["path"] + "domain.py") shutil.copy(inspect.getfile(inspect.currentframe()), opt["path"] + "exper.py") return eval(params.experiment)(**opt)
def run_experiment_params(param_path='./params.yaml'): params = type("Parameters", (), load_yaml(param_path)) def goalfn(state, goal, radius=0.1): # Can be quickly modified to have a new radius per goal element position = state[:2] return (np.linalg.norm(np.array(position) - np.array(goal)) < radius) # # Load domain def encode_trial(): rewards = list(params.domain_params['goalArray']) encode = Encoding(rewards, goalfn) return encode.strict_encoding params.domain_params['goalfn'] = goalfn params.domain_params['encodingFunction'] = encode_trial() # params.domain_params['goalArray'] = params.domain_params['goalArray'][::4] domain = eval(params.domain)(**params.domain_params) # domain = eval(params.domain)() #Load Representation representation = eval(params.representation)( domain, **params.representation_params) policy = eval(params.policy)(representation, **params.policy_params) agent = eval(params.agent)(policy, representation, discount_factor=domain.discount_factor, **params.agent_params) opt = {} opt["exp_id"] = params.exp_id opt["path"] = params.results_path + getTimeStr() + "/" opt["max_steps"] = params.max_steps # opt["max_eps"] = params.max_eps opt["num_policy_checks"] = params.num_policy_checks opt["checks_per_policy"] = params.checks_per_policy opt["domain"] = domain opt["agent"] = agent if not os.path.exists(opt["path"]): os.makedirs(opt["path"]) shutil.copy(param_path, opt["path"] + "params.yml") shutil.copy(inspect.getfile(eval(params.domain)), opt["path"] + "domain.py") shutil.copy(inspect.getfile(inspect.currentframe()), opt["path"] + "exper.py") return eval(params.experiment)(**opt)
def generate(mappath, goals=2, save=False, savepath="reward_locations"): """ Returns a numpy array of goal positions (without repeats nor ordering). Goal positions are picked from empty spots on map. :param mappath: Path to GridWorld Map :param goals: Number of goals placed :param save: Boolean for saving array for repeatability """ map_arr = np.loadtxt(mappath, dtype=np.uint8) possibles = np.argwhere(map_arr == 0) sample_indices = np.random.choice(len(possibles), goals, replace=False) goals = possibles[sample_indices] if save: if not os.path.exists(savepath): os.mkdir(savepath) path = os.path.join(savepath, getTimeStr()) np.savetxt(path, goals, fmt="%1d") return goals
opt["max_steps"] = 5000 opt["num_policy_checks"] = 50 experiment = ExperimentSegment(**opt) return experiment # ## DEBUG # import ipdb; ipdb.set_trace() # ######## if __name__ == '__main__': dirname, filename = os.path.split(os.path.abspath(__file__)) experiment = make_experiment( 1, path=dirname + "/Results/Experiments/9x9_2PathR1/" + getTimeStr() + "/") experiment.run( visualize_steps=False, # should each learning step be shown? visualize_learning=False, # show policy / value function? saveTrajectories=False) # show performance runs? # visual.saveDomain(experiment, GridWorldInter, maze) # import ipdb; ipdb.set_trace() experiment.domain.showLearning(experiment.agent.representation) # experiment.plotTrials(save=True) experiment.saveWeights() experiment.plot(save=True) #, y="reward") experiment.save()
import numpy as np import rlpy.Tools.results from rlpy.Tools import getTimeStr import pickle import json import os '''TODOS: - Replace loads with default commands - Remove pickle, replace with JSON - Save Parameters''' EPS, ENV_NOISE, SEG_RW, STEP_REW = range(4) get_AUC = lambda res: trapz(res, dx=1) ##setting dx = 1 but shouldn't matter DEFAULT_PATH = "./TwoPathExperiments/RESETResults/" + getTimeStr() + "/" def default_params(params=None): if params is None: params = {} if 'max_eps' not in params: params['max_eps'] = 400 if 'num_policy_checks' not in params: params['num_policy_checks'] = 40 ##DO NOT CHANGE params['eval_map'] = "9x9-2Path0.txt" ###### return params
## Agent opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.3, learn_rate_decay_mode='const') opt["max_eps"] = max_eps opt["checks_per_policy"] = checks_per_policy opt["num_policy_checks"] = num_policy_checks experiment = ExperimentSegment(**opt) return experiment if __name__ == '__main__': dirname, filename = os.path.split(os.path.abspath(__file__)) experiment = make_experiment(1, path=dirname +"/ResetResults/Experiments/9x9_2PathR1/" + getTimeStr() + "/") experiment.run(visualize_steps=False, # should each learning step be shown? visualize_learning=False, # show policy / value function? saveTrajectories=False) # show performance runs? # visual.saveDomain(experiment, GridWorldInter, maze) # import ipdb; ipdb.set_trace() experiment.domain.showLearning(experiment.agent.representation) # experiment.plotTrials(save=True) # experiment.saveWeights() experiment.plot(save=True, x="learning_episode") #, y="reward") # experiment.save()
opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.3) opt["checks_per_policy"] = 50 opt["max_steps"] = 5000 opt["num_policy_checks"] = 50 experiment = ExperimentSegment(**opt) return experiment # ## DEBUG # import ipdb; ipdb.set_trace() # ######## if __name__ == '__main__': experiment = make_experiment(1, path="./Results/Experiments/9x9_2Path0/" + getTimeStr() + "/") experiment.run(visualize_steps=False, # should each learning step be shown? visualize_learning=False, # show policy / value function? saveTrajectories=False) # show performance runs? # visual.saveDomain(experiment, GridWorldInter, maze) # import ipdb; ipdb.set_trace() experiment.domain.showLearning(experiment.agent.representation) # experiment.plotTrials(save=True) experiment.plot(save=True) #, y="reward") experiment.save()