def good_inverted_car(directory="./Results/Car/Noisy_500/"): rccar = RCCarBarriers(noise=0.) policies = [ PolicyLoader("models/noisy/" + path) for path in ['good', 'other'] ] domain = RLPyEnv(rccar) env = HRLEnv(domain, policies) policy = CategoricalMLPPolicy(env_spec=env.spec, ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=env.horizon, n_itr=500, discount=0.9, step_size=0.001, # plot=True, ) assert False, "Make sure to change logging directory before rerunning this experiment" run_experiment_lite( algo.train(), # Number of parallel workers for sampling n_parallel=4, # Only keep the snapshot parameters for the last iteration snapshot_mode="last", script="scripts/run_experiment_lite_rl.py", log_dir=directory, # Specifies the seed for the experiment. If this is not provided, a random seed # will be used seed=1, # plot=True, )
def test(num=1, path="./Results/Tmp", save=False): policies = [ PolicyLoader("models/Acrobot/" + path) for path in ['Mass2_Light', 'Mass1_Light', 'Mass1_Heavy'][:] ] # directory = os.path.join(directory, exp_name) acrobot = ModifiedAcrobot() acrobot.dt = 0.1 acrobot.episodeCap = 1000 # acrobot.torque_noise_max = 0.05 domain = RLPyEnv(acrobot) env = HRLEnv(domain, policies) # env = DoublePendulumEnv() policy = CategoricalMLPPolicy(env_spec=env.spec, hidden_sizes=(8, 8)) # rollout(env, policy) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=env.horizon, n_itr=50, discount=0.995, step_size=0.001, # plot=True, ) algo.train()
def noisy_bandits(): rccar = RCCarBarriers(noise=0.1) policies = [PolicyLoader("models/noisy/" +path) for path in ['good','untrained', 'untrained', 'untrained'] ] domain = RLPyEnv(rccar) band = Bandits(policies, domain, N=100, rmax=rccar.GOAL_REWARD, rmin=-rccar.episodeCap - 20) chosen = band.run() import joblib; joblib.dump(band.choices, "Results/Bandits/Noisy") return band
def two_policy_bandits(): rccar = RCCarSlideTurn(noise=0.1) # remove process noise domain = RLPyEnv(rccar) policies = [PolicyLoader("models/slideturn_experiment/" + path) for path in ['agent0','agent1'] ] band = Bandits(policies, domain, N=100, rmax=rccar.GOAL_REWARD, rmin=-rccar.episodeCap) chosen = band.run() import joblib; joblib.dump(band.choices, "Results/Bandits/TwoPolicy") return band
def good_double_bad_car(num=1, directory="./Results/Car/CarNoisy/", exp_name="Noisy_2_untrained", save=False): rccar = RCCarBarriers(noise=0.1) policies = [ PolicyLoader("models/noisy/" + path) for path in ['good', 'untrained', 'untrained'] ] domain = RLPyEnv(rccar) env = HRLEnv(domain, policies) # policy = CategoricalMLPPolicy( # env_spec=env.spec, # ) # baseline = LinearFeatureBaseline(env_spec=env.spec) # algo = TRPO( # env=env, # policy=policy, # baseline=baseline, # batch_size=4000, # max_path_length=env.horizon, # n_itr=500, # discount=0.9, # step_size=0.001, # # plot=True, # ) policy = CategoricalMLPPolicy(env_spec=env.spec, hidden_sizes=(64, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) for i in range(num): now = datetime.datetime.now() timestamp = now.strftime('%Y_%m_%d_%H_%M_%S') algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=env.horizon, n_itr=600, discount=0.9, step_size=0.0001, # plot=True, ) run_experiment_lite( algo.train(), # Number of parallel workers for sampling n_parallel=4, # Only keep the snapshot parameters for the last iteration snapshot_mode="last", script="scripts/run_experiment_lite_rl.py", exp_name=exp_name + timestamp, log_dir=os.path.join(directory, exp_name + timestamp) if save else "./Results/Tmp", # Specifies the seed for the experiment. If this is not provided, a random seed # will be used # plot=True, )
def good_x_cars(num_agents=5, directory="./Final_Results/Car/CarNoisyAgents/", exp_name="NoisyTest", save=False): rccar = RCCarBarriers(noise=0.1) policies = [ PolicyLoader("models/noisy/" + path) for path in [ 'good', 'untrained', 'untrained', 'untrained', 'untrained', 'untrained' ] ][:1 + num_agents] domain = RLPyEnv(rccar) env = HRLEnv(domain, policies) policy = CategoricalMLPPolicy(env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) exp_name = exp_name + str(num_agents) directory = os.path.join(directory, exp_name) for i in range(3): now = datetime.datetime.now() timestamp = now.strftime('%Y_%m_%d_%H_%M_%S') algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=2000, max_path_length=env.horizon, n_itr=500, discount=.995, step_size=0.001, # plot=True, ) # algo.train() # rollout(env, policy) try: os.mkdir(directory) except Exception: pass run_experiment_lite( algo.train(), # Number of parallel workers for sampling n_parallel=4, # Only keep the snapshot parameters for the last iteration snapshot_mode="last", script="scripts/run_experiment_lite_rl.py", exp_name=exp_name, log_dir=os.path.join(directory, timestamp) if save else "./Results/Tmp", # Specifies the seed for the experiment. If this is not provided, a random seed # will be used # plot=True, )
def slideturn_noisy(val=0.1, directory="./Results/Car/NoisyObs500/", exp_name="Cap_", save=False): policies = [ PolicyLoader("models/slideturn_experiment/" + path) for path in ['agent0', 'agent1'] ] rccar = RCCarSlideTurn(noise=0.) # remove process noise domain = RLPyEnv(rccar) original_env = HRLEnv(domain, policies) env = NoisyObservationEnv(original_env, obs_noise=val) policy = CategoricalMLPPolicy(env_spec=env.spec, ) baseline = LinearFeatureBaseline(env_spec=env.spec) dir_name = os.path.join(directory, exp_name) for i in range(1): now = datetime.datetime.now() timestamp = now.strftime('%Y_%m_%d_%H_%M_%S') algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=4000, max_path_length=env.horizon, n_itr=500, discount=0.9, step_size=0.01, # plot=True, ) # algo.train() # rollout(env, policy) run_experiment_lite( algo.train(), # Number of parallel workers for sampling n_parallel=4, # Only keep the snapshot parameters for the last iteration snapshot_mode="last", script="scripts/run_experiment_lite_rl.py", exp_name=exp_name + timestamp, log_dir=os.path.join(dir_name, timestamp) if save else './Results/Tmp2', # Specifies the seed for the experiment. If this is not provided, a random seed # will be used # plot=True, )
def slideturn_turn_only(num=1, directory="./Final_Results/Car/SlideTurn/", exp_name="Turn", save=True): policies = [PolicyLoader("models/slideturn_experiment/" + path) for path in ['agent1'] ] directory = os.path.join(directory, exp_name) for i in range(num): rccar = RCCarSlideTurn(noise=0.1) now = datetime.datetime.now() timestamp = now.strftime('%Y_%m_%d_%H_%M_%S') domain = RLPyEnv(rccar) env = HRLEnv(domain, policies) policy = CategoricalMLPPolicy( env_spec=env.spec, ) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=1000, max_path_length=env.horizon, n_itr=500 + 3, discount=0.995, step_size=0.001, # plot=True, ) # algo.train() # rollout(env, policy) run_experiment_lite( algo.train(), # Number of parallel workers for sampling n_parallel=4, # Only keep the snapshot parameters for the last iteration snapshot_mode="last", script="scripts/run_experiment_lite_rl.py", exp_name=exp_name, log_dir=os.path.join(directory, timestamp) if save else './Results/Tmp', # Specifies the seed for the experiment. If this is not provided, a random seed # will be used # plot=True, )