def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--env', help='environment ID', default='BreakoutNoFrameskip-v4') parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--prioritized', type=int, default=1) parser.add_argument('--dueling', type=int, default=1) parser.add_argument('--num-timesteps', type=int, default=int(10e6)) args = parser.parse_args() logger.configure() set_global_seeds(args.seed) env = make_atari(args.env) env = bench.Monitor(env, logger.get_dir()) env = HBS.wrap_atari_dqn(env) model = HBS.models.cnn_to_mlp( convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], dueling=bool(args.dueling), ) act = HBS.learn(env, q_func=model, lr=1e-4, max_timesteps=args.num_timesteps, buffer_size=10000, exploration_fraction=0.1, exploration_final_eps=0.01, train_freq=4, learning_starts=10000, target_network_update_freq=1000, gamma=0.99, prioritized_replay=bool(args.prioritized)) # act.save("pong_model.pkl") XXX env.close()
def main(): env = gym.make("Pendulum-v0") ''' we assume that we have actor_list, which is a list of pre-trained policies to be used as subpolicies ''' exp_name = 'boundary' actor_list = ["half_down", "half_up"] sub_policies = [] for actor in actor_list: print("=== Actor: {}".format(actor)) actor = HBS.load("pendulum_model_{}.pkl".format(actor), actor) sub_policies.append(actor) boundary_model = HBS.models.mlp([256, 256]) boundary_act = HBS.learn(env, q_func=boundary_model, sub_policies=sub_policies, lr=1e-4, max_timesteps=400000, buffer_size=50000, exploration_fraction=0.1, exploration_final_eps=0.02, print_freq=10, exp_name=exp_name, callback=callback) print("Saving model to pendulum_model.pkl") boundary_act.save("pendulum_model_{}.pkl".format(exp_name))
def main(): env = gym.make("Pendulum-v0") model = HBS.models.mlp([256, 256]) act = HBS.learn(env, q_func=model, lr=1e-4, max_timesteps=400000, buffer_size=50000, exploration_fraction=0.1, exploration_final_eps=0.02, print_freq=10, callback=callback) print("Saving model to cartpole_model.pkl") act.save("cartpole_model.pkl")
def main(): env = gym.make("MountainCar-v0") # Enabling layer_norm here is import for parameter space noise! model = HBS.models.mlp([64], layer_norm=True) act = HBS.learn(env, q_func=model, lr=1e-3, max_timesteps=100000, buffer_size=50000, exploration_fraction=0.1, exploration_final_eps=0.1, print_freq=10, param_noise=True) print("Saving model to mountaincar_model.pkl") act.save("mountaincar_model.pkl")