コード例 #1
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--env',
                        help='environment ID',
                        default='BreakoutNoFrameskip-v4')
    parser.add_argument('--seed', help='RNG seed', type=int, default=0)
    parser.add_argument('--prioritized', type=int, default=1)
    parser.add_argument('--dueling', type=int, default=1)
    parser.add_argument('--num-timesteps', type=int, default=int(10e6))
    args = parser.parse_args()
    logger.configure()
    set_global_seeds(args.seed)
    env = make_atari(args.env)
    env = bench.Monitor(env, logger.get_dir())
    env = HBS.wrap_atari_dqn(env)
    model = HBS.models.cnn_to_mlp(
        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        hiddens=[256],
        dueling=bool(args.dueling),
    )
    act = HBS.learn(env,
                    q_func=model,
                    lr=1e-4,
                    max_timesteps=args.num_timesteps,
                    buffer_size=10000,
                    exploration_fraction=0.1,
                    exploration_final_eps=0.01,
                    train_freq=4,
                    learning_starts=10000,
                    target_network_update_freq=1000,
                    gamma=0.99,
                    prioritized_replay=bool(args.prioritized))
    # act.save("pong_model.pkl") XXX
    env.close()
コード例 #2
0
def main():
    env = gym.make("Pendulum-v0")
    '''
    we assume that we have actor_list, which is a list of pre-trained policies 
    to be used as subpolicies
    '''

    exp_name = 'boundary'

    actor_list = ["half_down", "half_up"]
    sub_policies = []
    for actor in actor_list:
        print("=== Actor: {}".format(actor))
        actor = HBS.load("pendulum_model_{}.pkl".format(actor), actor)
        sub_policies.append(actor)

    boundary_model = HBS.models.mlp([256, 256])
    boundary_act = HBS.learn(env,
                             q_func=boundary_model,
                             sub_policies=sub_policies,
                             lr=1e-4,
                             max_timesteps=400000,
                             buffer_size=50000,
                             exploration_fraction=0.1,
                             exploration_final_eps=0.02,
                             print_freq=10,
                             exp_name=exp_name,
                             callback=callback)
    print("Saving model to pendulum_model.pkl")
    boundary_act.save("pendulum_model_{}.pkl".format(exp_name))
コード例 #3
0
def main():
    env = gym.make("Pendulum-v0")
    model = HBS.models.mlp([256, 256])
    act = HBS.learn(env,
                    q_func=model,
                    lr=1e-4,
                    max_timesteps=400000,
                    buffer_size=50000,
                    exploration_fraction=0.1,
                    exploration_final_eps=0.02,
                    print_freq=10,
                    callback=callback)
    print("Saving model to cartpole_model.pkl")
    act.save("cartpole_model.pkl")
コード例 #4
0
def main():
    env = gym.make("MountainCar-v0")
    # Enabling layer_norm here is import for parameter space noise!
    model = HBS.models.mlp([64], layer_norm=True)
    act = HBS.learn(env,
                    q_func=model,
                    lr=1e-3,
                    max_timesteps=100000,
                    buffer_size=50000,
                    exploration_fraction=0.1,
                    exploration_final_eps=0.1,
                    print_freq=10,
                    param_noise=True)
    print("Saving model to mountaincar_model.pkl")
    act.save("mountaincar_model.pkl")