Beispiel #1
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--env',
                        help='environment ID',
                        default='gvgai-testgame1-lvl0-v0')
    parser.add_argument('--seed', help='RNG seed', type=int, default=0)
    parser.add_argument('--prioritized', type=int, default=1)
    parser.add_argument('--prioritized-replay-alpha', type=float, default=0.6)
    parser.add_argument('--dueling', type=int, default=1)
    parser.add_argument('--num-timesteps', type=int, default=int(10e6))
    parser.add_argument('--checkpoint-freq', type=int, default=10000)
    parser.add_argument('--model_dir', type=str, default=None)

    args = parser.parse_args()
    set_global_seeds(args.seed)
    env, does_need_action_direction, game_name = create_gvgai_environment(
        args.env)

    model_dir = "models/{}/".format(game_name)
    os.makedirs(model_dir, exist_ok=True)
    player_processes, player_connections = create_players(
        args.env, model_dir, 0.1, args.num_timesteps, 0.01, False, 8)

    import models
    from simple import learn

    if does_need_action_direction:
        model = models.cnn_to_mlp_with_action_direction(
            convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
            hiddens=[256],
            dueling=bool(args.dueling),
        )
    else:
        model = models.cnn_to_mlp(
            convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
            hiddens=[256],
            dueling=bool(args.dueling),
        )
    env.close()
    if args.model_dir is not None:
        model_dir = args.model_dir

    learn(args.env,
          q_func=model,
          lr=1e-4,
          max_timesteps=args.num_timesteps,
          buffer_size=1000,
          exploration_fraction=0.1,
          exploration_final_eps=0.01,
          train_freq=1,
          learning_starts=500,
          target_network_update_freq=100,
          gamma=0.99,
          prioritized_replay=bool(args.prioritized),
          prioritized_replay_alpha=args.prioritized_replay_alpha,
          checkpoint_freq=args.checkpoint_freq,
          model_dir=model_dir,
          player_processes=player_processes,
          player_connections=player_connections)
Beispiel #2
0
def main():
    env = gym.make("CartPole-v0")
    act = simple.learn(env,
                       q_func_cart_pole,
                       exploration_fraction=0.35,
                       final_epsilon=0.1,
                       alpha=1e-3,
                       callback=callback)
    act.save("./cartpole_model.ckpt")
Beispiel #3
0
def train():
    set_global_seeds(args.seed)
    directory = os.path.join(args.log_dir, '_'.join([args.env, datetime.datetime.now().strftime("%m%d%H%M")]))
    if not os.path.exists(directory):
            os.makedirs(directory)
    else:
            ValueError("The directory already exists...", directory)
    json.dump(vars(args), open(os.path.join(directory, 'learning_prop.json'), 'w'))

    env = envs.make(args.env, dirname=directory)

    with tf.device(args.device):
        model = models.mlp([args.num_units]*args.num_layers, init_mean=args.init_mean, init_sd=args.init_sd)

        act, records = simple.learn(
            env,
            q_func=model,
            lr=args.learning_rate,
            lr_decay_factor=args.learning_rate_decay_factor,
            lr_growth_factor=args.learning_rate_growth_factor,
            max_timesteps=args.nb_train_steps,
            buffer_size=args.buffer_size,
            batch_size = args.batch_size,
            exploration_fraction=args.eps_fraction,
            exploration_final_eps=args.eps_min,
            target_network_update_freq=args.target_update_freq,
            print_freq=10,
            checkpoint_freq=int(args.nb_train_steps/10),
            learning_starts=args.nb_warmup_steps,
            gamma=args.gamma,
            prioritized_replay=bool(args.prioritized),
            prioritized_replay_alpha=args.prioritized_replay_alpha,
            callback=None,#callback,
            epoch_steps = args.nb_epoch_steps,
            noise = args.noise,
            varTH=args.varth,
            alg = args.alg,
            gpu_memory=args.gpu_memory,
            act_policy=args.act_policy,
            save_dir=directory,
            nb_test_steps=args.nb_test_steps,
            scope = args.scope,
            test_eps = args.test_eps,
        )
        print("Saving model to model.pkl")
        act.save(os.path.join(directory,"model.pkl"))
        plot(records, directory)
    memo = input("Memo for this experiment?: ")
    f = open(os.path.join(directory,"memo.txt"), 'w')
    f.write(memo)
    f.close()
    if args.record == 1:
        env.moviewriter.finish()
def main():
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--env', help='environment ID', default='Breakout')
    parser.add_argument('--seed', help='RNG seed', type=int, default=0)
    parser.add_argument('--prioritized', type=int, default=1)
    parser.add_argument('--num-timesteps', type=int, default=int(10e6))
    parser.add_argument('experiment_id')
    args = parser.parse_args()
    logging_directory = Path('./experiments/{}--{}'.format(args.experiment_id, args.env))
    if not logging_directory.exists():
        logging_directory.mkdir(parents=True)
    logger.configure(str(logging_directory), ['stdout', 'tensorboard', 'json'])
    model_directory = logging_directory / 'models'
    if not model_directory.exists():
        model_directory.mkdir(parents=True)
    set_global_seeds(args.seed)
    env_name = args.env + "NoFrameskip-v4"
    env = make_atari(env_name)
    env = bench.Monitor(env, logger.get_dir())
    env = deepq.wrap_atari_dqn(env)
    model = models.cnn_to_mlp(
        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        hiddens=[256],
    )
    exploration_schedule = PiecewiseSchedule(
        endpoints=[(0, 1), (1e6, 0.1), (5 * 1e6, 0.01)], outside_value=0.01)

    act = learn(
        env,
        q_func=model,
        beta1=0.9,
        beta2=0.99,
        epsilon=1e-4,
        max_timesteps=args.num_timesteps,
        buffer_size=1000000,
        exploration_schedule=exploration_schedule,
        start_lr=1e-4,
        end_lr=5 * 1e-5,
        start_step=1e6,
        end_step=5 * 1e6,
        train_freq=4,
        print_freq=10,
        batch_size=32,
        learning_starts=50000,
        target_network_update_freq=10000,
        gamma=0.99,
        prioritized_replay=bool(args.prioritized),
        model_directory=model_directory
    )
    act.save(str(model_directory / "act_model.pkl"))
    env.close()
Beispiel #5
0
def main():
    env = gym.make('Gomoku9x9-training-camp-v0', opponent_policy)
    model = models.mlp([64])
    act = simple.learn(env,
                       q_func=model,
                       lr=1e-3,
                       max_timesteps=100000,
                       buffer_size=50000,
                       exploration_fraction=0.1,
                       exploration_final_eps=0.02,
                       print_freq=10,
                       callback=callback)
    print("Saving model to cartpole_model.pkl")
    act.save("cartpole_model.pkl")
Beispiel #6
0
def main():
    env = envs.create_env(None)
    model = models.mlp([64])
    act = simple.learn(
        env,
        q_func=model,
        lr=1e-3,
        max_timesteps=100000,
        buffer_size=50000,
        exploration_fraction=0.01,
        exploration_final_eps=0.0,
        print_freq=10,
        callback=callback,
        prioritized_replay=True
    )
    print("Saving model to {}_model.pkl".format(envs.VSTR))
    act.save("{}_model.pkl".format(envs.VSTR))
Beispiel #7
0
def main():
    env = wrap_env(gym.make("PongNoFrameskip-v4"))
    act = simple.learn(env,
                       q_func_pong,
                       n_steps=2000000,
                       exploration_fraction=0.20,
                       final_epsilon=0.01,
                       alpha=1e-3,
                       buffer_size=10000,
                       train_main_every=4,
                       update_target_every=1000,
                       gamma=0.99,
                       print_every=1,
                       pre_run_steps=10000,
                       callback=callback)
    # show_result(env, act)
    act.save("./pong_model.ckpt")
Beispiel #8
0
def main():

    env = gym.make("CartPole-v0")
    #env = gym.make("MountainCar-v0")
    model = models.mlp([256, 20])
    act = learn(env,
                q_func=model,
                lr=1e-2,
                max_timesteps=100000,
                buffer_size=90000,
                exploration_fraction=0.1,
                exploration_final_eps=0.1,
                print_freq=25,
                checkpoint_path='model_chkpoints/cart_model',
                callback=callback,
                param_noise=True)
    print("Saving model to cartpole_model.pkl")
    act.save("cartpole_model.pkl")
Beispiel #9
0
def main():
    env = wrap_env(gym.make("BreakoutNoFrameskip-v4"))
    n_steps = 500000
    act = simple.learn(env,
                       q_func_breakout,
                       n_steps=n_steps,
                       exploration_fraction=0.2,
                       final_epsilon=0.01,
                       alpha=5e-4,
                       buffer_size=10000,
                       train_main_every=4,
                       update_target_every=1000,
                       gamma=0.99,
                       print_every=4,
                       pre_run_steps=10000,
                       callback=callback)
    # show_result(env, act)
    act.save("./breakout_model.ckpt")
Beispiel #10
0
def main():
    env = gym.make("PongNoFrameskip-v4")
    env = ScaledFloatFrame(wrap_dqn(env))
    model = deepq.models.cnn_to_mlp(convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
                                    hiddens=[256],
                                    dueling=True)
    act = simple.learn(env,
                       q_func=model,
                       lr=1e-4,
                       max_timesteps=200000,
                       buffer_size=10000,
                       exploration_fraction=0.1,
                       exploration_final_eps=0.01,
                       train_freq=4,
                       learning_starts=10000,
                       target_network_update_freq=1000,
                       gamma=0.99,
                       prioritized_replay=True,
                       tf_log_dir='./log')
    act.save("pong_model.pkl")
    env.close()
Beispiel #11
0
def train():

    logger.configure()
    set_global_seeds(args.seed)

    directory = os.path.join(
        args.log_dir,
        '_'.join([args.env,
                  datetime.datetime.now().strftime("%m%d%H%M")]))
    if not os.path.exists(directory):
        os.makedirs(directory)
    else:
        ValueError("The directory already exists...", directory)
    json.dump(vars(args),
              open(os.path.join(directory, 'learning_prop.json'), 'w'))

    env = make_atari(args.env)
    env = bench.Monitor(env, logger.get_dir())
    env = models.wrap_atari_dqn(env)

    nb_test_steps = args.nb_test_steps if args.nb_test_steps > 0 else None
    reload_path = args.reload_path if args.reload_path else None
    if args.record:
        env = Monitor(env, directory=directory)

    with tf.device(args.device):
        model = models.cnn_to_mlp(
            convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
            hiddens=[args.num_units] * args.num_layers,
            dueling=bool(args.dueling),
            init_mean=args.init_mean,
            init_sd=args.init_sd,
        )

        act, records = simple.learn(
            env,
            q_func=model,
            lr=args.learning_rate,
            lr_decay_factor=args.lr_decay_factor,
            lr_growth_factor=args.lr_growth_factor,
            max_timesteps=args.nb_train_steps,
            buffer_size=args.buffer_size,
            exploration_fraction=args.eps_fraction,
            exploration_final_eps=args.eps_min,
            train_freq=4,
            print_freq=1000,
            checkpoint_freq=int(args.nb_train_steps / 10),
            learning_starts=args.nb_warmup_steps,
            target_network_update_freq=args.target_update_freq,
            gamma=0.99,
            prioritized_replay=bool(args.prioritized),
            prioritized_replay_alpha=args.prioritized_replay_alpha,
            epoch_steps=args.nb_epoch_steps,
            alg=args.alg,
            noise=args.noise,
            gpu_memory=args.gpu_memory,
            varTH=args.varth,
            act_policy=args.act_policy,
            save_dir=directory,
            nb_test_steps=nb_test_steps,
            scope=args.scope,
            test_eps=args.test_eps,
            checkpoint_path=reload_path,
            init_t=args.init_t,
        )
        print("Saving model to model.pkl")
        act.save(os.path.join(directory, "model.pkl"))
    plot(records, directory)
    env.close()