def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--env', help='environment ID', default='gvgai-testgame1-lvl0-v0') parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--prioritized', type=int, default=1) parser.add_argument('--prioritized-replay-alpha', type=float, default=0.6) parser.add_argument('--dueling', type=int, default=1) parser.add_argument('--num-timesteps', type=int, default=int(10e6)) parser.add_argument('--checkpoint-freq', type=int, default=10000) parser.add_argument('--model_dir', type=str, default=None) args = parser.parse_args() set_global_seeds(args.seed) env, does_need_action_direction, game_name = create_gvgai_environment( args.env) model_dir = "models/{}/".format(game_name) os.makedirs(model_dir, exist_ok=True) player_processes, player_connections = create_players( args.env, model_dir, 0.1, args.num_timesteps, 0.01, False, 8) import models from simple import learn if does_need_action_direction: model = models.cnn_to_mlp_with_action_direction( convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], dueling=bool(args.dueling), ) else: model = models.cnn_to_mlp( convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], dueling=bool(args.dueling), ) env.close() if args.model_dir is not None: model_dir = args.model_dir learn(args.env, q_func=model, lr=1e-4, max_timesteps=args.num_timesteps, buffer_size=1000, exploration_fraction=0.1, exploration_final_eps=0.01, train_freq=1, learning_starts=500, target_network_update_freq=100, gamma=0.99, prioritized_replay=bool(args.prioritized), prioritized_replay_alpha=args.prioritized_replay_alpha, checkpoint_freq=args.checkpoint_freq, model_dir=model_dir, player_processes=player_processes, player_connections=player_connections)
def main(): env = gym.make("CartPole-v0") act = simple.learn(env, q_func_cart_pole, exploration_fraction=0.35, final_epsilon=0.1, alpha=1e-3, callback=callback) act.save("./cartpole_model.ckpt")
def train(): set_global_seeds(args.seed) directory = os.path.join(args.log_dir, '_'.join([args.env, datetime.datetime.now().strftime("%m%d%H%M")])) if not os.path.exists(directory): os.makedirs(directory) else: ValueError("The directory already exists...", directory) json.dump(vars(args), open(os.path.join(directory, 'learning_prop.json'), 'w')) env = envs.make(args.env, dirname=directory) with tf.device(args.device): model = models.mlp([args.num_units]*args.num_layers, init_mean=args.init_mean, init_sd=args.init_sd) act, records = simple.learn( env, q_func=model, lr=args.learning_rate, lr_decay_factor=args.learning_rate_decay_factor, lr_growth_factor=args.learning_rate_growth_factor, max_timesteps=args.nb_train_steps, buffer_size=args.buffer_size, batch_size = args.batch_size, exploration_fraction=args.eps_fraction, exploration_final_eps=args.eps_min, target_network_update_freq=args.target_update_freq, print_freq=10, checkpoint_freq=int(args.nb_train_steps/10), learning_starts=args.nb_warmup_steps, gamma=args.gamma, prioritized_replay=bool(args.prioritized), prioritized_replay_alpha=args.prioritized_replay_alpha, callback=None,#callback, epoch_steps = args.nb_epoch_steps, noise = args.noise, varTH=args.varth, alg = args.alg, gpu_memory=args.gpu_memory, act_policy=args.act_policy, save_dir=directory, nb_test_steps=args.nb_test_steps, scope = args.scope, test_eps = args.test_eps, ) print("Saving model to model.pkl") act.save(os.path.join(directory,"model.pkl")) plot(records, directory) memo = input("Memo for this experiment?: ") f = open(os.path.join(directory,"memo.txt"), 'w') f.write(memo) f.close() if args.record == 1: env.moviewriter.finish()
def main(): parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--env', help='environment ID', default='Breakout') parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--prioritized', type=int, default=1) parser.add_argument('--num-timesteps', type=int, default=int(10e6)) parser.add_argument('experiment_id') args = parser.parse_args() logging_directory = Path('./experiments/{}--{}'.format(args.experiment_id, args.env)) if not logging_directory.exists(): logging_directory.mkdir(parents=True) logger.configure(str(logging_directory), ['stdout', 'tensorboard', 'json']) model_directory = logging_directory / 'models' if not model_directory.exists(): model_directory.mkdir(parents=True) set_global_seeds(args.seed) env_name = args.env + "NoFrameskip-v4" env = make_atari(env_name) env = bench.Monitor(env, logger.get_dir()) env = deepq.wrap_atari_dqn(env) model = models.cnn_to_mlp( convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], ) exploration_schedule = PiecewiseSchedule( endpoints=[(0, 1), (1e6, 0.1), (5 * 1e6, 0.01)], outside_value=0.01) act = learn( env, q_func=model, beta1=0.9, beta2=0.99, epsilon=1e-4, max_timesteps=args.num_timesteps, buffer_size=1000000, exploration_schedule=exploration_schedule, start_lr=1e-4, end_lr=5 * 1e-5, start_step=1e6, end_step=5 * 1e6, train_freq=4, print_freq=10, batch_size=32, learning_starts=50000, target_network_update_freq=10000, gamma=0.99, prioritized_replay=bool(args.prioritized), model_directory=model_directory ) act.save(str(model_directory / "act_model.pkl")) env.close()
def main(): env = gym.make('Gomoku9x9-training-camp-v0', opponent_policy) model = models.mlp([64]) act = simple.learn(env, q_func=model, lr=1e-3, max_timesteps=100000, buffer_size=50000, exploration_fraction=0.1, exploration_final_eps=0.02, print_freq=10, callback=callback) print("Saving model to cartpole_model.pkl") act.save("cartpole_model.pkl")
def main(): env = envs.create_env(None) model = models.mlp([64]) act = simple.learn( env, q_func=model, lr=1e-3, max_timesteps=100000, buffer_size=50000, exploration_fraction=0.01, exploration_final_eps=0.0, print_freq=10, callback=callback, prioritized_replay=True ) print("Saving model to {}_model.pkl".format(envs.VSTR)) act.save("{}_model.pkl".format(envs.VSTR))
def main(): env = wrap_env(gym.make("PongNoFrameskip-v4")) act = simple.learn(env, q_func_pong, n_steps=2000000, exploration_fraction=0.20, final_epsilon=0.01, alpha=1e-3, buffer_size=10000, train_main_every=4, update_target_every=1000, gamma=0.99, print_every=1, pre_run_steps=10000, callback=callback) # show_result(env, act) act.save("./pong_model.ckpt")
def main(): env = gym.make("CartPole-v0") #env = gym.make("MountainCar-v0") model = models.mlp([256, 20]) act = learn(env, q_func=model, lr=1e-2, max_timesteps=100000, buffer_size=90000, exploration_fraction=0.1, exploration_final_eps=0.1, print_freq=25, checkpoint_path='model_chkpoints/cart_model', callback=callback, param_noise=True) print("Saving model to cartpole_model.pkl") act.save("cartpole_model.pkl")
def main(): env = wrap_env(gym.make("BreakoutNoFrameskip-v4")) n_steps = 500000 act = simple.learn(env, q_func_breakout, n_steps=n_steps, exploration_fraction=0.2, final_epsilon=0.01, alpha=5e-4, buffer_size=10000, train_main_every=4, update_target_every=1000, gamma=0.99, print_every=4, pre_run_steps=10000, callback=callback) # show_result(env, act) act.save("./breakout_model.ckpt")
def main(): env = gym.make("PongNoFrameskip-v4") env = ScaledFloatFrame(wrap_dqn(env)) model = deepq.models.cnn_to_mlp(convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], dueling=True) act = simple.learn(env, q_func=model, lr=1e-4, max_timesteps=200000, buffer_size=10000, exploration_fraction=0.1, exploration_final_eps=0.01, train_freq=4, learning_starts=10000, target_network_update_freq=1000, gamma=0.99, prioritized_replay=True, tf_log_dir='./log') act.save("pong_model.pkl") env.close()
def train(): logger.configure() set_global_seeds(args.seed) directory = os.path.join( args.log_dir, '_'.join([args.env, datetime.datetime.now().strftime("%m%d%H%M")])) if not os.path.exists(directory): os.makedirs(directory) else: ValueError("The directory already exists...", directory) json.dump(vars(args), open(os.path.join(directory, 'learning_prop.json'), 'w')) env = make_atari(args.env) env = bench.Monitor(env, logger.get_dir()) env = models.wrap_atari_dqn(env) nb_test_steps = args.nb_test_steps if args.nb_test_steps > 0 else None reload_path = args.reload_path if args.reload_path else None if args.record: env = Monitor(env, directory=directory) with tf.device(args.device): model = models.cnn_to_mlp( convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[args.num_units] * args.num_layers, dueling=bool(args.dueling), init_mean=args.init_mean, init_sd=args.init_sd, ) act, records = simple.learn( env, q_func=model, lr=args.learning_rate, lr_decay_factor=args.lr_decay_factor, lr_growth_factor=args.lr_growth_factor, max_timesteps=args.nb_train_steps, buffer_size=args.buffer_size, exploration_fraction=args.eps_fraction, exploration_final_eps=args.eps_min, train_freq=4, print_freq=1000, checkpoint_freq=int(args.nb_train_steps / 10), learning_starts=args.nb_warmup_steps, target_network_update_freq=args.target_update_freq, gamma=0.99, prioritized_replay=bool(args.prioritized), prioritized_replay_alpha=args.prioritized_replay_alpha, epoch_steps=args.nb_epoch_steps, alg=args.alg, noise=args.noise, gpu_memory=args.gpu_memory, varTH=args.varth, act_policy=args.act_policy, save_dir=directory, nb_test_steps=nb_test_steps, scope=args.scope, test_eps=args.test_eps, checkpoint_path=reload_path, init_t=args.init_t, ) print("Saving model to model.pkl") act.save(os.path.join(directory, "model.pkl")) plot(records, directory) env.close()