def main(): import logging logging.basicConfig(level=logging.DEBUG) parser = argparse.ArgumentParser() parser.add_argument('processes', type=int) parser.add_argument('--seed', type=int, default=None) parser.add_argument('--outdir', type=str, default=None) parser.add_argument('--scenario', type=str, default='basic') parser.add_argument('--t-max', type=int, default=5) parser.add_argument('--beta', type=float, default=1e-2) parser.add_argument('--profile', action='store_true') parser.add_argument('--steps', type=int, default=8 * 10**7) parser.add_argument('--lr', type=float, default=7e-4) parser.add_argument('--eval-frequency', type=int, default=10**5) parser.add_argument('--eval-n-runs', type=int, default=10) parser.add_argument('--use-lstm', action='store_true') parser.add_argument('--window-visible', action='store_true') parser.set_defaults(window_visible=False) parser.set_defaults(use_lstm=False) args = parser.parse_args() if args.seed is not None: random_seed.set_random_seed(args.seed) # Simultaneously launching multiple vizdoom processes makes program stuck, # so use the global lock env_lock = mp.Lock() def make_env(process_idx, test): with env_lock: return doom_env.DoomEnv(window_visible=args.window_visible, scenario=args.scenario) n_actions = 3 def model_opt(): if args.use_lstm: model = A3CLSTM(n_actions) else: model = A3CFF(n_actions) opt = rmsprop_async.RMSpropAsync(lr=args.lr, eps=1e-1, alpha=0.99) opt.setup(model) opt.add_hook(chainer.optimizer.GradientClipping(40)) return model, opt run_a3c.run_a3c(args.processes, make_env, model_opt, phi, t_max=args.t_max, beta=args.beta, profile=args.profile, steps=args.steps, eval_frequency=args.eval_frequency, eval_n_runs=args.eval_n_runs, args=args)
def main(): import logging logging.basicConfig(level=logging.DEBUG) parser = argparse.ArgumentParser() parser.add_argument('processes', type=int) parser.add_argument('--seed', type=int, default=None) parser.add_argument('--outdir', type=str, default=None) parser.add_argument('--scenario', type=str, default='basic') parser.add_argument('--t-max', type=int, default=5) parser.add_argument('--beta', type=float, default=1e-2) parser.add_argument('--profile', action='store_true') parser.add_argument('--steps', type=int, default=8 * 10 ** 7) parser.add_argument('--lr', type=float, default=7e-4) parser.add_argument('--eval-frequency', type=int, default=10 ** 5) parser.add_argument('--eval-n-runs', type=int, default=10) parser.add_argument('--use-lstm', action='store_true') parser.add_argument('--window-visible', action='store_true') parser.set_defaults(window_visible=False) parser.set_defaults(use_lstm=False) args = parser.parse_args() if args.seed is not None: random_seed.set_random_seed(args.seed) # Simultaneously launching multiple vizdoom processes makes program stuck, # so use the global lock env_lock = mp.Lock() def make_env(process_idx, test): with env_lock: return doom_env.DoomEnv(window_visible=args.window_visible, scenario=args.scenario) n_actions = 3 def model_opt(): if args.use_lstm: model = A3CLSTM(n_actions) else: model = A3CFF(n_actions) opt = rmsprop_async.RMSpropAsync(lr=args.lr, eps=1e-1, alpha=0.99) opt.setup(model) opt.add_hook(chainer.optimizer.GradientClipping(40)) return model, opt run_a3c.run_a3c(args.processes, make_env, model_opt, phi, t_max=args.t_max, beta=args.beta, profile=args.profile, steps=args.steps, eval_frequency=args.eval_frequency, eval_n_runs=args.eval_n_runs, args=args)
def main(): import logging logging.basicConfig(level=logging.DEBUG) parser = argparse.ArgumentParser() parser.add_argument('model', type=str) parser.add_argument('--seed', type=int, default=0) parser.add_argument('--sleep', type=float, default=0) parser.add_argument('--scenario', type=str, default='basic') parser.add_argument('--n-runs', type=int, default=10) parser.add_argument('--use-lstm', action='store_true') parser.add_argument('--window-visible', action='store_true') parser.add_argument('--deterministic', action='store_true') parser.add_argument('--random', action='store_true') parser.set_defaults(window_visible=False) parser.set_defaults(use_lstm=False) parser.set_defaults(deterministic=False) parser.set_defaults(random=False) args = parser.parse_args() random_seed.set_random_seed(args.seed) n_actions = doom_env.DoomEnv(window_visible=False, scenario=args.scenario).n_actions if not args.random: if args.use_lstm: model = A3CLSTM(n_actions) else: model = A3CFF(n_actions) serializers.load_hdf5(args.model, model) scores = [] env = doom_env.DoomEnv(window_visible=args.window_visible, scenario=args.scenario, sleep=args.sleep) for i in range(args.n_runs): if args.random: score = eval_single_random_run(env) else: score = eval_single_run(env, model, phi, deterministic=args.deterministic) print('Run {}: {}'.format(i, score)) scores.append(score) print('Average: {}'.format(sum(scores) / args.n_runs))
def main(): import logging logging.basicConfig(level=logging.DEBUG) parser = argparse.ArgumentParser() parser.add_argument('rom', type=str) parser.add_argument('model', type=str) parser.add_argument('--seed', type=int, default=0) parser.add_argument('--use-sdl', action='store_true') parser.add_argument('--n-runs', type=int, default=10) parser.add_argument('--deterministic', action='store_true') parser.set_defaults(use_sdl=False) parser.set_defaults(deterministic=False) args = parser.parse_args() random_seed.set_random_seed(args.seed) n_actions = ale.ALE(args.rom).number_of_actions def pv_func(model, state): head, pi, v = model out = head(state) return pi(out), v(out) # Load an A3C-DQN model head = dqn_head.NIPSDQNHead() pi = policy.FCSoftmaxPolicy(head.n_output_channels, n_actions) v = v_function.FCVFunction(head.n_output_channels) model = chainer.ChainList(head, pi, v) serializers.load_hdf5(args.model, model) def p_func(s): head, pi, _ = model out = head(s) return pi(out) scores = [] for i in range(args.n_runs): score = eval_performance( args.rom, p_func, deterministic=args.deterministic, use_sdl=args.use_sdl) print('Run {}: {}'.format(i, score)) scores.append(score) print('Average: {}'.format(sum(scores) / args.n_runs))
def main(): import logging logging.basicConfig(level=logging.DEBUG) parser = argparse.ArgumentParser() parser.add_argument('model', type=str) parser.add_argument('--seed', type=int, default=0) parser.add_argument('--sleep', type=float, default=0) parser.add_argument('--scenario', type=str, default='basic') parser.add_argument('--n-runs', type=int, default=10) parser.add_argument('--use-lstm', action='store_true') parser.add_argument('--window-visible', action='store_true') parser.add_argument('--deterministic', action='store_true') parser.add_argument('--random', action='store_true') parser.set_defaults(window_visible=False) parser.set_defaults(use_lstm=False) parser.set_defaults(deterministic=False) parser.set_defaults(random=False) args = parser.parse_args() random_seed.set_random_seed(args.seed) n_actions = doom_env.DoomEnv( window_visible=False, scenario=args.scenario).n_actions if not args.random: if args.use_lstm: model = A3CLSTM(n_actions) else: model = A3CFF(n_actions) serializers.load_hdf5(args.model, model) scores = [] env = doom_env.DoomEnv(window_visible=args.window_visible, scenario=args.scenario, sleep=args.sleep) for i in range(args.n_runs): if args.random: score = eval_single_random_run(env) else: score = eval_single_run( env, model, phi, deterministic=args.deterministic) print('Run {}: {}'.format(i, score)) scores.append(score) print('Average: {}'.format(sum(scores) / args.n_runs))
def main(): import logging logging.basicConfig(level=logging.DEBUG) parser = argparse.ArgumentParser() parser.add_argument('rom', type=str) parser.add_argument('model', type=str) parser.add_argument('--seed', type=int, default=0) parser.add_argument('--use-sdl', action='store_true') parser.add_argument('--n-runs', type=int, default=10) parser.add_argument('--deterministic', action='store_true') parser.add_argument('--record-screen-dir', type=str, default=None) parser.add_argument('--use-lstm', action='store_true') parser.set_defaults(use_sdl=False) parser.set_defaults(use_lstm=False) parser.set_defaults(deterministic=False) args = parser.parse_args() random_seed.set_random_seed(args.seed) n_actions = ale.ALE(args.rom).number_of_actions # Load an A3C-DQN model if args.use_lstm: model = A3CLSTM(n_actions) else: model = A3CFF(n_actions) serializers.load_hdf5(args.model, model) scores = [] for i in range(args.n_runs): episode_record_dir = None if args.record_screen_dir is not None: episode_record_dir = os.path.join(args.record_screen_dir, str(i)) os.makedirs(episode_record_dir) score = eval_performance(args.rom, model, deterministic=args.deterministic, use_sdl=args.use_sdl, record_screen_dir=episode_record_dir) print('Run {}: {}'.format(i, score)) scores.append(score) print('Average: {}'.format(sum(scores) / args.n_runs))
def main(): import logging logging.basicConfig(level=logging.DEBUG) parser = argparse.ArgumentParser() parser.add_argument('rom', type=str) parser.add_argument('model', type=str) parser.add_argument('--seed', type=int, default=0) parser.add_argument('--use-sdl', action='store_true') parser.add_argument('--n-runs', type=int, default=10) parser.add_argument('--deterministic', action='store_true') parser.add_argument('--record-screen-dir', type=str, default=None) parser.add_argument('--use-lstm', action='store_true') parser.set_defaults(use_sdl=False) parser.set_defaults(use_lstm=False) parser.set_defaults(deterministic=False) args = parser.parse_args() random_seed.set_random_seed(args.seed) n_actions = ale.ALE(args.rom).number_of_actions # Load an A3C-DQN model if args.use_lstm: model = A3CLSTM(n_actions) else: model = A3CFF(n_actions) serializers.load_hdf5(args.model, model) scores = [] for i in range(args.n_runs): episode_record_dir = None if args.record_screen_dir is not None: episode_record_dir = os.path.join(args.record_screen_dir, str(i)) os.makedirs(episode_record_dir) score = eval_performance( args.rom, model, deterministic=args.deterministic, use_sdl=args.use_sdl, record_screen_dir=episode_record_dir) print('Run {}: {}'.format(i, score)) scores.append(score) print('Average: {}'.format(sum(scores) / args.n_runs))
def set_seed_and_run(process_idx, run_func): random_seed.set_random_seed(np.random.randint(0, 2 ** 32)) run_func(process_idx)
def train(args): if not os.path.exists(args.out): os.makedirs(args.out) if args.gpu >= 0: cuda.check_cuda_available() cuda.get_device(args.gpu).use() if args.random_seed: set_random_seed(args.random_seed, (args.gpu,)) user2index = load_dict(os.path.join(args.indir, USER_DICT_FILENAME)) item2index = load_dict(os.path.join(args.indir, ITEM_DICT_FILENAME)) (trimmed_word2count, word2index, aspect2index, opinion2index) = read_and_trim_vocab( args.indir, args.trimfreq ) aspect_opinions = get_aspect_opinions(os.path.join(args.indir, TRAIN_FILENAME)) export_params( args, user2index, item2index, trimmed_word2count, word2index, aspect2index, opinion2index, aspect_opinions, ) src_aspect_score = SOURCE_ASPECT_SCORE.get(args.context, "aspect_score_efm") data_loader = DataLoader( args.indir, user2index, item2index, trimmed_word2count, word2index, aspect2index, opinion2index, aspect_opinions, src_aspect_score, ) train_iter, val_iter = get_dataset_iterator( args.context, data_loader, args.batchsize ) model = get_context_model(args, data_loader) if args.optimizer == "rmsprop": optimizer = O.RMSprop(lr=args.learning_rate, alpha=args.alpha) elif args.optimizer == "adam": optimizer = O.Adam(amsgrad=args.amsgrad) optimizer.setup(model) if args.grad_clip: optimizer.add_hook(GradientClipping(args.grad_clip)) if args.gpu >= 0: model.to_gpu(args.gpu) updater = training.updaters.StandardUpdater( train_iter, optimizer, converter=convert, device=args.gpu ) early_stop = triggers.EarlyStoppingTrigger( monitor="validation/main/loss", patients=args.patients, max_trigger=(args.epoch, "epoch"), ) trainer = training.Trainer(updater, stop_trigger=early_stop, out=args.out) trainer.extend( extensions.Evaluator(val_iter, model, converter=convert, device=args.gpu) ) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport( ["epoch", "main/loss", "validation/main/loss", "lr", "elapsed_time"] ) ) trainer.extend( extensions.PlotReport( ["main/loss", "validation/main/loss"], x_key="epoch", file_name="loss.png" ) ) trainer.extend(extensions.ProgressBar()) trainer.extend( extensions.snapshot_object(model, MODEL_FILENAME), trigger=triggers.MinValueTrigger("validation/main/loss"), ) trainer.extend(extensions.observe_lr()) if args.optimizer in ["rmsprop"]: if args.schedule_lr: epoch_list = np.array( [i for i in range(1, int(args.epoch / args.stepsize) + 1)] ).astype(np.int32) value_list = args.learning_rate * args.lr_reduce ** epoch_list value_list[value_list < args.min_learning_rate] = args.min_learning_rate epoch_list *= args.stepsize epoch_list += args.begin_step trainer.extend( schedule_optimizer_value(epoch_list.tolist(), value_list.tolist()) ) trainer.run()