def main(args): np.random.seed(2019) matplotlib.use("pdf") if args.gpus is not None: os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus saver = Saver(None) if args.save: dir_variables = [ SAVE_PREFIX.format(args.base_dir), args.learning_rate, args.optimizer, args.num_steps, args.num_blocks, args.num_components, args.beta1, args.beta2 ] dir_switches = [ "oversample" ] saver.create_dir_name(SAVE_TEMPLATE, dir_variables, dir_switches, args) saver.create_dir(add_run_subdir=True) saver.save_by_print(args, "settings") logger = Logger(save_file=saver.get_save_file("main", "log"), print_logs=True) logger.silence_tensorflow() runner_config = { cc.LOAD_PATH: args.load_path, cc.SAVER: saver, cc.LOGGER: logger, cc.OVERSAMPLE: args.oversample, cc.VALIDATION_FRACTION: args.validation_fraction, cc.VALIDATION_FREQ: args.validation_freq, cc.BATCH_SIZE: args.batch_size, cc.LOAD_MODEL_PATH: args.load_model_path, cc.NUM_STEPS: args.num_steps, cc.DATA_LIMIT: args.data_limit } model_config = { cc.HEIGHT: 30, cc.WIDTH: 3, cc.WEIGHT_DECAY: args.weight_decay, cc.OPTIMIZER: args.optimizer, cc.LEARNING_RATE: args.learning_rate, cc.NUM_BLOCKS: args.num_blocks, cc.NUM_COMPONENTS: args.num_components, cc.BETA0: args.beta0, cc.BETA1: args.beta1, cc.BETA2: args.beta2, cc.MIXTURES_MU_INIT_SD: args.mixtures_mu_init_sd, cc.MIXTURES_SD_INIT_MU: args.mixtures_sd_init_mu, cc.MIXTURES_SD_INIT_SD: args.mixtures_sd_init_sd, cc.ONE_Q_VALUE: False, cc.FC_ONLY: True } runner = LehnertGridworldGMMRunner(runner_config, model_config) runner.setup() runner.main_training_loop() if args.save_model: runner.save_model() runner.evaluate_and_visualize() runner.close_model_session()
def main(args): np.random.seed(2019) if not args.show_graphs: matplotlib.use("pdf") if args.gpus is not None: os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus saver = Saver(None) if args.save: dir_variables = [ SAVE_PREFIX, args.game, args.num_blocks, args.encoder_learning_rate, args.encoder_optimizer, args.num_steps ] dir_switches = [ "no_sample", "only_one_q_value" "disable_batch_norm" ] saver.create_dir_name(SAVE_TEMPLATE, dir_variables, dir_switches, args) saver.create_dir(add_run_subdir=True) saver.save_by_print(args, "settings") logger = LearnExpectationContinuousLogger(save_file=saver.get_save_file("main", "log"), print_logs=True) logger.silence_tensorflow() if args.game == constants.GAME_BREAKOUT: num_actions = 6 else: raise ValueError("Unknown game.") runner = QRunnerMinAtar( args.load_path, num_actions, logger, saver, args.num_blocks, args.encoder_learning_rate, args.weight_decay, args.encoder_optimizer, args.num_steps, disable_batch_norm=args.disable_batch_norm, disable_softplus=args.disable_softplus, no_sample=args.no_sample, only_one_q_value=args.only_one_q_value, validation_freq=args.validation_freq, validation_fraction=args.validation_fraction, summaries=args.summaries, load_model_path=args.load_model_path, zero_sd_after_training=args.zero_sd_after_training ) runner.setup() runner.main_training_loop() if args.save_model: runner.save_model() runner.evaluate_and_visualize() runner.close_model_session()
def main(args): np.random.seed(2019) matplotlib.use("pdf") if args.gpus is not None: os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus saver = Saver(None) if args.save: dir_variables = [ SAVE_PREFIX.format(args.base_dir), args.learning_rate, args.optimizer, args.num_steps, "" if args.data_limit is None else "_{:d}_dl".format(args.data_limit) ] dir_switches = [ "oversample" ] saver.create_dir_name(SAVE_TEMPLATE, dir_variables, dir_switches, args) saver.create_dir(add_run_subdir=True) saver.save_by_print(args, "settings") logger = Logger(save_file=saver.get_save_file("main", "log"), print_logs=True) logger.silence_tensorflow() runner_config = { cc.LOAD_PATH: args.load_path, cc.SAVER: saver, cc.LOGGER: logger, cc.OVERSAMPLE: args.oversample, cc.VALIDATION_FRACTION: args.validation_fraction, cc.VALIDATION_FREQ: args.validation_freq, cc.BATCH_SIZE: args.batch_size, cc.LOAD_MODEL_PATH: args.load_model_path, cc.NUM_STEPS: args.num_steps, cc.DATA_LIMIT: args.data_limit } model_config = { cc.HEIGHT: args.height, cc.WIDTH: args.width, cc.WEIGHT_DECAY: args.weight_decay, cc.OPTIMIZER: args.optimizer, cc.LEARNING_RATE: args.learning_rate, cc.FC_ONLY: not args.conv, cc.DROPOUT_PROB: args.dropout_prob } runner = LehnertGridworldRunner(runner_config, model_config) runner.setup() runner.main_training_loop() if args.save_model: runner.save_model() runner.close_model_session()
def main(args): os.environ["CUDA_VISIBLE_DEVICES"] = "" matplotlib.use("pdf") saver = Saver(None) if args.save: dir_variables = [ SAVE_PREFIX.format(args.base_dir), args.round_to, args.epsilon, "" if args.data_limit is None else "_{:d}_dl".format(args.data_limit) ] dir_switches = ["hard_t"] saver.create_dir_name(SAVE_TEMPLATE, dir_variables, dir_switches, args) saver.create_dir(add_run_subdir=True) saver.save_by_print(args, "settings") logger = Logger(save_file=saver.get_save_file("main", "log"), print_logs=True) logger.silence_tensorflow() model_config = { cc.HEIGHT: 30, cc.WIDTH: 3, cc.WEIGHT_DECAY: 0.0001, cc.OPTIMIZER: constants.OPT_ADAM, cc.LEARNING_RATE: 0.0005, cc.FC_ONLY: True, cc.DROPOUT_PROB: 0.0 } runner_config = { cc.LOAD_MODEL_PATH: args.load_model_path, cc.ROUND_TO: args.round_to, cc.HARD_T: args.hard_t, cc.SAVER: saver, cc.LOGGER: logger, cc.EPSILON: args.epsilon } runner = LehnertGridworldApproxPartitionRunner(runner_config, model_config) runner.setup() runner.main_training_loop() runner.evaluate_and_visualize()
def main(args): np.random.seed(2019) if not args.show_graphs: matplotlib.use("pdf") if args.gpus is not None: os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus saver = Saver(None) if args.save: dir_variables = [ SAVE_PREFIX.format(args.base_dir), args.num_pucks, args.grid_size, args.grid_size, args.num_blocks, args.num_components, args.beta0, args.beta1, args.beta2, args.encoder_learning_rate, args.encoder_optimizer, args.num_steps ] dir_switches = ["no_sample", "only_one_q_value", "gt_q_values", "disable_batch_norm"] saver.create_dir_name(SAVE_TEMPLATE, dir_variables, dir_switches, args) saver.create_dir(add_run_subdir=True) saver.save_by_print(args, "settings") logger = LearnExpectationContinuousLogger(save_file=saver.get_save_file("main", "log"), print_logs=True) logger.silence_tensorflow() runner = QGMMPriorRunner( args.load_path, args.grid_size, args.num_pucks, logger, saver, args.num_blocks, args.num_components, args.hiddens, args.encoder_learning_rate, args.beta0, args.beta1, args.beta2, args.weight_decay, args.encoder_optimizer, args.num_steps, disable_batch_norm=args.disable_batch_norm, disable_softplus=args.disable_softplus, no_sample=args.no_sample, only_one_q_value=args.only_one_q_value, gt_q_values=args.gt_q_values, disable_resize=args.disable_resize, oversample=args.oversample, validation_freq=args.validation_freq, validation_fraction=args.validation_fraction, summaries=args.summaries, load_model_path=args.load_model_path, include_goal_states=args.include_goal_states, q_values_noise_sd=args.q_values_noise_sd, new_dones=args.new_dones ) runner.setup() runner.main_training_loop() if args.save_model: runner.save_model() runner.evaluate_and_visualize() runner.close_model_session()
def main(args): assert args.active_indices is not None assert args.eval_indices is not None assert args.dones_index is not None np.random.seed(2019) if not args.show_graphs and not args.show_qs: matplotlib.use("pdf") if args.gpus is not None: os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus saver = Saver(None) if args.save: dir_variables = [ SAVE_PREFIX.format(args.base_dir), args.num_pucks, args.grid_size, args.grid_size, args.num_blocks, args.num_components, args.beta0, args.beta1, args.beta2, args.beta3, args.encoder_learning_rate, args.encoder_optimizer, args.num_steps ] dir_switches = [ "no_sample", "only_one_q_value", "disable_batch_norm", "train_prior", "post_train_prior", "post_train_t_and_prior", "post_train_hmm" ] saver.create_dir_name(SAVE_TEMPLATE, dir_variables, dir_switches, args) saver.create_dir(add_run_subdir=True) saver.save_by_print(args, "settings") logger = LearnExpectationContinuousLogger(save_file=saver.get_save_file("main", "log"), print_logs=True) logger.silence_tensorflow() runner = QHMMPriorMultiTask( args.load_path, args.grid_size, args.num_pucks, logger, saver, args.num_blocks, args.num_components, args.hiddens, args.encoder_learning_rate, args.beta0, args.beta1, args.beta2, args.weight_decay, args.encoder_optimizer, args.num_steps, args.active_indices, args.eval_indices, args.dones_index, disable_batch_norm=args.disable_batch_norm, disable_softplus=args.disable_softplus, no_sample=args.no_sample, only_one_q_value=args.only_one_q_value, oversample=args.oversample, validation_freq=args.validation_freq, validation_fraction=args.validation_fraction, summaries=args.summaries, load_model_path=args.load_model_path, beta3=args.beta3, post_train_prior=args.post_train_prior, post_train_hmm=args.post_train_hmm, post_train_t_and_prior=args.post_train_t_and_prior, save_gifs=args.save_gifs, hard_abstract_state=args.hard_abstract_state, zero_sd_after_training=args.zero_sd_after_training, prune_abstraction=args.prune_abstraction, prune_threshold=args.prune_threshold, old_bn_settings=args.old_bn_settings, include_goal_states=args.include_goal_states, shift_q_values=args.shift_q_values, soft_picture_goals=args.soft_picture_goals, goal_rewards_threshold=args.goal_rewards_threshold, q_values_indices=args.q_values_indices, fast_eval=args.fast_eval, sample_abstract_state=args.sample_abstract_state, softmax_policy=args.softmax_policy, softmax_policy_temp=args.softmax_policy_temp, show_qs=args.show_qs, fix_prior_training=args.fix_prior_training, model_learning_rate=args.model_learning_rate, random_shape=args.random_shape ) runner.setup() runner.main_training_loop() if args.post_train_hmm: runner.post_hmm_training_loop(args.post_train_hmm_steps) if args.save_model: runner.save_model() runner.evaluate_and_visualize() runner.close_model_session()
def main(args): np.random.seed(2019) if not args.show_graphs: matplotlib.use("pdf") if args.gpus is not None: os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus saver = Saver(None) if args.save: dir_variables = [ SAVE_PREFIX.format(args.base_dir), args.game, args.num_blocks, args.num_components, args.beta0, args.beta1, args.beta2, args.beta3, args.encoder_learning_rate, args.encoder_optimizer, args.num_steps ] dir_switches = [ "no_sample", "only_one_q_value", "disable_batch_norm", "train_prior", "post_train_prior", "post_train_t_and_prior", "post_train_hmm" ] saver.create_dir_name(SAVE_TEMPLATE, dir_variables, dir_switches, args) saver.create_dir(add_run_subdir=True) saver.save_by_print(args, "settings") logger = LearnExpectationContinuousLogger(save_file=saver.get_save_file( "main", "log"), print_logs=True) logger.silence_tensorflow() num_actions = 6 model_learning_rate = args.encoder_learning_rate if args.model_learning_rate is not None: model_learning_rate = args.model_learning_rate runner = QHMMPriorRunnerMinAtar( args.load_path, args.game, num_actions, logger, saver, args.num_blocks, args.num_components, args.encoder_learning_rate, args.beta0, args.beta1, args.beta2, args.weight_decay, args.encoder_optimizer, args.num_steps, disable_batch_norm=args.disable_batch_norm, disable_softplus=args.disable_softplus, no_sample=args.no_sample, only_one_q_value=args.only_one_q_value, validation_freq=args.validation_freq, validation_fraction=args.validation_fraction, summaries=args.summaries, load_model_path=args.load_model_path, beta3=args.beta3, post_train_prior=args.post_train_prior, post_train_hmm=args.post_train_hmm, post_train_t_and_prior=args.post_train_t_and_prior, save_gifs=args.save_gifs, zero_sd_after_training=args.zero_sd_after_training, hard_abstract_state=args.hard_abstract_state, freeze_hmm_no_entropy_at=args.freeze_hmm_no_entropy_at, cluster_predict_qs=args.cluster_predict_qs, cluster_predict_qs_weight=args.cluster_predict_qs_weight, prune_threshold=args.prune_threshold, prune_abstraction=args.prune_abstraction, prune_abstraction_new_means=args.prune_abstraction_new_means, sample_abstract_state=args.sample_abstract_state, softmax_policy=args.softmax_policy, softmax_policy_temp=args.softmax_policy_temp, discount=args.discount, fix_prior_training=args.fix_prior_training, model_learning_rate=model_learning_rate, q_scaling_factor=args.q_scaling_factor, eval_episodes=args.eval_episodes) runner.setup() runner.main_training_loop() if args.post_train_hmm: runner.post_hmm_training_loop(args.post_train_hmm_steps) if args.save_model: runner.save_model() runner.evaluate_and_visualize() runner.close_model_session()