Ejemplo n.º 1
0
def main(args):

    np.random.seed(2019)
    matplotlib.use("pdf")

    if args.gpus is not None:
        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus

    saver = Saver(None)

    if args.save:
        dir_variables = [
            SAVE_PREFIX.format(args.base_dir), args.learning_rate, args.optimizer, args.num_steps, args.num_blocks,
            args.num_components, args.beta1, args.beta2
        ]
        dir_switches = [
            "oversample"
        ]

        saver.create_dir_name(SAVE_TEMPLATE, dir_variables, dir_switches, args)
        saver.create_dir(add_run_subdir=True)

    saver.save_by_print(args, "settings")

    logger = Logger(save_file=saver.get_save_file("main", "log"), print_logs=True)
    logger.silence_tensorflow()

    runner_config = {
        cc.LOAD_PATH: args.load_path, cc.SAVER: saver, cc.LOGGER: logger, cc.OVERSAMPLE: args.oversample,
        cc.VALIDATION_FRACTION: args.validation_fraction, cc.VALIDATION_FREQ: args.validation_freq,
        cc.BATCH_SIZE: args.batch_size, cc.LOAD_MODEL_PATH: args.load_model_path,
        cc.NUM_STEPS: args.num_steps, cc.DATA_LIMIT: args.data_limit
    }

    model_config = {
        cc.HEIGHT: 30, cc.WIDTH: 3, cc.WEIGHT_DECAY: args.weight_decay, cc.OPTIMIZER: args.optimizer,
        cc.LEARNING_RATE: args.learning_rate, cc.NUM_BLOCKS: args.num_blocks, cc.NUM_COMPONENTS: args.num_components,
        cc.BETA0: args.beta0, cc.BETA1: args.beta1, cc.BETA2: args.beta2,
        cc.MIXTURES_MU_INIT_SD: args.mixtures_mu_init_sd,  cc.MIXTURES_SD_INIT_MU: args.mixtures_sd_init_mu,
        cc.MIXTURES_SD_INIT_SD: args.mixtures_sd_init_sd, cc.ONE_Q_VALUE: False, cc.FC_ONLY: True
    }

    runner = LehnertGridworldGMMRunner(runner_config, model_config)

    runner.setup()
    runner.main_training_loop()

    if args.save_model:
        runner.save_model()

    runner.evaluate_and_visualize()

    runner.close_model_session()
def main(args):

    np.random.seed(2019)

    if not args.show_graphs:
        matplotlib.use("pdf")

    if args.gpus is not None:
        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus

    saver = Saver(None)

    if args.save:
        dir_variables = [
            SAVE_PREFIX, args.game, args.num_blocks, args.encoder_learning_rate, args.encoder_optimizer, args.num_steps
        ]
        dir_switches = [
            "no_sample", "only_one_q_value" "disable_batch_norm"
        ]

        saver.create_dir_name(SAVE_TEMPLATE, dir_variables, dir_switches, args)
        saver.create_dir(add_run_subdir=True)

    saver.save_by_print(args, "settings")

    logger = LearnExpectationContinuousLogger(save_file=saver.get_save_file("main", "log"), print_logs=True)
    logger.silence_tensorflow()

    if args.game == constants.GAME_BREAKOUT:
        num_actions = 6
    else:
        raise ValueError("Unknown game.")

    runner = QRunnerMinAtar(
        args.load_path, num_actions, logger, saver, args.num_blocks,
        args.encoder_learning_rate, args.weight_decay, args.encoder_optimizer, args.num_steps,
        disable_batch_norm=args.disable_batch_norm, disable_softplus=args.disable_softplus,
        no_sample=args.no_sample, only_one_q_value=args.only_one_q_value,
        validation_freq=args.validation_freq, validation_fraction=args.validation_fraction, summaries=args.summaries,
        load_model_path=args.load_model_path, zero_sd_after_training=args.zero_sd_after_training
    )

    runner.setup()
    runner.main_training_loop()

    if args.save_model:
        runner.save_model()

    runner.evaluate_and_visualize()
    runner.close_model_session()
def main(args):

    np.random.seed(2019)
    matplotlib.use("pdf")

    if args.gpus is not None:
        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus

    saver = Saver(None)

    if args.save:
        dir_variables = [
            SAVE_PREFIX.format(args.base_dir), args.learning_rate, args.optimizer, args.num_steps,
            "" if args.data_limit is None else "_{:d}_dl".format(args.data_limit)
        ]
        dir_switches = [
            "oversample"
        ]

        saver.create_dir_name(SAVE_TEMPLATE, dir_variables, dir_switches, args)
        saver.create_dir(add_run_subdir=True)

    saver.save_by_print(args, "settings")

    logger = Logger(save_file=saver.get_save_file("main", "log"), print_logs=True)
    logger.silence_tensorflow()

    runner_config = {
        cc.LOAD_PATH: args.load_path, cc.SAVER: saver, cc.LOGGER: logger, cc.OVERSAMPLE: args.oversample,
        cc.VALIDATION_FRACTION: args.validation_fraction, cc.VALIDATION_FREQ: args.validation_freq,
        cc.BATCH_SIZE: args.batch_size, cc.LOAD_MODEL_PATH: args.load_model_path,
        cc.NUM_STEPS: args.num_steps, cc.DATA_LIMIT: args.data_limit
    }

    model_config = {
        cc.HEIGHT: args.height, cc.WIDTH: args.width, cc.WEIGHT_DECAY: args.weight_decay, cc.OPTIMIZER: args.optimizer,
        cc.LEARNING_RATE: args.learning_rate, cc.FC_ONLY: not args.conv, cc.DROPOUT_PROB: args.dropout_prob
    }

    runner = LehnertGridworldRunner(runner_config, model_config)

    runner.setup()
    runner.main_training_loop()

    if args.save_model:
        runner.save_model()

    runner.close_model_session()
Ejemplo n.º 4
0
def main(args):

    os.environ["CUDA_VISIBLE_DEVICES"] = ""
    matplotlib.use("pdf")

    saver = Saver(None)

    if args.save:
        dir_variables = [
            SAVE_PREFIX.format(args.base_dir), args.round_to, args.epsilon, ""
            if args.data_limit is None else "_{:d}_dl".format(args.data_limit)
        ]
        dir_switches = ["hard_t"]

        saver.create_dir_name(SAVE_TEMPLATE, dir_variables, dir_switches, args)
        saver.create_dir(add_run_subdir=True)

    saver.save_by_print(args, "settings")

    logger = Logger(save_file=saver.get_save_file("main", "log"),
                    print_logs=True)
    logger.silence_tensorflow()

    model_config = {
        cc.HEIGHT: 30,
        cc.WIDTH: 3,
        cc.WEIGHT_DECAY: 0.0001,
        cc.OPTIMIZER: constants.OPT_ADAM,
        cc.LEARNING_RATE: 0.0005,
        cc.FC_ONLY: True,
        cc.DROPOUT_PROB: 0.0
    }

    runner_config = {
        cc.LOAD_MODEL_PATH: args.load_model_path,
        cc.ROUND_TO: args.round_to,
        cc.HARD_T: args.hard_t,
        cc.SAVER: saver,
        cc.LOGGER: logger,
        cc.EPSILON: args.epsilon
    }

    runner = LehnertGridworldApproxPartitionRunner(runner_config, model_config)
    runner.setup()

    runner.main_training_loop()
    runner.evaluate_and_visualize()
def main(args):

    np.random.seed(2019)

    if not args.show_graphs:
        matplotlib.use("pdf")

    if args.gpus is not None:
        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus

    saver = Saver(None)

    if args.save:
        dir_variables = [
            SAVE_PREFIX.format(args.base_dir), args.num_pucks, args.grid_size, args.grid_size, args.num_blocks, args.num_components,
            args.beta0, args.beta1, args.beta2, args.encoder_learning_rate, args.encoder_optimizer, args.num_steps
        ]
        dir_switches = ["no_sample", "only_one_q_value", "gt_q_values", "disable_batch_norm"]

        saver.create_dir_name(SAVE_TEMPLATE, dir_variables, dir_switches, args)
        saver.create_dir(add_run_subdir=True)

    saver.save_by_print(args, "settings")

    logger = LearnExpectationContinuousLogger(save_file=saver.get_save_file("main", "log"), print_logs=True)
    logger.silence_tensorflow()

    runner = QGMMPriorRunner(
        args.load_path, args.grid_size, args.num_pucks, logger, saver, args.num_blocks, args.num_components,
        args.hiddens, args.encoder_learning_rate, args.beta0, args.beta1, args.beta2, args.weight_decay,
        args.encoder_optimizer, args.num_steps, disable_batch_norm=args.disable_batch_norm,
        disable_softplus=args.disable_softplus, no_sample=args.no_sample, only_one_q_value=args.only_one_q_value,
        gt_q_values=args.gt_q_values, disable_resize=args.disable_resize, oversample=args.oversample,
        validation_freq=args.validation_freq, validation_fraction=args.validation_fraction, summaries=args.summaries,
        load_model_path=args.load_model_path, include_goal_states=args.include_goal_states,
        q_values_noise_sd=args.q_values_noise_sd, new_dones=args.new_dones
    )

    runner.setup()
    runner.main_training_loop()

    if args.save_model:
        runner.save_model()

    runner.evaluate_and_visualize()
    runner.close_model_session()
def main(args):

    assert args.active_indices is not None
    assert args.eval_indices is not None
    assert args.dones_index is not None

    np.random.seed(2019)

    if not args.show_graphs and not args.show_qs:
        matplotlib.use("pdf")

    if args.gpus is not None:
        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus

    saver = Saver(None)

    if args.save:
        dir_variables = [
            SAVE_PREFIX.format(args.base_dir), args.num_pucks, args.grid_size, args.grid_size, args.num_blocks, args.num_components,
            args.beta0, args.beta1, args.beta2, args.beta3, args.encoder_learning_rate, args.encoder_optimizer,
            args.num_steps
        ]
        dir_switches = [
            "no_sample", "only_one_q_value", "disable_batch_norm", "train_prior",
            "post_train_prior", "post_train_t_and_prior", "post_train_hmm"
        ]

        saver.create_dir_name(SAVE_TEMPLATE, dir_variables, dir_switches, args)
        saver.create_dir(add_run_subdir=True)

    saver.save_by_print(args, "settings")

    logger = LearnExpectationContinuousLogger(save_file=saver.get_save_file("main", "log"), print_logs=True)
    logger.silence_tensorflow()

    runner = QHMMPriorMultiTask(
        args.load_path, args.grid_size, args.num_pucks, logger, saver, args.num_blocks, args.num_components,
        args.hiddens, args.encoder_learning_rate, args.beta0, args.beta1, args.beta2, args.weight_decay,
        args.encoder_optimizer, args.num_steps, args.active_indices, args.eval_indices, args.dones_index,
        disable_batch_norm=args.disable_batch_norm, disable_softplus=args.disable_softplus, no_sample=args.no_sample,
        only_one_q_value=args.only_one_q_value, oversample=args.oversample,
        validation_freq=args.validation_freq, validation_fraction=args.validation_fraction, summaries=args.summaries,
        load_model_path=args.load_model_path, beta3=args.beta3, post_train_prior=args.post_train_prior,
        post_train_hmm=args.post_train_hmm, post_train_t_and_prior=args.post_train_t_and_prior,
        save_gifs=args.save_gifs, hard_abstract_state=args.hard_abstract_state,
        zero_sd_after_training=args.zero_sd_after_training, prune_abstraction=args.prune_abstraction,
        prune_threshold=args.prune_threshold, old_bn_settings=args.old_bn_settings,
        include_goal_states=args.include_goal_states, shift_q_values=args.shift_q_values,
        soft_picture_goals=args.soft_picture_goals, goal_rewards_threshold=args.goal_rewards_threshold,
        q_values_indices=args.q_values_indices, fast_eval=args.fast_eval,
        sample_abstract_state=args.sample_abstract_state, softmax_policy=args.softmax_policy,
        softmax_policy_temp=args.softmax_policy_temp, show_qs=args.show_qs,
        fix_prior_training=args.fix_prior_training, model_learning_rate=args.model_learning_rate,
        random_shape=args.random_shape
    )

    runner.setup()
    runner.main_training_loop()

    if args.post_train_hmm:
        runner.post_hmm_training_loop(args.post_train_hmm_steps)

    if args.save_model:
        runner.save_model()

    runner.evaluate_and_visualize()
    runner.close_model_session()
def main(args):

    np.random.seed(2019)

    if not args.show_graphs:
        matplotlib.use("pdf")

    if args.gpus is not None:
        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus

    saver = Saver(None)

    if args.save:
        dir_variables = [
            SAVE_PREFIX.format(args.base_dir), args.game, args.num_blocks,
            args.num_components, args.beta0, args.beta1, args.beta2,
            args.beta3, args.encoder_learning_rate, args.encoder_optimizer,
            args.num_steps
        ]
        dir_switches = [
            "no_sample", "only_one_q_value", "disable_batch_norm",
            "train_prior", "post_train_prior", "post_train_t_and_prior",
            "post_train_hmm"
        ]

        saver.create_dir_name(SAVE_TEMPLATE, dir_variables, dir_switches, args)
        saver.create_dir(add_run_subdir=True)

    saver.save_by_print(args, "settings")

    logger = LearnExpectationContinuousLogger(save_file=saver.get_save_file(
        "main", "log"),
                                              print_logs=True)
    logger.silence_tensorflow()

    num_actions = 6

    model_learning_rate = args.encoder_learning_rate
    if args.model_learning_rate is not None:
        model_learning_rate = args.model_learning_rate

    runner = QHMMPriorRunnerMinAtar(
        args.load_path,
        args.game,
        num_actions,
        logger,
        saver,
        args.num_blocks,
        args.num_components,
        args.encoder_learning_rate,
        args.beta0,
        args.beta1,
        args.beta2,
        args.weight_decay,
        args.encoder_optimizer,
        args.num_steps,
        disable_batch_norm=args.disable_batch_norm,
        disable_softplus=args.disable_softplus,
        no_sample=args.no_sample,
        only_one_q_value=args.only_one_q_value,
        validation_freq=args.validation_freq,
        validation_fraction=args.validation_fraction,
        summaries=args.summaries,
        load_model_path=args.load_model_path,
        beta3=args.beta3,
        post_train_prior=args.post_train_prior,
        post_train_hmm=args.post_train_hmm,
        post_train_t_and_prior=args.post_train_t_and_prior,
        save_gifs=args.save_gifs,
        zero_sd_after_training=args.zero_sd_after_training,
        hard_abstract_state=args.hard_abstract_state,
        freeze_hmm_no_entropy_at=args.freeze_hmm_no_entropy_at,
        cluster_predict_qs=args.cluster_predict_qs,
        cluster_predict_qs_weight=args.cluster_predict_qs_weight,
        prune_threshold=args.prune_threshold,
        prune_abstraction=args.prune_abstraction,
        prune_abstraction_new_means=args.prune_abstraction_new_means,
        sample_abstract_state=args.sample_abstract_state,
        softmax_policy=args.softmax_policy,
        softmax_policy_temp=args.softmax_policy_temp,
        discount=args.discount,
        fix_prior_training=args.fix_prior_training,
        model_learning_rate=model_learning_rate,
        q_scaling_factor=args.q_scaling_factor,
        eval_episodes=args.eval_episodes)

    runner.setup()
    runner.main_training_loop()

    if args.post_train_hmm:
        runner.post_hmm_training_loop(args.post_train_hmm_steps)

    if args.save_model:
        runner.save_model()

    runner.evaluate_and_visualize()
    runner.close_model_session()