def _log_stats(self, epoch):
     logger.log("Epoch {} finished".format(epoch), with_timestamp=True)
     """
     Trainer
     """
     logger.record_dict(self.trainer.get_diagnostics(), prefix='trainer/')
     """
     Misc
     """
     gt.stamp('logging')
 def _log_stats(self, epoch):
     logger.log("Epoch {} finished".format(epoch), with_timestamp=True)
     """
     Policy
     """
     logger.record_dict(self.policy.get_diagnostics(), prefix='policy/')
     """
     Evaluation
     """
     logger.record_dict(self.get_evaluation_diagnostics(), prefix='eval/')
     """
     Misc
     """
     gt.stamp('logging')
Beispiel #3
0
    def _log_stats(self, epoch):
        logger.log("Epoch {} finished".format(epoch), with_timestamp=True)
        """
        Replay Buffer
        """
        logger.record_dict(self.replay_buffer.get_diagnostics(),
                           prefix='replay_buffer/')
        """
        Trainer
        """
        logger.record_dict(self.trainer.get_diagnostics(), prefix='trainer/')
        """
        Exploration
        """
        logger.record_dict(self.expl_data_collector.get_diagnostics(),
                           prefix='exploration/')
        expl_paths = self.expl_data_collector.get_epoch_paths()
        logger.record_dict(
            eval_util.get_generic_path_information(expl_paths),
            prefix="exploration/",
        )
        """
        Remote Evaluation
        """
        logger.record_dict(
            ray.get(self.remote_eval_data_collector.get_diagnostics.remote()),
            prefix='remote_evaluation/',
        )
        remote_eval_paths = ray.get(
            self.remote_eval_data_collector.get_epoch_paths.remote())
        logger.record_dict(
            eval_util.get_generic_path_information(remote_eval_paths),
            prefix="remote_evaluation/",
        )

        logger.record_dict(self.check_q_funct_estimate(remote_eval_paths),
                           prefix="check_estimate/")
        remote_eval_paths
        """
        Misc
        """
        gt.stamp('logging')
Beispiel #4
0
def setup_logger(
    log_dir,
    text_log_file="debug.log",
    tabular_log_file="progress.csv",
    log_tabular_only=False,
    snapshot_mode="last",
    snapshot_gap=1,
):

    tabular_log_path = osp.join(log_dir, tabular_log_file)
    text_log_path = osp.join(log_dir, text_log_file)

    logger.add_text_output(text_log_path)
    logger.add_tabular_output(tabular_log_path)

    logger.set_snapshot_dir(log_dir)
    logger.set_snapshot_mode(snapshot_mode)
    logger.set_snapshot_gap(snapshot_gap)
    logger.set_log_tabular_only(log_tabular_only)

    logger.log('Logging to: {}'.format(log_dir))
Beispiel #5
0
def setup_logger(
    log_dir,
    env_goal,
    num_epochs,
    text_log_file="debug.log",
    tabular_log_file="progress.csv",
    log_tabular_only=False,
    snapshot_mode="last",
    snapshot_gap=1,
):
    logger.goal = env_goal
    logger.num_epochs = num_epochs
    tabular_log_path = osp.join(log_dir, tabular_log_file)
    text_log_path = osp.join(log_dir, text_log_file)

    logger.add_text_output(text_log_path)
    logger.add_tabular_output(tabular_log_path)

    logger.set_snapshot_dir(log_dir)
    logger.set_snapshot_mode(snapshot_mode)
    logger.set_snapshot_gap(snapshot_gap)
    logger.set_log_tabular_only(log_tabular_only)

    logger.log(f'Logging to: {log_dir}')
def run_experiment_here(
        experiment_function,
        variant,
        seed=None,
        use_gpu=True,
        gpu_id=0,

        # Logger params:
        snapshot_mode='last',
        snapshot_gap=1,

        force_randomize_seed=False,
        log_dir=None,
):
    """
    Run an experiment locally without any serialization.

    :param experiment_function: Function. `variant` will be passed in as its
    only argument.
    :param exp_prefix: Experiment prefix for the save file.
    :param variant: Dictionary passed in to `experiment_function`.
    :param exp_id: Experiment ID. Should be unique across all
    experiments. Note that one experiment may correspond to multiple seeds,.
    :param seed: Seed used for this experiment.
    :param use_gpu: Run with GPU. By default False.
    :param script_name: Name of the running script
    :param log_dir: If set, set the log directory to this. Otherwise,
    the directory will be auto-generated based on the exp_prefix.
    :return:
    """
    torch.set_num_threads(1)

    if force_randomize_seed or seed is None:
        seed = random.randint(0, 100000)
        variant['seed'] = str(seed)

    log_dir = variant['log_dir']

    # The logger's default mode is to
    # append to the text file if the file already exists
    # So this would not override and erase any existing
    # log file in the same log dir.
    logger.reset()
    setup_logger(
        snapshot_mode=snapshot_mode,
        snapshot_gap=snapshot_gap,
        log_dir=log_dir,
    )

    # Assume this file is at the top level of the repo
    git_infos = get_git_infos([osp.dirname(__file__)])

    run_experiment_here_kwargs = dict(
        variant=variant,
        seed=seed,
        use_gpu=use_gpu,
        snapshot_mode=snapshot_mode,
        snapshot_gap=snapshot_gap,
        git_infos=git_infos,
    )

    exp_setting = dict(
        run_experiment_here_kwargs=run_experiment_here_kwargs
    )

    exp_setting_pkl_path = osp.join(log_dir, 'experiment.pkl')

    # Check if existing result exists
    prev_exp_state = None

    if osp.isfile(exp_setting_pkl_path):
        # Sanity check to make sure the experimental setting
        # of the saved data and the current experiment run is the same
        prev_exp_setting = load_pkl(exp_setting_pkl_path)

        logger.log(f'Log dir is not empty: {os.listdir(log_dir)}')

        if prev_exp_setting != exp_setting:
            logger.log("""Previous experimental setting is not
                        the same as the current experimental setting.
                        Very risky to try to reload the previous state.
                        Exitting""")
            logger.log(f'Previous: {prev_exp_setting}')
            logger.log(f'Current: {exp_setting}')
            exit(1)

        try:
            prev_exp_state = load_gzip_pickle(
                osp.join(log_dir, 'params.zip_pkl'))

            logger.log('Trying to restore the state of the experiment program')

        except FileNotFoundError:
            logger.log("""There is no previous experiment state available.
                            Do not try to restore.""")

            prev_exp_state = None

    # Log the variant
    logger.log("Variant:")
    logger.log(json.dumps(dict_to_safe_json(variant), indent=2))
    variant_log_path = osp.join(log_dir, 'variant.json')
    logger.log_variant(variant_log_path, variant)

    # Save the current experimental setting
    dump_pkl(exp_setting_pkl_path, exp_setting)
    log_git_infos(git_infos, log_dir)

    logger.log(f'Seed: {seed}')
    set_seed(seed)

    logger.log(f'Using GPU: {use_gpu}')
    set_gpu_mode(use_gpu, gpu_id)

    return experiment_function(variant, prev_exp_state)
Beispiel #7
0
            _, r, _, _ = env.step(a)
            mse_loss.append((pred_r - r)**2)
        reward_loss_other_tasks.append(
            np.mean(np.stack(mse_loss), axis=0).tolist())
        reward_loss_other_tasks_std.append(
            np.std(np.stack(mse_loss), axis=0).tolist())

    eval_statistics['reward_loss_other_tasks'] = reward_loss_other_tasks
    eval_statistics[
        'reward_loss_other_tasks_std'] = reward_loss_other_tasks_std
    eval_statistics['average_ensemble_reward_loss_other_tasks_mean'] = np.mean(
        reward_loss_other_tasks, axis=0)
    eval_statistics['average_ensemble_reward_loss_other_tasks_std'] = np.std(
        reward_loss_other_tasks, axis=0)

    eval_statistics['average_task_reward_loss_other_tasks_mean'] = np.mean(
        reward_loss_other_tasks, axis=1)
    eval_statistics['average_task_reward_loss_other_tasks_std'] = np.std(
        reward_loss_other_tasks, axis=1)

    eval_statistics[
        'num_selected_trans_other_tasks'] = num_selected_trans_other_tasks

    logger.log("Epoch {} finished".format(epoch), with_timestamp=True)
    logger.record_dict(eval_statistics, prefix='trainer/')

    write_header = True if epoch == 0 else False
    logger.dump_tabular(with_prefix=False,
                        with_timestamp=False,
                        write_header=write_header)
Beispiel #8
0
def experiment(variant,
               bcq_policies,
               bcq_buffers,
               ensemble_params_list,
               prev_exp_state=None):
    # Create the multitask replay buffer based on the buffer list
    train_buffer = MultiTaskReplayBuffer(bcq_buffers_list=bcq_buffers, )
    # create multi-task environment and sample tasks
    env = env_producer(variant['domain'], variant['seed'])

    env_max_action = float(env.action_space.high[0])
    obs_dim = int(np.prod(env.observation_space.shape))
    action_dim = int(np.prod(env.action_space.shape))
    vae_latent_dim = 2 * action_dim
    mlp_enconder_input_size = 2 * obs_dim + action_dim + 1 if variant[
        'use_next_obs_in_context'] else obs_dim + action_dim + 1

    variant['env_max_action'] = env_max_action
    variant['obs_dim'] = obs_dim
    variant['action_dim'] = action_dim

    variant['mlp_enconder_input_size'] = mlp_enconder_input_size

    # instantiate networks

    mlp_enconder = MlpEncoder(hidden_sizes=[200, 200, 200],
                              input_size=mlp_enconder_input_size,
                              output_size=2 * variant['latent_dim'])
    context_encoder = ProbabilisticContextEncoder(mlp_enconder,
                                                  variant['latent_dim'])

    ensemble_predictor = EnsemblePredictor(ensemble_params_list)

    Qs = FlattenMlp(
        hidden_sizes=variant['Qs_hidden_sizes'],
        input_size=obs_dim + action_dim + variant['latent_dim'],
        output_size=1,
    )
    vae_decoder = VaeDecoder(
        max_action=env_max_action,
        hidden_sizes=variant['vae_hidden_sizes'],
        input_size=obs_dim + vae_latent_dim + variant['latent_dim'],
        output_size=action_dim,
    )
    perturbation_generator = PerturbationGenerator(
        max_action=env_max_action,
        hidden_sizes=variant['perturbation_hidden_sizes'],
        input_size=obs_dim + action_dim + variant['latent_dim'],
        output_size=action_dim,
    )
    trainer = SuperQTrainer(
        ensemble_predictor=ensemble_predictor,
        num_network_ensemble=variant['num_network_ensemble'],
        bcq_policies=bcq_policies,
        std_threshold=variant['std_threshold'],
        is_combine=variant['is_combine'],
        nets=[context_encoder, Qs, vae_decoder, perturbation_generator])

    path_collector = RemotePathCollector(variant)

    algorithm = BatchMetaRLAlgorithm(
        trainer,
        path_collector,
        train_buffer,
        **variant['algo_params'],
    )

    algorithm.to(ptu.device)

    start_epoch = prev_exp_state['epoch'] + \
        1 if prev_exp_state is not None else 0

    # Log the variant
    logger.log("Variant:")
    logger.log(json.dumps(dict_to_safe_json(variant), indent=2))

    algorithm.train(start_epoch)
Beispiel #9
0
    assert len(bcq_buffers) == len(idx_list)

    # Load ensemble parameters
    ensemble_params_list = []
    for idx in idx_list:
        params_dir = ensemble_params_dir + str(idx) + '/itr_200.zip_pkl'
        params = load_gzip_pickle(params_dir)
        ensemble_params_list.extend(
            params['trainer']['network_ensemble_state_dict'])

    # set up logger
    variant['log_dir'] = get_log_dir(variant)

    logger.reset()
    setup_logger(log_dir=variant['log_dir'],
                 snapshot_gap=100,
                 snapshot_mode="gap")

    logger.log(f"Seed: {seed}")
    set_seed(seed)

    logger.log(f'Using GPU: {True}')
    set_gpu_mode(mode=True, gpu_id=0)

    experiment(variant,
               bcq_policies,
               bcq_buffers,
               ensemble_params_list,
               prev_exp_state=None)
Beispiel #10
0
            bcq_buffers.append(rp_buffer)
    ray.get(buffer_loader_id_list)

    start = variant['start']
    end = variant['end']
    for i in range(start, end):
        variant['algo_params']['train_goal_id'] = i
        variant['train_goal'] = train_goals[i]

        # set up logger
        variant['log_dir'] = get_log_dir(variant)

        logger.reset()
        setup_logger(log_dir=variant['log_dir'],
                     snapshot_gap=100,
                     snapshot_mode="gap")

        # Log the variant
        logger.log("Variant:")
        logger.log(json.dumps(dict_to_safe_json(variant), indent=2))

        logger.log(f'Seed: {seed}')
        set_seed(seed)

        logger.log(f'Using GPU: {True}')
        set_gpu_mode(mode=True, gpu_id=0)

        gt.reset()

        experiment(variant, bcq_buffers, prev_exp_state=None)
Beispiel #11
0
    def _log_stats(self, epoch):
        logger.log("Epoch {} finished".format(epoch), with_timestamp=True)

        # """
        # Replay Buffer
        # """
        # logger.record_dict(
        #     self.replay_buffer.get_diagnostics(),
        #     prefix='replay_buffer/'
        # )

        # """
        # Trainer
        # """
        # logger.record_dict(self.trainer.get_diagnostics(), prefix='trainer/')

        # """
        # Exploration
        # """
        # logger.record_dict(
        #     self.expl_data_collector.get_diagnostics(),
        #     prefix='exploration/'
        # )
        expl_paths = self.expl_data_collector.get_epoch_paths()

        average_return_expl = np.mean(
            [sum(path["rewards"]) for path in expl_paths])

        # logger.record_dict(
        #     eval_util.get_generic_path_information(expl_paths),
        #     prefix="exploration/",
        # )
        # """
        # Remote Evaluation
        # """
        # logger.record_dict(
        #     ray.get(self.remote_eval_data_collector.get_diagnostics.remote()),
        #     prefix='remote_evaluation/',
        # )
        remote_eval_paths = ray.get(
            self.remote_eval_data_collector.get_epoch_paths.remote())

        average_return_eval = np.mean(
            [sum(path["rewards"]) for path in remote_eval_paths])

        # logger.record_dict(
        #     eval_util.get_generic_path_information(remote_eval_paths),
        #     prefix="remote_evaluation/",
        # )

        with open(self.fixed_log_dir, 'a') as f:
            if epoch == 0:
                f.write("Epoch,AverageReturnExp,AverageReturnEval\n")

            f.write(
                str(epoch) + "," + str(average_return_expl) + "," +
                str(average_return_eval) + "\n")
        """
        Misc
        """
        gt.stamp('logging')