コード例 #1
0
    def __init__(self, variant, eval_goals_set):
        ptu.set_gpu_mode(True)
        torch.set_num_threads(1)

        self._single_mdp_path_collectors = [
            RemotePathCollectorSingleMdp.remote(variant, goal)
            for goal in eval_goals_set
        ]
コード例 #2
0
    def __init__(self, index, variant, candidate_size=10):
        ptu.set_gpu_mode(True)
        torch.set_num_threads(1)

        import sys
        sys.argv = ['']
        del sys

        env_max_action = variant['env_max_action']
        obs_dim = variant['obs_dim']
        action_dim = variant['action_dim']
        latent_dim = variant['latent_dim']
        vae_latent_dim = 2 * action_dim
        mlp_enconder_input_size = 2 * obs_dim + action_dim + 1 if variant[
            'use_next_obs_in_context'] else obs_dim + action_dim + 1

        mlp_enconder = MlpEncoder(hidden_sizes=[200, 200, 200],
                                  input_size=mlp_enconder_input_size,
                                  output_size=2 * variant['latent_dim'])
        self.context_encoder = ProbabilisticContextEncoder(
            mlp_enconder, variant['latent_dim'])
        self.Qs = FlattenMlp(
            hidden_sizes=variant['Qs_hidden_sizes'],
            input_size=obs_dim + action_dim + latent_dim,
            output_size=1,
        )
        self.vae_decoder = VaeDecoder(
            max_action=variant['env_max_action'],
            hidden_sizes=variant['vae_hidden_sizes'],
            input_size=obs_dim + vae_latent_dim + latent_dim,
            output_size=action_dim,
        )
        self.perturbation_generator = PerturbationGenerator(
            max_action=env_max_action,
            hidden_sizes=variant['perturbation_hidden_sizes'],
            input_size=obs_dim + action_dim + latent_dim,
            output_size=action_dim,
        )

        self.use_next_obs_in_context = variant['use_next_obs_in_context']

        self.env = env_producer(variant['domain'], variant['seed'])
        self.num_evals = variant['num_evals']
        self.max_path_length = variant['max_path_length']

        self.vae_latent_dim = vae_latent_dim
        self.candidate_size = variant['candidate_size']

        self.env.seed(10 * variant['seed'] + 1234 + index)
        set_seed(10 * variant['seed'] + 1234 + index)

        self.env.action_space.np_random.seed(123 + index)
コード例 #3
0
    def __init__(self, index, variant, candidate_size=10):
        ptu.set_gpu_mode(True)
        torch.set_num_threads(1)

        import sys
        sys.argv = ['']
        del sys

        env_max_action = variant['env_max_action']
        obs_dim = variant['obs_dim']
        action_dim = variant['action_dim']
        latent_dim = variant['latent_dim']
        vae_latent_dim = 2 * action_dim

        self.f = MlpEncoder(
            g_hidden_sizes=variant['g_hidden_sizes'],
            g_input_sizes=obs_dim + action_dim + 1,
            g_latent_dim=variant['g_latent_dim'],
            h_hidden_sizes=variant['h_hidden_sizes'],
            latent_dim=latent_dim,
        )
        self.Qs = FlattenMlp(
            hidden_sizes=variant['Qs_hidden_sizes'],
            input_size=obs_dim + action_dim + latent_dim,
            output_size=1,
        )
        self.vae_decoder = VaeDecoder(
            max_action=variant['env_max_action'],
            hidden_sizes=variant['vae_hidden_sizes'],
            input_size=obs_dim + vae_latent_dim + latent_dim,
            output_size=action_dim,
        )
        self.perturbation_generator = PerturbationGenerator(
            max_action=env_max_action,
            hidden_sizes=variant['perturbation_hidden_sizes'],
            input_size=obs_dim + action_dim + latent_dim,
            output_size=action_dim,
        )

        self.env = env_producer(variant['domain'], variant['seed'])
        self.num_evals = variant['algo_params']['num_evals']
        self.max_path_length = variant['max_path_length']

        self.vae_latent_dim = vae_latent_dim
        self.num_trans_context = variant['num_trans_context']
        self.candidate_size = variant['candidate_size']
        self.seed = variant['seed']
        self.index = index

        self.env.seed(10 * self.seed + 1234 + index)
        set_seed(10 * self.seed + 1234 + index)
コード例 #4
0
    def __init__(self, variant, goal, candidate_size=10):
        ptu.set_gpu_mode(True)
        torch.set_num_threads(1)

        import sys
        sys.argv = ['']
        del sys

        self.env = env_producer(variant['env_name'], seed=0, goal=goal)
        obs_dim = int(np.prod(self.env.observation_space.shape))
        action_dim = int(np.prod(self.env.action_space.shape))
        reward_dim = 1

        # instantiate networks
        latent_dim = variant['latent_size']
        context_encoder_input_dim = 2 * obs_dim + action_dim + reward_dim if variant[
            'algo_params'][
                'use_next_obs_in_context'] else obs_dim + action_dim + reward_dim
        context_encoder_output_dim = latent_dim * 2 if variant['algo_params'][
            'use_information_bottleneck'] else latent_dim
        net_size = variant['net_size']
        recurrent = variant['algo_params']['recurrent']
        encoder_model = RecurrentEncoder if recurrent else MlpEncoder

        context_encoder = encoder_model(
            hidden_sizes=[200, 200, 200],
            input_size=context_encoder_input_dim,
            output_size=context_encoder_output_dim,
        )

        policy = TanhGaussianPolicy(
            hidden_sizes=[net_size, net_size, net_size],
            obs_dim=obs_dim + latent_dim,
            latent_dim=latent_dim,
            action_dim=action_dim,
        )
        self.agent = PEARLAgent(latent_dim, context_encoder, policy,
                                **variant['algo_params'])
        self.num_evals = variant['num_evals']
        self.max_path_length = variant['max_path_length']
コード例 #5
0
    def __init__(self, index, variant, candidate_size=10):
        ptu.set_gpu_mode(True)
        torch.set_num_threads(1)

        import sys
        sys.argv = ['']
        del sys

        self.env = env_producer(variant['domain'], variant['seed'])
        state_dim = self.env.observation_space.low.size
        action_dim = self.env.action_space.low.size
        max_action = float(self.env.action_space.high[0])

        self.policy = BCQ(state_dim, action_dim, max_action,
                          **variant['policy_params'])
        self.num_evals = variant['num_evals']
        self.max_path_length = variant['max_path_length']
        self.seed = variant['seed']
        self.index = index

        self.env.seed(10 * self.seed + 1234 + index)
        set_seed(10 * self.seed + 1234 + index)
コード例 #6
0
def run_experiment_here(
        experiment_function,
        variant,
        seed=None,
        use_gpu=True,
        gpu_id=0,

        # Logger params:
        snapshot_mode='last',
        snapshot_gap=1,

        force_randomize_seed=False,
        log_dir=None,
):
    """
    Run an experiment locally without any serialization.

    :param experiment_function: Function. `variant` will be passed in as its
    only argument.
    :param exp_prefix: Experiment prefix for the save file.
    :param variant: Dictionary passed in to `experiment_function`.
    :param exp_id: Experiment ID. Should be unique across all
    experiments. Note that one experiment may correspond to multiple seeds,.
    :param seed: Seed used for this experiment.
    :param use_gpu: Run with GPU. By default False.
    :param script_name: Name of the running script
    :param log_dir: If set, set the log directory to this. Otherwise,
    the directory will be auto-generated based on the exp_prefix.
    :return:
    """
    torch.set_num_threads(1)

    if force_randomize_seed or seed is None:
        seed = random.randint(0, 100000)
        variant['seed'] = str(seed)

    log_dir = variant['log_dir']

    # The logger's default mode is to
    # append to the text file if the file already exists
    # So this would not override and erase any existing
    # log file in the same log dir.
    logger.reset()
    setup_logger(
        snapshot_mode=snapshot_mode,
        snapshot_gap=snapshot_gap,
        log_dir=log_dir,
    )

    # Assume this file is at the top level of the repo
    git_infos = get_git_infos([osp.dirname(__file__)])

    run_experiment_here_kwargs = dict(
        variant=variant,
        seed=seed,
        use_gpu=use_gpu,
        snapshot_mode=snapshot_mode,
        snapshot_gap=snapshot_gap,
        git_infos=git_infos,
    )

    exp_setting = dict(
        run_experiment_here_kwargs=run_experiment_here_kwargs
    )

    exp_setting_pkl_path = osp.join(log_dir, 'experiment.pkl')

    # Check if existing result exists
    prev_exp_state = None

    if osp.isfile(exp_setting_pkl_path):
        # Sanity check to make sure the experimental setting
        # of the saved data and the current experiment run is the same
        prev_exp_setting = load_pkl(exp_setting_pkl_path)

        logger.log(f'Log dir is not empty: {os.listdir(log_dir)}')

        if prev_exp_setting != exp_setting:
            logger.log("""Previous experimental setting is not
                        the same as the current experimental setting.
                        Very risky to try to reload the previous state.
                        Exitting""")
            logger.log(f'Previous: {prev_exp_setting}')
            logger.log(f'Current: {exp_setting}')
            exit(1)

        try:
            prev_exp_state = load_gzip_pickle(
                osp.join(log_dir, 'params.zip_pkl'))

            logger.log('Trying to restore the state of the experiment program')

        except FileNotFoundError:
            logger.log("""There is no previous experiment state available.
                            Do not try to restore.""")

            prev_exp_state = None

    # Log the variant
    logger.log("Variant:")
    logger.log(json.dumps(dict_to_safe_json(variant), indent=2))
    variant_log_path = osp.join(log_dir, 'variant.json')
    logger.log_variant(variant_log_path, variant)

    # Save the current experimental setting
    dump_pkl(exp_setting_pkl_path, exp_setting)
    log_git_infos(git_infos, log_dir)

    logger.log(f'Seed: {seed}')
    set_seed(seed)

    logger.log(f'Using GPU: {use_gpu}')
    set_gpu_mode(use_gpu, gpu_id)

    return experiment_function(variant, prev_exp_state)
コード例 #7
0
    assert len(bcq_buffers) == len(idx_list)

    # Load ensemble parameters
    ensemble_params_list = []
    for idx in idx_list:
        params_dir = ensemble_params_dir + str(idx) + '/itr_200.zip_pkl'
        params = load_gzip_pickle(params_dir)
        ensemble_params_list.extend(
            params['trainer']['network_ensemble_state_dict'])

    # set up logger
    variant['log_dir'] = get_log_dir(variant)

    logger.reset()
    setup_logger(log_dir=variant['log_dir'],
                 snapshot_gap=100,
                 snapshot_mode="gap")

    logger.log(f"Seed: {seed}")
    set_seed(seed)

    logger.log(f'Using GPU: {True}')
    set_gpu_mode(mode=True, gpu_id=0)

    experiment(variant,
               bcq_policies,
               bcq_buffers,
               ensemble_params_list,
               prev_exp_state=None)
コード例 #8
0
    def __init__(self, variant, eval_goals_set):
        ptu.set_gpu_mode(True)

        self._single_mdp_path_collectors = [
            PathCollectorSingleMdp(variant, goal) for goal in eval_goals_set
        ]