Esempio n. 1
0
    def __init__(self, config, section):
        super().__init__(config=config, section=section)
        self.max_episode_steps = config.getint(section, 'max_episode_steps')
        robot_world_section = config.get(section, 'robot_world')
        self.robot_world = make(config, robot_world_section)
        state_sampler_section = config.get(section, 'state_sampler')
        self.state_sampler = make(config, state_sampler_section)
        reward_func_section = config.get(section, 'reward_func')
        self.reward_func = make(config, reward_func_section)
        obs_func_section = config.get(section, 'observation_func')
        self.get_obs = make(config, obs_func_section)

        goal_sampler_section = config.get(section, 'goal_sampler')
        self.goal_sampler = make(config, goal_sampler_section)

        env_obs = self.reset()
        observation_spaces = OrderedDict()
        for key in env_obs.keys():
            observation_spaces[key] = gym.spaces.Box(-np.inf, np.inf, shape=env_obs[key].shape, dtype='float32')
        self.observation_space = gym.spaces.Dict(observation_spaces)
        self.action_space_bounds = [-1.0, 1.0]
        if config.has_option(section, 'action_space_bounds'):
            self.action_space_bounds = [float(x) for x in config.get(section,'action_space_bounds')]
        self.action_space = gym.spaces.Box(low=self.action_space_bounds[0], high=self.action_space_bounds[1],
                                           shape=(self.robot_world.get_action_dim(),))
        self.steps = 0
        self.render_gui = None
        if config.has_option(section, 'render_gui'):
            render_gui_section = config.get(section, 'render_gui')
            self.render_gui = make(config, render_gui_section)

        self.np_random = np.random
Esempio n. 2
0
    def __init__(self, config, section):

        # Setup
        self.experiment_dir = None
        self.config = config
        self.section = section
        self.env_maker = make(config, config.get(section, 'env_maker'))
        self.seed = config.getint(section, 'seed')
        self.snapshot_mode = config.get(section,
                                        'snapshot_mode',
                                        fallback='last')

        # SAC hyper parameters
        self.policy_hidden_sizes = eval(
            config.get(section, 'policy_hidden_sizes', fallback='[256, 256]'))
        self.qf_hidden_sizes = eval(
            config.get(section, 'qf_hidden_sizes', fallback='[256, 256]'))
        self.buffer_capacity_in_transitions = int(
            config.getfloat(section,
                            'buffer_capacity_in_transitions',
                            fallback=1e6))
        self.gradient_steps_per_itr = config.getint(
            section, 'gradient_steps_per_iteration', fallback=1000)
        self.max_path_length = config.getint(section,
                                             'max_path_length',
                                             fallback=1000)
        self.max_eval_path_length = config.getint(section,
                                                  'max_eval_path_length',
                                                  fallback=1000)
        self.min_buffer_size = int(
            config.getfloat(section, 'min_buffer_size', fallback=1e4))
        self.target_update_tau = config.getfloat(section,
                                                 'target_update_tau',
                                                 fallback=5e-3)
        self.discount = config.getfloat(section, 'discount', fallback=0.99)
        self.buffer_batch_size = config.getint(section,
                                               'buffer_batch_size',
                                               fallback=256)
        self.reward_scale = config.getfloat(section,
                                            'reward_scale',
                                            fallback=1.)
        self.steps_per_epoch = config.getint(section,
                                             'steps_per_epoch',
                                             fallback=1)
        self.batch_size = config.getint(section, 'batch_size', fallback=1000)
        self.n_epochs = config.getint(section, 'n_epochs', fallback=1000)
        self.parallel_sampling = config.getboolean(section,
                                                   'parallel_sampling',
                                                   fallback=False)
        if self.parallel_sampling:
            self.n_workers = config.getint(section, 'n_workers')
Esempio n. 3
0
def main(args):

    config_file = args.config_file
    config = ConfigParser()
    config.read(config_file)
    experiment_no = config.get('experiment', 'experiment_no')
    os.makedirs(os.environ['EXPERIMENTS_DIR'], exist_ok=True)
    experiment_dir = utils.get_experiment_dir(os.environ['EXPERIMENTS_DIR'],
                                              experiment_no,
                                              mkdir=True)
    config_path = utils.get_config_path(experiment_dir, experiment_no)
    config.save(config_path)
    algo = make(config, config.get('experiment', 'algo'))
    algo.set_experiment_dir(experiment_dir)
    algo.train()
Esempio n. 4
0
    def __init__(self, config, section):
        self._learn = ppo2.learn
        self.experiment_dir = None
        self.config = config
        self.section = section

        # ppo parameters
        params = self._get_parameter_descr_dict()
        params = config.get_section(section, params)

        # build and set network function to be passed to learn
        network_section = params['network']
        params['network'] = get_network(config, network_section)
        self.params = params

        # env
        env_maker_section = config.get(section, 'env_maker')
        self.env_maker = make(config, env_maker_section)
        vec_env_maker_section = config.get(section, 'vec_env_maker')
        self.vec_env_maker = VecEnvMaker(config, vec_env_maker_section)

        self.seed = config.getint(section, 'seed')
Esempio n. 5
0
def get_network(config, section):
    """
    Returns callable class for building network. The section in config can either have 'type' or 'entrypoint' as option
    The type can be set to mlp, lstm, etc,. The entrypoint option specifies the entrypoint to class definition
    that inherits NetworkFn. This however is not a strict requirement as long as the object returns a network
    when called.

    Refer baselines.common.models for the complete list of values type can take and the requirements for using custom
    network function.

    Example:

    [my_network]
    type = mlp
    num_layers = 2
    num_hidden = 128

    or

    [my_network]
    entrypoint: roam_rl.openai_baselines.models:MLP
    num_layers = 2
    num_hidden = 128

    """

    assert (config.has_option(section, 'type') and config.has_option(section, 'entrypoint')) is False,\
    "cannot specify both type and entrypoint"

    _mapping = {'mlp': MLP, 'lstm': LSTM, 'mlp_lstm_mlp': MLP_LSTM_MLP}

    if config.has_option(section, 'type'):
        _type = config.get(section, 'type')
        return _mapping[_type](config, section)
    elif config.has_option(section, 'entrypoint'):
        return make(config, section)
    else:
        raise ValueError("network unknown")