Ejemplo n.º 1
0
def make_remote_base(remote_config, n_actors):
    base = getattr(algorithms, remote_config['base'])

    def make_env_thunk(index):
        def thunk():
            return make_env(index, n_actors, **remote_config['env'])

        return thunk

    test_env = make_env_thunk(-1)()
    obs_space = test_env.observation_space
    action_space = test_env.action_space
    test_env.close()

    network_kwargs = dict()
    for arg_name, arg_value in remote_config['neural_network'].items():
        if isinstance(arg_value, dict):
            network_kwargs[arg_name] = get_network_builder(**arg_value)
        else:
            network_kwargs[arg_name] = get_network_builder(arg_value)

    if 'actor_resource' in remote_config:
        remote_actors = [
            Actor.options(**remote_config['actor_resource']).remote(
                thread_id=actor_id,
                base=base,
                make_env=make_env_thunk(actor_id),
                obs_space=obs_space,
                action_space=action_space,
                **network_kwargs,
                **remote_config['actors']) for actor_id in range(n_actors)
        ]
    else:
        remote_actors = [
            Actor.remote(thread_id=actor_id,
                         base=base,
                         make_env=make_env_thunk(actor_id),
                         obs_space=obs_space,
                         action_space=action_space,
                         **network_kwargs,
                         **remote_config['actors'])
            for actor_id in range(n_actors)
        ]
    return remote_actors
Ejemplo n.º 2
0
    config_gpu()
    env = make_env(**config['env'])
    env_dict, dtype_dict = get_dtype_dict(env.observation_space, env.action_space)
    if 'cpp' in config['buffer'].keys() and config['buffer'].pop('cpp'):
        dtype_dict['indexes'] = 'uint64'
        replay_buffer = cppPER(env_dict=env_dict, **config['buffer'])
    else:
        replay_buffer = PrioritizedReplayBuffer(env_dict=env_dict, **config['buffer'])
    if isinstance(env.observation_space, gym.spaces.Dict):
        state_keys = env.observation_space.spaces.keys()
        replay_buffer = DictWrapper(replay_buffer, state_prefix=('', 'next_', 'n_'),
                                    state_keys=state_keys)
    network_kwargs = dict()
    for key, value in config['neural_network'].items():
        if isinstance(value, dict):
            network_kwargs[key] = get_network_builder(**value)
        else:
            network_kwargs[key] = get_network_builder(value)

    base = getattr(algorithms, config['base'])
    agent = base(obs_space=env.observation_space, action_space=env.action_space,
                 replay_buff=replay_buffer, dtype_dict=dtype_dict,
                 **config['agent'], **network_kwargs)
    if 'pretrain_weights' in config:
        agent.load(**config['pretrain_weights'])

    summary_writer = tf.summary.create_file_writer(config.pop('log_dir'))
    with summary_writer.as_default():
        if 'train' in config:
            train_config = config['train']
            agent.train(env, **train_config)
Ejemplo n.º 3
0
def make_remote_base(apex_config,
                     env_dict=None,
                     dtype_dict=None,
                     obs_space=None,
                     action_space=None):
    filler_config = defaultdict(dict)
    filler_config.update(apex_config)
    base = getattr(algorithms, apex_config['base'])
    n_actors = apex_config['num_actors']

    def make_env_thunk(index):
        def thunk():
            return make_env(index, n_actors, **filler_config['env'])

        return thunk

    if obs_space is None or action_space is None:
        test_env = make_env_thunk(-2)()
        obs_space = test_env.observation_space
        action_space = test_env.action_space
        test_env.close()
    if env_dict is None or dtype_dict is None:
        env_dict, dtype_dict = get_dtype_dict(obs_space, action_space)

    remote_counter = Counter.remote()
    network_kwargs = dict()
    for arg_name, arg_value in filler_config['neural_network'].items():
        if isinstance(arg_value, dict):
            network_kwargs[arg_name] = get_network_builder(**arg_value)
        else:
            network_kwargs[arg_name] = get_network_builder(arg_value)
    if 'cpp' in filler_config['buffer'].keys() and filler_config['buffer'].pop(
            'cpp'):
        dtype_dict['indexes'] = 'uint64'
        main_buffer = cppPER(env_dict=env_dict, **filler_config['buffer'])
    else:
        main_buffer = PrioritizedReplayBuffer(env_dict=env_dict,
                                              **filler_config['buffer'])
    if isinstance(obs_space, gym.spaces.Dict):
        state_keys = obs_space.spaces.keys()
        main_buffer = DictWrapper(main_buffer,
                                  state_prefix=('', 'next_', 'n_'),
                                  state_keys=state_keys)
    if 'learner_resource' in filler_config:
        remote_learner = Learner.options(
            **filler_config['learner_resource']).remote(
                base=base,
                obs_space=obs_space,
                action_space=action_space,
                **filler_config['learner'],
                **filler_config['alg_args'],
                **network_kwargs)
    else:
        remote_learner = Learner.remote(base=base,
                                        obs_space=obs_space,
                                        action_space=action_space,
                                        **filler_config['learner'],
                                        **filler_config['alg_args'],
                                        **network_kwargs)
    if 'actor_resource' in filler_config:
        remote_actors = [
            Actor.options(**filler_config['actor_resource']).remote(
                thread_id=actor_id,
                base=base,
                make_env=make_env_thunk(actor_id),
                remote_counter=remote_counter,
                obs_space=obs_space,
                action_space=action_space,
                env_dict=env_dict,
                **network_kwargs,
                **filler_config['actors'],
                **filler_config['alg_args']) for actor_id in range(n_actors)
        ]
        remote_evaluate = Actor.options(
            **filler_config['actor_resource']).remote(
                thread_id='Evaluate',
                base=base,
                make_env=make_env_thunk(-1),
                remote_counter=remote_counter,
                obs_space=obs_space,
                action_space=action_space,
                **network_kwargs,
                **filler_config['actors'],
                **filler_config['alg_args'])
    else:
        remote_actors = [
            Actor.remote(thread_id=actor_id,
                         base=base,
                         make_env=make_env_thunk(actor_id),
                         remote_counter=remote_counter,
                         obs_space=obs_space,
                         action_space=action_space,
                         **network_kwargs,
                         **filler_config['actors'],
                         **filler_config['alg_args'])
            for actor_id in range(n_actors)
        ]
        remote_evaluate = Actor.remote(thread_id='Evaluate',
                                       base=base,
                                       make_env=make_env_thunk(-1),
                                       remote_counter=remote_counter,
                                       obs_space=obs_space,
                                       action_space=action_space,
                                       **network_kwargs,
                                       **filler_config['actors'],
                                       **filler_config['alg_args'])
    return remote_learner, remote_actors, main_buffer, remote_counter, remote_evaluate