Exemple #1
0
def state_estimator_model(
        input_shape,
        num_hidden_units=256,
        num_hidden_layers=2,
        output_size=4,  # (x, y, z_cos, z_sin)
        kernel_regularizer=None,
        preprocessor_params=None,
        preprocessor=None,
        name='state_estimator_preprocessor'):
    # TODO: Make this take in observation keys instead of this hardcoded output size.
    obs_preprocessor_params = (preprocessor_params
                               or DEFAULT_STATE_ESTIMATOR_PREPROCESSOR_PARAMS)
    #     preprocessor = convnet_model(
    #         name='convnet_preprocessor_state_est',
    #         **convnet_kwargs)

    if preprocessor is None:
        preprocessor = get_preprocessor_from_params(None,
                                                    obs_preprocessor_params)

    state_estimator = feedforward_model(
        hidden_layer_sizes=(num_hidden_units, ) * num_hidden_layers,
        output_size=output_size,
        output_activation=tf.keras.activations.tanh,
        kernel_regularizer=
        kernel_regularizer,  # tf.keras.regularizers.l2(0.001),
        name='feedforward_state_est')
    model = tfk.Sequential([
        tfk.Input(shape=input_shape, name='pixels', dtype=tf.uint8),
        preprocessor,
        state_estimator,
    ],
                           name=name)
    return model
Exemple #2
0
def get_dynamics_model_from_variant(variant, env, *args, **kwargs):
    from .dynamics_model import create_dynamics_model

    dynamics_model_params = deepcopy(variant['dynamics_model_params'])
    # dynamics_model_type = deepcopy(dynamics_model_params['type'])
    dynamics_model_kwargs = deepcopy(dynamics_model_params['kwargs'])

    observation_preprocessors_params = dynamics_model_kwargs.pop(
        'observation_preprocessors_params', {}).copy()
    observation_keys = dynamics_model_kwargs.pop('observation_keys',
                                                 None) or env.observation_keys

    encoder_kwargs = dynamics_model_kwargs.pop('encoder_kwargs', {}).copy()
    decoder_kwargs = dynamics_model_kwargs.pop('decoder_kwargs', {}).copy()
    dynamics_latent_dim = dynamics_model_kwargs.pop('dynamics_latent_dim', 16)

    observation_shapes = OrderedDict(
        ((key, value) for key, value in env.observation_shape.items()
         if key in observation_keys))
    action_shape = env.action_shape

    input_shapes = {
        'observations': observation_shapes,
        'actions': action_shape,
    }

    observation_preprocessors = OrderedDict()
    for name, observation_shape in observation_shapes.items():
        preprocessor_params = observation_preprocessors_params.get(name, None)
        if not preprocessor_params:
            observation_preprocessors[name] = None
            continue

        observation_preprocessors[name] = get_preprocessor_from_params(
            env, preprocessor_params)

    action_preprocessor = None
    preprocessors = {
        'observations': observation_preprocessors,
        'actions': action_preprocessor,
    }

    dynamics_model = create_dynamics_model(
        input_shapes=input_shapes,
        dynamics_latent_dim=dynamics_latent_dim,
        *args,
        observation_keys=observation_keys,
        preprocessors=preprocessors,
        encoder_kwargs=encoder_kwargs,
        decoder_kwargs=decoder_kwargs,
        **dynamics_model_kwargs,
        **kwargs)

    return dynamics_model
Exemple #3
0
def get_V_function_from_variant(variant, env, *args, **kwargs):
    V_params = variant['V_params']
    V_type = V_params['type']
    V_kwargs = deepcopy(V_params['kwargs'])

    preprocessor_params = V_kwargs.pop('preprocessor_params', None)
    preprocessor = get_preprocessor_from_params(env, preprocessor_params)
    return VALUE_FUNCTIONS[V_type](
        observation_shape=env.active_observation_shape,
        *args,
        observation_preprocessor=preprocessor,
        **V_kwargs,
        **kwargs)
Exemple #4
0
def get_Q_function_from_variant(variant, env, *args, **kwargs):
    Q_params = variant['Q_params']
    Q_type = Q_params['type']
    Q_kwargs = deepcopy(Q_params['kwargs'])

    preprocessor_params = Q_kwargs.pop('preprocessor_params', None)
    preprocessor = get_preprocessor_from_params(env, preprocessor_params)

    return VALUE_FUNCTIONS[Q_type](
        observation_shape=env.active_observation_shape,
        action_shape=env.action_space.shape,
        *args,
        observation_preprocessor=preprocessor,
        **Q_kwargs,
        **kwargs)
Exemple #5
0
def get_policy_from_variant(variant, env, Qs, *args, **kwargs):
    policy_params = variant['policy_params']
    policy_type = policy_params['type']
    policy_kwargs = deepcopy(policy_params['kwargs'])

    preprocessor_params = policy_kwargs.pop('preprocessor_params', None)
    preprocessor = get_preprocessor_from_params(env, preprocessor_params)

    policy = POLICY_FUNCTIONS[policy_type](env,
                                           *args,
                                           Q=Qs[0],
                                           preprocessor=preprocessor,
                                           **policy_kwargs,
                                           **kwargs)

    return policy
Exemple #6
0
def get_Q_function_from_variant(variant, env, *args, **kwargs):
    Q_params = deepcopy(variant['Q_params'])
    Q_type = deepcopy(Q_params['type'])
    Q_kwargs = deepcopy(Q_params['kwargs'])

    observation_preprocessors_params = Q_kwargs.pop(
        'observation_preprocessors_params', {}).copy()
    observation_keys = Q_kwargs.pop('observation_keys',
                                    None) or env.observation_keys
    goal_keys = Q_kwargs.pop('goal_keys', tuple())

    observation_shapes = OrderedDict(
        ((key, value) for key, value in env.observation_shape.items()
         if key in observation_keys + goal_keys))
    action_shape = env.action_shape
    input_shapes = {
        'observations': observation_shapes,
        'actions': action_shape,
    }

    observation_preprocessors = OrderedDict()
    for name, observation_shape in observation_shapes.items():
        preprocessor_params = observation_preprocessors_params.get(name, None)
        if not preprocessor_params:
            observation_preprocessors[name] = None
            continue

        observation_preprocessors[name] = get_preprocessor_from_params(
            env, preprocessor_params)

    action_preprocessor = None
    preprocessors = {
        'observations': observation_preprocessors,
        'actions': action_preprocessor,
    }

    Q_function = VALUE_FUNCTIONS[Q_type](input_shapes=input_shapes,
                                         observation_keys=observation_keys,
                                         goal_keys=goal_keys,
                                         *args,
                                         preprocessors=preprocessors,
                                         **Q_kwargs,
                                         **kwargs)

    return Q_function
def get_reward_classifier_from_variant(variant, env, *args, **kwargs):
    from .vice_models import create_feedforward_reward_classifier

    classifier_params = variant['classifier_params']
    classifier_type = classifier_params['type']
    classifier_kwargs = deepcopy(classifier_params['kwargs'])

    # TODO Avi maybe have some optional preprocessing
    preprocessor_params = classifier_kwargs.pop('preprocessor_params', None)
    preprocessor = get_preprocessor_from_params(env, preprocessor_params)

    return create_feedforward_reward_classifier(
        observation_shape=env.active_observation_shape,
        #action_shape=env.action_space.shape,
        *args,
        observation_preprocessor=preprocessor,
        **classifier_kwargs,
        **kwargs)
Exemple #8
0
def get_distance_estimator_from_variant(variant, env, *args, **kwargs):
    from softlearning.models.ddl.distance_estimator import (
        create_distance_estimator)

    distance_fn_params = deepcopy(variant['distance_fn_params'])
    distance_fn_kwargs = deepcopy(distance_fn_params['kwargs'])

    observation_preprocessors_params = distance_fn_kwargs.pop(
        'observation_preprocessors_params', {}).copy()
    observation_keys = distance_fn_kwargs.pop('observation_keys',
                                              None) or env.observation_keys

    observation_shapes = OrderedDict(
        ((key, value) for key, value in env.observation_shape.items()
         if key in observation_keys))

    input_shapes = {
        's1': observation_shapes,
        's2': observation_shapes,
    }

    observation_preprocessors = OrderedDict()
    for name, observation_shape in observation_shapes.items():
        preprocessor_params = observation_preprocessors_params.get(name, None)
        if not preprocessor_params:
            observation_preprocessors[name] = None
            continue
        observation_preprocessors[name] = get_preprocessor_from_params(
            env, preprocessor_params)

    preprocessors = {
        's1': observation_preprocessors,
        's2': observation_preprocessors,
    }

    distance_fn = create_distance_estimator(input_shapes=input_shapes,
                                            observation_keys=observation_keys,
                                            *args,
                                            preprocessors=preprocessors,
                                            **distance_fn_kwargs,
                                            **kwargs)
    return distance_fn
Exemple #9
0
def get_embedding_from_variant(variant, env, *args, **kwargs):
    from softlearning.models.ddl.distance_estimator import (create_embedding_fn
                                                            )

    distance_fn_params = deepcopy(variant['distance_fn_params'])
    distance_fn_kwargs = deepcopy(distance_fn_params['kwargs'])

    observation_preprocessors_params = distance_fn_kwargs.pop(
        'observation_preprocessors_params', {}).copy()
    observation_keys = distance_fn_kwargs.pop('observation_keys',
                                              None) or env.observation_keys

    observation_shapes = OrderedDict(
        ((key, value) for key, value in env.observation_shape.items()
         if key in observation_keys))

    input_shapes = observation_shapes

    observation_preprocessors = OrderedDict()
    for name, observation_shape in observation_shapes.items():
        preprocessor_params = observation_preprocessors_params.get(name, None)
        if not preprocessor_params:
            observation_preprocessors[name] = None
            continue
        observation_preprocessors[name] = get_preprocessor_from_params(
            env, preprocessor_params)

    preprocessors = observation_preprocessors

    assert 'embedding_dim' in distance_fn_kwargs, (
        'Must specify an embedding dimension in the distance function kwargs')
    embedding_dim = distance_fn_kwargs.pop('embedding_dim')

    embedding_fn = create_embedding_fn(input_shapes=input_shapes,
                                       embedding_dim=embedding_dim,
                                       observation_keys=observation_keys,
                                       *args,
                                       preprocessors=preprocessors,
                                       **distance_fn_kwargs,
                                       **kwargs)
    return embedding_fn
Exemple #10
0
def get_policy_from_params(policy_params, env, *args, **kwargs):
    policy_type = policy_params['type']
    policy_kwargs = deepcopy(policy_params.get('kwargs', {}))

    observation_preprocessors_params = policy_kwargs.pop(
        'observation_preprocessors_params', {})
    observation_keys = policy_kwargs.pop('observation_keys',
                                         None) or env.observation_keys
    goal_keys = policy_kwargs.pop('goal_keys', None) or tuple()

    observation_shapes = OrderedDict(
        ((key, value) for key, value in env.observation_shape.items()
         if key in observation_keys + goal_keys))

    observation_preprocessors = OrderedDict()
    for name, observation_shape in observation_shapes.items():
        preprocessor_params = observation_preprocessors_params.get(name, None)
        if not preprocessor_params:
            observation_preprocessors[name] = None
            continue

        observation_preprocessors[name] = get_preprocessor_from_params(
            env, preprocessor_params)

    if policy_type == 'UniformPolicy':
        action_range = (env.action_space.low, env.action_space.high)
        policy_kwargs['action_range'] = action_range

    policy = POLICY_FUNCTIONS[policy_type](
        input_shapes=observation_shapes,
        output_shape=env.action_space.shape,
        observation_keys=observation_keys,
        goal_keys=goal_keys,
        *args,
        preprocessors=observation_preprocessors,
        **policy_kwargs,
        **kwargs)

    return policy
Exemple #11
0
def get_Q_function_from_variant(variant, env, *args, **kwargs):
    Q_params = variant['Q_params']
    Q_type = Q_params['type']
    Q_kwargs = deepcopy(Q_params['kwargs'])
    alg_type = variant['algorithm_params']['type']
    num_q = variant['algorithm_params']['kwargs']['num_q']
    if alg_type == 'TQC':
        num_outputs = variant['algorithm_params']['kwargs']['num_quantiles']
    else:
        num_outputs = 1

    preprocessor_params = Q_kwargs.pop('preprocessor_params', None)
    preprocessor = get_preprocessor_from_params(env, preprocessor_params)

    return VALUE_FUNCTIONS[Q_type](
        observation_shape=env.active_observation_shape,
        action_shape=env.action_space.shape,
        *args,
        num_q=num_q,
        observation_preprocessor=preprocessor,
        output_size=num_outputs,
        **Q_kwargs,
        **kwargs)
Exemple #12
0
def get_reward_classifier_from_variant(variant, env, *args, **kwargs):
    from .vice_models import create_feedforward_reward_classifier_function

    reward_classifier_params = deepcopy(variant['reward_classifier_params'])
    reward_classifier_type = deepcopy(reward_classifier_params['type'])
    assert reward_classifier_type == 'feedforward_classifier', (
        reward_classifier_type)
    reward_classifier_kwargs = deepcopy(reward_classifier_params['kwargs'])

    observation_preprocessors_params = reward_classifier_kwargs.pop(
        'observation_preprocessors_params', {}).copy()
    observation_keys = reward_classifier_kwargs.pop(
        'observation_keys', None) or env.observation_keys

    # TODO: Clean this up
    dynamics_aware = variant['algorithm_params']['type'] == 'VICEDynamicsAware'

    observation_shapes = OrderedDict(
        ((key, value) for key, value in env.observation_shape.items()
         if key in observation_keys))

    if dynamics_aware:
        dynamics_model_kwargs = deepcopy(
            variant['dynamics_model_params']['kwargs'])
        dynamics_latent_dim = dynamics_model_kwargs['dynamics_latent_dim']
        dynamics_features_shape = tf.TensorShape(dynamics_latent_dim)
        input_shapes = {
            'observations': observation_shapes,
            'dynamics_features': dynamics_features_shape
        }
    else:
        input_shapes = observation_shapes

    observation_preprocessors = OrderedDict()
    for name, observation_shape in observation_shapes.items():
        preprocessor_params = observation_preprocessors_params.get(name, None)
        if not preprocessor_params:
            observation_preprocessors[name] = None
            continue

        preprocessor_type = preprocessor_params.get('type')
        if preprocessor_type == 'PickledPreprocessor':
            import pickle
            preprocessor_kwargs = preprocessor_params.pop('kwargs', {})
            assert 'preprocessor_path' in preprocessor_kwargs, (
                'Need to specify a .pkl file to load the preprocessor')
            with open(preprocessor_kwargs['preprocessor_path'], 'rb') as f:
                data = pickle.load(f)
                if 'extract_fn' in preprocessor_kwargs:
                    extract_fn = (variant['reward_classifier_params']['kwargs']
                                  ['observation_preprocessors_params'][name]
                                  ['kwargs'].pop('extract_fn'))
                    # extract_fn = preprocessor_kwargs.pop('extract_fn')
                    preprocessor = extract_fn(data)
                else:
                    preprocessor = data
                if isinstance(preprocessor, tf.keras.Model):
                    preprocessor.trainable = False
                observation_preprocessors[name] = preprocessor

        elif preprocessor_type == 'EmbeddingPreprocessor':
            preprocessor_kwargs = preprocessor_params.pop('kwargs', {})
            observation_preprocessors[name] = get_embedding_from_variant(
                variant, env)

        else:
            observation_preprocessors[name] = get_preprocessor_from_params(
                env, preprocessor_params)

    if dynamics_aware:
        preprocessors = {
            'observations': observation_preprocessors,
            'dynamics_features': None,
        }
    else:
        preprocessors = observation_preprocessors

    reward_classifier = create_feedforward_reward_classifier_function(
        input_shapes=input_shapes,
        observation_keys=observation_keys,
        *args,
        preprocessors=preprocessors,
        **reward_classifier_kwargs,
        **kwargs)

    return reward_classifier