def state_estimator_model( input_shape, num_hidden_units=256, num_hidden_layers=2, output_size=4, # (x, y, z_cos, z_sin) kernel_regularizer=None, preprocessor_params=None, preprocessor=None, name='state_estimator_preprocessor'): # TODO: Make this take in observation keys instead of this hardcoded output size. obs_preprocessor_params = (preprocessor_params or DEFAULT_STATE_ESTIMATOR_PREPROCESSOR_PARAMS) # preprocessor = convnet_model( # name='convnet_preprocessor_state_est', # **convnet_kwargs) if preprocessor is None: preprocessor = get_preprocessor_from_params(None, obs_preprocessor_params) state_estimator = feedforward_model( hidden_layer_sizes=(num_hidden_units, ) * num_hidden_layers, output_size=output_size, output_activation=tf.keras.activations.tanh, kernel_regularizer= kernel_regularizer, # tf.keras.regularizers.l2(0.001), name='feedforward_state_est') model = tfk.Sequential([ tfk.Input(shape=input_shape, name='pixels', dtype=tf.uint8), preprocessor, state_estimator, ], name=name) return model
def get_dynamics_model_from_variant(variant, env, *args, **kwargs): from .dynamics_model import create_dynamics_model dynamics_model_params = deepcopy(variant['dynamics_model_params']) # dynamics_model_type = deepcopy(dynamics_model_params['type']) dynamics_model_kwargs = deepcopy(dynamics_model_params['kwargs']) observation_preprocessors_params = dynamics_model_kwargs.pop( 'observation_preprocessors_params', {}).copy() observation_keys = dynamics_model_kwargs.pop('observation_keys', None) or env.observation_keys encoder_kwargs = dynamics_model_kwargs.pop('encoder_kwargs', {}).copy() decoder_kwargs = dynamics_model_kwargs.pop('decoder_kwargs', {}).copy() dynamics_latent_dim = dynamics_model_kwargs.pop('dynamics_latent_dim', 16) observation_shapes = OrderedDict( ((key, value) for key, value in env.observation_shape.items() if key in observation_keys)) action_shape = env.action_shape input_shapes = { 'observations': observation_shapes, 'actions': action_shape, } observation_preprocessors = OrderedDict() for name, observation_shape in observation_shapes.items(): preprocessor_params = observation_preprocessors_params.get(name, None) if not preprocessor_params: observation_preprocessors[name] = None continue observation_preprocessors[name] = get_preprocessor_from_params( env, preprocessor_params) action_preprocessor = None preprocessors = { 'observations': observation_preprocessors, 'actions': action_preprocessor, } dynamics_model = create_dynamics_model( input_shapes=input_shapes, dynamics_latent_dim=dynamics_latent_dim, *args, observation_keys=observation_keys, preprocessors=preprocessors, encoder_kwargs=encoder_kwargs, decoder_kwargs=decoder_kwargs, **dynamics_model_kwargs, **kwargs) return dynamics_model
def get_V_function_from_variant(variant, env, *args, **kwargs): V_params = variant['V_params'] V_type = V_params['type'] V_kwargs = deepcopy(V_params['kwargs']) preprocessor_params = V_kwargs.pop('preprocessor_params', None) preprocessor = get_preprocessor_from_params(env, preprocessor_params) return VALUE_FUNCTIONS[V_type]( observation_shape=env.active_observation_shape, *args, observation_preprocessor=preprocessor, **V_kwargs, **kwargs)
def get_Q_function_from_variant(variant, env, *args, **kwargs): Q_params = variant['Q_params'] Q_type = Q_params['type'] Q_kwargs = deepcopy(Q_params['kwargs']) preprocessor_params = Q_kwargs.pop('preprocessor_params', None) preprocessor = get_preprocessor_from_params(env, preprocessor_params) return VALUE_FUNCTIONS[Q_type]( observation_shape=env.active_observation_shape, action_shape=env.action_space.shape, *args, observation_preprocessor=preprocessor, **Q_kwargs, **kwargs)
def get_policy_from_variant(variant, env, Qs, *args, **kwargs): policy_params = variant['policy_params'] policy_type = policy_params['type'] policy_kwargs = deepcopy(policy_params['kwargs']) preprocessor_params = policy_kwargs.pop('preprocessor_params', None) preprocessor = get_preprocessor_from_params(env, preprocessor_params) policy = POLICY_FUNCTIONS[policy_type](env, *args, Q=Qs[0], preprocessor=preprocessor, **policy_kwargs, **kwargs) return policy
def get_Q_function_from_variant(variant, env, *args, **kwargs): Q_params = deepcopy(variant['Q_params']) Q_type = deepcopy(Q_params['type']) Q_kwargs = deepcopy(Q_params['kwargs']) observation_preprocessors_params = Q_kwargs.pop( 'observation_preprocessors_params', {}).copy() observation_keys = Q_kwargs.pop('observation_keys', None) or env.observation_keys goal_keys = Q_kwargs.pop('goal_keys', tuple()) observation_shapes = OrderedDict( ((key, value) for key, value in env.observation_shape.items() if key in observation_keys + goal_keys)) action_shape = env.action_shape input_shapes = { 'observations': observation_shapes, 'actions': action_shape, } observation_preprocessors = OrderedDict() for name, observation_shape in observation_shapes.items(): preprocessor_params = observation_preprocessors_params.get(name, None) if not preprocessor_params: observation_preprocessors[name] = None continue observation_preprocessors[name] = get_preprocessor_from_params( env, preprocessor_params) action_preprocessor = None preprocessors = { 'observations': observation_preprocessors, 'actions': action_preprocessor, } Q_function = VALUE_FUNCTIONS[Q_type](input_shapes=input_shapes, observation_keys=observation_keys, goal_keys=goal_keys, *args, preprocessors=preprocessors, **Q_kwargs, **kwargs) return Q_function
def get_reward_classifier_from_variant(variant, env, *args, **kwargs): from .vice_models import create_feedforward_reward_classifier classifier_params = variant['classifier_params'] classifier_type = classifier_params['type'] classifier_kwargs = deepcopy(classifier_params['kwargs']) # TODO Avi maybe have some optional preprocessing preprocessor_params = classifier_kwargs.pop('preprocessor_params', None) preprocessor = get_preprocessor_from_params(env, preprocessor_params) return create_feedforward_reward_classifier( observation_shape=env.active_observation_shape, #action_shape=env.action_space.shape, *args, observation_preprocessor=preprocessor, **classifier_kwargs, **kwargs)
def get_distance_estimator_from_variant(variant, env, *args, **kwargs): from softlearning.models.ddl.distance_estimator import ( create_distance_estimator) distance_fn_params = deepcopy(variant['distance_fn_params']) distance_fn_kwargs = deepcopy(distance_fn_params['kwargs']) observation_preprocessors_params = distance_fn_kwargs.pop( 'observation_preprocessors_params', {}).copy() observation_keys = distance_fn_kwargs.pop('observation_keys', None) or env.observation_keys observation_shapes = OrderedDict( ((key, value) for key, value in env.observation_shape.items() if key in observation_keys)) input_shapes = { 's1': observation_shapes, 's2': observation_shapes, } observation_preprocessors = OrderedDict() for name, observation_shape in observation_shapes.items(): preprocessor_params = observation_preprocessors_params.get(name, None) if not preprocessor_params: observation_preprocessors[name] = None continue observation_preprocessors[name] = get_preprocessor_from_params( env, preprocessor_params) preprocessors = { 's1': observation_preprocessors, 's2': observation_preprocessors, } distance_fn = create_distance_estimator(input_shapes=input_shapes, observation_keys=observation_keys, *args, preprocessors=preprocessors, **distance_fn_kwargs, **kwargs) return distance_fn
def get_embedding_from_variant(variant, env, *args, **kwargs): from softlearning.models.ddl.distance_estimator import (create_embedding_fn ) distance_fn_params = deepcopy(variant['distance_fn_params']) distance_fn_kwargs = deepcopy(distance_fn_params['kwargs']) observation_preprocessors_params = distance_fn_kwargs.pop( 'observation_preprocessors_params', {}).copy() observation_keys = distance_fn_kwargs.pop('observation_keys', None) or env.observation_keys observation_shapes = OrderedDict( ((key, value) for key, value in env.observation_shape.items() if key in observation_keys)) input_shapes = observation_shapes observation_preprocessors = OrderedDict() for name, observation_shape in observation_shapes.items(): preprocessor_params = observation_preprocessors_params.get(name, None) if not preprocessor_params: observation_preprocessors[name] = None continue observation_preprocessors[name] = get_preprocessor_from_params( env, preprocessor_params) preprocessors = observation_preprocessors assert 'embedding_dim' in distance_fn_kwargs, ( 'Must specify an embedding dimension in the distance function kwargs') embedding_dim = distance_fn_kwargs.pop('embedding_dim') embedding_fn = create_embedding_fn(input_shapes=input_shapes, embedding_dim=embedding_dim, observation_keys=observation_keys, *args, preprocessors=preprocessors, **distance_fn_kwargs, **kwargs) return embedding_fn
def get_policy_from_params(policy_params, env, *args, **kwargs): policy_type = policy_params['type'] policy_kwargs = deepcopy(policy_params.get('kwargs', {})) observation_preprocessors_params = policy_kwargs.pop( 'observation_preprocessors_params', {}) observation_keys = policy_kwargs.pop('observation_keys', None) or env.observation_keys goal_keys = policy_kwargs.pop('goal_keys', None) or tuple() observation_shapes = OrderedDict( ((key, value) for key, value in env.observation_shape.items() if key in observation_keys + goal_keys)) observation_preprocessors = OrderedDict() for name, observation_shape in observation_shapes.items(): preprocessor_params = observation_preprocessors_params.get(name, None) if not preprocessor_params: observation_preprocessors[name] = None continue observation_preprocessors[name] = get_preprocessor_from_params( env, preprocessor_params) if policy_type == 'UniformPolicy': action_range = (env.action_space.low, env.action_space.high) policy_kwargs['action_range'] = action_range policy = POLICY_FUNCTIONS[policy_type]( input_shapes=observation_shapes, output_shape=env.action_space.shape, observation_keys=observation_keys, goal_keys=goal_keys, *args, preprocessors=observation_preprocessors, **policy_kwargs, **kwargs) return policy
def get_Q_function_from_variant(variant, env, *args, **kwargs): Q_params = variant['Q_params'] Q_type = Q_params['type'] Q_kwargs = deepcopy(Q_params['kwargs']) alg_type = variant['algorithm_params']['type'] num_q = variant['algorithm_params']['kwargs']['num_q'] if alg_type == 'TQC': num_outputs = variant['algorithm_params']['kwargs']['num_quantiles'] else: num_outputs = 1 preprocessor_params = Q_kwargs.pop('preprocessor_params', None) preprocessor = get_preprocessor_from_params(env, preprocessor_params) return VALUE_FUNCTIONS[Q_type]( observation_shape=env.active_observation_shape, action_shape=env.action_space.shape, *args, num_q=num_q, observation_preprocessor=preprocessor, output_size=num_outputs, **Q_kwargs, **kwargs)
def get_reward_classifier_from_variant(variant, env, *args, **kwargs): from .vice_models import create_feedforward_reward_classifier_function reward_classifier_params = deepcopy(variant['reward_classifier_params']) reward_classifier_type = deepcopy(reward_classifier_params['type']) assert reward_classifier_type == 'feedforward_classifier', ( reward_classifier_type) reward_classifier_kwargs = deepcopy(reward_classifier_params['kwargs']) observation_preprocessors_params = reward_classifier_kwargs.pop( 'observation_preprocessors_params', {}).copy() observation_keys = reward_classifier_kwargs.pop( 'observation_keys', None) or env.observation_keys # TODO: Clean this up dynamics_aware = variant['algorithm_params']['type'] == 'VICEDynamicsAware' observation_shapes = OrderedDict( ((key, value) for key, value in env.observation_shape.items() if key in observation_keys)) if dynamics_aware: dynamics_model_kwargs = deepcopy( variant['dynamics_model_params']['kwargs']) dynamics_latent_dim = dynamics_model_kwargs['dynamics_latent_dim'] dynamics_features_shape = tf.TensorShape(dynamics_latent_dim) input_shapes = { 'observations': observation_shapes, 'dynamics_features': dynamics_features_shape } else: input_shapes = observation_shapes observation_preprocessors = OrderedDict() for name, observation_shape in observation_shapes.items(): preprocessor_params = observation_preprocessors_params.get(name, None) if not preprocessor_params: observation_preprocessors[name] = None continue preprocessor_type = preprocessor_params.get('type') if preprocessor_type == 'PickledPreprocessor': import pickle preprocessor_kwargs = preprocessor_params.pop('kwargs', {}) assert 'preprocessor_path' in preprocessor_kwargs, ( 'Need to specify a .pkl file to load the preprocessor') with open(preprocessor_kwargs['preprocessor_path'], 'rb') as f: data = pickle.load(f) if 'extract_fn' in preprocessor_kwargs: extract_fn = (variant['reward_classifier_params']['kwargs'] ['observation_preprocessors_params'][name] ['kwargs'].pop('extract_fn')) # extract_fn = preprocessor_kwargs.pop('extract_fn') preprocessor = extract_fn(data) else: preprocessor = data if isinstance(preprocessor, tf.keras.Model): preprocessor.trainable = False observation_preprocessors[name] = preprocessor elif preprocessor_type == 'EmbeddingPreprocessor': preprocessor_kwargs = preprocessor_params.pop('kwargs', {}) observation_preprocessors[name] = get_embedding_from_variant( variant, env) else: observation_preprocessors[name] = get_preprocessor_from_params( env, preprocessor_params) if dynamics_aware: preprocessors = { 'observations': observation_preprocessors, 'dynamics_features': None, } else: preprocessors = observation_preprocessors reward_classifier = create_feedforward_reward_classifier_function( input_shapes=input_shapes, observation_keys=observation_keys, *args, preprocessors=preprocessors, **reward_classifier_kwargs, **kwargs) return reward_classifier