def __init__( self, env_spec, extra_dims=0, subsample_factor=1., num_seq_inputs=1, regressor_args=None, ): """ Constructor. :param env_spec: :param subsample_factor: :param num_seq_inputs: :param regressor_args: """ Serializable.quick_init(self, locals()) super(MultiTaskGaussianMLPBaseline, self).__init__(env_spec) if regressor_args is None: regressor_args = dict() self._regressor = GaussianMLPRegressor( input_shape=((env_spec.observation_space.flat_dim + extra_dims) * num_seq_inputs, ), output_dim=1, name="vf", use_trust_region=True, **regressor_args)
def test_log_likelihood_sym(self, output_dim, input_shape): gmr = GaussianMLPRegressor(input_shape=input_shape, output_dim=output_dim, optimizer=PenaltyLbfgsOptimizer, optimizer_args=dict()) new_input_var = tf.compat.v1.placeholder(tf.float32, shape=(None, ) + input_shape) new_ys_var = tf.compat.v1.placeholder(dtype=tf.float32, name='ys', shape=(None, output_dim)) data = np.random.random(size=input_shape) label = np.ones(output_dim) outputs = gmr.log_likelihood_sym(new_input_var, new_ys_var, name='ll_sym') ll_from_sym = self.sess.run(outputs, feed_dict={ new_input_var: [data], new_ys_var: [label] }) mean, log_std = gmr._f_pdists([data]) ll = gmr.model.networks['default'].dist.log_likelihood( [label], dict(mean=mean, log_std=log_std)) assert np.allclose(ll, ll_from_sym, rtol=0, atol=1e-5)
def test_fit_normalized(self): gmr = GaussianMLPRegressor(input_shape=(1, ), output_dim=1) self.sess.run(tf.global_variables_initializer()) data = np.linspace(-np.pi, np.pi, 1000) obs = [{'observations': [[x]], 'returns': [np.sin(x)]} for x in data] observations = np.concatenate([p['observations'] for p in obs]) returns = np.concatenate([p['returns'] for p in obs]) returns = returns.reshape((-1, 1)) for i in range(150): gmr.fit(observations, returns) # There will be new assign operations created in the first # iteration so let's take the second one to check. if i == 1: assign_ops_counts = np.sum( np.array([ 'Assign' in n.name for n in tf.get_default_graph().as_graph_def().node ]).astype(int)) assign_ops_counts_after = np.sum( np.array([ 'Assign' in n.name for n in tf.get_default_graph().as_graph_def().node ]).astype(int)) assert assign_ops_counts == assign_ops_counts_after paths = { 'observations': [[-np.pi], [-np.pi / 2], [-np.pi / 4], [0], [np.pi / 4], [np.pi / 2], [np.pi]] } prediction = gmr.predict(paths['observations']) expected = [[0], [-1], [-0.707], [0], [0.707], [1], [0]] assert np.allclose(prediction, expected, rtol=0, atol=0.1)
def __init__( self, env_spec, subsample_factor=1., num_seq_inputs=1, regressor_args=None, name='GaussianMLPBaseline', ): """ Gaussian MLP Baseline with Model. It fits the input data to a gaussian distribution estimated by a MLP. Args: env_spec (garage.envs.env_spec.EnvSpec): Environment specification. subsample_factor (float): The factor to subsample the data. By default it is 1.0, which means using all the data. num_seq_inputs (float): Number of sequence per input. By default it is 1.0, which means only one single sequence. regressor_args (dict): Arguments for regressor. """ super().__init__(env_spec) if regressor_args is None: regressor_args = dict() self._regressor = GaussianMLPRegressor( input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs, ), output_dim=1, name=name, **regressor_args) self.name = name
def __init__( self, env_spec, subsample_factor=1., num_seq_inputs=1, regressor_args=None, name="GaussianMLPBaseline", ): """ Constructor. :param env_spec: :param subsample_factor: :param num_seq_inputs: :param regressor_args: """ Parameterized.__init__(self) Serializable.quick_init(self, locals()) super(GaussianMLPBaseline, self).__init__(env_spec) if regressor_args is None: regressor_args = dict() self._regressor = GaussianMLPRegressor( input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs, ), output_dim=1, name=name, **regressor_args) self.name = name
def test_is_pickleable(self): gmr = GaussianMLPRegressor(input_shape=(1, ), output_dim=1) with tf.compat.v1.variable_scope( 'GaussianMLPRegressor/GaussianMLPRegressorModel', reuse=True): bias = tf.compat.v1.get_variable( 'dist_params/mean_network/hidden_0/bias') bias.load(tf.ones_like(bias).eval()) result1 = gmr.predict(np.ones((1, 1))) h = pickle.dumps(gmr) with tf.compat.v1.Session(graph=tf.Graph()): gmr_pickled = pickle.loads(h) result2 = gmr_pickled.predict(np.ones((1, 1))) assert np.array_equal(result1, result2)
class GaussianMLPBaseline(Baseline): """A value function using Gaussian MLP network.""" def __init__( self, env_spec, subsample_factor=1., num_seq_inputs=1, regressor_args=None, name='GaussianMLPBaseline', ): """ Gaussian MLP Baseline with Model. It fits the input data to a gaussian distribution estimated by a MLP. Args: env_spec (garage.envs.env_spec.EnvSpec): Environment specification. subsample_factor (float): The factor to subsample the data. By default it is 1.0, which means using all the data. num_seq_inputs (float): Number of sequence per input. By default it is 1.0, which means only one single sequence. regressor_args (dict): Arguments for regressor. """ super().__init__(env_spec) if regressor_args is None: regressor_args = dict() self._regressor = GaussianMLPRegressor( input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs, ), output_dim=1, name=name, **regressor_args) self.name = name def fit(self, paths): """Fit regressor based on paths.""" observations = np.concatenate([p['observations'] for p in paths]) returns = np.concatenate([p['returns'] for p in paths]) self._regressor.fit(observations, returns.reshape((-1, 1))) def predict(self, path): """Predict value based on paths.""" return self._regressor.predict(path['observations']).flatten() def get_param_values(self, **tags): """Get parameter values.""" return self._regressor.get_param_values(**tags) def set_param_values(self, flattened_params, **tags): """Set parameter values to val.""" self._regressor.set_param_values(flattened_params, **tags) def get_params_internal(self, **tags): """Get internal parameters.""" return self._regressor.get_params_internal(**tags)
def __init__( self, env_spec, subsample_factor=1., num_seq_inputs=1, regressor_args=None, name='GaussianMLPBaseline', ): if regressor_args is None: regressor_args = dict() self._regressor = GaussianMLPRegressor( input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs, ), output_dim=1, name=name, subsample_factor=subsample_factor, **regressor_args) self.name = name
class GaussianMLPBaseline(Baseline, Parameterized, Serializable): """A value function using gaussian mlp network.""" def __init__( self, env_spec, subsample_factor=1., num_seq_inputs=1, regressor_args=None, name="GaussianMLPBaseline", ): """ Constructor. :param env_spec: :param subsample_factor: :param num_seq_inputs: :param regressor_args: """ Parameterized.__init__(self) Serializable.quick_init(self, locals()) super(GaussianMLPBaseline, self).__init__(env_spec) if regressor_args is None: regressor_args = dict() self._regressor = GaussianMLPRegressor( input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs, ), output_dim=1, name=name, **regressor_args) self.name = name @overrides def fit(self, paths): """Fit regressor based on paths.""" observations = np.concatenate([p["observations"] for p in paths]) returns = np.concatenate([p["returns"] for p in paths]) self._regressor.fit(observations, returns.reshape((-1, 1))) @overrides def predict(self, path): """Predict value based on paths.""" return self._regressor.predict(path["observations"]).flatten() @overrides def get_param_values(self, **tags): """Get parameter values.""" return self._regressor.get_param_values(**tags) @overrides def set_param_values(self, flattened_params, **tags): """Set parameter values to val.""" self._regressor.set_param_values(flattened_params, **tags) @overrides def get_params_internal(self, **tags): return self._regressor.get_params_internal(**tags)
def test_fit_smaller_subsample_factor(self): gmr = GaussianMLPRegressor(input_shape=(1, ), output_dim=1, subsample_factor=0.9) data = np.linspace(-np.pi, np.pi, 1000) obs = [{'observations': [[x]], 'returns': [np.sin(x)]} for x in data] observations = np.concatenate([p['observations'] for p in obs]) returns = np.concatenate([p['returns'] for p in obs]) for _ in range(150): gmr.fit(observations, returns.reshape((-1, 1))) paths = { 'observations': [[-np.pi], [-np.pi / 2], [-np.pi / 4], [0], [np.pi / 4], [np.pi / 2], [np.pi]] } prediction = gmr.predict(paths['observations']) expected = [[0], [-1], [-0.707], [0], [0.707], [1], [0]] assert np.allclose(prediction, expected, rtol=0, atol=0.1)
def test_is_pickleable2(self): gmr = GaussianMLPRegressor(input_shape=(1, ), output_dim=1) with tf.compat.v1.variable_scope( 'GaussianMLPRegressor/GaussianMLPRegressorModel', reuse=True): x_mean = tf.compat.v1.get_variable('normalized_vars/x_mean') x_mean.load(tf.ones_like(x_mean).eval()) x1 = gmr.model.networks['default'].x_mean.eval() h = pickle.dumps(gmr) with tf.compat.v1.Session(graph=tf.Graph()): gmr_pickled = pickle.loads(h) x2 = gmr_pickled.model.networks['default'].x_mean.eval() assert np.array_equal(x1, x2)
def test_fit_unnormalized(self): gmr = GaussianMLPRegressor(input_shape=(1, ), output_dim=1, subsample_factor=0.9, normalize_inputs=False, normalize_outputs=False) data = np.linspace(-np.pi, np.pi, 1000) obs = [{'observations': [[x]], 'returns': [np.sin(x)]} for x in data] observations = np.concatenate([p['observations'] for p in obs]) returns = np.concatenate([p['returns'] for p in obs]) for _ in range(150): gmr.fit(observations, returns.reshape((-1, 1))) paths = { 'observations': [[-np.pi], [-np.pi / 2], [-np.pi / 4], [0], [np.pi / 4], [np.pi / 2], [np.pi]] } prediction = gmr.predict(paths['observations']) expected = [[0], [-1], [-0.707], [0], [0.707], [1], [0]] assert np.allclose(prediction, expected, rtol=0, atol=0.1) x_mean = self.sess.run(gmr.model.networks['default'].x_mean) x_mean_expected = np.zeros_like(x_mean) x_std = self.sess.run(gmr.model.networks['default'].x_std) x_std_expected = np.ones_like(x_std) assert np.array_equal(x_mean, x_mean_expected) assert np.array_equal(x_std, x_std_expected) y_mean = self.sess.run(gmr.model.networks['default'].y_mean) y_mean_expected = np.zeros_like(y_mean) y_std = self.sess.run(gmr.model.networks['default'].y_std) y_std_expected = np.ones_like(y_std) assert np.allclose(y_mean, y_mean_expected) assert np.allclose(y_std, y_std_expected)
class MultiTaskGaussianMLPBaseline(Baseline, Parameterized): """A value function using gaussian mlp network.""" def __init__( self, env_spec, extra_dims=0, subsample_factor=1., num_seq_inputs=1, regressor_args=None, ): """ Constructor. :param env_spec: :param subsample_factor: :param num_seq_inputs: :param regressor_args: """ Serializable.quick_init(self, locals()) super(MultiTaskGaussianMLPBaseline, self).__init__(env_spec) if regressor_args is None: regressor_args = dict() self._regressor = GaussianMLPRegressor( input_shape=((env_spec.observation_space.flat_dim + extra_dims) * num_seq_inputs, ), output_dim=1, name="vf", use_trust_region=True, **regressor_args) @overrides def fit(self, paths): """Fit regressor based on paths.""" observations = np.concatenate([p["observations"] for p in paths]) tasks = np.concatenate([p["tasks_gt"] for p in paths]) latents = np.concatenate([p["latents"] for p in paths]) aug_obs = np.concatenate([observations, tasks, latents], axis=1) returns = np.concatenate([p["returns"] for p in paths]) self._regressor.fit(aug_obs, returns.reshape((-1, 1))) @overrides def predict(self, path): """Predict value based on paths.""" inputs = np.concatenate( (path["observations"], path["tasks_gt"], path["latents"]), axis=1) return self._regressor.predict(inputs, ).flatten() @overrides def get_param_values(self, **tags): """Get parameter values.""" return self._regressor.get_param_values(**tags) @overrides def set_param_values(self, flattened_params, **tags): """Set parameter values to val.""" self._regressor.set_param_values(flattened_params, **tags)
def test_fit_normalized(self): gmr = GaussianMLPRegressor(input_shape=(1, ), output_dim=1) data = np.linspace(-np.pi, np.pi, 1000) obs = [{'observations': [[x]], 'returns': [np.sin(x)]} for x in data] observations = np.concatenate([p['observations'] for p in obs]) returns = np.concatenate([p['returns'] for p in obs]) returns = returns.reshape((-1, 1)) for _ in range(150): gmr.fit(observations, returns) paths = { 'observations': [[-np.pi], [-np.pi / 2], [-np.pi / 4], [0], [np.pi / 4], [np.pi / 2], [np.pi]] } prediction = gmr.predict(paths['observations']) expected = [[0], [-1], [-0.707], [0], [0.707], [1], [0]] assert np.allclose(prediction, expected, rtol=0, atol=0.1) x_mean = self.sess.run(gmr.model.networks['default'].x_mean) x_mean_expected = np.mean(observations, axis=0, keepdims=True) x_std = self.sess.run(gmr.model.networks['default'].x_std) x_std_expected = np.std(observations, axis=0, keepdims=True) assert np.allclose(x_mean, x_mean_expected) assert np.allclose(x_std, x_std_expected) y_mean = self.sess.run(gmr.model.networks['default'].y_mean) y_mean_expected = np.mean(returns, axis=0, keepdims=True) y_std = self.sess.run(gmr.model.networks['default'].y_std) y_std_expected = np.std(returns, axis=0, keepdims=True) assert np.allclose(y_mean, y_mean_expected) assert np.allclose(y_std, y_std_expected)
class CollisionAwareBaseline(Baseline, Parameterized): """A value function using gaussian mlp network.""" def __init__( self, env_spec, subsample_factor=1., num_seq_inputs=1, regressor_args=None, ): """ Constructor. :param env_spec: :param subsample_factor: :param num_seq_inputs: :param regressor_args: """ Parameterized.__init__(self) Serializable.quick_init(self, locals()) Baseline.__init__(self, env_spec) if regressor_args is None: regressor_args = dict() self._regressor = GaussianMLPRegressor( input_shape=((env_spec.observation_space.flat_dim + 1) * num_seq_inputs, ), output_dim=1, name="Baseline", **regressor_args) @overrides def fit(self, paths): """Fit regressor based on paths.""" observations = np.concatenate([p["observations"] for p in paths]) collisions = np.concatenate( np.float32([p["env_infos"]["in_collision"] for p in paths])) collisions = np.expand_dims(collisions, axis=1) aug_obs = np.concatenate([observations, collisions], axis=1) returns = np.concatenate([p["returns"] for p in paths]) self._regressor.fit(aug_obs, returns.reshape((-1, 1))) @overrides def predict(self, path): """Predict value based on paths.""" collisions = np.expand_dims(path["env_infos"]["in_collision"], axis=1) inputs = np.concatenate([path["observations"], collisions], axis=1) return self._regressor.predict(inputs, ).flatten() @overrides def get_param_values(self, **tags): """Get parameter values.""" return self._regressor.get_param_values(**tags) @overrides def set_param_values(self, flattened_params, **tags): """Set parameter values to val.""" self._regressor.set_param_values(flattened_params, **tags) @overrides def get_params_internal(self, **tags): return self._regressor.get_params_internal(**tags)
def test_auxiliary(self): gmr = GaussianMLPRegressor(input_shape=(1, ), output_dim=5) assert gmr.vectorized assert gmr.distribution.event_shape.as_list() == [5]
class GaussianMLPBaseline(Baseline): """Gaussian MLP Baseline with Model. It fits the input data to a gaussian distribution estimated by a MLP. Args: env_spec (garage.envs.env_spec.EnvSpec): Environment specification. subsample_factor (float): The factor to subsample the data. By default it is 1.0, which means using all the data. num_seq_inputs (float): Number of sequence per input. By default it is 1.0, which means only one single sequence. regressor_args (dict): Arguments for regressor. name (str): Name of baseline. """ def __init__( self, env_spec, subsample_factor=1., num_seq_inputs=1, regressor_args=None, name='GaussianMLPBaseline', ): super().__init__(env_spec) if regressor_args is None: regressor_args = dict() self._regressor = GaussianMLPRegressor( input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs, ), output_dim=1, name=name, subsample_factor=subsample_factor, **regressor_args) self.name = name def fit(self, paths): """Fit regressor based on paths. Args: paths (list[dict]): Sample paths. """ observations = np.concatenate([p['observations'] for p in paths]) returns = np.concatenate([p['returns'] for p in paths]) self._regressor.fit(observations, returns.reshape((-1, 1))) def predict(self, path): """Predict value based on paths. Args: path (list[dict]): Sample paths. Returns: numpy.ndarray: Predicted value. """ return self._regressor.predict(path['observations']).flatten() def get_param_values(self): """Get parameter values. Returns: List[np.ndarray]: A list of values of each parameter. """ return self._regressor.get_param_values() def set_param_values(self, flattened_params): """Set param values. Args: flattened_params (np.ndarray): A numpy array of parameter values. """ self._regressor.set_param_values(flattened_params) def get_params_internal(self): """Get the params, which are the trainable variables. Returns: List[tf.Variable]: A list of trainable variables in the current variable scope. """ return self._regressor.get_params_internal()
def test_auxiliary(self): gmr = GaussianMLPRegressor(input_shape=(1, ), output_dim=5) assert not gmr.recurrent assert gmr.vectorized assert gmr.distribution.dim == 5