def test_fit_unnormalized(self): cmr = ContinuousMLPRegressorWithModel(input_shape=(1, ), output_dim=1, normalize_inputs=False) data = np.linspace(-np.pi, np.pi, 1000) obs = [{'observations': [[x]], 'returns': [np.sin(x)]} for x in data] observations = np.concatenate([p['observations'] for p in obs]) returns = np.concatenate([p['returns'] for p in obs]) for _ in range(150): cmr.fit(observations, returns.reshape((-1, 1))) paths = { 'observations': [[-np.pi], [-np.pi / 2], [-np.pi / 4], [0], [np.pi / 4], [np.pi / 2], [np.pi]] } prediction = cmr.predict(paths['observations']) expected = [[0], [-1], [-0.707], [0], [0.707], [1], [0]] assert np.allclose(prediction, expected, rtol=0, atol=0.1) x_mean = self.sess.run(cmr.model.networks['default'].x_mean) x_mean_expected = np.zeros_like(x_mean) x_std = self.sess.run(cmr.model.networks['default'].x_std) x_std_expected = np.ones_like(x_std) assert np.array_equal(x_mean, x_mean_expected) assert np.array_equal(x_std, x_std_expected)
def __init__( self, env_spec, subsample_factor=1., num_seq_inputs=1, regressor_args=None, name='ContinuousMLPBaselineWithModel', ): """ Continuous MLP Baseline with Model. It fits the input data by performing linear regression to the outputs. Args: env_spec (garage.envs.env_spec.EnvSpec): Environment specification. subsample_factor (float): The factor to subsample the data. By default it is 1.0, which means using all the data. num_seq_inputs (float): Number of sequence per input. By default it is 1.0, which means only one single sequence. regressor_args (dict): Arguments for regressor. """ super().__init__(env_spec) if regressor_args is None: regressor_args = dict() self._regressor = ContinuousMLPRegressorWithModel( input_shape=( env_spec.observation_space.flat_dim * num_seq_inputs, ), output_dim=1, name=name, **regressor_args) self.name = name
def test_predict_sym(self, output_dim, input_shape): cmr = ContinuousMLPRegressorWithModel(input_shape=input_shape, output_dim=output_dim, optimizer=LbfgsOptimizer, optimizer_args=dict()) new_input_var = tf.placeholder(tf.float32, shape=(None, ) + input_shape) data = np.random.random(size=input_shape) outputs = cmr.predict_sym(new_input_var, name='y_hat_sym') y_hat_sym = self.sess.run(outputs, feed_dict={new_input_var: [data]}) y_hat = cmr._f_predict([data]) assert np.allclose(y_hat, y_hat_sym, rtol=0, atol=1e-5)
def test_is_pickleable(self): cmr = ContinuousMLPRegressorWithModel(input_shape=(1, ), output_dim=1) with tf.variable_scope(('ContinuousMLPRegressorWithModel/' 'NormalizedInputMLPModel'), reuse=True): bias = tf.get_variable('mlp/hidden_0/bias') bias.load(tf.ones_like(bias).eval()) result1 = cmr.predict(np.ones((1, 1))) h = pickle.dumps(cmr) with tf.Session(graph=tf.Graph()): cmr_pickled = pickle.loads(h) result2 = cmr_pickled.predict(np.ones((1, 1))) assert np.array_equal(result1, result2)
class ContinuousMLPBaselineWithModel(Baseline): """A value function using a MLP network.""" def __init__( self, env_spec, subsample_factor=1., num_seq_inputs=1, regressor_args=None, name='ContinuousMLPBaselineWithModel', ): """ Continuous MLP Baseline with Model. It fits the input data by performing linear regression to the outputs. Args: env_spec (garage.envs.env_spec.EnvSpec): Environment specification. subsample_factor (float): The factor to subsample the data. By default it is 1.0, which means using all the data. num_seq_inputs (float): Number of sequence per input. By default it is 1.0, which means only one single sequence. regressor_args (dict): Arguments for regressor. """ super().__init__(env_spec) if regressor_args is None: regressor_args = dict() self._regressor = ContinuousMLPRegressorWithModel( input_shape=( env_spec.observation_space.flat_dim * num_seq_inputs, ), output_dim=1, name=name, **regressor_args) self.name = name @overrides def fit(self, paths): """Fit regressor based on paths.""" observations = np.concatenate([p['observations'] for p in paths]) returns = np.concatenate([p['returns'] for p in paths]) self._regressor.fit(observations, returns.reshape((-1, 1))) @overrides def predict(self, path): """Predict value based on paths.""" return self._regressor.predict(path['observations']).flatten() @overrides def get_param_values(self, **tags): """Get parameter values.""" return self._regressor.get_param_values(**tags) @overrides def set_param_values(self, flattened_params, **tags): """Set parameter values to val.""" self._regressor.set_param_values(flattened_params, **tags) @overrides def get_params_internal(self, **tags): """Get internal parameters.""" return self._regressor.get_params_internal(**tags)