def test_fit_unnormalized(self):
        cmr = ContinuousMLPRegressor(input_shape=(1, ),
                                     output_dim=1,
                                     normalize_inputs=False)
        data = np.linspace(-np.pi, np.pi, 1000)
        obs = [{'observations': [[x]], 'returns': [np.sin(x)]} for x in data]

        observations = np.concatenate([p['observations'] for p in obs])
        returns = np.concatenate([p['returns'] for p in obs])
        for _ in range(150):
            cmr.fit(observations, returns.reshape((-1, 1)))

        paths = {
            'observations': [[-np.pi], [-np.pi / 2], [-np.pi / 4], [0],
                             [np.pi / 4], [np.pi / 2], [np.pi]]
        }

        prediction = cmr.predict(paths['observations'])

        expected = [[0], [-1], [-0.707], [0], [0.707], [1], [0]]
        assert np.allclose(prediction, expected, rtol=0, atol=0.1)

        x_mean = self.sess.run(cmr.model._networks['default'].x_mean)
        x_mean_expected = np.zeros_like(x_mean)
        x_std = self.sess.run(cmr.model._networks['default'].x_std)
        x_std_expected = np.ones_like(x_std)
        assert np.array_equal(x_mean, x_mean_expected)
        assert np.array_equal(x_std, x_std_expected)
class ContinuousMLPBaseline(Baseline, Parameterized, Serializable):
    """A value function using a mlp network."""
    def __init__(
        self,
        env_spec,
        subsample_factor=1.,
        num_seq_inputs=1,
        regressor_args=None,
        name='ContinuousMLPBaseline',
    ):
        """
        Constructor.

        :param env_spec: environment specification.
        :param subsample_factor:
        :param num_seq_inputs: number of sequence inputs.
        :param regressor_args: regressor arguments.
        """
        Parameterized.__init__(self)
        Serializable.quick_init(self, locals())
        super(ContinuousMLPBaseline, self).__init__(env_spec)
        if regressor_args is None:
            regressor_args = dict()

        self._regressor = ContinuousMLPRegressor(
            input_shape=(env_spec.observation_space.flat_dim *
                         num_seq_inputs, ),
            output_dim=1,
            name=name,
            **regressor_args)
        self.name = name

    @overrides
    def get_param_values(self, **tags):
        """Get parameter values."""
        return self._regressor.get_param_values(**tags)

    @overrides
    def set_param_values(self, val, **tags):
        """Set parameter values to val."""
        self._regressor.set_param_values(val, **tags)

    @overrides
    def fit(self, paths):
        """Fit regressor based on paths."""
        observations = np.concatenate([p['observations'] for p in paths])
        returns = np.concatenate([p['returns'] for p in paths])
        self._regressor.fit(observations, returns.reshape((-1, 1)))

    @overrides
    def predict(self, path):
        """Predict value based on paths."""
        return self._regressor.predict(path['observations']).flatten()

    @overrides
    def get_params_internal(self, **tags):
        return self._regressor.get_params_internal(**tags)
Beispiel #3
0
class ContinuousMLPBaseline(Baseline):
    """A value function using a MLP network."""
    def __init__(
        self,
        env_spec,
        subsample_factor=1.,
        num_seq_inputs=1,
        regressor_args=None,
        name='ContinuousMLPBaseline',
    ):
        """
        Continuous MLP Baseline.

        It fits the input data by performing linear regression
        to the outputs.

        Args:
            env_spec (garage.envs.env_spec.EnvSpec): Environment specification.
            subsample_factor (float): The factor to subsample the data. By
                default it is 1.0, which means using all the data.
            num_seq_inputs (float): Number of sequence per input. By default
                it is 1.0, which means only one single sequence.
            regressor_args (dict): Arguments for regressor.
        """
        super().__init__(env_spec)
        if regressor_args is None:
            regressor_args = dict()

        self._regressor = ContinuousMLPRegressor(
            input_shape=(env_spec.observation_space.flat_dim *
                         num_seq_inputs, ),
            output_dim=1,
            name=name,
            **regressor_args)
        self.name = name

    def fit(self, paths):
        """Fit regressor based on paths."""
        observations = np.concatenate([p['observations'] for p in paths])
        returns = np.concatenate([p['returns'] for p in paths])
        self._regressor.fit(observations, returns.reshape((-1, 1)))

    def predict(self, path):
        """Predict value based on paths."""
        return self._regressor.predict(path['observations']).flatten()

    def get_param_values(self, **tags):
        """Get parameter values."""
        return self._regressor.get_param_values(**tags)

    def set_param_values(self, flattened_params, **tags):
        """Set parameter values to val."""
        self._regressor.set_param_values(flattened_params, **tags)

    def get_params_internal(self, **tags):
        """Get internal parameters."""
        return self._regressor.get_params_internal(**tags)
Beispiel #4
0
class ContinuousMLPBaseline(Baseline):
    """A value function using a MLP network.

    It fits the input data by performing linear regression
    to the outputs.

    Args:
        env_spec (garage.envs.env_spec.EnvSpec): Environment specification.
        num_seq_inputs (float): Number of sequence per input. By default
            it is 1.0, which means only one single sequence.
        regressor_args (dict): Arguments for regressor.
        name (str): Name of baseline.

    """
    def __init__(self,
                 env_spec,
                 num_seq_inputs=1,
                 regressor_args=None,
                 name='ContinuousMLPBaseline'):
        super().__init__(env_spec)
        if regressor_args is None:
            regressor_args = dict()

        self._regressor = ContinuousMLPRegressor(
            input_shape=(env_spec.observation_space.flat_dim *
                         num_seq_inputs, ),
            output_dim=1,
            name=name,
            **regressor_args)
        self.name = name

    def fit(self, paths):
        """Fit regressor based on paths.

        Args:
            paths (dict[numpy.ndarray]): Sample paths.

        """
        observations = np.concatenate([p['observations'] for p in paths])
        returns = np.concatenate([p['returns'] for p in paths])
        self._regressor.fit(observations, returns.reshape((-1, 1)))

    def predict(self, path):
        """Predict value based on paths.

        Args:
            path (dict[numpy.ndarray]): Sample paths.

        Returns:
            numpy.ndarray: Predicted value.

        """
        return self._regressor.predict(path['observations']).flatten()

    def get_param_values(self):
        """Get parameter values.

        Returns:
            List[np.ndarray]: A list of values of each parameter.

        """
        return self._regressor.get_param_values()

    def set_param_values(self, flattened_params):
        """Set param values.

        Args:
            flattened_params (np.ndarray): A numpy array of parameter values.

        """
        self._regressor.set_param_values(flattened_params)

    def get_params_internal(self):
        """Get the params, which are the trainable variables.

        Returns:
            List[tf.Variable]: A list of trainable variables in the current
            variable scope.

        """
        return self._regressor.get_params_internal()