def __init__(
        self,
        env_spec,
        extra_dims=0,
        subsample_factor=1.,
        num_seq_inputs=1,
        regressor_args=None,
    ):
        """
        Constructor.

        :param env_spec:
        :param subsample_factor:
        :param num_seq_inputs:
        :param regressor_args:
        """
        Serializable.quick_init(self, locals())
        super(MultiTaskGaussianMLPBaseline, self).__init__(env_spec)
        if regressor_args is None:
            regressor_args = dict()

        self._regressor = GaussianMLPRegressor(
            input_shape=((env_spec.observation_space.flat_dim + extra_dims) *
                         num_seq_inputs, ),
            output_dim=1,
            name="vf",
            use_trust_region=True,
            **regressor_args)
예제 #2
0
    def test_log_likelihood_sym(self, output_dim, input_shape):
        gmr = GaussianMLPRegressor(input_shape=input_shape,
                                   output_dim=output_dim,
                                   optimizer=PenaltyLbfgsOptimizer,
                                   optimizer_args=dict())

        new_input_var = tf.compat.v1.placeholder(tf.float32,
                                                 shape=(None, ) + input_shape)
        new_ys_var = tf.compat.v1.placeholder(dtype=tf.float32,
                                              name='ys',
                                              shape=(None, output_dim))

        data = np.random.random(size=input_shape)
        label = np.ones(output_dim)

        outputs = gmr.log_likelihood_sym(new_input_var,
                                         new_ys_var,
                                         name='ll_sym')
        ll_from_sym = self.sess.run(outputs,
                                    feed_dict={
                                        new_input_var: [data],
                                        new_ys_var: [label]
                                    })
        mean, log_std = gmr._f_pdists([data])
        ll = gmr.model.networks['default'].dist.log_likelihood(
            [label], dict(mean=mean, log_std=log_std))
        assert np.allclose(ll, ll_from_sym, rtol=0, atol=1e-5)
예제 #3
0
    def test_fit_normalized(self):
        gmr = GaussianMLPRegressor(input_shape=(1, ), output_dim=1)
        self.sess.run(tf.global_variables_initializer())
        data = np.linspace(-np.pi, np.pi, 1000)
        obs = [{'observations': [[x]], 'returns': [np.sin(x)]} for x in data]

        observations = np.concatenate([p['observations'] for p in obs])
        returns = np.concatenate([p['returns'] for p in obs])
        returns = returns.reshape((-1, 1))
        for i in range(150):
            gmr.fit(observations, returns)
            # There will be new assign operations created in the first
            # iteration so let's take the second one to check.
            if i == 1:
                assign_ops_counts = np.sum(
                    np.array([
                        'Assign' in n.name
                        for n in tf.get_default_graph().as_graph_def().node
                    ]).astype(int))
        assign_ops_counts_after = np.sum(
            np.array([
                'Assign' in n.name
                for n in tf.get_default_graph().as_graph_def().node
            ]).astype(int))

        assert assign_ops_counts == assign_ops_counts_after

        paths = {
            'observations': [[-np.pi], [-np.pi / 2], [-np.pi / 4], [0],
                             [np.pi / 4], [np.pi / 2], [np.pi]]
        }
        prediction = gmr.predict(paths['observations'])
        expected = [[0], [-1], [-0.707], [0], [0.707], [1], [0]]
        assert np.allclose(prediction, expected, rtol=0, atol=0.1)
예제 #4
0
    def __init__(
            self,
            env_spec,
            subsample_factor=1.,
            num_seq_inputs=1,
            regressor_args=None,
            name='GaussianMLPBaseline',
    ):
        """
        Gaussian MLP Baseline with Model.

        It fits the input data to a gaussian distribution estimated by
        a MLP.

        Args:
            env_spec (garage.envs.env_spec.EnvSpec): Environment specification.
            subsample_factor (float): The factor to subsample the data. By
                default it is 1.0, which means using all the data.
            num_seq_inputs (float): Number of sequence per input. By default
                it is 1.0, which means only one single sequence.
            regressor_args (dict): Arguments for regressor.
        """
        super().__init__(env_spec)
        if regressor_args is None:
            regressor_args = dict()

        self._regressor = GaussianMLPRegressor(
            input_shape=(env_spec.observation_space.flat_dim *
                         num_seq_inputs, ),
            output_dim=1,
            name=name,
            **regressor_args)
        self.name = name
예제 #5
0
    def __init__(
        self,
        env_spec,
        subsample_factor=1.,
        num_seq_inputs=1,
        regressor_args=None,
        name="GaussianMLPBaseline",
    ):
        """
        Constructor.

        :param env_spec:
        :param subsample_factor:
        :param num_seq_inputs:
        :param regressor_args:
        """
        Parameterized.__init__(self)
        Serializable.quick_init(self, locals())
        super(GaussianMLPBaseline, self).__init__(env_spec)
        if regressor_args is None:
            regressor_args = dict()

        self._regressor = GaussianMLPRegressor(
            input_shape=(env_spec.observation_space.flat_dim *
                         num_seq_inputs, ),
            output_dim=1,
            name=name,
            **regressor_args)
        self.name = name
예제 #6
0
    def test_is_pickleable(self):
        gmr = GaussianMLPRegressor(input_shape=(1, ), output_dim=1)

        with tf.compat.v1.variable_scope(
                'GaussianMLPRegressor/GaussianMLPRegressorModel', reuse=True):
            bias = tf.compat.v1.get_variable(
                'dist_params/mean_network/hidden_0/bias')
        bias.load(tf.ones_like(bias).eval())

        result1 = gmr.predict(np.ones((1, 1)))
        h = pickle.dumps(gmr)

        with tf.compat.v1.Session(graph=tf.Graph()):
            gmr_pickled = pickle.loads(h)
            result2 = gmr_pickled.predict(np.ones((1, 1)))
            assert np.array_equal(result1, result2)
예제 #7
0
class GaussianMLPBaseline(Baseline):
    """A value function using Gaussian MLP network."""

    def __init__(
            self,
            env_spec,
            subsample_factor=1.,
            num_seq_inputs=1,
            regressor_args=None,
            name='GaussianMLPBaseline',
    ):
        """
        Gaussian MLP Baseline with Model.

        It fits the input data to a gaussian distribution estimated by
        a MLP.

        Args:
            env_spec (garage.envs.env_spec.EnvSpec): Environment specification.
            subsample_factor (float): The factor to subsample the data. By
                default it is 1.0, which means using all the data.
            num_seq_inputs (float): Number of sequence per input. By default
                it is 1.0, which means only one single sequence.
            regressor_args (dict): Arguments for regressor.
        """
        super().__init__(env_spec)
        if regressor_args is None:
            regressor_args = dict()

        self._regressor = GaussianMLPRegressor(
            input_shape=(env_spec.observation_space.flat_dim *
                         num_seq_inputs, ),
            output_dim=1,
            name=name,
            **regressor_args)
        self.name = name

    def fit(self, paths):
        """Fit regressor based on paths."""
        observations = np.concatenate([p['observations'] for p in paths])
        returns = np.concatenate([p['returns'] for p in paths])
        self._regressor.fit(observations, returns.reshape((-1, 1)))

    def predict(self, path):
        """Predict value based on paths."""
        return self._regressor.predict(path['observations']).flatten()

    def get_param_values(self, **tags):
        """Get parameter values."""
        return self._regressor.get_param_values(**tags)

    def set_param_values(self, flattened_params, **tags):
        """Set parameter values to val."""
        self._regressor.set_param_values(flattened_params, **tags)

    def get_params_internal(self, **tags):
        """Get internal parameters."""
        return self._regressor.get_params_internal(**tags)
예제 #8
0
    def __init__(
        self,
        env_spec,
        subsample_factor=1.,
        num_seq_inputs=1,
        regressor_args=None,
        name='GaussianMLPBaseline',
    ):
        if regressor_args is None:
            regressor_args = dict()

        self._regressor = GaussianMLPRegressor(
            input_shape=(env_spec.observation_space.flat_dim *
                         num_seq_inputs, ),
            output_dim=1,
            name=name,
            subsample_factor=subsample_factor,
            **regressor_args)
        self.name = name
예제 #9
0
class GaussianMLPBaseline(Baseline, Parameterized, Serializable):
    """A value function using gaussian mlp network."""
    def __init__(
        self,
        env_spec,
        subsample_factor=1.,
        num_seq_inputs=1,
        regressor_args=None,
        name="GaussianMLPBaseline",
    ):
        """
        Constructor.

        :param env_spec:
        :param subsample_factor:
        :param num_seq_inputs:
        :param regressor_args:
        """
        Parameterized.__init__(self)
        Serializable.quick_init(self, locals())
        super(GaussianMLPBaseline, self).__init__(env_spec)
        if regressor_args is None:
            regressor_args = dict()

        self._regressor = GaussianMLPRegressor(
            input_shape=(env_spec.observation_space.flat_dim *
                         num_seq_inputs, ),
            output_dim=1,
            name=name,
            **regressor_args)
        self.name = name

    @overrides
    def fit(self, paths):
        """Fit regressor based on paths."""
        observations = np.concatenate([p["observations"] for p in paths])
        returns = np.concatenate([p["returns"] for p in paths])
        self._regressor.fit(observations, returns.reshape((-1, 1)))

    @overrides
    def predict(self, path):
        """Predict value based on paths."""
        return self._regressor.predict(path["observations"]).flatten()

    @overrides
    def get_param_values(self, **tags):
        """Get parameter values."""
        return self._regressor.get_param_values(**tags)

    @overrides
    def set_param_values(self, flattened_params, **tags):
        """Set parameter values to val."""
        self._regressor.set_param_values(flattened_params, **tags)

    @overrides
    def get_params_internal(self, **tags):
        return self._regressor.get_params_internal(**tags)
예제 #10
0
    def test_fit_smaller_subsample_factor(self):
        gmr = GaussianMLPRegressor(input_shape=(1, ),
                                   output_dim=1,
                                   subsample_factor=0.9)
        data = np.linspace(-np.pi, np.pi, 1000)
        obs = [{'observations': [[x]], 'returns': [np.sin(x)]} for x in data]

        observations = np.concatenate([p['observations'] for p in obs])
        returns = np.concatenate([p['returns'] for p in obs])
        for _ in range(150):
            gmr.fit(observations, returns.reshape((-1, 1)))

        paths = {
            'observations': [[-np.pi], [-np.pi / 2], [-np.pi / 4], [0],
                             [np.pi / 4], [np.pi / 2], [np.pi]]
        }

        prediction = gmr.predict(paths['observations'])

        expected = [[0], [-1], [-0.707], [0], [0.707], [1], [0]]
        assert np.allclose(prediction, expected, rtol=0, atol=0.1)
예제 #11
0
    def test_is_pickleable2(self):
        gmr = GaussianMLPRegressor(input_shape=(1, ), output_dim=1)

        with tf.compat.v1.variable_scope(
                'GaussianMLPRegressor/GaussianMLPRegressorModel', reuse=True):
            x_mean = tf.compat.v1.get_variable('normalized_vars/x_mean')
        x_mean.load(tf.ones_like(x_mean).eval())
        x1 = gmr.model.networks['default'].x_mean.eval()
        h = pickle.dumps(gmr)
        with tf.compat.v1.Session(graph=tf.Graph()):
            gmr_pickled = pickle.loads(h)
            x2 = gmr_pickled.model.networks['default'].x_mean.eval()
            assert np.array_equal(x1, x2)
예제 #12
0
    def test_fit_unnormalized(self):
        gmr = GaussianMLPRegressor(input_shape=(1, ),
                                   output_dim=1,
                                   subsample_factor=0.9,
                                   normalize_inputs=False,
                                   normalize_outputs=False)
        data = np.linspace(-np.pi, np.pi, 1000)
        obs = [{'observations': [[x]], 'returns': [np.sin(x)]} for x in data]

        observations = np.concatenate([p['observations'] for p in obs])
        returns = np.concatenate([p['returns'] for p in obs])
        for _ in range(150):
            gmr.fit(observations, returns.reshape((-1, 1)))

        paths = {
            'observations': [[-np.pi], [-np.pi / 2], [-np.pi / 4], [0],
                             [np.pi / 4], [np.pi / 2], [np.pi]]
        }

        prediction = gmr.predict(paths['observations'])

        expected = [[0], [-1], [-0.707], [0], [0.707], [1], [0]]
        assert np.allclose(prediction, expected, rtol=0, atol=0.1)

        x_mean = self.sess.run(gmr.model.networks['default'].x_mean)
        x_mean_expected = np.zeros_like(x_mean)
        x_std = self.sess.run(gmr.model.networks['default'].x_std)
        x_std_expected = np.ones_like(x_std)
        assert np.array_equal(x_mean, x_mean_expected)
        assert np.array_equal(x_std, x_std_expected)

        y_mean = self.sess.run(gmr.model.networks['default'].y_mean)
        y_mean_expected = np.zeros_like(y_mean)
        y_std = self.sess.run(gmr.model.networks['default'].y_std)
        y_std_expected = np.ones_like(y_std)

        assert np.allclose(y_mean, y_mean_expected)
        assert np.allclose(y_std, y_std_expected)
예제 #13
0
class MultiTaskGaussianMLPBaseline(Baseline, Parameterized):
    """A value function using gaussian mlp network."""

    def __init__(
            self,
            env_spec,
            extra_dims=0,
            subsample_factor=1.,
            num_seq_inputs=1,
            regressor_args=None,
    ):
        """
        Constructor.

        :param env_spec:
        :param subsample_factor:
        :param num_seq_inputs:
        :param regressor_args:
        """
        Serializable.quick_init(self, locals())
        super(MultiTaskGaussianMLPBaseline, self).__init__(env_spec)
        if regressor_args is None:
            regressor_args = dict()

        self._regressor = GaussianMLPRegressor(
            input_shape=((env_spec.observation_space.flat_dim + extra_dims) *
                         num_seq_inputs, ),
            output_dim=1,
            name="vf",
            use_trust_region=True,
            **regressor_args)

    @overrides
    def fit(self, paths):
        """Fit regressor based on paths."""
        observations = np.concatenate([p["observations"] for p in paths])
        tasks = np.concatenate([p["tasks_gt"] for p in paths])
        latents = np.concatenate([p["latents"] for p in paths])
        aug_obs = np.concatenate([observations, tasks, latents], axis=1)
        returns = np.concatenate([p["returns"] for p in paths])
        self._regressor.fit(aug_obs, returns.reshape((-1, 1)))

    @overrides
    def predict(self, path):
        """Predict value based on paths."""
        inputs = np.concatenate(
            (path["observations"], path["tasks_gt"], path["latents"]), axis=1)
        return self._regressor.predict(inputs, ).flatten()

    @overrides
    def get_param_values(self, **tags):
        """Get parameter values."""
        return self._regressor.get_param_values(**tags)

    @overrides
    def set_param_values(self, flattened_params, **tags):
        """Set parameter values to val."""
        self._regressor.set_param_values(flattened_params, **tags)
예제 #14
0
    def test_fit_normalized(self):
        gmr = GaussianMLPRegressor(input_shape=(1, ), output_dim=1)
        data = np.linspace(-np.pi, np.pi, 1000)
        obs = [{'observations': [[x]], 'returns': [np.sin(x)]} for x in data]

        observations = np.concatenate([p['observations'] for p in obs])
        returns = np.concatenate([p['returns'] for p in obs])
        returns = returns.reshape((-1, 1))
        for _ in range(150):
            gmr.fit(observations, returns)

        paths = {
            'observations': [[-np.pi], [-np.pi / 2], [-np.pi / 4], [0],
                             [np.pi / 4], [np.pi / 2], [np.pi]]
        }

        prediction = gmr.predict(paths['observations'])

        expected = [[0], [-1], [-0.707], [0], [0.707], [1], [0]]
        assert np.allclose(prediction, expected, rtol=0, atol=0.1)

        x_mean = self.sess.run(gmr.model.networks['default'].x_mean)
        x_mean_expected = np.mean(observations, axis=0, keepdims=True)
        x_std = self.sess.run(gmr.model.networks['default'].x_std)
        x_std_expected = np.std(observations, axis=0, keepdims=True)

        assert np.allclose(x_mean, x_mean_expected)
        assert np.allclose(x_std, x_std_expected)

        y_mean = self.sess.run(gmr.model.networks['default'].y_mean)
        y_mean_expected = np.mean(returns, axis=0, keepdims=True)
        y_std = self.sess.run(gmr.model.networks['default'].y_std)
        y_std_expected = np.std(returns, axis=0, keepdims=True)

        assert np.allclose(y_mean, y_mean_expected)
        assert np.allclose(y_std, y_std_expected)
예제 #15
0
class CollisionAwareBaseline(Baseline, Parameterized):
    """A value function using gaussian mlp network."""
    def __init__(
        self,
        env_spec,
        subsample_factor=1.,
        num_seq_inputs=1,
        regressor_args=None,
    ):
        """
        Constructor.

        :param env_spec:
        :param subsample_factor:
        :param num_seq_inputs:
        :param regressor_args:
        """
        Parameterized.__init__(self)
        Serializable.quick_init(self, locals())
        Baseline.__init__(self, env_spec)
        if regressor_args is None:
            regressor_args = dict()

        self._regressor = GaussianMLPRegressor(
            input_shape=((env_spec.observation_space.flat_dim + 1) *
                         num_seq_inputs, ),
            output_dim=1,
            name="Baseline",
            **regressor_args)

    @overrides
    def fit(self, paths):
        """Fit regressor based on paths."""
        observations = np.concatenate([p["observations"] for p in paths])
        collisions = np.concatenate(
            np.float32([p["env_infos"]["in_collision"] for p in paths]))
        collisions = np.expand_dims(collisions, axis=1)
        aug_obs = np.concatenate([observations, collisions], axis=1)
        returns = np.concatenate([p["returns"] for p in paths])
        self._regressor.fit(aug_obs, returns.reshape((-1, 1)))

    @overrides
    def predict(self, path):
        """Predict value based on paths."""
        collisions = np.expand_dims(path["env_infos"]["in_collision"], axis=1)
        inputs = np.concatenate([path["observations"], collisions], axis=1)
        return self._regressor.predict(inputs, ).flatten()

    @overrides
    def get_param_values(self, **tags):
        """Get parameter values."""
        return self._regressor.get_param_values(**tags)

    @overrides
    def set_param_values(self, flattened_params, **tags):
        """Set parameter values to val."""
        self._regressor.set_param_values(flattened_params, **tags)

    @overrides
    def get_params_internal(self, **tags):
        return self._regressor.get_params_internal(**tags)
예제 #16
0
    def test_auxiliary(self):
        gmr = GaussianMLPRegressor(input_shape=(1, ), output_dim=5)

        assert gmr.vectorized
        assert gmr.distribution.event_shape.as_list() == [5]
예제 #17
0
class GaussianMLPBaseline(Baseline):
    """Gaussian MLP Baseline with Model.

    It fits the input data to a gaussian distribution estimated by
    a MLP.

    Args:
        env_spec (garage.envs.env_spec.EnvSpec): Environment specification.
        subsample_factor (float): The factor to subsample the data. By
            default it is 1.0, which means using all the data.
        num_seq_inputs (float): Number of sequence per input. By default
            it is 1.0, which means only one single sequence.
        regressor_args (dict): Arguments for regressor.
        name (str): Name of baseline.

    """

    def __init__(
            self,
            env_spec,
            subsample_factor=1.,
            num_seq_inputs=1,
            regressor_args=None,
            name='GaussianMLPBaseline',
    ):
        super().__init__(env_spec)
        if regressor_args is None:
            regressor_args = dict()

        self._regressor = GaussianMLPRegressor(
            input_shape=(env_spec.observation_space.flat_dim *
                         num_seq_inputs, ),
            output_dim=1,
            name=name,
            subsample_factor=subsample_factor,
            **regressor_args)
        self.name = name

    def fit(self, paths):
        """Fit regressor based on paths.

        Args:
            paths (list[dict]): Sample paths.

        """
        observations = np.concatenate([p['observations'] for p in paths])
        returns = np.concatenate([p['returns'] for p in paths])
        self._regressor.fit(observations, returns.reshape((-1, 1)))

    def predict(self, path):
        """Predict value based on paths.

        Args:
            path (list[dict]): Sample paths.

        Returns:
            numpy.ndarray: Predicted value.

        """
        return self._regressor.predict(path['observations']).flatten()

    def get_param_values(self):
        """Get parameter values.

        Returns:
            List[np.ndarray]: A list of values of each parameter.

        """
        return self._regressor.get_param_values()

    def set_param_values(self, flattened_params):
        """Set param values.

        Args:
            flattened_params (np.ndarray): A numpy array of parameter values.

        """
        self._regressor.set_param_values(flattened_params)

    def get_params_internal(self):
        """Get the params, which are the trainable variables.

        Returns:
            List[tf.Variable]: A list of trainable variables in the current
            variable scope.

        """
        return self._regressor.get_params_internal()
예제 #18
0
    def test_auxiliary(self):
        gmr = GaussianMLPRegressor(input_shape=(1, ), output_dim=5)

        assert not gmr.recurrent
        assert gmr.vectorized
        assert gmr.distribution.dim == 5