예제 #1
0
 def _create_network_internal(self, observation_input=None):
     assert observation_input is not None
     observation_input = self._process_layer(observation_input,
                                             scope_name="observation_input")
     if len(self.hidden_sizes) > 0:
         with tf.variable_scope("mlp"):
             observation_output = mlp(
                 observation_input,
                 self.observation_dim,
                 self.hidden_sizes,
                 self.hidden_activation,
                 W_initializer=he_uniform_initializer(),
                 b_initializer=tf.constant_initializer(0.),
                 pre_nonlin_lambda=self._process_layer,
             )
         last_layer_size = self.hidden_sizes[-1]
     else:
         observation_output = observation_input
         last_layer_size = self.observation_dim
     with tf.variable_scope("output"):
         return linear(
             observation_output,
             last_layer_size,
             self.feature_dim,
             W_initializer=he_uniform_initializer(),
             b_initializer=tf.constant_initializer(0.),
         )
예제 #2
0
 def _create_network_internal(self, observation_input=None):
     assert observation_input is not None
     observation_input = self._process_layer(observation_input,
                                             scope_name="observation_input")
     with tf.variable_scope("mlp"):
         observation_output = mlp(
             observation_input,
             self.observation_dim,
             self.observation_hidden_sizes,
             self.hidden_nonlinearity,
             W_initializer=self.hidden_W_init,
             b_initializer=self.hidden_b_init,
             pre_nonlin_lambda=self._process_layer,
         )
     observation_output = self._process_layer(
         observation_output,
         scope_name="output_preactivations",
     )
     with tf.variable_scope("output"):
         return self.output_nonlinearity(
             linear(
                 observation_output,
                 self.observation_hidden_sizes[-1],
                 self.output_dim,
                 W_initializer=self.output_W_init,
                 b_initializer=self.output_b_init,
             ))
예제 #3
0
 def _create_network_internal(self, input_tensor=None):
     assert input_tensor is not None
     input_tensor = self._process_layer(input_tensor,
                                        scope_name="input_tensor")
     in_size = self.input_size
     for layer, next_size in enumerate(self.hidden_sizes):
         p = Perceptron(
             'p{0}'.format(layer),
             input_tensor,
             in_size,
             next_size,
             W_name=self.W_name,
             b_name=self.b_name,
             W_initializer=self.W_initializer,
             b_initializer=self.b_initializer,
             batch_norm_config=self._batch_norm_config,
         )
         input_tensor = self._add_subnetwork_and_get_output(p)
         input_tensor = self._process_layer(input_tensor)
         in_size = next_size
     return tf_util.linear(
         input_tensor,
         in_size,
         self.output_size,
         W_name=self.W_name,
         b_name=self.b_name,
         W_initializer=self.W_initializer,
         b_initializer=self.b_initializer,
     )
예제 #4
0
    def _create_network_internal(self, feature_input=None, action_input=None):
        assert feature_input is not None and action_input is not None

        feature_input = self._process_layer(feature_input,
                                            scope_name="feature_input")
        action_input = self._process_layer(action_input,
                                           scope_name="action_input")

        with tf.variable_scope("mlp"):
            embedded = tf.concat(1, [feature_input, action_input])
            embedded_dim = self.feature_dim + self.action_dim

            feature_output = mlp(
                embedded,
                embedded_dim,
                self.hidden_sizes,
                self.hidden_activation,
                # W_initializer=lambda shape, dtype, partition_info: tf.truncated_normal(shape),
                b_initializer=tf.constant_initializer(0.),
                pre_nonlin_lambda=self._process_layer,
            )

        with tf.variable_scope("output_linear"):
            return linear(
                feature_output,
                self.hidden_sizes[-1],
                self.feature_dim,
                # W_initializer=lambda shape, dtype, partition_info: tf.truncated_normal(shape),
                b_initializer=tf.constant_initializer(0.),
            )
    def _create_network_internal(self,
                                 observation_input=None,
                                 action_input=None):
        assert observation_input is not None and action_input is not None
        observation_input = self._process_layer(observation_input,
                                                scope_name="observation_input")
        action_input = self._process_layer(action_input,
                                           scope_name="action_input")
        with tf.variable_scope("conv_network"):
            observation_output, output_shape = conv_network(
                observation_input,
                self.input_shape,
                self.conv_filters,
                self.conv_filter_sizes,
                self.conv_strides,
                self.conv_pads,
                self.observation_hidden_sizes,
                self.hidden_nonlinearity,
                W_initializer=xavier_uniform_initializer(),
                b_initializer=tf.constant_initializer(0.),
                pre_nonlin_lambda=self._process_layer,
            )

        output_dim = np.prod(output_shape[1:])
        observation_output = tf.contrib.layers.flatten(observation_output,
                                                       [-1, output_dim])

        with tf.variable_scope("mlp"):
            observation_output = mlp(
                observation_output,
                output_dim,
                self.observation_hidden_sizes,
                self.hidden_nonlinearity,
                W_initializer=xavier_uniform_initializer(),
                b_initializer=tf.constant_initializer(0.),
                pre_nonlin_lambda=self._process_layer,
            )

        embedded = tf.concat(1, [observation_output, action_input])
        embedded_dim = self.action_dim + self.observation_hidden_sizes[-1]
        with tf.variable_scope("fusion_mlp"):
            fused_output = mlp(
                embedded,
                embedded_dim,
                self.embedded_hidden_sizes,
                self.hidden_nonlinearity,
                W_initializer=self.hidden_W_init,
                b_initializer=self.hidden_b_init,
                pre_nonlin_lambda=self._process_layer,
            )
            fused_output = self._process_layer(fused_output)

        with tf.variable_scope("output"):
            return linear(
                observation_output,
                self.embedded_hidden_sizes[-1],
                1,
                W_initializer=xavier_uniform_initializer(),
                b_initializer=tf.constant_initializer(0.),
            )
예제 #6
0
    def _create_network_internal(self,
                                 feature_input1=None,
                                 feature_input2=None):
        assert feature_input1 is not None and feature_input2 is not None

        feature_input1 = self._process_layer(feature_input1,
                                             scope_name="feature_input1")
        feature_input2 = self._process_layer(feature_input2,
                                             scope_name="feature_input2")

        with tf.variable_scope("mlp"):
            embedded = tf.concat(1, [feature_input1, feature_input2])

            action_output = mlp(
                embedded,
                2 * self.feature_dim,
                self.hidden_sizes,
                self.hidden_activation,
                # W_initializer=lambda shape, dtype, partition_info: tf.truncated_normal(shape),
                b_initializer=tf.constant_initializer(0.),
                pre_nonlin_lambda=self._process_layer,
            )
        with tf.variable_scope("output"):
            if self.output_activation is not None:
                return self.output_activation(
                    linear(
                        action_output,
                        self.hidden_sizes[-1],
                        self.action_dim,
                        # W_initializer=lambda shape, dtype, partition_info: tf.truncated_normal(shape),
                        b_initializer=tf.constant_initializer(0.),
                    ))
            else:
                return linear(
                    action_output,
                    self.hidden_sizes[-1],
                    self.action_dim,
                    # W_initializer=lambda shape, dtype, partition_info: tf.truncated_normal(shape),
                    b_initializer=tf.constant_initializer(0.),
                )
예제 #7
0
 def test_get_collections(self):
     in_size = 5
     out_size = 10
     input_placeholder = tf.placeholder(tf.float32, [None, in_size])
     scope = 'abc'
     with tf.variable_scope(scope) as _:
         _ = tf_util.linear(input_placeholder, in_size, out_size)
     # TODO(vpong): figure out why this line doesn't work
     # variables = tf.get_collection(tf.GraphKeys.VARIABLES, scope)
     variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope)
     self.assertEqual(2, len(variables))
     variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "nope")
     self.assertEqual(0, len(variables))
예제 #8
0
 def _create_network_internal(self, input_tensor=None):
     assert input_tensor is not None
     input_tensor = self._process_layer(input_tensor,
                                        scope_name="input_tensor")
     return tf_util.linear(
         input_tensor,
         self.input_size,
         self.output_size,
         W_name=self.W_name,
         b_name=self.b_name,
         W_initializer=self.W_initializer,
         b_initializer=self.b_initializer,
     )
예제 #9
0
 def test_linear_shape(self):
     input_placeholder = tf.placeholder(tf.float32, [None, 4])
     linear_output = tf_util.linear(
         input_placeholder,
         4,
         3,
     )
     self.sess.run(tf.global_variables_initializer())
     # y = xW + b
     x = np.random.rand(13, 4)
     y = self.sess.run(linear_output, feed_dict={
         input_placeholder: x,
     })
     self.assertEqual(y.shape, (13, 3))
예제 #10
0
    def _create_network_internal(self, observation_input=None):
        assert observation_input is not None
        observation_input = self._process_layer(observation_input,
                                                scope_name="observation_input")

        with tf.variable_scope("conv_network"):
            observation_output, output_shape = conv_network(
                observation_input,
                self.input_shape,
                self.conv_filters,
                self.conv_filter_sizes,
                self.conv_strides,
                self.conv_pads,
                self.hidden_sizes,
                self.hidden_activation,
                W_initializer=xavier_uniform_initializer(),
                b_initializer=tf.constant_initializer(0.),
                pre_nonlin_lambda=self._process_layer,
            )

        output_dim = np.prod(output_shape[1:])
        observation_output = tf.contrib.layers.flatten(observation_output,
                                                       [-1, output_dim])
        with tf.variable_scope("mlp"):
            observation_output = mlp(
                observation_output,
                output_dim,
                self.hidden_sizes,
                self.hidden_activation,
                W_initializer=xavier_uniform_initializer(),
                b_initializer=tf.constant_initializer(0.),
                pre_nonlin_lambda=self._process_layer,
            )

        if len(self.hidden_sizes) > 0:
            last_size = self.hidden_sizes[-1]
        else:
            last_size = output_dim

        with tf.variable_scope("output"):
            return linear(
                observation_output,
                last_size,
                self.feature_dim,
                W_initializer=xavier_uniform_initializer(),
                b_initializer=tf.constant_initializer(0.),
            )
예제 #11
0
    def _create_network_internal(self, observation_input=None):
        assert observation_input is not None
        observation_input = self._process_layer(observation_input,
                                                scope_name="observation_input")

        with tf.variable_scope("conv_network"):
            observation_output, output_shape = conv_network(
                observation_input,
                self.input_shape,
                self.conv_filters,
                self.conv_filter_sizes,
                self.conv_strides,
                self.conv_pads,
                self.observation_hidden_sizes,
                self.hidden_nonlinearity,
                W_initializer=xavier_uniform_initializer(),
                b_initializer=tf.constant_initializer(0.),
                pre_nonlin_lambda=self._process_layer,
            )

        output_dim = np.prod(output_shape[1:])
        observation_output = tf.reshape(observation_output, [-1, output_dim],
                                        name="reshape")
        # observation_output = tf.contrib.layers.flatten(observation_output, [-1, output_dim])
        self.reshaped_observation_feature = observation_output
        # tf.add_to_collection(tf.GraphKeys.GLOBAL_VARIABLES, observation_output)

        with tf.variable_scope("mlp"):
            observation_output = mlp(
                observation_output,
                output_dim,
                self.observation_hidden_sizes,
                self.hidden_nonlinearity,
                W_initializer=xavier_uniform_initializer(),
                b_initializer=tf.constant_initializer(0.),
                pre_nonlin_lambda=self._process_layer,
            )

        with tf.variable_scope("output"):
            return self.output_nonlinearity(
                linear(
                    observation_output,
                    self.observation_hidden_sizes[-1],
                    self.output_dim,
                    W_initializer=xavier_uniform_initializer(),
                    b_initializer=tf.constant_initializer(0.),
                ))
예제 #12
0
 def test_linear_output(self):
     input_placeholder = tf.placeholder(tf.float32, [None, 4])
     linear_output = tf_util.linear(
         input_placeholder,
         4,
         3,
         W_initializer=tf.constant_initializer(1.),
         b_initializer=tf.constant_initializer(0.),
     )
     self.sess.run(tf.global_variables_initializer())
     # y = xW + b
     x = np.random.rand(13, 4)
     y = self.sess.run(linear_output, feed_dict={
         input_placeholder: x,
     })
     expected = np.matmul(x, np.ones((4, 3)))
     self.assertNpEqual(y, expected)
예제 #13
0
    def _create_network_internal(self,
                                 feature_input1=None,
                                 feature_input2=None):
        assert feature_input1 is not None and feature_input2 is not None

        feature_input1 = self._process_layer(feature_input1,
                                             scope_name="feature_input1")
        feature_input2 = self._process_layer(feature_input2,
                                             scope_name="feature_input2")

        with tf.variable_scope("mlp"):
            embedded = tf.concat(1, [feature_input1, feature_input2])

            action_output = mlp(
                embedded,
                2 * self.feature_dim,
                self.hidden_sizes,
                self.hidden_activation,
                # W_initializer=lambda shape, dtype, partition_info: tf.truncated_normal(shape),
                b_initializer=tf.constant_initializer(0.),
                pre_nonlin_lambda=self._process_layer,
            )

        outputs = []
        for i in range(self.action_dim):
            if i == 0:
                embedded = action_output
            else:
                embedded = tf.concat(1, [action_output, outputs[-1]])

            with tf.variable_scope("inverse_output%d" % i):
                linear_output = linear(
                    embedded,
                    embedded.get_shape().as_list()[1],
                    1,
                )

            if self.output_activation is not None:
                outputs.append(self.output_activation(linear_output))
            else:
                outputs.append(linear_output)

        return tf.concat(1, outputs)
    def _create_network_internal(self, observation_input, action_input):
        observation_input = self._process_layer(observation_input,
                                                scope_name="observation_input")
        action_input = self._process_layer(action_input,
                                           scope_name="action_input")
        with tf.variable_scope("observation_mlp"):
            observation_output = mlp(
                observation_input,
                self.observation_dim,
                self.observation_hidden_sizes,
                self.hidden_nonlinearity,
                W_initializer=self.hidden_W_init,
                b_initializer=self.hidden_b_init,
                pre_nonlin_lambda=self._process_layer,
            )
            observation_output = self._process_layer(
                observation_output, scope_name="observation_output")
        embedded = tf.concat(1, [observation_output, action_input])
        embedded_dim = self.action_dim + self.observation_hidden_sizes[-1]
        with tf.variable_scope("fusion_mlp"):
            fused_output = mlp(
                embedded,
                embedded_dim,
                self.embedded_hidden_sizes,
                self.hidden_nonlinearity,
                W_initializer=self.hidden_W_init,
                b_initializer=self.hidden_b_init,
                pre_nonlin_lambda=self._process_layer,
            )
            fused_output = self._process_layer(fused_output)

        with tf.variable_scope("output_linear"):
            return linear(
                fused_output,
                self.embedded_hidden_sizes[-1],
                1,
                W_initializer=self.output_W_init,
                b_initializer=self.output_b_init,
            )
 def _create_network_internal(self, observation_input, action_input):
     observation_input = self._process_layer(observation_input,
                                             scope_name="observation_input")
     action_input = self._process_layer(action_input,
                                        scope_name="action_input")
     concat_input = tf.concat(1, [observation_input, action_input])
     hidden_output = tf_util.mlp(
         concat_input,
         self.observation_dim + self.action_dim,
         self.hidden_sizes,
         self.hidden_nonlinearity,
         W_initializer=self.hidden_W_init,
         b_initializer=self.hidden_b_init,
         pre_nonlin_lambda=self._process_layer,
     )
     return self.output_nonlinearity(
         tf_util.linear(
             hidden_output,
             self.hidden_sizes[-1],
             self.output_dim,
             W_initializer=self.output_W_init,
             b_initializer=self.output_b_init,
         ))
 def _create_network_internal(self, observation_input):
     observation_input = self._process_layer(observation_input,
                                             scope_name="observation_input")
     with tf.variable_scope("hidden_mlp"):
         hidden_output = tf_util.mlp(
             observation_input,
             self.observation_dim,
             self.observation_hidden_sizes,
             self.hidden_nonlinearity,
             W_initializer=self.hidden_W_init,
             b_initializer=self.hidden_b_init,
             pre_nonlin_lambda=self._process_layer,
         )
     hidden_output = self._process_layer(hidden_output,
                                         scope_name="hidden_output")
     with tf.variable_scope("output"):
         return self.output_nonlinearity(
             tf_util.linear(
                 hidden_output,
                 self.observation_hidden_sizes[-1],
                 self.output_dim,
                 W_initializer=self.output_W_init,
                 b_initializer=self.output_b_init,
             ))
예제 #17
0
    def __init__(
            self,
            env_spec,
            mean_hidden_nonlinearity=tf.nn.relu,
            mean_hidden_sizes=(32, 32),
            std_hidden_nonlinearity=tf.nn.relu,
            std_hidden_sizes=(32, 32),
            min_std=1e-6,
    ):
        """
        :param env_spec:
        :param mean_hidden_nonlinearity: nonlinearity used for the mean hidden
                                         layers
        :param mean_hidden_sizes: list of hidden_sizes for the fully-connected hidden layers
        :param std_hidden_nonlinearity: nonlinearity used for the std hidden
                                        layers
        :param std_hidden_sizes: list of hidden_sizes for the fully-connected hidden layers
        :param min_std: whether to make sure that the std is at least some
                        threshold value, to avoid numerical issues
        :return:
        """
        Serializable.quick_init(self, locals())
        assert isinstance(env_spec.action_space, Box)
        super(GaussianMLPPolicy, self).__init__(env_spec)

        self.env_spec = env_spec
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2)
        self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
        # Create network
        observation_dim = self.env_spec.observation_space.flat_dim
        self.observations_input = tf.placeholder(tf.float32,
                                                 shape=[None, observation_dim])
        action_dim = self.env_spec.action_space.flat_dim
        with tf.variable_scope('mean') as _:
            mlp_mean_output = tf_util.mlp(self.observations_input,
                                          observation_dim,
                                          mean_hidden_sizes,
                                          mean_hidden_nonlinearity)
            mlp_mean_output_size = mean_hidden_sizes[-1]
            self.mean = tf_util.linear(mlp_mean_output,
                                       mlp_mean_output_size,
                                       action_dim)

        with tf.variable_scope('log_std') as _:
            mlp_std_output = tf_util.mlp(self.observations_input,
                                         observation_dim,
                                         std_hidden_sizes,
                                         std_hidden_nonlinearity)
            mlp_std_output_size = std_hidden_sizes[-1]
            self.log_std = tf_util.linear(mlp_std_output,
                                          mlp_std_output_size,
                                          action_dim)
            self.std = tf.maximum(tf.exp(self.log_std), min_std)

        self._dist = DiagonalGaussian(action_dim)

        self.actions_output = tf.placeholder(tf.float32, shape=[None, action_dim])
        z = (self.actions_output - self.mean) / self.std
        self.log_likelihood = (- tf.log(self.std**2)
                               - z**2 * 0.5
                               - tf.log(2*np.pi) * 0.5)