def _create_network_internal(self,
                                 observation_input=None,
                                 action_input=None):
        assert observation_input is not None and action_input is not None
        observation_input = self._process_layer(observation_input,
                                                scope_name="observation_input")
        action_input = self._process_layer(action_input,
                                           scope_name="action_input")
        with tf.variable_scope("conv_network"):
            observation_output, output_shape = conv_network(
                observation_input,
                self.input_shape,
                self.conv_filters,
                self.conv_filter_sizes,
                self.conv_strides,
                self.conv_pads,
                self.observation_hidden_sizes,
                self.hidden_nonlinearity,
                W_initializer=xavier_uniform_initializer(),
                b_initializer=tf.constant_initializer(0.),
                pre_nonlin_lambda=self._process_layer,
            )

        output_dim = np.prod(output_shape[1:])
        observation_output = tf.contrib.layers.flatten(observation_output,
                                                       [-1, output_dim])

        with tf.variable_scope("mlp"):
            observation_output = mlp(
                observation_output,
                output_dim,
                self.observation_hidden_sizes,
                self.hidden_nonlinearity,
                W_initializer=xavier_uniform_initializer(),
                b_initializer=tf.constant_initializer(0.),
                pre_nonlin_lambda=self._process_layer,
            )

        embedded = tf.concat(1, [observation_output, action_input])
        embedded_dim = self.action_dim + self.observation_hidden_sizes[-1]
        with tf.variable_scope("fusion_mlp"):
            fused_output = mlp(
                embedded,
                embedded_dim,
                self.embedded_hidden_sizes,
                self.hidden_nonlinearity,
                W_initializer=self.hidden_W_init,
                b_initializer=self.hidden_b_init,
                pre_nonlin_lambda=self._process_layer,
            )
            fused_output = self._process_layer(fused_output)

        with tf.variable_scope("output"):
            return linear(
                observation_output,
                self.embedded_hidden_sizes[-1],
                1,
                W_initializer=xavier_uniform_initializer(),
                b_initializer=tf.constant_initializer(0.),
            )
Exemple #2
0
 def _create_network_internal(self, observation_input=None):
     assert observation_input is not None
     observation_input = self._process_layer(observation_input,
                                             scope_name="observation_input")
     with tf.variable_scope("mlp"):
         observation_output = mlp(
             observation_input,
             self.observation_dim,
             self.observation_hidden_sizes,
             self.hidden_nonlinearity,
             W_initializer=self.hidden_W_init,
             b_initializer=self.hidden_b_init,
             pre_nonlin_lambda=self._process_layer,
         )
     observation_output = self._process_layer(
         observation_output,
         scope_name="output_preactivations",
     )
     with tf.variable_scope("output"):
         return self.output_nonlinearity(
             linear(
                 observation_output,
                 self.observation_hidden_sizes[-1],
                 self.output_dim,
                 W_initializer=self.output_W_init,
                 b_initializer=self.output_b_init,
             ))
Exemple #3
0
    def _create_network_internal(self, feature_input=None, action_input=None):
        assert feature_input is not None and action_input is not None

        feature_input = self._process_layer(feature_input,
                                            scope_name="feature_input")
        action_input = self._process_layer(action_input,
                                           scope_name="action_input")

        with tf.variable_scope("mlp"):
            embedded = tf.concat(1, [feature_input, action_input])
            embedded_dim = self.feature_dim + self.action_dim

            feature_output = mlp(
                embedded,
                embedded_dim,
                self.hidden_sizes,
                self.hidden_activation,
                # W_initializer=lambda shape, dtype, partition_info: tf.truncated_normal(shape),
                b_initializer=tf.constant_initializer(0.),
                pre_nonlin_lambda=self._process_layer,
            )

        with tf.variable_scope("output_linear"):
            return linear(
                feature_output,
                self.hidden_sizes[-1],
                self.feature_dim,
                # W_initializer=lambda shape, dtype, partition_info: tf.truncated_normal(shape),
                b_initializer=tf.constant_initializer(0.),
            )
Exemple #4
0
 def _create_network_internal(self, observation_input=None):
     assert observation_input is not None
     observation_input = self._process_layer(observation_input,
                                             scope_name="observation_input")
     if len(self.hidden_sizes) > 0:
         with tf.variable_scope("mlp"):
             observation_output = mlp(
                 observation_input,
                 self.observation_dim,
                 self.hidden_sizes,
                 self.hidden_activation,
                 W_initializer=he_uniform_initializer(),
                 b_initializer=tf.constant_initializer(0.),
                 pre_nonlin_lambda=self._process_layer,
             )
         last_layer_size = self.hidden_sizes[-1]
     else:
         observation_output = observation_input
         last_layer_size = self.observation_dim
     with tf.variable_scope("output"):
         return linear(
             observation_output,
             last_layer_size,
             self.feature_dim,
             W_initializer=he_uniform_initializer(),
             b_initializer=tf.constant_initializer(0.),
         )
    def _create_network_internal(self, observation_input, action_input):
        observation_input = self._process_layer(observation_input,
                                                scope_name="observation_input")
        action_input = self._process_layer(action_input,
                                           scope_name="action_input")
        with tf.variable_scope("observation_mlp"):
            observation_output = mlp(
                observation_input,
                self.observation_dim,
                self.observation_hidden_sizes,
                self.hidden_nonlinearity,
                W_initializer=self.hidden_W_init,
                b_initializer=self.hidden_b_init,
                pre_nonlin_lambda=self._process_layer,
            )
            observation_output = self._process_layer(
                observation_output, scope_name="observation_output")
        embedded = tf.concat(1, [observation_output, action_input])
        embedded_dim = self.action_dim + self.observation_hidden_sizes[-1]
        with tf.variable_scope("fusion_mlp"):
            fused_output = mlp(
                embedded,
                embedded_dim,
                self.embedded_hidden_sizes,
                self.hidden_nonlinearity,
                W_initializer=self.hidden_W_init,
                b_initializer=self.hidden_b_init,
                pre_nonlin_lambda=self._process_layer,
            )
            fused_output = self._process_layer(fused_output)

        with tf.variable_scope("output_linear"):
            return linear(
                fused_output,
                self.embedded_hidden_sizes[-1],
                1,
                W_initializer=self.output_W_init,
                b_initializer=self.output_b_init,
            )
Exemple #6
0
    def _create_network_internal(self, observation_input=None):
        assert observation_input is not None
        observation_input = self._process_layer(observation_input,
                                                scope_name="observation_input")

        with tf.variable_scope("conv_network"):
            observation_output, output_shape = conv_network(
                observation_input,
                self.input_shape,
                self.conv_filters,
                self.conv_filter_sizes,
                self.conv_strides,
                self.conv_pads,
                self.observation_hidden_sizes,
                self.hidden_nonlinearity,
                W_initializer=xavier_uniform_initializer(),
                b_initializer=tf.constant_initializer(0.),
                pre_nonlin_lambda=self._process_layer,
            )

        output_dim = np.prod(output_shape[1:])
        observation_output = tf.reshape(observation_output, [-1, output_dim],
                                        name="reshape")
        # observation_output = tf.contrib.layers.flatten(observation_output, [-1, output_dim])
        self.reshaped_observation_feature = observation_output
        # tf.add_to_collection(tf.GraphKeys.GLOBAL_VARIABLES, observation_output)

        with tf.variable_scope("mlp"):
            observation_output = mlp(
                observation_output,
                output_dim,
                self.observation_hidden_sizes,
                self.hidden_nonlinearity,
                W_initializer=xavier_uniform_initializer(),
                b_initializer=tf.constant_initializer(0.),
                pre_nonlin_lambda=self._process_layer,
            )

        with tf.variable_scope("output"):
            return self.output_nonlinearity(
                linear(
                    observation_output,
                    self.observation_hidden_sizes[-1],
                    self.output_dim,
                    W_initializer=xavier_uniform_initializer(),
                    b_initializer=tf.constant_initializer(0.),
                ))
Exemple #7
0
    def _create_network_internal(self, observation_input=None):
        assert observation_input is not None
        observation_input = self._process_layer(observation_input,
                                                scope_name="observation_input")

        with tf.variable_scope("conv_network"):
            observation_output, output_shape = conv_network(
                observation_input,
                self.input_shape,
                self.conv_filters,
                self.conv_filter_sizes,
                self.conv_strides,
                self.conv_pads,
                self.hidden_sizes,
                self.hidden_activation,
                W_initializer=xavier_uniform_initializer(),
                b_initializer=tf.constant_initializer(0.),
                pre_nonlin_lambda=self._process_layer,
            )

        output_dim = np.prod(output_shape[1:])
        observation_output = tf.contrib.layers.flatten(observation_output,
                                                       [-1, output_dim])
        with tf.variable_scope("mlp"):
            observation_output = mlp(
                observation_output,
                output_dim,
                self.hidden_sizes,
                self.hidden_activation,
                W_initializer=xavier_uniform_initializer(),
                b_initializer=tf.constant_initializer(0.),
                pre_nonlin_lambda=self._process_layer,
            )

        if len(self.hidden_sizes) > 0:
            last_size = self.hidden_sizes[-1]
        else:
            last_size = output_dim

        with tf.variable_scope("output"):
            return linear(
                observation_output,
                last_size,
                self.feature_dim,
                W_initializer=xavier_uniform_initializer(),
                b_initializer=tf.constant_initializer(0.),
            )
Exemple #8
0
    def _create_network_internal(self,
                                 feature_input1=None,
                                 feature_input2=None):
        assert feature_input1 is not None and feature_input2 is not None

        feature_input1 = self._process_layer(feature_input1,
                                             scope_name="feature_input1")
        feature_input2 = self._process_layer(feature_input2,
                                             scope_name="feature_input2")

        with tf.variable_scope("mlp"):
            embedded = tf.concat(1, [feature_input1, feature_input2])

            action_output = mlp(
                embedded,
                2 * self.feature_dim,
                self.hidden_sizes,
                self.hidden_activation,
                # W_initializer=lambda shape, dtype, partition_info: tf.truncated_normal(shape),
                b_initializer=tf.constant_initializer(0.),
                pre_nonlin_lambda=self._process_layer,
            )

        outputs = []
        for i in range(self.action_dim):
            if i == 0:
                embedded = action_output
            else:
                embedded = tf.concat(1, [action_output, outputs[-1]])

            with tf.variable_scope("inverse_output%d" % i):
                linear_output = linear(
                    embedded,
                    embedded.get_shape().as_list()[1],
                    1,
                )

            if self.output_activation is not None:
                outputs.append(self.output_activation(linear_output))
            else:
                outputs.append(linear_output)

        return tf.concat(1, outputs)
Exemple #9
0
    def _create_network_internal(self, feature_input):
        # reshape
        # assert
        feature_output = mlp(
            feature_input,
            self.feature_dim,
            (self.output_dim, self.upsample_dim),
            self.hidden_activation,
            W_initializer=xavier_uniform_initializer(),
            b_initializer=tf.constant_initializer(0.),
            pre_nonlin_lambda=self._process_layer,
        )

        # side = int(math.sqrt(feature_output.get_shape().as_list()[1]))

        reshaped_feature_output = tf.reshape(
            feature_output, [tf.shape(feature_output)[0], 32, 32, 4])
        upsampled_output = tf.image.resize_images(reshaped_feature_output,
                                                  [64, 64])
        return upsampled_output
Exemple #10
0
    def _create_network_internal(self,
                                 feature_input1=None,
                                 feature_input2=None):
        assert feature_input1 is not None and feature_input2 is not None

        feature_input1 = self._process_layer(feature_input1,
                                             scope_name="feature_input1")
        feature_input2 = self._process_layer(feature_input2,
                                             scope_name="feature_input2")

        with tf.variable_scope("mlp"):
            embedded = tf.concat(1, [feature_input1, feature_input2])

            action_output = mlp(
                embedded,
                2 * self.feature_dim,
                self.hidden_sizes,
                self.hidden_activation,
                # W_initializer=lambda shape, dtype, partition_info: tf.truncated_normal(shape),
                b_initializer=tf.constant_initializer(0.),
                pre_nonlin_lambda=self._process_layer,
            )
        with tf.variable_scope("output"):
            if self.output_activation is not None:
                return self.output_activation(
                    linear(
                        action_output,
                        self.hidden_sizes[-1],
                        self.action_dim,
                        # W_initializer=lambda shape, dtype, partition_info: tf.truncated_normal(shape),
                        b_initializer=tf.constant_initializer(0.),
                    ))
            else:
                return linear(
                    action_output,
                    self.hidden_sizes[-1],
                    self.action_dim,
                    # W_initializer=lambda shape, dtype, partition_info: tf.truncated_normal(shape),
                    b_initializer=tf.constant_initializer(0.),
                )
 def _create_network_internal(self, observation_input, action_input):
     observation_input = self._process_layer(observation_input,
                                             scope_name="observation_input")
     action_input = self._process_layer(action_input,
                                        scope_name="action_input")
     concat_input = tf.concat(1, [observation_input, action_input])
     hidden_output = tf_util.mlp(
         concat_input,
         self.observation_dim + self.action_dim,
         self.hidden_sizes,
         self.hidden_nonlinearity,
         W_initializer=self.hidden_W_init,
         b_initializer=self.hidden_b_init,
         pre_nonlin_lambda=self._process_layer,
     )
     return self.output_nonlinearity(
         tf_util.linear(
             hidden_output,
             self.hidden_sizes[-1],
             self.output_dim,
             W_initializer=self.output_W_init,
             b_initializer=self.output_b_init,
         ))
 def _create_network_internal(self, observation_input):
     observation_input = self._process_layer(observation_input,
                                             scope_name="observation_input")
     with tf.variable_scope("hidden_mlp"):
         hidden_output = tf_util.mlp(
             observation_input,
             self.observation_dim,
             self.observation_hidden_sizes,
             self.hidden_nonlinearity,
             W_initializer=self.hidden_W_init,
             b_initializer=self.hidden_b_init,
             pre_nonlin_lambda=self._process_layer,
         )
     hidden_output = self._process_layer(hidden_output,
                                         scope_name="hidden_output")
     with tf.variable_scope("output"):
         return self.output_nonlinearity(
             tf_util.linear(
                 hidden_output,
                 self.observation_hidden_sizes[-1],
                 self.output_dim,
                 W_initializer=self.output_W_init,
                 b_initializer=self.output_b_init,
             ))
Exemple #13
0
    def __init__(
            self,
            env_spec,
            mean_hidden_nonlinearity=tf.nn.relu,
            mean_hidden_sizes=(32, 32),
            std_hidden_nonlinearity=tf.nn.relu,
            std_hidden_sizes=(32, 32),
            min_std=1e-6,
    ):
        """
        :param env_spec:
        :param mean_hidden_nonlinearity: nonlinearity used for the mean hidden
                                         layers
        :param mean_hidden_sizes: list of hidden_sizes for the fully-connected hidden layers
        :param std_hidden_nonlinearity: nonlinearity used for the std hidden
                                        layers
        :param std_hidden_sizes: list of hidden_sizes for the fully-connected hidden layers
        :param min_std: whether to make sure that the std is at least some
                        threshold value, to avoid numerical issues
        :return:
        """
        Serializable.quick_init(self, locals())
        assert isinstance(env_spec.action_space, Box)
        super(GaussianMLPPolicy, self).__init__(env_spec)

        self.env_spec = env_spec
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2)
        self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
        # Create network
        observation_dim = self.env_spec.observation_space.flat_dim
        self.observations_input = tf.placeholder(tf.float32,
                                                 shape=[None, observation_dim])
        action_dim = self.env_spec.action_space.flat_dim
        with tf.variable_scope('mean') as _:
            mlp_mean_output = tf_util.mlp(self.observations_input,
                                          observation_dim,
                                          mean_hidden_sizes,
                                          mean_hidden_nonlinearity)
            mlp_mean_output_size = mean_hidden_sizes[-1]
            self.mean = tf_util.linear(mlp_mean_output,
                                       mlp_mean_output_size,
                                       action_dim)

        with tf.variable_scope('log_std') as _:
            mlp_std_output = tf_util.mlp(self.observations_input,
                                         observation_dim,
                                         std_hidden_sizes,
                                         std_hidden_nonlinearity)
            mlp_std_output_size = std_hidden_sizes[-1]
            self.log_std = tf_util.linear(mlp_std_output,
                                          mlp_std_output_size,
                                          action_dim)
            self.std = tf.maximum(tf.exp(self.log_std), min_std)

        self._dist = DiagonalGaussian(action_dim)

        self.actions_output = tf.placeholder(tf.float32, shape=[None, action_dim])
        z = (self.actions_output - self.mean) / self.std
        self.log_likelihood = (- tf.log(self.std**2)
                               - z**2 * 0.5
                               - tf.log(2*np.pi) * 0.5)
Exemple #14
0
def create_network(in_size):
    hidden_sizes = (32, 4)
    nonlinearity = tf.nn.relu
    input_ph = tf.placeholder(tf.float32, shape=[None, in_size])
    last_layer = tf_util.mlp(input_ph, in_size, hidden_sizes, nonlinearity)
    return input_ph, last_layer