def build_encoder_conv_(self, x, namespace, share_weights=False):
        # same network as for minatar, more or less

        with tf.variable_scope(namespace, reuse=share_weights):

            x = x[:, :, :, tf.newaxis]

            with tf.variable_scope("conv1"):
                x = tf.layers.conv2d(
                    x,
                    16,
                    3, (1, 1),
                    padding="same",
                    activation=tf.nn.relu,
                    kernel_regularizer=agent_utils.get_weight_regularizer(
                        self.weight_decay),
                    kernel_initializer=agent_utils.get_mrsa_initializer())

            x = tf.layers.flatten(x)

            with tf.variable_scope("fc1"):
                x = tf.layers.dense(
                    x,
                    128,
                    activation=tf.nn.relu,
                    kernel_regularizer=agent_utils.get_weight_regularizer(
                        self.weight_decay),
                    kernel_initializer=agent_utils.get_mrsa_initializer())

            with tf.variable_scope("latent"):
                mu = tf.layers.dense(
                    x,
                    32,
                    activation=None,
                    kernel_regularizer=agent_utils.get_weight_regularizer(
                        self.weight_decay),
                    kernel_initializer=agent_utils.get_mrsa_initializer())
                sigma = tf.layers.dense(
                    x,
                    32,
                    activation=None,
                    kernel_regularizer=agent_utils.get_weight_regularizer(
                        self.weight_decay),
                    kernel_initializer=agent_utils.get_mrsa_initializer())
                sd = tf.nn.softplus(sigma)
                noise = tf.random_normal(shape=(tf.shape(mu)[0], 32),
                                         mean=0,
                                         stddev=1.0)
                sd_noise_t = noise * sd
                sample = mu + sd_noise_t
                var = tf.square(sd)

        return mu, var, sd, sample, noise
    def build_encoder_fc_(self, x, namespace, share_weights=False):

        with tf.variable_scope(namespace, reuse=share_weights):

            x = tf.layers.flatten(x)

            with tf.variable_scope("fc1"):
                x = tf.layers.dense(
                    x,
                    128,
                    activation=tf.nn.relu,
                    kernel_regularizer=agent_utils.get_weight_regularizer(
                        self.weight_decay),
                    kernel_initializer=agent_utils.get_mrsa_initializer())

            with tf.variable_scope("fc2"):
                x = tf.layers.dense(
                    x,
                    128,
                    activation=tf.nn.relu,
                    kernel_regularizer=agent_utils.get_weight_regularizer(
                        self.weight_decay),
                    kernel_initializer=agent_utils.get_mrsa_initializer())

            with tf.variable_scope("latent"):
                mu = tf.layers.dense(
                    x,
                    32,
                    activation=None,
                    kernel_regularizer=agent_utils.get_weight_regularizer(
                        self.weight_decay),
                    kernel_initializer=agent_utils.get_mrsa_initializer())
                sigma = tf.layers.dense(
                    x,
                    32,
                    activation=None,
                    kernel_regularizer=agent_utils.get_weight_regularizer(
                        self.weight_decay),
                    kernel_initializer=agent_utils.get_mrsa_initializer())
                sd = tf.nn.softplus(sigma)
                noise = tf.random_normal(shape=(tf.shape(mu)[0], 32),
                                         mean=0,
                                         stddev=1.0)
                sd_noise_t = noise * sd
                sample = mu + sd_noise_t
                var = tf.square(sd)

        return mu, var, sd, sample, noise
    def build_predictors_(self):

        with tf.variable_scope(self.ENCODER_NAMESPACE):

            with tf.variable_scope("predict_q"):
                self.q_prediction_t = tf.layers.dense(
                    self.state_sample_t,
                    self.NUM_ACTIONS,
                    activation=None,
                    kernel_regularizer=agent_utils.get_weight_regularizer(
                        self.weight_decay),
                    kernel_initializer=agent_utils.get_mrsa_initializer())
            self.masked_q_prediction_t = tf.reduce_sum(self.actions_mask_t *
                                                       self.q_prediction_t,
                                                       axis=1)
Ejemplo n.º 4
0
    def build_network_(self, namespace):

        with tf.variable_scope(namespace):

            if self.target_size is not None:
                x = tf.image.resize_bilinear(self.depth_pl, size=(self.target_size, self.target_size))
            else:
                x = self.depth_pl

            with tf.variable_scope("convs"):

                for i in range(len(self.num_filters_list)):

                    with tf.variable_scope("conv{:d}".format(i + 1)):

                        x = tf.layers.conv2d(
                            x, self.num_filters_list[i], self.filter_size_list[i], self.stride_list[i],
                            padding="SAME", activation=tf.nn.relu,
                            kernel_initializer=utils.get_mrsa_initializer()
                        )

            x = tf.layers.flatten(x, name="flatten")

            with tf.variable_scope("fcs"):

                for i in range(len(self.hiddens)):

                    with tf.variable_scope("fc{:d}".format(i + 1)):

                        x = tf.layers.dense(
                            x, self.hiddens[i], activation=tf.nn.relu
                        )

            with tf.variable_scope("logits"):

                # predict for both hand empty and hand full
                logits = tf.layers.dense(x, self.output_shape[0] * self.output_shape[1] * 2)
                logits = tf.reshape(logits, shape=(-1, self.output_shape[0], self.output_shape[1], 1))

                # mask hand states
                logits = self.mask_hand_states(logits, self.hand_states_pl)

                return tf.reshape(
                    logits, shape=(-1, self.output_shape[0] * self.output_shape[1])
                )
Ejemplo n.º 5
0
    def build_network_(self, namespace):

        with tf.variable_scope(namespace):

            x = self.depth_pl

            with tf.variable_scope("convs"):

                for i in range(len(self.num_filters_list)):

                    with tf.variable_scope("conv{:d}".format(i + 1)):

                        x = tf.layers.conv2d(
                            x,
                            self.num_filters_list[i],
                            self.filter_size_list[i],
                            self.stride_list[i],
                            padding="SAME",
                            activation=tf.nn.relu,
                            kernel_initializer=utils.get_mrsa_initializer())

            x = tf.layers.flatten(x, name="flatten")

            with tf.variable_scope("fcs"):

                for i in range(len(self.hiddens)):

                    with tf.variable_scope("fc{:d}".format(i + 1)):

                        x = tf.layers.dense(x,
                                            self.hiddens[i],
                                            activation=tf.nn.relu)

            with tf.variable_scope("logits"):

                logits = tf.layers.dense(x, self.num_actions)

                return logits
    def build_model_conv_(self):
        # same network as for minatar, more or less

        with tf.variable_scope(self.MODEL_NAMESPACE):

            x = self.states_pl[:, :, :, tf.newaxis]
            batch_size = tf.shape(x)[0]

            with tf.variable_scope("conv1"):
                x = tf.layers.conv2d(
                    x,
                    16,
                    3, (1, 1),
                    padding="same",
                    activation=tf.nn.relu,
                    kernel_regularizer=agent_utils.get_weight_regularizer(
                        self.weight_decay),
                    kernel_initializer=agent_utils.get_mrsa_initializer())

            x = tf.layers.flatten(x)

            with tf.variable_scope("fc1"):
                x = tf.layers.dense(
                    x,
                    128,
                    activation=tf.nn.relu,
                    kernel_regularizer=agent_utils.get_weight_regularizer(
                        self.weight_decay),
                    kernel_initializer=agent_utils.get_mrsa_initializer())

            if self.dropout_prob is not None and self.dropout_prob > 0.0:
                x = tf.layers.dropout(x,
                                      rate=self.dropout_prob,
                                      training=self.is_training)

            with tf.variable_scope("predict_reward"):
                self.reward_prediction_t = tf.layers.dense(
                    x,
                    1,
                    activation=None,
                    kernel_regularizer=agent_utils.get_weight_regularizer(
                        self.weight_decay),
                    kernel_initializer=agent_utils.get_mrsa_initializer())[:,
                                                                           0]

            with tf.variable_scope("predict_transition"):
                self.transition_prediction_t = tf.layers.dense(
                    x,
                    self.NUM_ACTIONS * self.height * self.width,
                    activation=None,
                    kernel_regularizer=agent_utils.get_weight_regularizer(
                        self.weight_decay),
                    kernel_initializer=agent_utils.get_mrsa_initializer())
                self.transition_prediction_t = tf.reshape(
                    self.transition_prediction_t,
                    (batch_size, self.NUM_ACTIONS, self.height, self.width))
                self.masked_transition_prediction_t = tf.reduce_sum(
                    self.transition_prediction_t *
                    self.actions_mask_t[:, :, tf.newaxis, tf.newaxis],
                    axis=1)
                self.masked_transition_prediction_softmax_t = \
                    tf.reshape(tf.nn.softmax(tf.reshape(
                        self.masked_transition_prediction_t,
                        shape=[batch_size, -1]
                    ), axis=1), [batch_size, self.height, self.width])