Example #1
0
    def _build_module(self):
        # NOTE: for image inputs, we expect the data format to be of type uint8, so to be memory efficient. we chose not
        #  to implement the rescaling as an input filters.observation.observation_filter, as this would have caused the
        #  input to the network to be float, which is 4x more expensive in memory.
        #  thus causing each saved transition in the memory to also be 4x more pricier.

        input_layer = self.input / self.input_rescaling
        input_layer -= self.input_offset
        # clip input using te given range
        if self.input_clipping is not None:
            input_layer = tf.clip_by_value(input_layer, self.input_clipping[0], self.input_clipping[1])

        self.layers.append(input_layer)

        # layers order is conv -> batchnorm -> activation -> dropout
        if isinstance(self.scheme, EmbedderScheme):
            layers_params = self.schemes[self.scheme]
        else:
            layers_params = self.scheme
        for idx, layer_params in enumerate(layers_params):
            self.layers.append(
                layer_params(input_layer=self.layers[-1], name='{}_{}'.format(layer_params.__class__.__name__, idx))
            )

            self.layers.extend(batchnorm_activation_dropout(self.layers[-1], self.batchnorm,
                                                            self.activation_function, self.dropout,
                                                            self.dropout_rate, idx))

        self.output = tf.contrib.layers.flatten(self.layers[-1])
Example #2
0
    def _build_module(self):
        """
        self.state_in: tuple of placeholders containing the initial state
        self.state_out: tuple of output state

        todo: it appears that the shape of the output is batch, feature
        the code here seems to be slicing off the first element in the batch
        which would definitely be wrong. need to double check the shape
        """

        self.layers.append(self.input)

        # optionally insert some dense layers before the LSTM
        if isinstance(self.scheme, MiddlewareScheme):
            layers_params = self.schemes[self.scheme]
        else:
            layers_params = self.scheme
        for idx, layer_params in enumerate(layers_params):
            self.layers.append(
                tf.layers.dense(self.layers[-1],
                                layer_params[0],
                                name='fc{}'.format(idx)))

            self.layers.extend(
                batchnorm_activation_dropout(self.layers[-1], self.batchnorm,
                                             self.activation_function,
                                             self.dropout, self.dropout_rate,
                                             idx))

        # add the LSTM layer
        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(self.number_of_lstm_cells,
                                                 state_is_tuple=True)
        self.c_init = np.zeros((1, lstm_cell.state_size.c), np.float32)
        self.h_init = np.zeros((1, lstm_cell.state_size.h), np.float32)
        self.state_init = [self.c_init, self.h_init]
        self.c_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.c])
        self.h_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.h])
        self.state_in = (self.c_in, self.h_in)
        rnn_in = tf.expand_dims(self.layers[-1], [0])
        step_size = tf.shape(self.layers[-1])[:1]
        state_in = tf.nn.rnn_cell.LSTMStateTuple(self.c_in, self.h_in)
        lstm_outputs, lstm_state = tf.nn.dynamic_rnn(lstm_cell,
                                                     rnn_in,
                                                     initial_state=state_in,
                                                     sequence_length=step_size,
                                                     time_major=False)
        lstm_c, lstm_h = lstm_state
        self.state_out = (lstm_c[:1, :], lstm_h[:1, :])
        self.output = tf.reshape(lstm_outputs, [-1, self.number_of_lstm_cells])
Example #3
0
    def _build_module(self, input_layer):
        # mean
        pre_activation_policy_values_mean = self.dense_layer(self.num_actions)(input_layer, name='fc_mean')
        policy_values_mean = batchnorm_activation_dropout(pre_activation_policy_values_mean, self.batchnorm,
                                                          self.activation_function,
                                                          False, 0, 0)[-1]
        self.policy_mean = tf.multiply(policy_values_mean, self.output_scale, name='output_mean')

        if self.is_local:
            # add a squared penalty on the squared pre-activation features of the action
            if self.action_penalty and self.action_penalty != 0:
                self.regularizations += \
                    [self.action_penalty * tf.reduce_mean(tf.square(pre_activation_policy_values_mean))]

        self.output = [self.policy_mean]
Example #4
0
    def _build_module(self):
        self.layers.append(self.input)

        if isinstance(self.scheme, MiddlewareScheme):
            layers_params = self.schemes[self.scheme]
        else:
            layers_params = self.scheme
        for idx, layer_params in enumerate(layers_params):
            self.layers.append(
                layer_params(self.layers[-1], name='{}_{}'.format(layer_params.__class__.__name__, idx))
            )

            self.layers.extend(batchnorm_activation_dropout(self.layers[-1], self.batchnorm,
                                                            self.activation_function, self.dropout,
                                                            self.dropout_rate, idx))

        self.output = self.layers[-1]