Esempio n. 1
0
class DDPGCriticNetwork(LayerBasedNetwork):
    def __init__(self,
                 scope='ddpg-critic-network',
                 summary_labels=(),
                 size_t0=400,
                 size_t1=300):
        super(DDPGCriticNetwork, self).__init__(scope=scope,
                                                summary_labels=summary_labels)

        self.t0l = Linear(size=size_t0, scope='linear0')
        self.t0b = TFLayer(layer='batch_normalization',
                           scope='batchnorm0',
                           center=True,
                           scale=True)
        self.t0n = Nonlinearity(name='relu', scope='relu0')

        self.t1l = Linear(size=size_t1, scope='linear1')
        self.t1b = TFLayer(layer='batch_normalization',
                           scope='batchnorm1',
                           center=True,
                           scale=True)
        self.t1n = Nonlinearity(name='relu', scope='relu1')

        self.t2d = Dense(size=1,
                         activation='tanh',
                         scope='dense0',
                         weights=tf.random_uniform_initializer(minval=-3e-3,
                                                               maxval=3e-3))

        self.add_layer(self.t0l)
        self.add_layer(self.t0b)
        self.add_layer(self.t0n)

        self.add_layer(self.t1l)
        self.add_layer(self.t1b)
        self.add_layer(self.t1n)

        self.add_layer(self.t2d)

    def tf_apply(self, x, internals, update, return_internals=False):
        assert x['states'], x['actions']

        if isinstance(x['states'], dict):
            if len(x['states']) != 1:
                raise TensorForceError(
                    'DDPG critic network must have only one state input, but {} given.'
                    .format(len(x['states'])))
            x_states = x['states'][next(iter(sorted(x['states'])))]
        else:
            x_states = x['states']

        if isinstance(x['actions'], dict):
            if len(x['actions']) != 1:
                raise TensorForceError(
                    'DDPG critic network must have only one action input, but {} given.'
                    .format(len(x['actions'])))
            x_actions = x['actions'][next(iter(sorted(x['actions'])))]
        else:
            x_actions = x['actions']

        out = self.t0l.apply(x=x_states, update=update)
        out = self.t0b.apply(x=out, update=update)
        out = self.t0n.apply(x=out, update=update)

        out = self.t1l.apply(x=tf.concat([out, x_actions], axis=1),
                             update=update)
        out = self.t1b.apply(x=out, update=update)
        out = self.t1n.apply(x=out, update=update)

        out = self.t2d.apply(x=out, update=update)

        # Remove last dimension because we only return Q values for one state and action
        # out = tf.squeeze(out)

        if return_internals:
            # Todo: Internals management
            return out, None
        else:
            return out
Esempio n. 2
0
class EIIE_OutPut(Layer):
    """
    EIIE Output layer
    based on 2-dimensional convolutional layer.
    use of two entries:
    - * : the precedent treatments
    - last_w : the last weights coming from environment
    """
    def __init__(self,
                 l2_regularization=0.0,
                 l1_regularization=0.0,
                 scope='eieeoutput',
                 summary_labels=()):
        """
        2D convolutional layer.

        Args:
            size: Number of filters set to 1
            window: Convolution window size, either an integer or pair of integers. calculated
            stride: Convolution stride, either an integer or pair of integers.
            padding: Convolution padding, one of 'VALID' or 'SAME'
            bias: If true, a bias is added
            activation: Type of nonlinearity, or dict with name & arguments
            l2_regularization: L2 regularization weight
            l1_regularization: L1 regularization weight
        """
        self.size = 1
        self.stride = 1
        self.padding = 'VALID'
        self.bias = True
        activation = 'relu'
        self.l2_regularization = l2_regularization
        self.l1_regularization = l1_regularization
        self.nonlinearity = Nonlinearity(name=activation,
                                         summary_labels=summary_labels)
        super(EIIE_OutPut, self).__init__(scope=scope,
                                          summary_labels=summary_labels)

    def tf_apply(self, x, update):
        if util.rank(x) != 4:
            raise TensorForceError(
                'Invalid input rank for conv2d layer: {}, must be 4'.format(
                    util.rank(x)))

        self.window = (1, x.shape[2])
        filters_shape = self.window + (x.shape[3].value, self.size)
        stddev = min(0.1, sqrt(2.0 / self.size))
        filters_init = tf.random_normal_initializer(mean=0.0,
                                                    stddev=stddev,
                                                    dtype=tf.float32)
        self.filters = tf.get_variable(name='W',
                                       shape=filters_shape,
                                       dtype=tf.float32,
                                       initializer=filters_init)
        stride_h, stride_w = self.stride if type(self.stride) is tuple else (
            self.stride, self.stride)
        x = tf.nn.conv2d(input=x,
                         filter=self.filters,
                         strides=(1, stride_h, stride_w, 1),
                         padding=self.padding)

        if self.bias:
            bias_shape = (self.size, )
            bias_init = tf.zeros_initializer(dtype=tf.float32)
            self.bias = tf.get_variable(name='b',
                                        shape=bias_shape,
                                        dtype=tf.float32,
                                        initializer=bias_init)
            x = tf.nn.bias_add(value=x, bias=self.bias)

        x = self.nonlinearity.apply(x=x, update=update)

        if 'activations' in self.summary_labels:
            summary = tf.summary.histogram(name='activations', values=x)
            self.summaries.append(summary)

        return x

    def tf_regularization_loss(self):
        regularization_loss = super(EIIE_OutPut, self).tf_regularization_loss()
        if regularization_loss is None:
            losses = list()
        else:
            losses = [regularization_loss]

        if self.l2_regularization > 0.0:
            losses.append(self.l2_regularization *
                          tf.nn.l2_loss(t=self.filters))
            if self.bias is not None:
                losses.append(self.l2_regularization *
                              tf.nn.l2_loss(t=self.bias))

        if self.l1_regularization > 0.0:
            losses.append(self.l1_regularization *
                          tf.reduce_sum(input_tensor=tf.abs(x=self.filters)))
            if self.bias is not None:
                losses.append(self.l1_regularization *
                              tf.reduce_sum(input_tensor=tf.abs(x=self.bias)))

        regularization_loss = self.nonlinearity.regularization_loss()
        if regularization_loss is not None:
            losses.append(regularization_loss)

        if len(losses) > 0:
            return tf.add_n(inputs=losses)
        else:
            return None

    def get_variables(self, include_non_trainable=False):
        layer_variables = super(
            EIIE_OutPut,
            self).get_variables(include_non_trainable=include_non_trainable)
        nonlinearity_variables = self.nonlinearity.get_variables(
            include_non_trainable=include_non_trainable)

        return layer_variables + nonlinearity_variables

    def get_summaries(self):
        layer_summaries = super(EIIE_OutPut, self).get_summaries()
        nonlinearity_summaries = self.nonlinearity.get_summaries()

        return layer_summaries + nonlinearity_summaries