Beispiel #1
0
        class CustomNetwork(LayerBasedNetwork):

            def __init__(self, scope='layerbased-network', summary_labels=()):
                super(CustomNetwork, self).__init__(scope=scope, summary_labels=summary_labels)

                self.layer01 = Dense(size=32, scope='state0-1')
                self.add_layer(layer=self.layer01)
                self.layer02 = Dense(size=32, scope='state0-2')
                self.add_layer(layer=self.layer02)

                self.layer11 = Dense(size=32, scope='state1-1')
                self.add_layer(layer=self.layer11)
                self.layer12 = Dense(size=32, scope='state1-2')
                self.add_layer(layer=self.layer12)

                self.layer21 = Dense(size=32, scope='state2-1')
                self.add_layer(layer=self.layer21)
                self.layer22 = Dense(size=32, scope='state2-2')
                self.add_layer(layer=self.layer22)

                self.layer31 = Dense(size=32, scope='state3-1')
                self.add_layer(layer=self.layer31)
                self.layer32 = Dense(size=32, scope='state3-2')
                self.add_layer(layer=self.layer32)

            def tf_apply(self, x, internals, update, return_internals=False):
                x0 = self.layer02.apply(x=self.layer01.apply(x=x['state0'], update=update), update=update)
                x1 = self.layer12.apply(x=self.layer11.apply(x=x['state1'], update=update), update=update)
                x2 = self.layer22.apply(x=self.layer21.apply(x=x['state2'], update=update), update=update)
                x3 = self.layer32.apply(x=self.layer31.apply(x=x['state3'], update=update), update=update)
                x = x0 * x1 * x2 * x3
                return (x, list()) if return_internals else x
Beispiel #2
0
 def tf_apply(self, x, internals, update, return_internals=False):
     layer01 = Dense(size=32, scope='state0-1')
     self.add_layer(layer=layer01)
     layer02 = Dense(size=32, scope='state0-2')
     self.add_layer(layer=layer02)
     x0 = layer02.apply(x=layer01.apply(x=x['state0'],
                                        update=update),
                        update=update)
     layer11 = Dense(size=32, scope='state1-1')
     self.add_layer(layer=layer11)
     layer12 = Dense(size=32, scope='state1-2')
     self.add_layer(layer=layer12)
     x1 = layer12.apply(x=layer11.apply(x=x['state1'],
                                        update=update),
                        update=update)
     layer21 = Dense(size=32, scope='state2-1')
     self.add_layer(layer=layer21)
     layer22 = Dense(size=32, scope='state2-2')
     self.add_layer(layer=layer22)
     x2 = layer22.apply(x=layer21.apply(x=x['state2'],
                                        update=update),
                        update=update)
     layer31 = Dense(size=32, scope='state3-1')
     self.add_layer(layer=layer31)
     layer32 = Dense(size=32, scope='state3-2')
     self.add_layer(layer=layer32)
     x3 = layer32.apply(x=layer31.apply(x=x['state3'],
                                        update=update),
                        update=update)
     x = x0 * x1 * x2 * x3
     return (x, list()) if return_internals else x
        class CustomNetwork(LayerBasedNetwork):
            def __init__(self, scope='layerbased-network', summary_labels=()):
                super(CustomNetwork,
                      self).__init__(scope=scope,
                                     summary_labels=summary_labels)

                self.layer_bool1 = Dense(size=16, scope='state-bool1')
                self.add_layer(layer=self.layer_bool1)
                self.layer_bool2 = Dense(size=16, scope='state-bool2')
                self.add_layer(layer=self.layer_bool2)

                self.layer_int1 = Dense(size=16, scope='state-int1')
                self.add_layer(layer=self.layer_int1)
                self.layer_int2 = Dense(size=16, scope='state-int2')
                self.add_layer(layer=self.layer_int2)

                self.layer_float1 = Dense(size=16, scope='state-float1')
                self.add_layer(layer=self.layer_float1)
                self.layer_float2 = Dense(size=16, scope='state-float2')
                self.add_layer(layer=self.layer_float2)

                self.layer_bounded1 = Dense(size=16, scope='state-bounded1')
                self.add_layer(layer=self.layer_bounded1)
                self.layer_bounded2 = Dense(size=16, scope='state-bounded2')
                self.add_layer(layer=self.layer_bounded2)

            def tf_apply(self, x, internals, update, return_internals=False):
                x0 = self.layer_bool2.apply(x=self.layer_bool1.apply(
                    x=x['bool'], update=update),
                                            update=update)
                x1 = self.layer_int2.apply(x=self.layer_int1.apply(
                    x=x['int'], update=update),
                                           update=update)
                x2 = self.layer_float2.apply(x=self.layer_float1.apply(
                    x=x['float'], update=update),
                                             update=update)
                x3 = self.layer_bounded2.apply(x=self.layer_bounded1.apply(
                    x=x['bounded'], update=update),
                                               update=update)
                x = x0 * x1 * x2 * x3
                return (x, dict()) if return_internals else x
Beispiel #4
0
            def tf_apply(self, x, internals, return_internals=False):
                if exclude_bool:
                    x0 = 1.0
                else:
                    layer01 = Dense(size=32, scope='state0-1')
                    self.add_layer(layer=layer01)
                    layer02 = Dense(size=32, scope='state0-2')
                    self.add_layer(layer=layer02)
                    x0 = layer02.apply(x=layer01.apply(x=x['state0']))

                if exclude_int:
                    x1 = 1.0
                else:
                    layer11 = Dense(size=32, scope='state1-1')
                    self.add_layer(layer=layer11)
                    layer12 = Dense(size=32, scope='state1-2')
                    self.add_layer(layer=layer12)
                    x1 = layer12.apply(x=layer11.apply(x=x['state1']))

                if exclude_float:
                    x2 = 1.0
                else:
                    layer21 = Dense(size=32, scope='state2-1')
                    self.add_layer(layer=layer21)
                    layer22 = Dense(size=32, scope='state2-2')
                    self.add_layer(layer=layer22)
                    x2 = layer22.apply(x=layer21.apply(x=x['state2']))

                if exclude_bounded:
                    x3 = 1.0
                else:
                    layer31 = Dense(size=32, scope='state3-1')
                    self.add_layer(layer=layer31)
                    layer32 = Dense(size=32, scope='state3-2')
                    self.add_layer(layer=layer32)
                    x3 = layer32.apply(x=layer31.apply(x=x['state3']))

                x = x0 * x1 * x2 * x3
                return (x, list()) if return_internals else x
Beispiel #5
0
        class CustomNetwork(LayerBasedNetwork):
            def __init__(self, scope='layerbased-network', summary_labels=()):
                super(CustomNetwork,
                      self).__init__(scope=scope,
                                     summary_labels=summary_labels)

                if not exclude_bool:
                    self.layer_bool1 = Dense(size=16, scope='state-bool1')
                    self.add_layer(layer=self.layer_bool1)
                    self.layer_bool2 = Dense(size=16, scope='state-bool2')
                    self.add_layer(layer=self.layer_bool2)

                if not exclude_int:
                    self.layer_int1 = Dense(size=16, scope='state-int1')
                    self.add_layer(layer=self.layer_int1)
                    self.layer_int2 = Dense(size=16, scope='state-int2')
                    self.add_layer(layer=self.layer_int2)

                if not exclude_float:
                    self.layer_float1 = Dense(size=16, scope='state-float1')
                    self.add_layer(layer=self.layer_float1)
                    self.layer_float2 = Dense(size=16, scope='state-float2')
                    self.add_layer(layer=self.layer_float2)

                if not exclude_bounded:
                    self.layer_bounded1 = Dense(size=16,
                                                scope='state-bounded1')
                    self.add_layer(layer=self.layer_bounded1)
                    self.layer_bounded2 = Dense(size=16,
                                                scope='state-bounded2')
                    self.add_layer(layer=self.layer_bounded2)

            def tf_apply(self, x, internals, update, return_internals=False):
                xs = list()

                if not exclude_bool:
                    xs.append(
                        self.layer_bool2.apply(x=self.layer_bool1.apply(
                            x=x['bool'], update=update),
                                               update=update))

                if not exclude_int:
                    xs.append(
                        self.layer_int2.apply(x=self.layer_int1.apply(
                            x=x['int'], update=update),
                                              update=update))

                if not exclude_float:
                    xs.append(
                        self.layer_float2.apply(x=self.layer_float1.apply(
                            x=x['float'], update=update),
                                                update=update))

                if not exclude_bounded:
                    xs.append(
                        self.layer_bounded2.apply(x=self.layer_bounded1.apply(
                            x=x['bounded-float'], update=update),
                                                  update=update))

                x = xs[0]
                for y in xs[1:]:
                    x *= y
                # import tensorflow as tf
                # x = tf.concat(values=xs, axis=1)
                return (x, list()) if return_internals else x
class DDPGCriticNetwork(LayerBasedNetwork):
    def __init__(self,
                 scope='ddpg-critic-network',
                 summary_labels=(),
                 size_t0=400,
                 size_t1=300):
        super(DDPGCriticNetwork, self).__init__(scope=scope,
                                                summary_labels=summary_labels)

        self.t0l = Linear(size=size_t0, scope='linear0')
        self.t0b = TFLayer(layer='batch_normalization',
                           scope='batchnorm0',
                           center=True,
                           scale=True)
        self.t0n = Nonlinearity(name='relu', scope='relu0')

        self.t1l = Linear(size=size_t1, scope='linear1')
        self.t1b = TFLayer(layer='batch_normalization',
                           scope='batchnorm1',
                           center=True,
                           scale=True)
        self.t1n = Nonlinearity(name='relu', scope='relu1')

        self.t2d = Dense(size=1,
                         activation='tanh',
                         scope='dense0',
                         weights=tf.random_uniform_initializer(minval=-3e-3,
                                                               maxval=3e-3))

        self.add_layer(self.t0l)
        self.add_layer(self.t0b)
        self.add_layer(self.t0n)

        self.add_layer(self.t1l)
        self.add_layer(self.t1b)
        self.add_layer(self.t1n)

        self.add_layer(self.t2d)

    def tf_apply(self, x, internals, update, return_internals=False):
        assert x['states'], x['actions']

        if isinstance(x['states'], dict):
            if len(x['states']) != 1:
                raise TensorForceError(
                    'DDPG critic network must have only one state input, but {} given.'
                    .format(len(x['states'])))
            x_states = x['states'][next(iter(sorted(x['states'])))]
        else:
            x_states = x['states']

        if isinstance(x['actions'], dict):
            if len(x['actions']) != 1:
                raise TensorForceError(
                    'DDPG critic network must have only one action input, but {} given.'
                    .format(len(x['actions'])))
            x_actions = x['actions'][next(iter(sorted(x['actions'])))]
        else:
            x_actions = x['actions']

        out = self.t0l.apply(x=x_states, update=update)
        out = self.t0b.apply(x=out, update=update)
        out = self.t0n.apply(x=out, update=update)

        out = self.t1l.apply(x=tf.concat([out, x_actions], axis=1),
                             update=update)
        out = self.t1b.apply(x=out, update=update)
        out = self.t1n.apply(x=out, update=update)

        out = self.t2d.apply(x=out, update=update)

        # Remove last dimension because we only return Q values for one state and action
        # out = tf.squeeze(out)

        if return_internals:
            # Todo: Internals management
            return out, None
        else:
            return out