class CustomNetwork(LayerBasedNetwork): def __init__(self, scope='layerbased-network', summary_labels=()): super(CustomNetwork, self).__init__(scope=scope, summary_labels=summary_labels) self.layer01 = Dense(size=32, scope='state0-1') self.add_layer(layer=self.layer01) self.layer02 = Dense(size=32, scope='state0-2') self.add_layer(layer=self.layer02) self.layer11 = Dense(size=32, scope='state1-1') self.add_layer(layer=self.layer11) self.layer12 = Dense(size=32, scope='state1-2') self.add_layer(layer=self.layer12) self.layer21 = Dense(size=32, scope='state2-1') self.add_layer(layer=self.layer21) self.layer22 = Dense(size=32, scope='state2-2') self.add_layer(layer=self.layer22) self.layer31 = Dense(size=32, scope='state3-1') self.add_layer(layer=self.layer31) self.layer32 = Dense(size=32, scope='state3-2') self.add_layer(layer=self.layer32) def tf_apply(self, x, internals, update, return_internals=False): x0 = self.layer02.apply(x=self.layer01.apply(x=x['state0'], update=update), update=update) x1 = self.layer12.apply(x=self.layer11.apply(x=x['state1'], update=update), update=update) x2 = self.layer22.apply(x=self.layer21.apply(x=x['state2'], update=update), update=update) x3 = self.layer32.apply(x=self.layer31.apply(x=x['state3'], update=update), update=update) x = x0 * x1 * x2 * x3 return (x, list()) if return_internals else x
def tf_apply(self, x, internals, update, return_internals=False): layer01 = Dense(size=32, scope='state0-1') self.add_layer(layer=layer01) layer02 = Dense(size=32, scope='state0-2') self.add_layer(layer=layer02) x0 = layer02.apply(x=layer01.apply(x=x['state0'], update=update), update=update) layer11 = Dense(size=32, scope='state1-1') self.add_layer(layer=layer11) layer12 = Dense(size=32, scope='state1-2') self.add_layer(layer=layer12) x1 = layer12.apply(x=layer11.apply(x=x['state1'], update=update), update=update) layer21 = Dense(size=32, scope='state2-1') self.add_layer(layer=layer21) layer22 = Dense(size=32, scope='state2-2') self.add_layer(layer=layer22) x2 = layer22.apply(x=layer21.apply(x=x['state2'], update=update), update=update) layer31 = Dense(size=32, scope='state3-1') self.add_layer(layer=layer31) layer32 = Dense(size=32, scope='state3-2') self.add_layer(layer=layer32) x3 = layer32.apply(x=layer31.apply(x=x['state3'], update=update), update=update) x = x0 * x1 * x2 * x3 return (x, list()) if return_internals else x
class CustomNetwork(LayerBasedNetwork): def __init__(self, scope='layerbased-network', summary_labels=()): super(CustomNetwork, self).__init__(scope=scope, summary_labels=summary_labels) self.layer_bool1 = Dense(size=16, scope='state-bool1') self.add_layer(layer=self.layer_bool1) self.layer_bool2 = Dense(size=16, scope='state-bool2') self.add_layer(layer=self.layer_bool2) self.layer_int1 = Dense(size=16, scope='state-int1') self.add_layer(layer=self.layer_int1) self.layer_int2 = Dense(size=16, scope='state-int2') self.add_layer(layer=self.layer_int2) self.layer_float1 = Dense(size=16, scope='state-float1') self.add_layer(layer=self.layer_float1) self.layer_float2 = Dense(size=16, scope='state-float2') self.add_layer(layer=self.layer_float2) self.layer_bounded1 = Dense(size=16, scope='state-bounded1') self.add_layer(layer=self.layer_bounded1) self.layer_bounded2 = Dense(size=16, scope='state-bounded2') self.add_layer(layer=self.layer_bounded2) def tf_apply(self, x, internals, update, return_internals=False): x0 = self.layer_bool2.apply(x=self.layer_bool1.apply( x=x['bool'], update=update), update=update) x1 = self.layer_int2.apply(x=self.layer_int1.apply( x=x['int'], update=update), update=update) x2 = self.layer_float2.apply(x=self.layer_float1.apply( x=x['float'], update=update), update=update) x3 = self.layer_bounded2.apply(x=self.layer_bounded1.apply( x=x['bounded'], update=update), update=update) x = x0 * x1 * x2 * x3 return (x, dict()) if return_internals else x
def tf_apply(self, x, internals, return_internals=False): if exclude_bool: x0 = 1.0 else: layer01 = Dense(size=32, scope='state0-1') self.add_layer(layer=layer01) layer02 = Dense(size=32, scope='state0-2') self.add_layer(layer=layer02) x0 = layer02.apply(x=layer01.apply(x=x['state0'])) if exclude_int: x1 = 1.0 else: layer11 = Dense(size=32, scope='state1-1') self.add_layer(layer=layer11) layer12 = Dense(size=32, scope='state1-2') self.add_layer(layer=layer12) x1 = layer12.apply(x=layer11.apply(x=x['state1'])) if exclude_float: x2 = 1.0 else: layer21 = Dense(size=32, scope='state2-1') self.add_layer(layer=layer21) layer22 = Dense(size=32, scope='state2-2') self.add_layer(layer=layer22) x2 = layer22.apply(x=layer21.apply(x=x['state2'])) if exclude_bounded: x3 = 1.0 else: layer31 = Dense(size=32, scope='state3-1') self.add_layer(layer=layer31) layer32 = Dense(size=32, scope='state3-2') self.add_layer(layer=layer32) x3 = layer32.apply(x=layer31.apply(x=x['state3'])) x = x0 * x1 * x2 * x3 return (x, list()) if return_internals else x
class CustomNetwork(LayerBasedNetwork): def __init__(self, scope='layerbased-network', summary_labels=()): super(CustomNetwork, self).__init__(scope=scope, summary_labels=summary_labels) if not exclude_bool: self.layer_bool1 = Dense(size=16, scope='state-bool1') self.add_layer(layer=self.layer_bool1) self.layer_bool2 = Dense(size=16, scope='state-bool2') self.add_layer(layer=self.layer_bool2) if not exclude_int: self.layer_int1 = Dense(size=16, scope='state-int1') self.add_layer(layer=self.layer_int1) self.layer_int2 = Dense(size=16, scope='state-int2') self.add_layer(layer=self.layer_int2) if not exclude_float: self.layer_float1 = Dense(size=16, scope='state-float1') self.add_layer(layer=self.layer_float1) self.layer_float2 = Dense(size=16, scope='state-float2') self.add_layer(layer=self.layer_float2) if not exclude_bounded: self.layer_bounded1 = Dense(size=16, scope='state-bounded1') self.add_layer(layer=self.layer_bounded1) self.layer_bounded2 = Dense(size=16, scope='state-bounded2') self.add_layer(layer=self.layer_bounded2) def tf_apply(self, x, internals, update, return_internals=False): xs = list() if not exclude_bool: xs.append( self.layer_bool2.apply(x=self.layer_bool1.apply( x=x['bool'], update=update), update=update)) if not exclude_int: xs.append( self.layer_int2.apply(x=self.layer_int1.apply( x=x['int'], update=update), update=update)) if not exclude_float: xs.append( self.layer_float2.apply(x=self.layer_float1.apply( x=x['float'], update=update), update=update)) if not exclude_bounded: xs.append( self.layer_bounded2.apply(x=self.layer_bounded1.apply( x=x['bounded-float'], update=update), update=update)) x = xs[0] for y in xs[1:]: x *= y # import tensorflow as tf # x = tf.concat(values=xs, axis=1) return (x, list()) if return_internals else x
class DDPGCriticNetwork(LayerBasedNetwork): def __init__(self, scope='ddpg-critic-network', summary_labels=(), size_t0=400, size_t1=300): super(DDPGCriticNetwork, self).__init__(scope=scope, summary_labels=summary_labels) self.t0l = Linear(size=size_t0, scope='linear0') self.t0b = TFLayer(layer='batch_normalization', scope='batchnorm0', center=True, scale=True) self.t0n = Nonlinearity(name='relu', scope='relu0') self.t1l = Linear(size=size_t1, scope='linear1') self.t1b = TFLayer(layer='batch_normalization', scope='batchnorm1', center=True, scale=True) self.t1n = Nonlinearity(name='relu', scope='relu1') self.t2d = Dense(size=1, activation='tanh', scope='dense0', weights=tf.random_uniform_initializer(minval=-3e-3, maxval=3e-3)) self.add_layer(self.t0l) self.add_layer(self.t0b) self.add_layer(self.t0n) self.add_layer(self.t1l) self.add_layer(self.t1b) self.add_layer(self.t1n) self.add_layer(self.t2d) def tf_apply(self, x, internals, update, return_internals=False): assert x['states'], x['actions'] if isinstance(x['states'], dict): if len(x['states']) != 1: raise TensorForceError( 'DDPG critic network must have only one state input, but {} given.' .format(len(x['states']))) x_states = x['states'][next(iter(sorted(x['states'])))] else: x_states = x['states'] if isinstance(x['actions'], dict): if len(x['actions']) != 1: raise TensorForceError( 'DDPG critic network must have only one action input, but {} given.' .format(len(x['actions']))) x_actions = x['actions'][next(iter(sorted(x['actions'])))] else: x_actions = x['actions'] out = self.t0l.apply(x=x_states, update=update) out = self.t0b.apply(x=out, update=update) out = self.t0n.apply(x=out, update=update) out = self.t1l.apply(x=tf.concat([out, x_actions], axis=1), update=update) out = self.t1b.apply(x=out, update=update) out = self.t1n.apply(x=out, update=update) out = self.t2d.apply(x=out, update=update) # Remove last dimension because we only return Q values for one state and action # out = tf.squeeze(out) if return_internals: # Todo: Internals management return out, None else: return out