Ejemplo n.º 1
0
    def network(self, dueling):
        """ Build Deep Q-Network
        """
        inp = Input((self.state_dim))

        # Determine whether we are dealing with an image input (Atari) or not
        if(len(self.state_dim) > 2):
            inp = Input((self.state_dim[1:]))
            x = conv_block(inp, 32, (2, 2), 8)
            x = conv_block(x, 64, (2, 2), 4)
            x = conv_block(x, 64, (2, 2), 3)
            x = Flatten()(x)
            x = Dense(256, activation='relu')(x)
        else:
            x = Flatten()(inp)
            x = Dense(64, activation='relu')(x)
            x = Dense(64, activation='relu')(x)

        if(dueling):
            # Have the network estimate the Advantage function as an intermediate layer
            x = Dense(self.action_dim + 1, activation='linear')(x)
            x = Lambda(lambda i: K.expand_dims(i[:,0],-1) + i[:,1:] - K.mean(i[:,1:], keepdims=True), output_shape=(self.action_dim,))(x)
        else:
            x = Dense(self.action_dim, activation='linear')(x)
        return Model(inp, x)
Ejemplo n.º 2
0
    def network(self):
        """ Actor Network for Policy function Approximation, using a tanh
        activation for continuous control. We add parameter noise to encourage
        exploration, and balance it with Layer Normalization.
        """
        inp = Input((self.env_dim))
        # #
        # x = Dense(256, activation='relu')(inp)
        # x = GaussianNoise(1.0)(x)
        # #
        # x = Flatten()(x)
        # x = Dense(128, activation='relu')(x)
        # x = GaussianNoise(1.0)(x)
        # #
        # out = Dense(self.act_dim, activation='tanh', kernel_initializer=RandomUniform())(x)
        # out = Lambda(lambda i: i * self.act_range)(out)
        # #

        x = conv_block(inp, 32, (2, 2), 8)
        x = conv_block(x, 64, (2, 2), 4)
        x = conv_block(x, 64, (2, 2), 3)
        x = Flatten()(x)
        x = Dense(256, activation='relu')(x)

        x = Dense(self.act_dim,
                  activation='tanh',
                  kernel_initializer=RandomUniform())(x)
        out = Lambda(lambda i: i * self.act_range)(x)

        return Model(inp, out)
Ejemplo n.º 3
0
 def buildNetwork(self):
     """ Assemble shared layers
     """
     inp = Input((self.env_dim))
     # If we have an image, apply convolutional layers
     if(len(self.env_dim) > 2):
         x = Reshape((self.env_dim[1], self.env_dim[2], -1))(inp)
         x = conv_block(x, 32, (2, 2))
         x = conv_block(x, 32, (2, 2))
         x = Flatten()(x)
     else:
         x = Flatten()(inp)
         x = Dense(64, activation='relu')(x)
         x = Dense(128, activation='relu')(x)
     return Model(inp, x)
Ejemplo n.º 4
0
    def network(self):
        """ Assemble Critic network to predict q-values
        """
        state = Input((self.env_dim))
        x = conv_block(state, 32, (2, 2), 8)
        x = conv_block(x, 64, (2, 2), 4)
        x = conv_block(x, 64, (2, 2), 3)

        action = Input((self.act_dim, ))

        x = Flatten()(x)
        # x = Dense(256, activation='relu')(state)
        x = Dense(256, activation='relu')(x)
        x = concatenate([x, action])
        x = Dense(128, activation='relu')(x)
        out = Dense(1, activation='linear',
                    kernel_initializer=RandomUniform())(x)
        return Model([state, action], out)