Ejemplo n.º 1
0
    def build_model(self):
        l2_regularization_kernel = 1e-5

        # Input Layer
        input = layers.Input(shape=(self.state_size,), name='input_states')

        # Hidden Layers
        model = layers.Dense(units=300, kernel_regularizer=regularizers.l2(l2_regularization_kernel))(input)
        model = layers.BatchNormalization()(model)
        model = layers.LeakyReLU(1e-2)(model)

        model = layers.Dense(units=400, kernel_regularizer=regularizers.l2(l2_regularization_kernel))(model)
        model = layers.BatchNormalization()(model)
        model = layers.LeakyReLU(1e-2)(model)

        model = layers.Dense(units=200, kernel_regularizer=regularizers.l2(l2_regularization_kernel))(model)
        model = layers.BatchNormalization()(model)
        model = layers.LeakyReLU(1e-2)(model)

        # Our output layer - a fully connected layer
        output = layers.Dense(units=self.action_size, activation='tanh', kernel_regularizer=regularizers.l2(l2_regularization_kernel),
                               kernel_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3), name='output_actions')(model)

        # Keras model
        self.model = models.Model(inputs=input, outputs=output)

        # Define loss and optimizer
        action_gradients = layers.Input(shape=(self.action_size,))
        loss = K.mean(-action_gradients * output)
        optimizer = optimizers.Adam(lr=1e-4)

        update_operation = optimizer.get_updates(params=self.model.trainable_weights, loss=loss)
        self.train_fn = K.function(inputs=[self.model.input, action_gradients, K.learning_phase()],
            outputs=[], updates=update_operation)
Ejemplo n.º 2
0
    def build_model(self):
        l2_kernel_regularization = 1e-5

        # Define input layers
        input_states = layers.Input(shape=(self.state_size, ),
                                    name='input_states')
        input_actions = layers.Input(shape=(self.action_size, ),
                                     name='input_actions')

        # Hidden layers for states
        model_states = layers.Dense(
            units=32,
            kernel_regularizer=regularizers.l2(l2_kernel_regularization))(
                input_states)
        model_states = layers.BatchNormalization()(model_states)
        model_states = layers.LeakyReLU(1e-2)(model_states)

        model_states = layers.Dense(
            units=64,
            kernel_regularizer=regularizers.l2(l2_kernel_regularization))(
                model_states)
        model_states = layers.BatchNormalization()(model_states)
        model_states = layers.LeakyReLU(1e-2)(model_states)

        # Hidden layers for actions
        model_actions = layers.Dense(
            units=64,
            kernel_regularizer=regularizers.l2(l2_kernel_regularization))(
                input_actions)
        model_actions = layers.BatchNormalization()(model_actions)
        model_actions = layers.LeakyReLU(1e-2)(model_actions)

        # Both models merge here
        model = layers.add([model_states, model_actions])

        # Fully connected and batch normalization
        model = layers.Dense(units=32,
                             kernel_regularizer=regularizers.l2(
                                 l2_kernel_regularization))(model)
        model = layers.BatchNormalization()(model)
        model = layers.LeakyReLU(1e-2)(model)

        # Q values / output layer
        Q_values = layers.Dense(
            units=1,
            activation=None,
            kernel_regularizer=regularizers.l2(l2_kernel_regularization),
            kernel_initializer=initializers.RandomUniform(minval=-5e-3,
                                                          maxval=5e-3),
            name='output_Q_values')(model)

        # Keras wrap the model
        self.model = models.Model(inputs=[input_states, input_actions],
                                  outputs=Q_values)
        optimizer = optimizers.Adam(lr=1e-2)
        self.model.compile(optimizer=optimizer, loss='mse')
        action_gradients = K.gradients(Q_values, input_actions)
        self.get_action_gradients = K.function(
            inputs=[*self.model.input, K.learning_phase()],
            outputs=action_gradients)
Ejemplo n.º 3
0
    def _create_generator(self):
        inputs = layers.Input(shape=(self.args.latent_dims, ))

        x = layers.Dense(128 * 16 * 16)(inputs)
        x = layers.LeakyReLU()(x)
        x = layers.Reshape((16, 16, 128))(x)

        x = layers.Conv2D(256, kernel_size=5, strides=1, padding='same')(x)
        x = layers.LeakyReLU()(x)

        # we use a kernel-size which is a multiple of the strides to don't have artifacts when up-sampling
        x = layers.Conv2DTranspose(256,
                                   kernel_size=4,
                                   strides=2,
                                   padding='same')(x)
        x = layers.LeakyReLU()(x)

        x = layers.Conv2D(256, kernel_size=5, padding='same')(x)
        x = layers.LeakyReLU()(x)

        x = layers.Conv2D(256, kernel_size=5, padding='same')(x)
        x = layers.LeakyReLU()(x)

        outputs = layers.Conv2D(CHANNELS,
                                kernel_size=7,
                                activation='tanh',
                                padding='same')(x)

        generator = models.Model(inputs, outputs)
        return generator
Ejemplo n.º 4
0
    def build_model(self):
        """Build an actor (policy) network that maps states -> actions."""
        # Define input layer (states)
        states = layers.Input(shape=(self.state_size, ), name='states')
        '''# Add hidden layers
        net = layers.Dense(units=32, activation='relu')(states)
        net = layers.Dense(units=64, activation='relu')(net)
        net = layers.Dense(units=32, activation='relu')(net)
        
        # Try different layer sizes, activations, add batch normalization, regularizers, etc.

        # Add final output layer with sigmoid activation
        raw_actions = layers.Dense(units=self.action_size, activation='sigmoid',
            name='raw_actions')(net)
        '''
        ###################################
        # Add hidden layers
        net = layers.Dense(units=400,
                           kernel_regularizer=regularizers.l2(1e-6))(states)
        net = layers.BatchNormalization()(net)
        net = layers.LeakyReLU(1e-2)(net)
        net = layers.Dense(units=300,
                           kernel_regularizer=regularizers.l2(1e-6))(net)
        net = layers.BatchNormalization()(net)
        net = layers.LeakyReLU(1e-2)(net)

        # Add final output layer with sigmoid activation
        raw_actions = layers.Dense(
            units=self.action_size,
            activation='sigmoid',
            name='raw_actions',
            kernel_initializer=initializers.RandomUniform(minval=-0.003,
                                                          maxval=0.003))(net)
        #######################################

        # Scale [0, 1] output for each action dimension to proper range
        actions = layers.Lambda(lambda x:
                                (x * self.action_range) + self.action_low,
                                name='actions')(raw_actions)

        # Create Keras model
        self.model = models.Model(inputs=states, outputs=actions)

        # Define loss function using action value (Q value) gradients
        action_gradients = layers.Input(shape=(self.action_size, ))
        loss = K.mean(-action_gradients * actions)

        # Incorporate any additional losses here (e.g. from regularizers)

        # Define optimizer and training function
        optimizer = optimizers.Adam(lr=1e-6)
        updates_op = optimizer.get_updates(params=self.model.trainable_weights,
                                           loss=loss)
        self.train_fn = K.function(
            inputs=[self.model.input, action_gradients,
                    K.learning_phase()],
            outputs=[],
            updates=updates_op)
Ejemplo n.º 5
0
def res_block(inputs, size):
    kernel_l2_reg = 1e-3
    net = layers.Dense(size,
                       activation=None,
                       kernel_regularizer=regularizers.l2(kernel_l2_reg),
                       kernel_initializer=initializers.RandomUniform(
                           minval=-5e-3, maxval=5e-3))(inputs)
    net = layers.BatchNormalization()(net)
    net = layers.LeakyReLU(1e-2)(net)

    net = layers.Dense(size,
                       activation=None,
                       kernel_regularizer=regularizers.l2(kernel_l2_reg),
                       kernel_initializer=initializers.RandomUniform(
                           minval=-5e-3, maxval=5e-3))(net)
    net = layers.BatchNormalization()(net)
    net = layers.LeakyReLU(1e-2)(net)
    net = layers.add([inputs, net])
    return net
Ejemplo n.º 6
0
    def build_model(self):
        #Define input layers
        inputStates = layers.Input(shape=(self.state_size, ),
                                   name='inputStates')
        inputActions = layers.Input(shape=(self.action_size, ),
                                    name='inputActions')

        # Hidden layers for states
        modelS = layers.Dense(units=128, activation='linear')(inputStates)
        modelS = layers.BatchNormalization()(modelS)
        modelS = layers.LeakyReLU(0.01)(modelS)
        modelS = layers.Dropout(0.3)(modelS)

        modelS = layers.Dense(units=256, activation='linear')(modelS)
        modelS = layers.BatchNormalization()(modelS)
        modelS = layers.LeakyReLU(0.01)(modelS)
        modelS = layers.Dropout(0.3)(modelS)

        modelA = layers.Dense(units=256, activation='linear')(inputActions)
        modelA = layers.LeakyReLU(0.01)(modelA)
        modelA = layers.BatchNormalization()(modelA)
        modelA = layers.Dropout(0.5)(modelA)

        #Merging the models
        model = layers.add([modelS, modelA])
        model = layers.Dense(units=256, activation='linear')(model)
        model = layers.BatchNormalization()(model)
        model = layers.LeakyReLU(0.01)(model)

        #Q Layer
        Qvalues = layers.Dense(units=1, activation=None,
                               name='outputQvalues')(model)

        #Keras model
        self.model = models.Model(inputs=[inputStates, inputActions],
                                  outputs=Qvalues)
        optimizer = optimizers.Adam()
        self.model.compile(optimizer=optimizer, loss='mse')
        actionGradients = K.gradients(Qvalues, inputActions)
        self.get_action_gradients = K.function(
            inputs=[*self.model.input, K.learning_phase()],
            outputs=actionGradients)
Ejemplo n.º 7
0
    def build_model(self): 
        states = layers.Input(shape=(self.state_size,), name='inputStates')

        # Hidden Layers
        model = layers.Dense(units=128, activation='linear')(states)
        model = layers.BatchNormalization()(model)
        model = layers.LeakyReLU(0.01)(model)
        model = layers.Dropout(0.3)(model)
        
        model = layers.Dense(units=256, activation='linear')(model)
        model = layers.BatchNormalization()(model)
        model = layers.LeakyReLU(0.01)(model)
        model = layers.Dropout(0.3)(model)

        model = layers.Dense(units=512, activation='linear')(model)
        model = layers.BatchNormalization()(model)
        model = layers.LeakyReLU(0.01)(model)
        model = layers.Dropout(0.3)(model)

        model = layers.Dense(units=128, activation='linear')(model)
        model = layers.BatchNormalization()(model)
        model = layers.LeakyReLU(0.01)(model)
        model = layers.Dropout(0.3)(model)

        output = layers.Dense(
            units=self.action_size, 
            activation='tanh', 
            kernel_regularizer=regularizers.l2(0.01),
            name='outputActions')(model)

        #Keras
        self.model = models.Model(inputs=states, outputs=output)

        #Definint Optimizer
        actionGradients = layers.Input(shape=(self.action_size,))
        loss = K.mean(-actionGradients * output)
        optimizer = optimizers.Adam()
        update_operation = optimizer.get_updates(params=self.model.trainable_weights, loss=loss)
        self.train_fn = K.function(
            inputs=[self.model.input, actionGradients, K.learning_phase()],
            outputs=[], 
            updates=update_operation)
Ejemplo n.º 8
0
    def _create_discriminator(self):
        inputs = layers.Input(shape=(HEIGHT, WIDTH, CHANNELS))

        x = layers.Conv2D(128, kernel_size=3)(inputs)
        x = layers.LeakyReLU()(x)

        x = layers.Conv2D(128, kernel_size=4, strides=2)(x)
        x = layers.LeakyReLU()(x)

        x = layers.Conv2D(128, kernel_size=4, strides=2)(x)
        x = layers.LeakyReLU()(x)

        x = layers.Conv2D(128, kernel_size=4, strides=2)(x)
        x = layers.LeakyReLU()(x)

        x = layers.Flatten()(x)
        x = layers.Dropout(self.args.dropout)(x)
        outputs = layers.Dense(1, activation='sigmoid')(x)

        discriminator = models.Model(inputs, outputs)
        return discriminator
Ejemplo n.º 9
0
    def residual_block(y,
                       nb_channels_in,
                       nb_channels_out,
                       _strides=(1, 1),
                       _project_shortcut=False):
        """
        Our network consists of a stack of residual blocks. These blocks have the same topology,
        and are subject to two simple rules:
        - If producing spatial maps of the same size, the blocks share the same hyper-parameters (width and filter sizes).
        - Each time the spatial map is down-sampled by a factor of 2, the width of the blocks is multiplied by a factor of 2.
        """
        shortcut = y

        # we modify the residual building block as a bottleneck design to make the network more economical
        y = layers.Conv2D(nb_channels_in,
                          kernel_size=(1, 1),
                          strides=(1, 1),
                          padding='same')(y)
        y = add_common_layers(y)

        # ResNeXt (identical to ResNet when `cardinality` == 1)
        y = grouped_convolution(y, nb_channels_in, _strides=_strides)
        y = add_common_layers(y)

        y = layers.Conv2D(nb_channels_out,
                          kernel_size=(1, 1),
                          strides=(1, 1),
                          padding='same')(y)
        # batch normalization is employed after aggregating the transformations and before adding to the shortcut
        y = layers.BatchNormalization()(y)

        # identity shortcuts used directly when the input and output are of the same dimensions
        if _project_shortcut or _strides != (1, 1):
            # when the dimensions increase projection shortcut is used to match dimensions (done by 1×1 convolutions)
            # when the shortcuts go across feature maps of two sizes, they are performed with a stride of 2
            shortcut = layers.Conv2D(nb_channels_out,
                                     kernel_size=(1, 1),
                                     strides=_strides,
                                     padding='same')(shortcut)
            shortcut = layers.BatchNormalization()(shortcut)

        y = layers.add([shortcut, y])

        # relu is performed right after each batch normalization,
        # expect for the output of the block where relu is performed after the adding to the shortcut
        y = layers.LeakyReLU()(y)

        return y
Ejemplo n.º 10
0
    def build_model(self):
        """Build a critic (value) network that maps (state, action) pairs -> Q-values."""
        # Define input layers
        states = layers.Input(shape=(self.state_size, ), name='states')
        actions = layers.Input(shape=(self.action_size, ), name='actions')

        # Add hidden layer(s) for state pathway
        net_states = layers.Dense(
            units=32,
            activation='relu',
            use_bias=False,
            kernel_regularizer=regularizers.l2(0.01),
            activity_regularizer=regularizers.l1(0.01))(states)
        net_states = layers.BatchNormalization()(net_states)
        net_states = layers.LeakyReLU(1e-2)(net_states)

        net_states = layers.Dense(
            units=64,
            activation='relu',
            use_bias=False,
            kernel_regularizer=regularizers.l2(0.01),
            activity_regularizer=regularizers.l1(0.01))(net_states)
        net_states = layers.BatchNormalization()(net_states)
        net_states = layers.LeakyReLU(1e-2)(net_states)

        net_states = layers.Dense(
            units=128,
            activation='relu',
            use_bias=False,
            kernel_regularizer=regularizers.l2(0.01),
            activity_regularizer=regularizers.l1(0.01))(net_states)
        net_states = layers.BatchNormalization()(net_states)
        net_states = layers.LeakyReLU(1e-2)(net_states)

        # Add hidden layer(s) for action pathway
        net_actions = layers.Dense(
            units=32,
            activation='relu',
            use_bias=False,
            kernel_regularizer=regularizers.l2(0.01),
            activity_regularizer=regularizers.l1(0.01))(actions)
        net_actions = layers.BatchNormalization()(net_actions)
        net_actions = layers.LeakyReLU(1e-2)(net_actions)

        net_actions = layers.Dense(
            units=64,
            activation='relu',
            use_bias=False,
            kernel_regularizer=regularizers.l2(0.01),
            activity_regularizer=regularizers.l1(0.01))(net_actions)
        net_actions = layers.BatchNormalization()(net_actions)
        net_actions = layers.LeakyReLU(1e-2)(net_actions)

        net_actions = layers.Dense(
            units=128,
            activation='relu',
            use_bias=False,
            kernel_regularizer=regularizers.l2(0.01),
            activity_regularizer=regularizers.l1(0.01))(net_actions)
        net_actions = layers.BatchNormalization()(net_actions)
        net_actions = layers.LeakyReLU(1e-2)(net_actions)

        # Try different layer sizes, activations, add batch normalization, regularizers, etc.

        # Combine state and action pathways
        net = layers.Add()([net_states, net_actions])
        net = layers.Activation('relu')(net)

        # Add more layers to the combined network if needed

        # Add final output layer to prduce action values (Q values)
        Q_values = layers.Dense(units=1, name='q_values')(net)

        # Create Keras model
        self.model = models.Model(inputs=[states, actions], outputs=Q_values)

        # Define optimizer and compile model for training with built-in loss function
        optimizer = optimizers.Adam()
        self.model.compile(optimizer=optimizer, loss='mse')

        # Compute action gradients (derivative of Q values w.r.t. to actions)
        action_gradients = K.gradients(Q_values, actions)

        # Define an additional function to fetch action gradients (to be used by actor model)
        self.get_action_gradients = K.function(
            inputs=[*self.model.input, K.learning_phase()],
            outputs=action_gradients)
Ejemplo n.º 11
0
    def add_common_layers(y):
        y = layers.BatchNormalization()(y)
        y = layers.LeakyReLU()(y)

        return y
Ejemplo n.º 12
0
    def build_model(self):
        kernel_l2_reg = 1e-5

        # Dense Options
        # units = 200,
        # activation='relu',
        # activation = None,
        # activity_regularizer=regularizers.l2(0.01),
        # kernel_regularizer=regularizers.l2(kernel_l2_reg),
        # bias_initializer=initializers.Constant(1e-2),
        # use_bias = True
        # use_bias=False
        """Build a critic (value) network that maps (state, action) pairs -> Q-values."""
        # Define input layers
        states = layers.Input(shape=(self.state_size, ), name='states')
        actions = layers.Input(shape=(self.action_size, ), name='actions')

        # size_repeat = 30
        # state_size = size_repeat*self.state_size
        # action_size = size_repeat*self.action_size
        # block_size = size_repeat*self.state_size + size_repeat*self.action_size
        # print("Critic block size = {}".format(block_size))
        #
        # net_states = layers.concatenate(size_repeat * [states])
        # net_states = layers.BatchNormalization()(net_states)
        # net_states = layers.Dropout(0.2)(net_states)
        #
        # net_actions = layers.concatenate(size_repeat * [actions])
        # net_actions = layers.BatchNormalization()(net_actions)
        # net_actions = layers.Dropout(0.2)(net_actions)
        #
        # # State pathway
        # for _ in range(3):
        #     net_states = res_block(net_states, state_size)
        #
        # # Action pathway
        # for _ in range(2):
        #     net_actions = res_block(net_actions, action_size)
        #
        # # Merge state and action pathways
        # net = layers.concatenate([net_states, net_actions])
        #
        # # Final blocks
        # for _ in range(3):
        #     net = res_block(net, block_size)

        # Add hidden layer(s) for state pathway
        net_states = layers.Dense(
            units=300,
            kernel_regularizer=regularizers.l2(kernel_l2_reg))(states)
        net_states = layers.BatchNormalization()(net_states)
        net_states = layers.LeakyReLU(1e-2)(net_states)

        net_states = layers.Dense(
            units=400,
            kernel_regularizer=regularizers.l2(kernel_l2_reg))(net_states)
        net_states = layers.BatchNormalization()(net_states)
        net_states = layers.LeakyReLU(1e-2)(net_states)

        # Add hidden layer(s) for action pathway
        net_actions = layers.Dense(
            units=400,
            kernel_regularizer=regularizers.l2(kernel_l2_reg))(actions)
        net_actions = layers.BatchNormalization()(net_actions)
        net_actions = layers.LeakyReLU(1e-2)(net_actions)

        # Merge state and action pathways
        net = layers.add([net_states, net_actions])

        net = layers.Dense(
            units=200, kernel_regularizer=regularizers.l2(kernel_l2_reg))(net)
        net = layers.BatchNormalization()(net)
        net = layers.LeakyReLU(1e-2)(net)

        # Add final output layer to prduce action values (Q values)
        Q_values = layers.Dense(
            units=1,
            activation=None,
            kernel_regularizer=regularizers.l2(kernel_l2_reg),
            kernel_initializer=initializers.RandomUniform(minval=-5e-3,
                                                          maxval=5e-3),
            # bias_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3),
            name='q_values')(net)

        # Create Keras model
        self.model = models.Model(inputs=[states, actions], outputs=Q_values)

        # Define optimizer and compile model for training with built-in loss function
        optimizer = optimizers.Adam(lr=1e-2)

        self.model.compile(optimizer=optimizer, loss='mse')

        # Compute action gradients (derivative of Q values w.r.t. to actions)
        action_gradients = K.gradients(Q_values, actions)

        # Define an additional function to fetch action gradients (to be used by actor model)
        self.get_action_gradients = K.function(
            inputs=[*self.model.input, K.learning_phase()],
            outputs=action_gradients)
Ejemplo n.º 13
0
    def build_model(self):
        kernel_l2_reg = 1e-5
        """Build an actor (policy) network that maps states -> actions."""
        # Define input layer (states)
        states = layers.Input(shape=(self.state_size, ), name='states')

        # size_repeat = 30
        # block_size = size_repeat*self.state_size
        # print("Actor block size = {}".format(block_size))
        #
        # net = layers.concatenate([states]*size_repeat)
        # # net = layers.Dense(block_size,
        # #                    # kernel_initializer=initializers.RandomNormal(mean=1.0, stddev=0.1),
        # #                    #  bias_initializer=initializers.RandomNormal(mean=0.0, stddev=0.01),
        # #                    activation=None,
        # #                    use_bias=False)(states)
        # net = layers.BatchNormalization()(net)
        # net = layers.Dropout(0.2)(net)
        # # net = layers.LeakyReLU(1e-2)(net)
        #
        # for _ in range(5):
        #     net = res_block(net, block_size)

        # Add hidden layers
        net = layers.Dense(
            units=300,
            kernel_regularizer=regularizers.l2(kernel_l2_reg))(states)
        net = layers.BatchNormalization()(net)
        net = layers.LeakyReLU(1e-2)(net)

        net = layers.Dense(
            units=400, kernel_regularizer=regularizers.l2(kernel_l2_reg))(net)
        net = layers.BatchNormalization()(net)
        net = layers.LeakyReLU(1e-2)(net)

        net = layers.Dense(
            units=200, kernel_regularizer=regularizers.l2(kernel_l2_reg))(net)
        net = layers.BatchNormalization()(net)
        net = layers.LeakyReLU(1e-2)(net)

        # Try different layer sizes, activations, add batch normalization, regularizers, etc.

        # # Add final output layer with sigmoid activation
        # raw_actions = layers.Dense(units=self.action_size,
        #                            activation='sigmoid',
        #                            # kernel_regularizer=regularizers.l2(kernel_l2_reg),
        #                            kernel_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3),
        #                            # bias_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3),
        #                            name='raw_actions')(net)
        #
        # # Scale [0, 1] output for each action dimension to proper range
        # actions = layers.Lambda(lambda x: (x * self.action_range) + self.action_low, name='actions')(raw_actions)

        actions = layers.Dense(
            units=self.action_size,
            activation='tanh',
            kernel_regularizer=regularizers.l2(kernel_l2_reg),
            kernel_initializer=initializers.RandomUniform(minval=-3e-3,
                                                          maxval=3e-3),
            name='actions')(net)

        # Create Keras model
        self.model = models.Model(inputs=states, outputs=actions)

        # Define loss function using action value (Q value) gradients
        action_gradients = layers.Input(shape=(self.action_size, ))
        loss = K.mean(-action_gradients * actions)

        # Incorporate any additional losses here (e.g. from regularizers)

        # Define optimizer and training function
        optimizer = optimizers.Adam(lr=1e-4)

        updates_op = optimizer.get_updates(params=self.model.trainable_weights,
                                           loss=loss)
        self.train_fn = K.function(
            inputs=[self.model.input, action_gradients,
                    K.learning_phase()],
            outputs=[],
            updates=updates_op)