Exemple #1
0
def GCN_net():
    I1 = Input(shape=(no_agents, no_features), name="gcn_input")
    Adj = Input(shape=(no_agents, no_agents), name="adj")
    gcn = GCNConv(arglist.no_neurons,
                  kernel_initializer=tf.keras.initializers.he_uniform(),
                  activation=tf.keras.layers.LeakyReLU(alpha=0.1),
                  use_bias=False,
                  name="Gcn")([I1, Adj])
    concat = tf.keras.layers.Concatenate(axis=2)([I1, gcn])
    dense = Dense(arglist.no_neurons,
                  kernel_initializer=tf.keras.initializers.he_uniform(),
                  activation=tf.keras.layers.LeakyReLU(alpha=0.1),
                  name="dense_layer")

    last_dense = Dense(no_actions,
                       kernel_initializer=tf.keras.initializers.he_uniform(),
                       name="last_dense_layer")
    split = Lambda(lambda x: tf.squeeze(
        tf.split(x, num_or_size_splits=no_agents, axis=1), axis=2))(concat)
    outputs = []
    for j in list(range(no_agents)):
        output = last_dense(dense(split[j]))
        output = tf.keras.activations.tanh(output)
        outputs.append(output)

    V = tf.stack(outputs, axis=1)
    model = Model([I1, Adj], V)
    model._name = "final_network"
    return model
def graph_net(arglist):
    I1 = Input(shape=(no_agents, no_features), name="graph_input")
    Adj = Input(shape=(no_agents, no_agents), name="adj")
    gat = GATConv(
        arglist.no_neurons,
        activation='relu',
        attn_heads=4,
        concat_heads=True,
    )([I1, Adj])
    concat = tf.keras.layers.Concatenate(axis=2)([I1, gat])

    dense = Dense(arglist.no_neurons,
                  kernel_initializer=tf.keras.initializers.he_uniform(),
                  activation=tf.keras.layers.LeakyReLU(alpha=0.1),
                  name="dense_layer")

    last_dense = Dense(no_actions,
                       kernel_initializer=tf.keras.initializers.he_uniform(),
                       name="last_dense_layer")
    split = Lambda(lambda x: tf.squeeze(
        tf.split(x, num_or_size_splits=no_agents, axis=1), axis=2))(concat)
    outputs = []
    for j in list(range(no_agents)):
        output = last_dense(dense(split[j]))
        output = tf.keras.activations.tanh(output)
        outputs.append(output)

    V = tf.stack(outputs, axis=1)
    model = Model([I1, Adj], V)
    model._name = "final_network"

    tf.keras.utils.plot_model(model, show_shapes=True)
    return model
Exemple #3
0
def graph_net(arglist):
    I1 = Input(shape=(no_agents, no_features), name="gcn_input")
    Adj = Input(shape=(no_agents, no_agents), name="adj")
    gat = GATConv(
        arglist.no_neurons,
        activation='relu',
        attn_heads=4,
        concat_heads=True,
    )([I1, Adj])
    concat = tf.keras.layers.Concatenate(axis=2)([I1, gat])

    outputs = []
    dense = Dense(arglist.no_neurons,
                  kernel_initializer=tf.keras.initializers.he_uniform(),
                  activation=tf.keras.layers.LeakyReLU(alpha=0.1),
                  name="dense_layer")

    dense2 = Dense(arglist.no_neurons / 2,
                   kernel_initializer=tf.keras.initializers.he_uniform(),
                   activation=tf.keras.layers.LeakyReLU(alpha=0.1),
                   name="sec_dense_layer")

    state_value = Dense(1,
                        kernel_initializer='he_uniform',
                        name="value_output")
    state_value_lambda = Lambda(lambda s: K.expand_dims(s[:, 0], -1),
                                output_shape=(no_actions, ))

    action_advantage = Dense(no_actions,
                             name="advantage_output",
                             kernel_initializer='he_uniform')
    action_advantage_lambda = Lambda(
        lambda a: a[:, :] - K.mean(a[:, :], keepdims=True),
        output_shape=(no_actions, ))

    split = Lambda(lambda x: tf.squeeze(
        tf.split(x, num_or_size_splits=no_agents, axis=1), axis=2))(concat)
    for j in list(range(no_agents)):
        V = dense(split[j])
        V2 = dense2(V)
        if arglist.dueling:
            state_value_dense = state_value(V2)
            state_value_n = state_value_lambda(state_value_dense)
            action_adj_dense = action_advantage(V2)
            action_adj_n = action_advantage_lambda(action_adj_dense)
            output = Add()([state_value_n, action_adj_n])
            output = tf.keras.activations.tanh(output)
            outputs.append(output)
        else:
            outputs.append(V2)

    V = tf.stack(outputs, axis=1)
    model = Model([I1, Adj], V)
    model._name = "final_network"
    tf.keras.utils.plot_model(model, show_shapes=True)
    return model
Exemple #4
0
def graph_net(arglist):
    I = []
    for _ in range(no_agents):
        I.append(Input(shape=(
            arglist.history_size,
            no_features,
        )))

    outputs = []
    temporal_state = None
    for i in range(no_agents):
        if arglist.temporal_mode.lower() == "rnn":
            temporal_state = GRU(arglist.no_neurons)(I[i])
        elif arglist.temporal_mode.lower() == "attention":
            temporal_state = SelfAttention(
                activation=tf.keras.layers.LeakyReLU(alpha=0.1))(I[i])
            temporal_state = Lambda(lambda x: x[:, -1])(temporal_state)
        else:
            raise RuntimeError(
                "Temporal Information Layer should be rnn or attention but %s found!"
                % arglist.temporal_mode)
        dense = Dense(
            arglist.no_neurons,
            kernel_initializer=tf.keras.initializers.he_uniform(),
            activation=tf.keras.layers.LeakyReLU(alpha=0.1))(temporal_state)
        med_dense = Dense(
            arglist.no_neurons,
            kernel_initializer=tf.keras.initializers.he_uniform(),
            activation=tf.keras.layers.LeakyReLU(alpha=0.1))(dense)
        last_dense = Dense(
            no_actions,
            kernel_initializer=tf.keras.initializers.he_uniform())(med_dense)
        outputs.append(last_dense)

    V = tf.stack(outputs, axis=1)
    model = Model(I, V)
    model._name = "final_network"
    tf.keras.utils.plot_model(model, show_shapes=True)
    return model
    def _model(self):
        from keras.models import Model
        from keras.layers import Dense, Input, Conv2D, add, Flatten, BatchNormalization, ReLU
        from keras.optimizers import Adam, SGD
        from keras.losses import mean_squared_error, binary_crossentropy
        from keras.regularizers import l2

        board_input = Input(shape=(6, 7, 3))

        # Start conv block
        conv_1 = Conv2D(self.filters, (4, 4),
                        padding='same',
                        kernel_regularizer=l2(self.l2_reg))(board_input)
        norm_1 = BatchNormalization()(conv_1)
        relu_1 = ReLU()(norm_1)

        # Residual convolution blocks
        res_1 = ResBlock(relu_1, self.filters, self.l2_reg)
        res_2 = ResBlock(res_1, self.filters, self.l2_reg)
        res_3 = ResBlock(res_2, self.filters, self.l2_reg)
        res_4 = ResBlock(res_3, self.filters, self.l2_reg)
        res_5 = ResBlock(res_4, self.filters, self.l2_reg)
        res_6 = ResBlock(res_5, self.filters, self.l2_reg)

        # Policy head
        policy_conv = Conv2D(32, (1, 1),
                             use_bias=False,
                             kernel_regularizer=l2(self.l2_reg))(res_6)
        policy_norm = BatchNormalization()(policy_conv)
        policy_relu = ReLU()(policy_norm)
        policy_flat = Flatten()(policy_relu)

        # Policy output
        policy = Dense(7,
                       activation='softmax',
                       name='policy',
                       use_bias=False,
                       kernel_regularizer=l2(self.l2_reg))(policy_flat)

        # Value head
        value_conv = Conv2D(32, (1, 1),
                            use_bias=False,
                            kernel_regularizer=l2(self.l2_reg))(res_5)
        value_norm = BatchNormalization()(value_conv)
        value_relu_1 = ReLU()(value_norm)
        value_flat = Flatten()(value_relu_1)

        value_dense = Dense(32,
                            use_bias=False,
                            kernel_regularizer=l2(self.l2_reg))(value_flat)
        value_relu_2 = ReLU()(value_dense)

        # Value output
        value = Dense(1,
                      activation='tanh',
                      name='value',
                      use_bias=False,
                      kernel_regularizer=l2(self.l2_reg))(value_relu_2)

        # Final model
        model = Model(inputs=[board_input], outputs=[policy, value])

        # Compile
        model.compile(optimizer=Adam(0.001),
                      loss={
                          'value': 'mse',
                          'policy': softmax_cross_entropy_with_logits
                      })
        # model.compile(optimizer=SGD(0.1, 0.9), loss={'value': 'mse', 'policy': softmax_cross_entropy_with_logits})

        # Set the model name
        model._name = self.name

        return model
    def _model(self):
        from keras.models import Model
        from keras.layers import Dense, Input, Conv2D, add, Flatten, BatchNormalization, ReLU
        from keras.optimizers import Adam, SGD
        from keras.losses import mean_squared_error, binary_crossentropy
        from keras.regularizers import l2
        from tensorflow.python.util import deprecation
        deprecation._PRINT_DEPRECATION_WARNINGS = False

        board_input = Input(shape=(2, 6, 7))

        # Start conv block
        conv_1 = Conv2D(self.config.n_filters,
                        (self.config.kernel, self.config.kernel),
                        padding='same',
                        data_format='channels_first',
                        kernel_regularizer=l2(self.config.l2_reg))(board_input)
        norm_1 = BatchNormalization(axis=1)(conv_1)
        relu_1 = ReLU()(norm_1)
        res = relu_1

        # Residual convolution blocks
        for _ in range(self.config.res_layers):
            res = ResBlock(res, self.config.n_filters, self.config.kernel,
                           self.config.l2_reg)

        # Policy head
        policy_conv = Conv2D(2, (1, 1),
                             data_format='channels_first',
                             kernel_regularizer=l2(self.config.l2_reg))(res)
        policy_norm = BatchNormalization(axis=1)(policy_conv)
        policy_relu = ReLU()(policy_norm)
        policy_flat = Flatten()(policy_relu)

        # Policy output
        policy = Dense(7,
                       activation='softmax',
                       name='policy',
                       kernel_regularizer=l2(self.config.l2_reg))(policy_flat)

        # Value head
        value_conv = Conv2D(1, (1, 1),
                            data_format='channels_first',
                            kernel_regularizer=l2(self.config.l2_reg))(res)
        value_norm = BatchNormalization(axis=1)(value_conv)
        value_relu_1 = ReLU()(value_norm)
        value_flat = Flatten()(value_relu_1)

        value_dense = Dense(self.config.value_dense,
                            kernel_regularizer=l2(
                                self.config.l2_reg))(value_flat)
        value_relu_2 = ReLU()(value_dense)

        # Value output
        value = Dense(1,
                      activation='tanh',
                      name='value',
                      kernel_regularizer=l2(self.config.l2_reg))(value_relu_2)

        # Final model
        model = Model(inputs=[board_input], outputs=[policy, value])

        # Compile
        model.compile(optimizer=SGD(0.001, momentum=0.9),
                      loss={
                          'value': objective_function_for_value,
                          'policy': objective_function_for_policy
                      },
                      metrics={'value': [self.mean]})

        # Set the model name
        model._name = self.config.model

        return model