Esempio n. 1
0
def restricted_attention(x, k):
    dim = x.shape[2]

    Wq = layers.Dense(dim)
    Wk = layers.Dense(dim)

    wk = Wk(x)

    paddings = tf.constant([[0, 0, ], [k, k], [0, 0]])
    pk = tf.pad(wk, paddings)
    pv = tf.pad(x, paddings)

    keys = []
    vals = []
    for i in range(-k, k + 1):
        keys.append(tf.roll(pk, i, 1))
        vals.append(tf.roll(pv, i, 1))

    keys = tf.stack(keys, 2)
    keys = keys[:, k:-k, :, :]
    vals = tf.stack(vals, 2)
    vals = vals[:, k:-k, :, :]

    # -- missing code --
    query = Wq(x)[..., None]

    dot_product = tf.matmul(keys, query) / np.sqrt(dim)
    atten_weights = layers.Softmax(name='atten_weights', axis=-2)(dot_product)

    val_out = tf.matmul(atten_weights, vals, transpose_a=True)
    val_out = tf.squeeze(val_out, axis=2)
    return x + val_out
Esempio n. 2
0
    def call(self, inputs, **kwargs):

        m = inputs.shape[-1]

        W_Query = self.add_weight(shape=[m, self.att_embedding_size * self.heads],
                                  initializer=tf.keras.initializers.RandomNormal(seed=self.seed))
        W_key = self.add_weight(shape=[m, self.att_embedding_size * self.heads],
                                initializer=tf.keras.initializers.RandomNormal(seed=self.seed))
        W_Value = self.add_weight(shape=[m, self.att_embedding_size * self.heads],
                                  initializer=tf.keras.initializers.RandomNormal(seed=self.seed))

        queries = tf.matmul(inputs, W_Query)
        keys = tf.matmul(inputs, W_key)
        values = tf.matmul(inputs, W_Value)

        queries = tf.stack(tf.split(queries, self.heads, axis=2))
        keys = tf.stack(tf.split(keys, self.heads, axis=2))
        values = tf.stack(tf.split(values, self.heads, axis=2))

        att_score = tf.matmul(queries, keys, transpose_b=True)
        att_score = layers.Softmax(axis=-1)(att_score)

        result = tf.matmul(att_score, values)
        result = tf.concat(tf.split(result, self.heads), axis=-1)
        result = tf.squeeze(result, axis=0)

        if self.use_res:
            W_Res = self.add_weight(shape=[m, self.att_embedding_size * self.heads],
                                    initializer=tf.keras.initializers.RandomNormal(seed=self.seed))
            result = result + tf.matmul(inputs, W_Res)

        result = tf.keras.activations.relu(result)

        return result
Esempio n. 3
0
def multihead_attention_model(inputs):
    inputs_transposed = layers.Permute(dims=(2, 1))(inputs)
    # query_key = layers.Dot(axes=[1, 2])([inputs, inputs_transposed])
    query_key = layers.Dot(axes=2)([inputs, inputs])
    attentions = layers.Softmax(axis=-1)(query_key)  # TODO test it
    qkv = layers.Dot(axes=1)([attentions, inputs])
    return qkv
Esempio n. 4
0
    def call(self, inputs, **kwargs):
        x = self.dense_1(inputs)
        x = self.bn_1(x)
        x = self.lrelu(x)
        logits = self.dense_2(x)
        out = layers.Softmax(logits)

        return out, logits
Esempio n. 5
0
 def __init__(self, scope):
     self.scope = scope
     super(DeepxorModel, self).__init__()
     self.l_0 = layers.Dense(4096, activation=tf.nn.elu, kernel_regularizer= tf.contrib.layers.l2_regularizer(scale=FLAGS.l2), kernel_initializer=tf.contrib.layers.xavier_initializer())
     self.l_1 = layers.Dense(2048, activation=tf.nn.elu, kernel_regularizer= tf.contrib.layers.l2_regularizer(scale=FLAGS.l2), kernel_initializer=tf.contrib.layers.xavier_initializer())
     self.l_2 = layers.Dense(512, activation=tf.nn.elu, kernel_regularizer= tf.contrib.layers.l2_regularizer(scale=FLAGS.l2), kernel_initializer=tf.contrib.layers.xavier_initializer())
     self.l_3 = layers.Dense(512, activation=tf.nn.elu, kernel_regularizer= tf.contrib.layers.l2_regularizer(scale=FLAGS.l2), kernel_initializer=tf.contrib.layers.xavier_initializer())
     self.logits = layers.Dense(num_actions,kernel_regularizer= tf.contrib.layers.l2_regularizer(scale=FLAGS.l2), kernel_initializer=tf.contrib.layers.xavier_initializer())
     self.policy = layers.Softmax()
     self.values = layers.Dense(1, activation=tf.tanh,kernel_regularizer= tf.contrib.layers.l2_regularizer(scale=FLAGS.l2), kernel_initializer=tf.contrib.layers.xavier_initializer())
Esempio n. 6
0
 def build_layer7(self, inp):
     layer7 = Sequential([
         layers.Conv2D(256, 2),
         Activation('relu'),
         layers.Conv2D(128, 2),
         Activation('relu'),
         layers.Conv2D(2, 1),
         layers.Softmax()
     ])(inp)
     print('layer 7 ', layer7.shape)
     return keras.Model(inp, layer7)
Esempio n. 7
0
def RSoftmax(x, filters, radix, groups, name):
    bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1
    c = filters // radix // groups
    shape = (groups, radix, c) if bn_axis == 3 else (groups, radix, c)

    x = layers.Reshape(shape, name=name + '_0_attn_reshape')(x)
    x = layers.Lambda(lambda x: tf.transpose(x, (0, 2, 1, 3)),
                      name=name + '_attn_transpose')(x)
    x = layers.Softmax(axis=1, name=name + '_attn_softmax')(x)

    shape = (1, 1, filters) if bn_axis == 3 else (filters, 1, 1)
    x = layers.Reshape(shape, name=name + '_1_attn_reshape')(x)
    return x
Esempio n. 8
0
    def __init__(self):
        super(KerasModel, self).__init__()
        weight_decay = 1e-4
        self.conv1 = layers.Conv2D(
            32, (3, 3),
            padding='same',
            input_shape=(32, 32, 3),
            kernel_regularizer=regularizers.l2(weight_decay))
        self.elu1 = layers.ELU()
        self.bn1 = layers.BatchNormalization()
        self.conv2 = layers.Conv2D(
            32, (3, 3), kernel_regularizer=regularizers.l2(weight_decay))
        self.elu2 = layers.ELU()
        self.bn2 = layers.BatchNormalization()
        self.pool1 = layers.MaxPool2D(pool_size=(2, 2))
        self.dropout1 = layers.Dropout(rate=0.2)

        self.conv3 = layers.Conv2D(
            64, (3, 3),
            padding='same',
            kernel_regularizer=regularizers.l2(weight_decay))
        self.elu3 = layers.ELU()
        self.bn3 = layers.BatchNormalization()
        self.conv4 = layers.Conv2D(
            64, (3, 3), kernel_regularizer=regularizers.l2(weight_decay))
        self.elu4 = layers.ELU()
        self.bn4 = layers.BatchNormalization()
        self.pool2 = layers.MaxPool2D(pool_size=(2, 2))
        self.dropout2 = layers.Dropout(rate=0.3)

        self.conv5 = layers.Conv2D(
            128, (3, 3),
            padding='same',
            kernel_regularizer=regularizers.l2(weight_decay))
        self.elu5 = layers.ELU()
        self.bn5 = layers.BatchNormalization()
        self.conv6 = layers.Conv2D(
            128, (3, 3), kernel_regularizer=regularizers.l2(weight_decay))
        self.elu6 = layers.ELU()
        self.bn6 = layers.BatchNormalization()
        self.pool3 = layers.MaxPool2D(pool_size=(2, 2))
        self.dropout3 = layers.Dropout(rate=0.4)

        self.flatten1 = layers.Flatten()
        self.dense1 = layers.Dense(512)
        self.elu7 = layers.ELU()
        self.dropout4 = layers.Dropout(rate=0.5)
        self.dense2 = layers.Dense(10)
        self.softmax = layers.Softmax()
Esempio n. 9
0
    def call(self, inputs, **kwargs):

        interactions = list()

        for i in range(len(inputs) - 1):
            for j in range(i + 1, len(inputs)):
                interactions.append(tf.multiply(inputs[i], inputs[j]))

        interactions = tf.stack(interactions, axis=1)
        att_weight = self.att_layer(interactions)
        att_weight = self.att_proj_layer(att_weight)

        att_weight = layers.Softmax(axis=1)(att_weight)
        output = tf.reduce_sum(interactions * att_weight, axis=1)

        return output
Esempio n. 10
0
    def __init__(self, num_actions):
        super(RecurrentModel, self).__init__()
        self.l_0 = layers.Dense(
            FLAGS.input_layer,
            activation=tf.nn.elu,
            kernel_regularizer=tf.contrib.layers.l2_regularizer(
                scale=FLAGS.l2),
            kernel_initializer=tf.contrib.layers.xavier_initializer())
        self.l_1 = layers.Dense(
            FLAGS.hidden_layer,
            activation=tf.nn.elu,
            kernel_regularizer=tf.contrib.layers.l2_regularizer(
                scale=FLAGS.l2),
            kernel_initializer=tf.contrib.layers.xavier_initializer())
        self.l_2 = layers.Dense(
            FLAGS.policy_layer,
            activation=tf.nn.elu,
            kernel_regularizer=tf.contrib.layers.l2_regularizer(
                scale=FLAGS.l2),
            kernel_initializer=tf.contrib.layers.xavier_initializer())
        self.l_3 = layers.Dense(
            FLAGS.value_layer,
            activation=tf.nn.elu,
            kernel_regularizer=tf.contrib.layers.l2_regularizer(
                scale=FLAGS.l2),
            kernel_initializer=tf.contrib.layers.xavier_initializer())
        self.logits = layers.Dense(
            num_actions,
            kernel_regularizer=tf.contrib.layers.l2_regularizer(
                scale=FLAGS.l2),
            kernel_initializer=tf.contrib.layers.xavier_initializer())
        self.policy = layers.Softmax()
        self.values = layers.Dense(
            1,
            activation=tf.tanh,
            kernel_regularizer=tf.contrib.layers.l2_regularizer(
                scale=FLAGS.l2),
            kernel_initializer=tf.contrib.layers.xavier_initializer())

        self.seqlen = tf.placeholder(tf.int32, [None])
Esempio n. 11
0
    def call(self, inputs, **kwargs):
        queries = tf.matmul(inputs, self.W_Query)
        keys = tf.matmul(inputs, self.W_key)
        values = tf.matmul(inputs, self.W_Value)

        queries = tf.stack(tf.split(queries, self.heads, axis=2))
        keys = tf.stack(tf.split(keys, self.heads, axis=2))
        values = tf.stack(tf.split(values, self.heads, axis=2))

        att_score = tf.matmul(queries, keys, transpose_b=True)
        att_score = layers.Softmax(axis=-1)(att_score)

        result = tf.matmul(att_score, values)
        result = tf.concat(tf.split(result, self.heads), axis=-1)
        result = tf.squeeze(result, axis=0)

        if self.use_res:
            result = result + tf.matmul(inputs, self.W_Res)

        result = tf.keras.activations.relu(result)

        return result
Esempio n. 12
0
    def call(self, inputs, **kwargs):

        interactions = list()

        for i in range(len(inputs) - 1):
            for j in range(i + 1, len(inputs)):
                interactions.append(tf.multiply(inputs[i], inputs[j]))
        # print(interactions)
        interactions = tf.stack(interactions, axis=1)
        print("interactions:", interactions)
        att_weight = self.att_layer(interactions)
        print("att_weight:", att_weight)
        # att_weight: Tensor("attention_based_pooling_layer/dense/Identity:0", shape=(None, 276, 4), dtype=float32)
        att_weight = self.att_proj_layer(att_weight)
        print("att_weight:", att_weight)

        att_weight = layers.Softmax(axis=1)(att_weight)
        print("att_weight:", att_weight)

        output = tf.reduce_sum(interactions * att_weight, axis=1)
        print("output:", output)

        return output
Esempio n. 13
0
    def build(self):
        inputs = layers.Input(self.input_size)

        output0 = self._context_module(16, inputs, strides=(1, 1))
        output1 = self._context_module(32, output0, strides=(2, 2))
        output2 = self._context_module(64, output1, strides=(2, 2))
        output3 = self._context_module(128, output2, strides=(2, 2))
        output4 = self._context_module(256, output3, strides=(2, 2))

        decoder0 = self._decoder_block(128, [output3, output4])
        decoder1 = self._decoder_block(64, [output2, decoder0])
        decoder2 = self._decoder_block(32, [output1, decoder1])
        decoder3 = self._decoder_block_last(16, [output0, decoder2])
        output0 = layers.Conv2D(self.num_class, (1, 1))(decoder3)
        output1 = layers.Conv2D(self.num_class, (1, 1))(decoder2)
        output2_up = layers.UpSampling2D(size=(2, 2))(layers.Conv2D(
            self.num_class, (1, 1))(decoder1))

        output_sum = layers.Add()([output2_up, output1])
        output_sum = layers.UpSampling2D(size=(2, 2))(output_sum)
        output_sum = layers.Add()([output_sum, output0])
        output = layers.Softmax()(output_sum)

        return models.Model(inputs=[inputs], outputs=[output])
Esempio n. 14
0
    def __init__(self):
        #weight initializer
        initializer = tf.keras.initializers.RandomNormal(mean=0.0, stddev=1.0)

        #head side
        head_input = keras.Input(shape=(4, ), name='input_head')

        l2_head = layers.Dense(32,
                               activation='relu',
                               name='l2_head',
                               kernel_initializer=initializer)(head_input)

        l3_head = layers.Dense(4,
                               activation='relu',
                               name='l3_head',
                               kernel_initializer=initializer)(l2_head)

        l4_head = layers.BatchNormalization(name='head_norm')(l3_head)

        #grid side
        block_input = keras.Input(shape=(globe.GRID_X, globe.GRID_Y, 1),
                                  name='input_game_state')

        l1 = layers.Conv2D(16,
                           3,
                           padding='same',
                           activation='relu',
                           name='l1',
                           kernel_initializer=initializer)(block_input)

        l2 = layers.Conv2D(16,
                           3,
                           padding='same',
                           activation='relu',
                           name='l2',
                           kernel_initializer=initializer)(l1)

        l3 = layers.Conv2D(4,
                           1,
                           padding='same',
                           activation='relu',
                           name='l4',
                           kernel_initializer=initializer)(l2)

        l4 = layers.GlobalAveragePooling2D(name='pool')(l3)

        l5 = layers.BatchNormalization(name='norm')(l4)

        #combine
        l5 = layers.add([l5, l4_head], name='add')

        l6 = layers.Dense(4,
                          activation='relu',
                          name='last_fully_connected',
                          kernel_initializer=initializer)(l5)

        l7 = layers.Softmax(name='policy')(l6)

        l8 = layers.Multiply(name='mult')([l7, head_input])

        self.model = keras.Model(inputs=[block_input, head_input], outputs=l8)
        self.compile()
Esempio n. 15
0
def build_model(model_type, n_unints=64):
    print(model_type)
    sequences = layers.Input(shape=(MAX_LENGTH,))
    embedding_layer = layers.Embedding(MAX_FEATURES, 100, weights=[embedding_matrix], input_length=MAX_LENGTH,
                                       trainable=False)

    # embedding the words into 100 dim vectors

    x = embedding_layer(sequences)

    if model_type not in {'RNN', 'GRU'}:

        # non recurrent networks

        if model_type in {'ATTN_WEIGHTED', 'ATTN_SUM'}:
            # attention layer
            x = restricted_attention(x, k=5)

        # word-wise FC layers -- MAKE SURE you have ,name= "sub_score" in the sub_scores step
        # E.g., sub_score = layers.Dense(2,name="sub_score")(x)

        # -- missing code --
        x = layers.Dense(32, activation='relu')(x)
        # x = layers.Dense(50, activation='relu')(x)

        if model_type in {'WEIGHTED', 'ATTN_WEIGHTED'}:
            x = layers.Dense(2, name="sub_score")(x)
            x0 = layers.Lambda(lambda x: x[:, :, 0])(x)
            x1 = layers.Lambda(lambda x: x[:, :, 1])(x)
            sum_weights = layers.Softmax(name='sum_weights')(x1)
            x = tf.expand_dims(x0 * sum_weights, 2)
        else:
            x = layers.Dense(1, name="sub_score")(x)
        x = K.sum(x, axis=1)

        # final prediction

        x = tf.sigmoid(x)

        predictions = x

    else:
        # recurrent networks
        if model_type == 'GRU':
            x, _ = GRU(n_unints, x)
        else:

            x, _ = RNN(n_unints, x)

        x = layers.Dense(32, activation='relu')(x)
        x = layers.Dense(1, activation='sigmoid')(x)

        predictions = x

    model = models.Model(inputs=sequences, outputs=predictions)

    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['binary_accuracy', f1]
    )
    return model