Ejemplo n.º 1
0
    def compare_and_score(self, left, right, ent, feats):
        """ Final layer of the compiled model
        Concatenates several comparisons between the vectors of left and right
        contexts and the entity vector.

        Final dense layer takes all of these comparisons, and the final feature
        vector, and outputs a binary prediction.
        """
        comparisons = []

        left_dot = layers.Dot(axes=1, normalize=True)([left, ent])
        right_dot = layers.Dot(axes=1, normalize=True)([right, ent])
        comparisons += [left_dot, right_dot]

        left_diff = layers.Subtract()([left, ent])
        right_diff = layers.Subtract()([right, ent])
        comparisons += [left_diff, right_diff]

        left_diff_sq = layers.Multiply()([left_diff, left_diff])
        right_diff_sq = layers.Multiply()([right_diff, right_diff])
        comparisons += [left_diff_sq, right_diff_sq]

        left_mult = layers.Multiply()([left, ent])
        right_mult = layers.Multiply()([right, ent])
        comparisons += [left_mult, right_mult]

        if feats is not None:
            comparisons.append(feats)

        comparisons_concat = layers.Concatenate(axis=1)(comparisons)
        out = self.reduce_layer(comparisons_concat)
        return out
Ejemplo n.º 2
0
    def model(self):
        """prepare the model"""
        in_target = layers.Input((1, ), name='in_tgt')
        in_context = layers.Input((1, ), name='in_ctx')

        embedding_dim = self.embedding_dim
        embedding = layers.Embedding(self.vocab_size,
                                     embedding_dim,
                                     input_length=1,
                                     name='embedding')

        target = embedding(in_target)
        target = layers.Reshape((embedding_dim, 1), name='target')(target)

        context = embedding(in_context)
        context = layers.Reshape((embedding_dim, 1), name='context')(context)

        dot_product = layers.Dot(axes=1)([target, context])
        dot_product = layers.Reshape((1, ), name='dot')(dot_product)
        output = layers.Dense(1, activation='sigmoid',
                              name='output')(dot_product)

        model = models.Model(inputs=[in_target, in_context], outputs=output)
        model.compile(loss='binary_crossentropy', optimizer='rmsprop')

        # for the validation model, apply cosine similarity
        similarity = layers.Dot(axes=1, normalize=True)([target, context])
        similarity = layers.Reshape((1, ), name='sim')(similarity)

        validation_model = models.Model(inputs=[in_target, in_context],
                                        outputs=similarity)
        return model, validation_model
Ejemplo n.º 3
0
    def build_model(self):

        # This function builds the NN.

        # The two inputs.
        input_state = kl.Input(shape=(4, ))
        input_actions = kl.Input(shape=(self.num_actions, ))

        # Create a NN with three fully connected hidden layers.
        x = kl.Dense(64, activation='tanh')(input_state)
        x = kl.Dropout(0.4)(x)
        x = kl.Dense(32, activation='tanh')(x)
        x = kl.Dense(16, activation='tanh')(x)

        # The regular output layer, for the standard forward pass
        # of the input_state.
        q = kl.Dense(self.num_actions, activation='relu')(x)

        # An alternative output layer, used for training.  Here we
        # just multiply the regular output with a 3-element
        # input_action variable and take the sum.
        action_q = kl.Dot(1)([q, input_actions])

        # Create two models, one for each output layer, sharing
        # the same hidden layers.
        self.q_model = km.Model(inputs=input_state, outputs=q)
        self.applied_action_model = km.Model(
            inputs=[input_state, input_actions], outputs=action_q)

        # We compile the model that is actually used for training.
        self.applied_action_model.compile(optimizer=ko.SGD(lr=1e-5),
                                          loss="mean_squared_error",
                                          metrics=['accuracy'])
Ejemplo n.º 4
0
def test_merge_dot():
    i1 = layers.Input(shape=(4, ))
    i2 = layers.Input(shape=(4, ))
    o = layers.dot([i1, i2], axes=1)
    assert o._keras_shape == (None, 1)
    model = models.Model([i1, i2], o)

    dot_layer = layers.Dot(axes=1)
    o2 = dot_layer([i1, i2])
    assert dot_layer.output_shape == (None, 1)

    x1 = np.random.random((2, 4))
    x2 = np.random.random((2, 4))
    out = model.predict([x1, x2])
    assert out.shape == (2, 1)
    expected = np.zeros((2, 1))
    expected[0, 0] = np.dot(x1[0], x2[0])
    expected[1, 0] = np.dot(x1[1], x2[1])
    assert_allclose(out, expected, atol=1e-4)

    # Test with negative tuple of axes.
    o = layers.dot([i1, i2], axes=(-1, -1))
    assert o._keras_shape == (None, 1)
    model = models.Model([i1, i2], o)
    out = model.predict([x1, x2])
    assert out.shape == (2, 1)
    assert_allclose(out, expected, atol=1e-4)
Ejemplo n.º 5
0
def create_model(embeddings_matrix, vocab_size, context, response, labels):

    context_input = Input(shape=(MAX_SEQUENCE_LEN, ), dtype='float32')
    response_input = Input(shape=(MAX_SEQUENCE_LEN, ), dtype='float32')

    init = RandomUniform(minval=-0.01, maxval=0.01)
    embeddings_layer = Embedding(vocab_size, WORD_EMBEDDINGS_LEN, weights=[embeddings_matrix], input_length=MAX_SEQUENCE_LEN, trainable=True)
    rnn_layer = layers.LSTM(units=UNITS, kernel_initializer=init, dropout=0.2)

    c_x = embeddings_layer(context_input)
    r_x = embeddings_layer(response_input)

    c_x = rnn_layer(c_x)
    r_x = rnn_layer(r_x)

    # This layer needs to be fixed, multiplication by
    # the context is missing
    preds = CustomLayer(output_dim=UNITS)([c_x, r_x])
    preds = layers.Dot(axes=-1)([preds, c_x])

    preds = Dense(1, activation='sigmoid')(preds)

    siamese_model = Model(inputs=[context_input, response_input], outputs=preds)
    op = Adam(lr=0.0001, clipvalue=10.0)
    siamese_model.compile(loss='binary_crossentropy', optimizer=op, metrics=['acc', 'binary_accuracy'])
    siamese_model.summary()
    siamese_model.fit([context, response], labels, batch_size=BATCH_SIZE, epochs=100, validation_split=0.1)
Ejemplo n.º 6
0
def factorization_machine(f_size, k_latent=5, embedding_reg=0.0005):
    def get_embed(x_input, x_size, k_latent):
        if x_size > 0:  #category
            embed = Embedding(
                x_size, k_latent,
                embeddings_regularizer=l2(embedding_reg))(x_input)
            embed = Flatten()(embed)
        else:
            embed = Dense(k_latent,
                          kernel_regularizer=l2(embedding_reg))(x_input)
            #embed = Dense(k_latent)(x_input)
        return embed

    dim_input = len(f_size)
    input_x = [Input(shape=(1, )) for i in range(dim_input)]
    biases = [get_embed(x, size, 1) for (x, size) in zip(input_x, f_size)]
    factors = [
        get_embed(x, size, k_latent) for (x, size) in zip(input_x, f_size)
    ]
    s = Add()(factors)
    diffs = [layers.Subtract()([s, x]) for x in factors]
    dots = [layers.Dot(axes=1)([d, x]) for d, x in zip(diffs, factors)]
    dots = Add()(dots)
    dots_sum = layers.Lambda(lambda x: x / 2)(dots)
    biases_sum = Add()(biases)
    x = Add()([dots_sum, biases_sum])
    model = Model(inputs=input_x, outputs=x)
    #output_f = factors + biases
    #model_features = Model(inputs=input_x, outputs=output_f)
    #model, model_features = build_model_1(X_train, f_size)
    return model
Ejemplo n.º 7
0
def gen_model(n_users, n_items, latent_dim, normalize):

    userInputLayer = layers.Input(shape=[1])
    itemInputLayer = layers.Input(shape=[1])

    if normalize is True:
        userVec = layers.Embedding(n_users,
                                   latent_dim,
                                   embeddings_initializer='random_normal',
                                   name='User_Embedding')(userInputLayer)
        itemVec = layers.Embedding(n_items,
                                   latent_dim,
                                   embeddings_initializer='random_normal',
                                   name='Movie_Embedding')(itemInputLayer)
    else:  #non-negative matrix
        userVec = layers.Embedding(
            n_users,
            latent_dim,
            embeddings_initializer='random_normal',
            name='User_Embedding',
            embeddings_constraint=non_neg())(userInputLayer)
        itemVec = layers.Embedding(
            n_items,
            latent_dim,
            embeddings_initializer='random_normal',
            name='Movie_Embedding',
            embeddings_constraint=non_neg())(itemInputLayer)

    userBias = layers.Embedding(n_users, 1,
                                embeddings_initializer='zeros')(userInputLayer)
    itemBias = layers.Embedding(n_items, 1,
                                embeddings_initializer='zeros')(itemInputLayer)

    userVec = layers.Flatten()(userVec)
    userBias = layers.Flatten()(userBias)
    itemVec = layers.Flatten()(itemVec)
    itemBias = layers.Flatten()(itemBias)

    r_hat = layers.Dot(name='Dot', axes=1)([userVec, itemVec])
    r_hat = layers.Add(name='Bias')([r_hat, userBias, itemBias])

    #outputLayer  = layers.Concatenate()([inputLayer_a, inputLayer_b])
    #keras.layers.Concatenate(axis=-1)

    model = models.Model(inputs=[userInputLayer, itemInputLayer],
                         outputs=r_hat)
    model.summary()
    model.compile(loss='mse', optimizer='adam')

    plot_model(model,
               to_file='tmp/model.png',
               show_shapes=True,
               show_layer_names=True)

    return model
Ejemplo n.º 8
0
    def get_fd_particle_type_sub_graph(self, s, P_transpose, R, T):

        vs = []
        for i in range(T):
            r = kl.Lambda(
                lambda z: K.sum(z[0] * z[1], axis=-1, keepdims=True))([s, R])
            v = kl.Lambda(lambda z: (self.gamma**i) * z)(r)
            vs.append(v)

            s = kl.Dot(axes=(-1))([P_transpose, s])

        v = kl.Add()(vs)
        return v
Ejemplo n.º 9
0
 def get_score(self,user,item, artist=None):
     
     uemb = kl.Flatten()( self.emb_user_mf( user ) )
     iemb = kl.Flatten()( self.emb_item_mf( item ) )
             
     mf_vector = kl.Multiply()( [uemb, iemb] )
     if self.add_dot:
         mf_dot = kl.Dot(1)( [uemb, iemb] )
         mf_vector = kl.Concatenate()( [mf_vector, mf_dot] )
         
     if self.include_artist:
         uemb = kl.Flatten()( self.emb_user_artist_mf( user ) )
         aemb = kl.Flatten()( self.emb_artist_mf( artist ) )
         mf_mul = kl.Multiply()( [uemb, aemb] )
         if self.add_dot:
             mf_dot = kl.Dot(1)( [uemb, aemb] )
             mf_mul = kl.Concatenate()( [mf_mul, mf_dot] )
         
         mf_vector = kl.Concatenate()( [mf_vector, mf_mul] )
             
     res = self.fff(mf_vector)
     
     return res
Ejemplo n.º 10
0
def create_discriminator():
    int_input = layers.Input(shape=(10, ))
    input = layers.Input(shape=(28, 28))

    x = layers.Reshape((28, 28, 1))(input)
    x = layers.Conv2D(filters=40,
                      kernel_size=(8, 8),
                      activation='relu',
                      padding='same')(x)
    x = layers.MaxPooling2D(pool_size=(3, 3),
                            strides=None,
                            padding='valid',
                            data_format=None)(x)
    x = layers.Dropout(0.5)(x)
    x = layers.Conv2D(filters=60,
                      kernel_size=(5, 5),
                      activation='relu',
                      padding='same')(x)
    x = layers.MaxPooling2D(pool_size=(3, 3),
                            strides=None,
                            padding='valid',
                            data_format=None)(x)
    x = layers.Dropout(0.5)(x)
    x = layers.Conv2D(filters=80,
                      kernel_size=(3, 3),
                      activation='relu',
                      padding='same')(x)
    x = layers.Conv2D(filters=100,
                      kernel_size=(3, 3),
                      activation='relu',
                      padding='same')(x)
    x = layers.MaxPooling2D(pool_size=(3, 3),
                            strides=None,
                            padding='valid',
                            data_format=None)(x)
    x = layers.Flatten()(x)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(100)(x)
    x = layers.LeakyReLU(alpha=0.2)(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(100)(x)
    x = layers.LeakyReLU(alpha=0.2)(x)
    x = layers.Dense(10, activation="sigmoid")(x)
    output = layers.Dot(-1)([x, int_input])
    model = Model(inputs=[input, int_input], outputs=output)
    model.compile(optimizer=SGD(lr=0.04, momentum=0.9),
                  loss='binary_crossentropy')
    model.summary()
    return model
Ejemplo n.º 11
0
    def get_bs_particle_graphs(self, s, Dense_W, V, R, temp):
        Dense_W(s)
        W = Dense_W.weights[0]

        Dense_W_ = kl.Dense(self.num_states,
                            use_bias=False,
                            W_constraint=nonneg())
        Dense_W_(s)
        W_ = Dense_W_.weights[0]

        s_embed = kl.Dense(self.num_states, activation='tanh')(s)
        Dense_E = kl.Dense(self.num_states * self.num_states)
        null_input = kl.Lambda(lambda z: 0 * z[:, 0:1])(s_embed)
        E_logit = kl.Dense(self.num_states, activation='tanh')(null_input)
        E_logit = Dense_E(E_logit)
        E_logit = kl.Lambda(lambda z: z / temp)(E_logit)
        E = kl.Activation('sigmoid')(E_logit)
        E = kl.Reshape((self.num_states, self.num_states))(E)

        E = kl.Lambda(lambda z: self.mask * z)(E)

        Dense_W.trainable = False
        logit_1 = Dense_W(s)

        logit_2 = kl.Lambda(lambda z: W_ * (z))(E)
        logit_2 = kl.Dot(axes=1)([logit_2, s])

        logit = kl.Add()([logit_1, logit_2])
        #P = kl.Activation('softmax')(logit)
        P = kl.Lambda(lambda z: z / K.sum(z, axis=-1, keepdims=True))(logit)
        V_ = kl.Dot(axes=-1)([P, V])

        r = kl.Dot(axes=-1)([s, R])

        v = kl.Lambda(lambda z: z[0] + self.gamma * z[1])([r, V_])
        return W, W_, E, P, V_, v
    def build_q_model(self, hidden_layer_sizes=(40, 40)):
        """build the Q model.
        Returns one model for prediction and one for training.
        """
        inp_st = layers.Input(shape=(self.state_dim, ))
        prev = inp_st
        for n in hidden_layer_sizes:
            prev = layers.Dense(n, activation='relu')(prev)
        out_ac = layers.Dense(self.action_dim)(prev)
        """we only want to fit the output for the action actually taken.
        We contract the predicted outputs with a mask that is to be provided by input when training.
        """
        inp_mask = layers.Input(shape=(self.action_dim, ))
        out_masked = layers.Dot(axes=1)([out_ac, inp_mask])

        model_train = Model(inputs=[inp_st, inp_mask], outputs=out_masked)
        model_train.compile(loss='mse', optimizer=Adam())

        model_predict = Model(inputs=inp_st, outputs=out_ac)

        return model_train, model_predict
Ejemplo n.º 13
0
 def fit(self, Y, T, X):
     """
     Parameters
     ----------
     y : outcome
     T : treatment
     X : features
     """
     d_x, d_t, d_y = [np.shape(arr)[1:] for arr in (X, T, Y)]
     self.d_t = d_t  # keep track in case we need to reshape output by dropping singleton dimensions
     self.d_y = d_y  # keep track in case we need to reshape output by dropping singleton dimensions
     d_x, d_t, d_y = [1 if not d else d[0] for d in (d_x, d_t, d_y)]
     x_in, t_in = [L.Input((d,)) for d in (d_x, d_t)]
     # reshape in case we get fewer dimensions than expected from h (e.g. a scalar)
     h_out = L.Reshape((d_y, d_t))(self._h(x_in))
     y_out = L.Dot([2, 1])([h_out, t_in])
     self.theta = Model([x_in], self._h(x_in))
     model = Model([x_in, t_in], y_out)
     model.compile(optimizer, loss='mse')
     model.fit([X, T], Y, **training_options)
     return self
Ejemplo n.º 14
0
def build_model(user_n, movie_n, latent_dim):
    print('Building model')
    user_input = layers.Input(shape=[1])
    u_v = layers.Embedding(user_n, latent_dim)(user_input)
    u_v = layers.Flatten()(u_v)

    movie_input = layers.Input(shape=[1])
    m_v = layers.Embedding(movie_n, latent_dim)(movie_input)
    m_v = layers.Flatten()(m_v)

    user_bias = layers.Embedding(user_n, 1)(user_input)
    user_bias = layers.Flatten()(user_bias)

    movie_bias = layers.Embedding(movie_n, 1)(movie_input)
    movie_bias = layers.Flatten()(movie_bias)

    merge = layers.Dot(axes=1)([u_v, m_v])
    result = layers.Add()([merge, user_bias, movie_bias])
    result = layers.Dense(1)(result)
    model = Model(inputs=[user_input, movie_input], outputs=[result])
    model.compile(loss='mse', optimizer="adamax", metrics=[rmse])
    model.summary()
    return model
def build_model(char_size=27,
                dim=64,
                iterations=4,
                training=True,
                ilp=False,
                pca=False):
    """Build the model."""
    # Inputs
    # Context: (rules, preds, chars,)
    context = L.Input(shape=(
        None,
        None,
        None,
    ),
                      name='context',
                      dtype='int32')
    query = L.Input(shape=(None, ), name='query', dtype='int32')

    if ilp:
        context, query, templates = ilp

    print('Found %s texts.' % len(CONTEXT_TEXTS))
    word_index = WORD_INDEX
    print('Found %s unique tokens.' % len(word_index))

    embeddings_index = {}
    GLOVE_DIR = os.path.abspath('.') + "/data/glove"
    f = open(os.path.join(GLOVE_DIR, 'glove.6B.100d.txt'),
             'r',
             encoding='utf-8')
    for line in f:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = coefs
    f.close()

    print('Found %s word vectors.' % len(embeddings_index))

    EMBEDDING_DIM = 100

    embedding_matrix = np.zeros((len(word_index) + 1, EMBEDDING_DIM))
    for word, i in word_index.items():
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            # words not found in embedding index will be all-zeros.
            embedding_matrix[i] = embedding_vector

    # Contextual embeddeding of symbols
    # onehot_weights = np.eye(char_size)
    # onehot_weights[0, 0] = 0 # Clear zero index
    # onehot = L.Embedding(char_size, char_size,
    #                      trainable=False,
    #                      weights=[onehot_weights],
    #                      name='onehot')
    embedding_layer = L.Embedding(len(word_index) + 1,
                                  EMBEDDING_DIM,
                                  weights=[embedding_matrix],
                                  trainable=False)
    embedded_ctx = embedding_layer(
        context)  # (?, rules, preds, chars, char_size)
    embedded_q = embedding_layer(query)  # (?, chars, char_size)

    if ilp:
        # Combine the templates with the context, (?, rules+temps, preds, chars, char_size)
        embedded_ctx = L.Lambda(lambda xs: K.concatenate(xs, axis=1),
                                name='template_concat')(
                                    [templates, embedded_ctx])
        # embedded_ctx = L.concatenate([templates, embedded_ctx], axis=1)

    embed_pred = ZeroGRU(dim, go_backwards=True, name='embed_pred')
    embedded_predq = embed_pred(embedded_q)  # (?, dim)
    # For every rule, for every predicate, embed the predicate
    embedded_ctx_preds = NestedTimeDist(NestedTimeDist(embed_pred,
                                                       name='nest1'),
                                        name='nest2')(embedded_ctx)
    # (?, rules, preds, dim)

    embed_rule = ZeroGRU(dim, name='embed_rule')
    embedded_rules = NestedTimeDist(embed_rule,
                                    name='d_embed_rule')(embedded_ctx_preds)
    # (?, rules, dim)

    # Reused layers over iterations
    repeat_toctx = L.RepeatVector(K.shape(embedded_ctx)[1],
                                  name='repeat_to_ctx')
    diff_sq = L.Lambda(lambda xy: K.square(xy[0] - xy[1]),
                       output_shape=(None, dim),
                       name='diff_sq')
    mult = L.Multiply()
    concat = L.Lambda(lambda xs: K.concatenate(xs, axis=2),
                      output_shape=(None, dim * 5),
                      name='concat')
    att_densel = L.Dense(dim // 2, activation='tanh', name='att_densel')
    att_dense = L.Dense(1, name='att_dense')
    squeeze2 = L.Lambda(lambda x: K.squeeze(x, 2), name='sequeeze2')
    softmax1 = L.Softmax(axis=1)
    unifier = NestedTimeDist(ZeroGRU(dim, go_backwards=False, name='unifier'),
                             name='dist_unifier')
    dot11 = L.Dot((1, 1))

    # Reasoning iterations
    state = embedded_predq
    repeated_q = repeat_toctx(embedded_predq)
    outs = list()
    for _ in range(iterations):
        # Compute attention between rule and query state
        ctx_state = repeat_toctx(state)  # (?, rules, dim)
        s_s_c = diff_sq([ctx_state, embedded_rules])
        s_m_c = mult([embedded_rules, state])  # (?, rules, dim)
        sim_vec = concat([s_s_c, s_m_c, ctx_state, embedded_rules, repeated_q])
        sim_vec = att_densel(sim_vec)  # (?, rules, dim//2)
        sim_vec = att_dense(sim_vec)  # (?, rules, 1)
        sim_vec = squeeze2(sim_vec)  # (?, rules)
        sim_vec = softmax1(sim_vec)
        outs.append(sim_vec)

        # Unify every rule and weighted sum based on attention
        new_states = unifier(embedded_ctx_preds, initial_state=[state])
        # (?, rules, dim)
        state = dot11([sim_vec, new_states])

    # Predication
    out = L.Dense(1, activation='sigmoid', name='out')(state)
    if ilp:
        return outs, out
    elif pca:
        model = Model([context, query], [embedded_rules])
    elif training:
        model = Model([context, query], [out])
        model.compile(loss='binary_crossentropy',
                      optimizer='adam',
                      metrics=['acc'])
    else:
        model = Model([context, query], outs + [out])
    return model
Ejemplo n.º 16
0
    def build(self, input_shape):
        print(input_shape)
        self.mu = self.add_weight(shape=(1, 1, 1, self.N),
                                  name=self.base_name + 'mu',
                                  initializer=self.mu_initializer,
                                  constraint=self.mu_constraint,
                                  trainable=True)
        self.sigma = self.add_weight(shape=(1, 1, 1, self.N),
                                     name=self.base_name + 'sigma',
                                     initializer=self.sigma_initializer,
                                     constraint=self.sigma_constraint,
                                     trainable=True)
        self.alpha = self.add_weight(shape=(1, 1, 1, self.N),
                                     name=self.base_name + 'alpha',
                                     initializer=self.alpha_initializer,
                                     constraint=self.alpha_constraint,
                                     trainable=True)

        self.conv_theta = KL.Conv2D(self.intermediate_dim, (1, 1),
                                    name=self.base_name + 'conv_theta',
                                    padding='same',
                                    use_bias=True)
        self.conv_theta.build(input_shape)

        self.conv_phi = KL.Conv2D(self.intermediate_dim, (1, 1),
                                  name=self.base_name + 'conv_phi',
                                  padding='same',
                                  use_bias=True)
        self.conv_phi.build(input_shape)

        self.conv_delta = KL.Conv2D(self.N, (1, 1),
                                    name=self.base_name + 'conv_delta',
                                    padding='same',
                                    use_bias=True)
        self.conv_delta.build(input_shape)

        self.conv_g = KL.Conv2D(self.intermediate_dim, (1, 1),
                                name=self.base_name + 'conv_g',
                                padding='same',
                                use_bias=True)
        self.conv_g.build(input_shape)

        self.conv_y = KL.Conv2D(self.channels, (1, 1),
                                name=self.base_name + 'conv_y',
                                padding='same',
                                use_bias=True)
        self.conv_y.build((input_shape[0], input_shape[1], input_shape[2],
                           self.intermediate_dim))

        self.bn_y = KL.BatchNormalization(name=self.base_name + 'bn_y',
                                          gamma_initializer='zeros')
        self.bn_y.build(input_shape)

        self.mat_mul_1 = KL.Dot(axes=2, name=self.base_name + 'mat_mul_1')
        self.mat_mul_1.build([
            (input_shape[0], self.dim1 * self.dim2, self.intermediate_dim),
            (input_shape[0], self.dim1 * self.dim2, self.intermediate_dim)
        ])

        self.mat_mul_2 = KL.Dot(axes=[2, 1], name=self.base_name + 'mat_mul_2')
        self.mat_mul_2.build([
            (input_shape[0], self.dim1 * self.dim2, self.dim1 * self.dim2),
            (input_shape[0], self.dim1 * self.dim2, self.intermediate_dim)
        ])

        self._trainable_weights += self.conv_theta.trainable_weights + self.conv_phi.trainable_weights + self.conv_delta.trainable_weights + self.conv_g.trainable_weights + self.conv_y.trainable_weights + self.bn_y.trainable_weights

        super(Contextual_Attention, self).build(input_shape)
Ejemplo n.º 17
0
def AID_CreateModel(input_shape,
                    alpha_hinge=0.2,
                    Spatial_Dropout=False,
                    BN=True,
                    B5_FC1_neurons=1024,
                    similarity='simCos',
                    desc_dim=128,
                    desc_between_0_1=False,
                    BigDesc=False,
                    verbose=True):

    # descriptor model
    in_desc = layers.Input(shape=input_shape, name='input_patches')

    x = layers.Conv2D(64, (3, 3), padding='same', name='block1_conv1')(in_desc)
    if BN:
        x = layers.BatchNormalization(name='block1_BN1')(x)
    x = layers.Activation('relu', name='block1_relu1')(x)

    x = layers.Conv2D(64, (3, 3), padding='same', name='block1_conv2')(x)
    if BN:
        x = layers.BatchNormalization(name='block1_BN2')(x)
    x = layers.Activation('relu', name='block1_relu2')(x)

    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

    # Block 2
    x = layers.Conv2D(64, (3, 3), padding='same', name='block2_conv1')(x)
    if BN:
        x = layers.BatchNormalization(name='block2_BN1')(x)
    x = layers.Activation('relu', name='block2_relu1')(x)

    x = layers.Conv2D(64, (3, 3), padding='same', name='block2_conv2')(x)
    if BN:
        x = layers.BatchNormalization(name='block2_BN2')(x)
    x = layers.Activation('relu', name='block2_relu2')(x)

    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

    # Block 3
    x = layers.Conv2D(128, (3, 3), padding='same', name='block3_conv1')(x)
    if BN:
        x = layers.BatchNormalization(name='block3_BN1')(x)
    x = layers.Activation('relu', name='block3_relu1')(x)

    x = layers.Conv2D(128, (3, 3), padding='same', name='block3_conv2')(x)
    if BN:
        x = layers.BatchNormalization(name='block3_BN2')(x)
    x = layers.Activation('relu', name='block3_relu2')(x)

    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

    # Block 4
    x = layers.Conv2D(128, (3, 3), padding='same', name='block4_conv1')(x)
    if BN:
        x = layers.BatchNormalization(name='block4_BN1')(x)
    x = layers.Activation('relu', name='block4_relu1')(x)

    x = layers.Conv2D(128, (3, 3), padding='same', name='block4_conv2')(x)

    if BigDesc == False and BN:
        x = layers.BatchNormalization(name='block4_BN2')(x)

    if Spatial_Dropout:
        x = layers.SpatialDropout2D(p=0.5, name='block4_Dropout1')(x)

    if BigDesc == False:
        x = layers.Activation('relu', name='block4_relu2')(x)

    # Block 5
    x = layers.Flatten(name='block5_flatten1')(x)

    if BigDesc == False:
        if B5_FC1_neurons > 0:
            x = layers.Dense(B5_FC1_neurons,
                             activation='relu',
                             name='block5_FC1')(x)

        if desc_between_0_1:
            x = layers.Dense(desc_dim, activation='sigmoid',
                             name='block5_FC2')(x)
        else:
            x = layers.Dense(desc_dim, name='block5_FC2')(x)

    desc_model = Model(in_desc, x, name='aff_desc')

    # similarity model
    if similarity[0:5] == 'simFC':
        if similarity[5:] == '_concat' or similarity[5:] == '_concat_BigDesc':
            sim_type = 'concat'
            desc_dim = 2 * desc_model.output_shape[1]
        elif similarity[5:] == '_diff':
            sim_type = 'diff'
        # 2 siamese network
        in_desc1 = layers.Input(shape=input_shape, name='input_patches1')
        in_desc2 = layers.Input(shape=input_shape, name='input_patches2')
        emb_1 = desc_model(in_desc1)
        emb_2 = desc_model(in_desc2)

        # Similarity model
        in_sim = layers.Input(shape=(desc_dim, ), name='input_diff_desc')
        x = layers.Dense(64, activation='relu', name='block1_FC1')(in_sim)
        x = layers.Dense(32, activation='relu', name='block1_FC2')(x)
        x = layers.Dense(1, activation='sigmoid', name='block1_FC3')(x)
        sim_model = Model(in_sim, x, name='sim')

        if sim_type == 'concat':
            x = layers.Concatenate(name='Concat')([emb_1, emb_2])
        else:
            x = layers.Subtract(name='Subtract')([emb_1, emb_2])

        out_net = sim_model(x)

        # Groundtruth Model
        in_GT = layers.Input(shape=(1, ), name='input_GroundTruth')
        GT_model = Model(in_GT, in_GT, name='GroundTruth')
        out_GT = GT_model(in_GT)

        class TopLossLayerClass(layers.Layer):
            def __init__(self, **kwargs):
                super(TopLossLayerClass, self).__init__(**kwargs)

            def call(self, inputs):
                #out_net,  out_GT = inputs
                s, t = inputs  # t=1 -> Positive class, t=0 -> Negative class
                loss = K.sum(t * K.log(s) + (1 - t) * K.log(1 - s))
                self.add_loss(loss)
                return loss

        TopLossLayer_obj = TopLossLayerClass(name='TopLossLayer')

        TopLossLayer = TopLossLayer_obj([out_net, out_GT])
        train_model = Model([in_desc1, in_desc2, in_GT],
                            TopLossLayer,
                            name='TrainModel')
    elif similarity == 'simCos':  # hinge loss
        # Similarity model
        desc_dim = desc_model.output_shape[1]
        in_sim1 = layers.Input(shape=(desc_dim, ), name='input_desc1')
        in_sim2 = layers.Input(shape=(desc_dim, ), name='input_desc2')
        x = layers.Dot(axes=1, normalize=True,
                       name='CosineProximity')([in_sim1,
                                                in_sim2])  # cosine proximity
        sim_model = Model([in_sim1, in_sim2], x, name='sim')

        # 3 siamese networks
        in_desc1 = layers.Input(shape=input_shape, name='input_patches_anchor')
        in_desc2 = layers.Input(shape=input_shape,
                                name='input_patches_positive')
        in_desc3 = layers.Input(shape=input_shape,
                                name='input_patches_negative')
        emb_1 = desc_model(in_desc1)
        emb_2 = desc_model(in_desc2)
        emb_3 = desc_model(in_desc3)
        sim_type = 'inlist'
        out_net_positive = sim_model([emb_1, emb_2])
        out_net_negative = sim_model([emb_1, emb_3])

        class TopLossLayerClass(layers.Layer):
            def __init__(self, alpha=0.2, **kwargs):
                self.alpha = alpha
                super(TopLossLayerClass, self).__init__(**kwargs)

            def call(self, inputs):
                out_net_positive, out_net_negative = inputs
                # Hinge loss computation
                loss = K.sum(
                    K.maximum(out_net_negative - out_net_positive + self.alpha,
                              0))  #,axis=0)
                self.add_loss(loss)
                return loss

        TopLossLayer_obj = TopLossLayerClass(name='TopLossLayer',
                                             alpha=alpha_hinge)
        TopLossLayer = TopLossLayer_obj([out_net_positive, out_net_negative])
        train_model = Model([in_desc1, in_desc2, in_desc3],
                            TopLossLayer,
                            name='TrainModel')

    if verbose:
        print(
            '\n\n-------> The network architecture for the affine descriptor computation !'
        )
        desc_model.summary()
        print(
            '\n\n-------> The network architecture for the similarity computation !'
        )
        sim_model.summary()
        print('\n\n-------> Train model connections')
        train_model.summary()
    return train_model, sim_type
Ejemplo n.º 18
0
 def k_func(x, y):
     return klayers.Dot(axes=[1, 1], normalize=False)([x, y])
Ejemplo n.º 19
0
 def k_func(x, y):
     return klayers.Dot(axes=[1, 2], normalize=True)([x, y])
Ejemplo n.º 20
0
def build_birnn_multifeature_coattention_model(voca_dim,
                                               time_steps,
                                               num_feature_channels,
                                               num_features,
                                               feature_dim,
                                               output_dim,
                                               model_dim,
                                               atten_dim,
                                               mlp_dim,
                                               item_embedding=None,
                                               rnn_depth=1,
                                               mlp_depth=1,
                                               drop_out=0.5,
                                               rnn_drop_out=0.,
                                               rnn_state_drop_out=0.,
                                               trainable_embedding=False,
                                               gpu=False,
                                               return_customized_layers=False):
    """
    Create A Bidirectional Attention Model.

    :param voca_dim: vocabulary dimension size.
    :param time_steps: the length of input
    :param output_dim: the output dimension size
    :param model_dim: rrn dimension size
    :param mlp_dim: the dimension size of fully connected layer
    :param item_embedding: integer, numpy 2D array, or None (default=None)
        If item_embedding is a integer, connect a randomly initialized embedding matrix to the input tensor.
        If item_embedding is a matrix, this matrix will be used as the embedding matrix.
        If item_embedding is None, then connect input tensor to RNN layer directly.
    :param rnn_depth: rnn depth
    :param mlp_depth: the depth of fully connected layers
    :param num_feature_channels: the number of attention channels, this can be used to mimic multi-head attention mechanism
    :param drop_out: dropout rate of fully connected layers
    :param rnn_drop_out: dropout rate of rnn layers
    :param rnn_state_drop_out: dropout rate of rnn state tensor
    :param trainable_embedding: boolean
    :param gpu: boolean, default=False
        If True, CuDNNLSTM is used instead of LSTM for RNN layer.
    :param return_customized_layers: boolean, default=False
        If True, return model and customized object dictionary, otherwise return model only
    :return: keras model
    """

    if model_dim % 2 == 1:
        model_dim += 1

    if item_embedding is not None:
        inputs = models.Input(shape=(time_steps, ),
                              dtype='int32',
                              name='input0')
        x1 = inputs

        # item embedding
        if isinstance(item_embedding, np.ndarray):
            assert voca_dim == item_embedding.shape[0]
            x1 = layers.Embedding(voca_dim,
                                  item_embedding.shape[1],
                                  input_length=time_steps,
                                  weights=[
                                      item_embedding,
                                  ],
                                  trainable=trainable_embedding,
                                  mask_zero=False,
                                  name='embedding_layer0')(x1)
        elif utils.is_integer(item_embedding):
            x1 = layers.Embedding(voca_dim,
                                  item_embedding,
                                  input_length=time_steps,
                                  trainable=trainable_embedding,
                                  mask_zero=False,
                                  name='embedding_layer0')(x1)
        else:
            raise ValueError(
                "item_embedding must be either integer or numpy matrix")
    else:
        inputs = models.Input(shape=(time_steps, voca_dim),
                              dtype='float32',
                              name='input0')
        x1 = inputs

    inputs1 = list()
    for fi in range(num_feature_channels):
        inputs1.append(
            models.Input(shape=(num_features, feature_dim),
                         dtype='float32',
                         name='input1' + str(fi)))

    feature_map_layer = layers.TimeDistributed(layers.Dense(
        model_dim, name="feature_map_layer", activation="sigmoid"),
                                               name="td_feature_map_layer")
    x2s = list(map(lambda input_: feature_map_layer(input_), inputs1))

    if gpu:
        # rnn encoding
        for i in range(rnn_depth):
            x1 = layers.Bidirectional(layers.CuDNNLSTM(int(model_dim / 2),
                                                       return_sequences=True),
                                      name='bi_lstm_layer' + str(i))(x1)
            x1 = layers.BatchNormalization(name='rnn_batch_norm_layer' +
                                           str(i))(x1)
            x1 = layers.Dropout(rnn_drop_out,
                                name="rnn_dropout_layer" + str(i))(x1)
    else:
        # rnn encoding
        for i in range(rnn_depth):
            x1 = layers.Bidirectional(layers.LSTM(
                int(model_dim / 2),
                return_sequences=True,
                dropout=rnn_drop_out,
                recurrent_dropout=rnn_state_drop_out),
                                      name='bi_lstm_layer' + str(i))(x1)
            x1 = layers.BatchNormalization(name='rnn_batch_norm_layer' +
                                           str(i))(x1)

    coatten_layer = clayers.CoAttentionWeight(name="coattention_weights_layer")
    featnorm_layer1 = clayers.FeatureNormalization(
        name="normalized_coattention_weights_layer1", axis=1)
    featnorm_layer2 = clayers.FeatureNormalization(
        name="normalized_coattention_weights_layer2", axis=2)
    focus_layer1 = layers.Dot((1, 1), name="focus_layer1")
    focus_layer2 = layers.Dot((2, 1), name="focus_layer2")
    pair_layer1 = layers.Concatenate(axis=-1, name="pair_layer1")
    pair_layer2 = layers.Concatenate(axis=-1, name="pair_layer2")

    compare_layer1 = layers.TimeDistributed(layers.Dense(model_dim,
                                                         activation="relu"),
                                            name="compare_layer1")
    compare_layer2 = layers.TimeDistributed(layers.Dense(model_dim,
                                                         activation="relu"),
                                            name="compare_layer2")
    flatten_layer = layers.Flatten(name="flatten_layer")

    xs = list()
    for x2_ in x2s:
        xs += _coatten_compare_aggregate(coatten_layer, featnorm_layer1,
                                         featnorm_layer2, focus_layer1,
                                         focus_layer2, pair_layer1,
                                         pair_layer2, compare_layer1,
                                         compare_layer2, flatten_layer, x1,
                                         x2_)

    x = layers.Concatenate(axis=1, name="concat_feature_layer")(xs)

    # MLP Layers
    for i in range(mlp_depth - 1):
        x = layers.Dense(mlp_dim,
                         activation='selu',
                         kernel_initializer='lecun_normal',
                         name='selu_layer' + str(i))(x)
        x = layers.AlphaDropout(drop_out, name='alpha_layer' + str(i))(x)

    outputs = layers.Dense(output_dim,
                           activation="softmax",
                           name="softmax_layer0")(x)

    model = models.Model([inputs] + inputs1, outputs)

    if return_customized_layers:
        return model, {
            'CoAttentionWeight': clayers.CoAttentionWeight,
            "FeatureNormalization": clayers.FeatureNormalization
        }

    return model
Ejemplo n.º 21
0
    def init_model(self, train, std=0.01):

        #current_item = kl.Input( ( 1, ), name="current_item" )

        item = kl.Input((1, ),
                        dtype=self.intX)  #, batch_shape=(self.,self.steps) )
        user = kl.Input((1, ),
                        dtype=self.intX)  #, batch_shape=(self.batch,1) )

        if self.include_artist:
            artist = kl.Input((1, ),
                              dtype=self.intX)  #, batch_shape=(self.batch,1) )

        emb_user_mf = Embedding(output_dim=self.factors,
                                input_dim=self.num_users,
                                embeddings_regularizer=l2(self.emb_reg))
        emb_user = Embedding(output_dim=self.factors,
                             input_dim=self.num_users,
                             embeddings_regularizer=l2(self.emb_reg))
        emb_item_mf = Embedding(output_dim=self.factors,
                                input_dim=self.num_items,
                                embeddings_regularizer=l2(self.emb_reg))
        emb_item = Embedding(output_dim=self.factors,
                             input_dim=self.num_items,
                             embeddings_regularizer=l2(self.emb_reg))

        if self.include_artist:
            emb_user_artist_mf = Embedding(output_dim=self.factors,
                                           input_dim=self.num_artists,
                                           embeddings_regularizer=l2(
                                               self.emb_reg))
            emb_artist_mf = Embedding(output_dim=self.factors,
                                      input_dim=self.num_artists,
                                      embeddings_regularizer=l2(self.emb_reg))
            emb_artist = Embedding(output_dim=self.factors,
                                   input_dim=self.num_artists,
                                   embeddings_regularizer=l2(self.emb_reg))

        #MF PART

        uemb = kl.Flatten()(emb_user_mf(user))
        iemb = kl.Flatten()(emb_item_mf(item))

        mf_dot = kl.Dot(1)([uemb, iemb])
        mf_mul = kl.Multiply()([uemb, iemb])

        mf_vector = kl.Concatenate()([mf_mul, mf_dot])

        #mf_vector = mf_mul

        if self.include_artist:
            uemb = kl.Flatten()(emb_user_artist_mf(user))
            aemb = kl.Flatten()(emb_artist_mf(item))
            mf_dot = kl.Dot(1)([uemb, aemb])
            mf_mul = kl.Multiply()([uemb, aemb])

            mf_vector = kl.Concatenate()([mf_vector, mf_mul, mf_dot])

        #MLP PART

        uemb = kl.Flatten()(emb_user(user))
        iemb = kl.Flatten()(emb_item(item))

        mlp_vector = kl.Concatenate()([uemb, iemb])
        if self.include_artist:
            emba = kl.Flatten()(emb_artist(artist))
            mlp_vector = kl.Concatenate()([mlp_vector, emba])

        for i in range(len(self.layers)):
            layer = kl.Dense(self.layers[i],
                             activation='relu',
                             name="layer%d" % i,
                             kernel_regularizer=l2(self.layer_reg))
            mlp_vector = layer(mlp_vector)

        #PRED PART

        comb = kl.Concatenate()([mf_vector, mlp_vector])  #, uemb ] )

        fff = kl.Dense(1,
                       activation='linear',
                       kernel_initializer='lecun_uniform',
                       kernel_regularizer=l2(self.layer_reg))
        res = fff(comb)

        inputs = [user, item]  #+ [artist
        if self.include_artist:
            inputs += [artist]
        outputs = [res]

        predict_model = km.Model(inputs, outputs)

        current_user = kl.Input(
            (1, ),
            name="current_user")  # , batch_shape=(self.batch, self.steps) )
        current_item_pos = kl.Input(
            (1, ), dtype=self.intX,
            name="current_item_pos")  #, batch_shape=(self.batch,1) )
        current_item_neg = kl.Input(
            (1, ), dtype=self.intX,
            name="current_item_neg")  #, batch_shape=(self.batch,1) )

        pred_from_pos = [current_user, current_item_pos]
        pred_from_neg = [current_user, current_item_neg]

        if self.include_artist:
            current_artist_pos = kl.Input(
                (1, ), name="current_artist_pos"
            )  # , batch_shape=(self.batch, self.steps) )
            current_artist_neg = kl.Input(
                (1, ), name="current_artist_neg"
            )  # , batch_shape=(self.batch, self.steps) )
            pred_from_neg += [current_artist_neg]
            pred_from_pos += [current_artist_pos]

        current_res_pos = predict_model(pred_from_pos)  #, current_user ] )
        current_res_neg = predict_model(pred_from_neg)  #, current_user ] )

        inputs = [current_user, current_item_pos,
                  current_item_neg]  #+ [current_user]
        if self.include_artist:
            inputs += [current_artist_pos, current_artist_neg]
        outputs = [current_res_pos, current_res_neg]

        model = km.Model(inputs, outputs)
        model.add_loss(K.mean(self.bpr(outputs)))

        if self.optimizer == 'adam':
            opt = keras.optimizers.Adam(lr=self.learning_rate)
        elif self.optimizer == 'adagrad':
            opt = keras.optimizers.Adagrad(lr=self.learning_rate)
        elif self.optimizer == 'adadelta':
            opt = keras.optimizers.Adadelta(lr=self.learning_rate * 10)
        elif self.optimizer == 'sgd':
            opt = keras.optimizers.SGD(lr=self.learning_rate)

        model.compile(optimizer=opt)

        return model, predict_model
Ejemplo n.º 22
0
def build_model(char_size=27,
                dim=64,
                iterations=4,
                training=True,
                ilp=False,
                pca=False):
    """Build the model."""
    # Inputs
    # Context: (rules, preds, chars,)
    context = L.Input(shape=(
        None,
        None,
        None,
    ),
                      name='context',
                      dtype='int32')
    query = L.Input(shape=(None, ), name='query', dtype='int32')

    # Flatten preds to embed entire rules
    var_flat = L.Lambda(lambda x: K.reshape(
        x, K.stack([K.shape(x)[0], -1,
                    K.prod(K.shape(x)[2:])])),
                        name='var_flat')
    flat_ctx = var_flat(context)  # (?, rules, preds*chars)

    # Onehot embeddeding of symbols
    onehot_weights = np.eye(char_size)
    onehot_weights[0, 0] = 0  # Clear zero index
    onehot = L.Embedding(char_size,
                         char_size,
                         trainable=False,
                         weights=[onehot_weights],
                         name='onehot')
    embedded_ctx = onehot(flat_ctx)  # (?, rules, preds*chars*char_size)
    embedded_q = onehot(query)  # (?, chars, char_size)

    # Embed predicates
    embed_pred = ZeroGRU(dim,
                         go_backwards=True,
                         return_sequences=True,
                         return_state=True,
                         name='embed_pred')
    embedded_predqs, embedded_predq = embed_pred(embedded_q)  # (?, chars, dim)
    embed_pred.return_sequences = False
    embed_pred.return_state = False
    # Embed every rule
    embedded_rules = L.TimeDistributed(embed_pred,
                                       name='rule_embed')(embedded_ctx)
    # (?, rules, dim)

    # Reused layers over iterations
    concatm1 = L.Concatenate(name='concatm1')
    repeat_toqlen = L.RepeatVector(K.shape(embedded_q)[1],
                                   name='repeat_toqlen')
    mult_cqi = L.Multiply(name='mult_cqi')
    dense_cqi = L.Dense(dim, name='dense_cqi')
    dense_cais = L.Dense(1, name='dense_cais')

    squeeze2 = L.Lambda(lambda x: K.squeeze(x, 2), name='sequeeze2')
    softmax1 = L.Softmax(axis=1, name='softmax1')
    dot11 = L.Dot((1, 1), name='dot11')

    repeat_toctx = L.RepeatVector(K.shape(context)[1], name='repeat_toctx')
    memory_dense = L.Dense(dim, name='memory_dense')
    kb_dense = L.Dense(dim, name='kb_dense')
    mult_info = L.Multiply(name='mult_info')
    info_dense = L.Dense(dim, name='info_dense')
    mult_att_dense = L.Multiply(name='mult_att_dense')
    read_att_dense = L.Dense(1, name='read_att_dense')

    mem_info_dense = L.Dense(dim, name='mem_info_dense')
    stack1 = L.Lambda(lambda xs: K.stack(xs, 1),
                      output_shape=(None, dim),
                      name='stack1')
    mult_self_att = L.Multiply(name='mult_self_att')
    self_att_dense = L.Dense(1, name='self_att_dense')
    misa_dense = L.Dense(dim, use_bias=False, name='misa_dense')
    mi_info_dense = L.Dense(dim, name='mi_info_dense')
    add_mip = L.Lambda(lambda xy: xy[0] + xy[1], name='add_mip')
    control_gate = L.Dense(1, activation='sigmoid', name='control_gate')
    gate2 = L.Lambda(lambda xyg: xyg[2] * xyg[0] + (1 - xyg[2]) * xyg[1],
                     name='gate')

    # Init control and memory
    zeros_like = L.Lambda(K.zeros_like, name='zeros_like')
    memory = embedded_predq  # (?, dim)
    control = zeros_like(memory)  # (?, dim)
    pmemories, pcontrols = [memory], [control]

    # Reasoning iterations
    outs = list()
    for i in range(iterations):
        # Control Unit
        qi = L.Dense(dim, name='qi' + str(i))(embedded_predq)  # (?, dim)
        cqi = dense_cqi(concatm1([control, qi]))  # (?, dim)
        cais = dense_cais(mult_cqi([repeat_toqlen(cqi),
                                    embedded_predqs]))  # (?, qlen, 1)
        cais = squeeze2(cais)  # (?, qlen)
        cais = softmax1(cais)  # (?, qlen)
        outs.append(cais)
        new_control = dot11([cais, embedded_predqs])  # (?, dim)

        # Read Unit
        info = mult_info(
            [repeat_toctx(memory_dense(memory)),
             kb_dense(embedded_rules)])  # (?, rules, dim)
        infop = info_dense(concatm1([info, embedded_rules]))  # (?, rules, dim)
        rai = read_att_dense(mult_att_dense([repeat_toctx(new_control),
                                             infop]))  # (?, rules, 1)
        rai = squeeze2(rai)  # (?, rules)
        rai = softmax1(rai)  # (?, rules)
        outs.append(rai)
        read = dot11([rai, embedded_rules])  # (?, dim)

        # Write Unit
        mi_info = mem_info_dense(concatm1([read, memory]))  # (?, dim)
        past_ctrls = stack1(pcontrols)  # (?, i+1, dim)
        sai = self_att_dense(
            mult_self_att([L.RepeatVector(i + 1)(new_control),
                           past_ctrls]))  # (?, i+1, 1)
        sai = squeeze2(sai)  # (?, i+1)
        sai = softmax1(sai)  # (?, i+1)
        outs.append(sai)
        past_mems = stack1(pmemories)  # (?, i+1, dim)
        misa = L.dot([sai, past_mems], (1, 1),
                     name='misa_' + str(i))  # (?, dim)
        mip = add_mip([misa_dense(misa), mi_info_dense(mi_info)])  # (?, dim)
        cip = control_gate(new_control)  # (?, 1)
        outs.append(cip)
        new_memory = gate2([mip, memory, cip])  # (?, dim)

        # Update state
        pcontrols.append(new_control)
        pmemories.append(new_memory)
        memory, control = new_memory, new_control

    # Output Unit
    out = L.Dense(1, activation='sigmoid',
                  name='out')(concatm1([embedded_predq, memory]))
    if training:
        model = Model([context, query], out)
        model.compile(loss='binary_crossentropy',
                      optimizer='adam',
                      metrics=['acc'])
    else:
        model = Model([context, query], outs + [out])
    return model
Ejemplo n.º 23
0
print(x_test_sr.shape, y_test.shape, adjacency_test_near.shape,
      adjacency_test_middle.shape, adjacency_test_distant.shape)

plt.figure(figsize=(20, 10))
plt.plot(y_test, 'r')

################################################ Model: Multi-STGCnet-SR
# input
features = Input(shape=(n, pre_sr))
adjacency_near = Input(shape=(n, n))
adjacency_middle = Input(shape=(n, n))
adjacency_distant = Input(shape=(n, n))

# near
# GCN layer
output_near_start = layers.Dot(axes=1)([adjacency_near, features])
output = layers.Dense(n, activation='relu')(output_near_start)
# GCN layer
output = layers.Dot(axes=1)([adjacency_near, output])
output = layers.Dense(n, activation='relu')(output)
output = layers.Permute((2, 1))(output)
output = layers.LSTM(32, return_sequences=True)(output)
output = layers.LSTM(12, kernel_initializer='random_normal')(output)
output_near_end = layers.Dense(1,
                               activation='relu',
                               kernel_initializer='random_normal')(output)

# middle
# GCN layer
output_middle_start = layers.Dot(axes=1)([adjacency_middle, features])
output = layers.Dense(n, activation='relu')(output_middle_start)
# In[11]:

print(GPUs)

# In[39]:

from keras import layers

with tf.device(GPUs[0]):
    input_a = Input(shape=(INPUT_SHAPE, ))
    processed_a = base_network(input_a)
    # with tf.device(GPUs[1]):
    input_b = Input(shape=(INPUT_SHAPE, ))
    processed_b = base_network(input_b)
    cos_distance = layers.Dot(axes=-1,
                              normalize=True)([processed_a, processed_b])
    siamese_net = Model([input_a, input_b], cos_distance)

# In[40]:

siamese_net.summary()

# ### 7. Compile the model.

# In[41]:


def contrastive_loss(y_true, y_pred):
    '''Contrastive loss from Hadsell-et-al.'06
    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
    '''
y_train = labels[:4000]
y_test = labels[4000:]
adjacency_test_near = adjacencys[4000:]

print(x_train_near.shape, y_train.shape, adjacency_train_near.shape)
print(x_test_near.shape, y_test.shape, adjacency_test_near.shape)

################################################ Model: Near Block - Multi-STGCnet
from keras import Input, models, layers

features = Input(shape=(n, pre_sr))
adjacency = Input(shape=(n, n))  # adjacency matrix

#################### spatial component
# GCN layer
output = layers.Dot(axes=1)([adjacency, features])
output = layers.Dense(n, activation='relu')(output)
# GCN layer
output = layers.Dot(axes=1)([adjacency, output])
output = layers.Dense(n, activation='relu')(output)

#################### temporal component
# LSTM
output = layers.Permute((2, 1))(output)
output = layers.LSTM(32, return_sequences=True)(output)
output = layers.LSTM(12, kernel_initializer='random_normal')(output)
# output layer
output = layers.Dense(1, activation='relu',
                      kernel_initializer='random_normal')(output)

model = models.Model(inputs=[features, adjacency], outputs=[output])
Ejemplo n.º 26
0
def build_birnn_feature_coattention_cnn_model(voca_dim,
                                              time_steps,
                                              num_features,
                                              feature_dim,
                                              output_dim,
                                              model_dim,
                                              mlp_dim,
                                              num_filters,
                                              filter_sizes,
                                              item_embedding=None,
                                              rnn_depth=1,
                                              mlp_depth=1,
                                              drop_out=0.5,
                                              rnn_drop_out=0.,
                                              rnn_state_drop_out=0.,
                                              cnn_drop_out=0.5,
                                              pooling='max',
                                              trainable_embedding=False,
                                              gpu=False,
                                              return_customized_layers=False):
    """
    Create A Bidirectional Attention Model.

    :param voca_dim: vocabulary dimension size.
    :param time_steps: the length of input
    :param output_dim: the output dimension size
    :param model_dim: rrn dimension size
    :param mlp_dim: the dimension size of fully connected layer
    :param item_embedding: integer, numpy 2D array, or None (default=None)
        If item_embedding is a integer, connect a randomly initialized embedding matrix to the input tensor.
        If item_embedding is a matrix, this matrix will be used as the embedding matrix.
        If item_embedding is None, then connect input tensor to RNN layer directly.
    :param rnn_depth: rnn depth
    :param mlp_depth: the depth of fully connected layers
    :param num_att_channel: the number of attention channels, this can be used to mimic multi-head attention mechanism
    :param drop_out: dropout rate of fully connected layers
    :param rnn_drop_out: dropout rate of rnn layers
    :param rnn_state_drop_out: dropout rate of rnn state tensor
    :param trainable_embedding: boolean
    :param gpu: boolean, default=False
        If True, CuDNNLSTM is used instead of LSTM for RNN layer.
    :param return_customized_layers: boolean, default=False
        If True, return model and customized object dictionary, otherwise return model only
    :return: keras model
    """
    if model_dim % 2 == 1:
        model_dim += 1

    if item_embedding is not None:
        inputs = models.Input(shape=(time_steps, ),
                              dtype='int32',
                              name='input0')
        x1 = inputs

        # item embedding
        if isinstance(item_embedding, np.ndarray):
            assert voca_dim == item_embedding.shape[0]
            x1 = layers.Embedding(voca_dim,
                                  item_embedding.shape[1],
                                  input_length=time_steps,
                                  weights=[
                                      item_embedding,
                                  ],
                                  trainable=trainable_embedding,
                                  mask_zero=False,
                                  name='embedding_layer0')(x1)
        elif utils.is_integer(item_embedding):
            x1 = layers.Embedding(voca_dim,
                                  item_embedding,
                                  input_length=time_steps,
                                  trainable=trainable_embedding,
                                  mask_zero=False,
                                  name='embedding_layer0')(x1)
        else:
            raise ValueError(
                "item_embedding must be either integer or numpy matrix")
    else:
        inputs = models.Input(shape=(time_steps, voca_dim),
                              dtype='float32',
                              name='input0')
        x1 = inputs

    inputs1 = models.Input(shape=(num_features, feature_dim),
                           dtype='float32',
                           name='input1')
    x2 = layers.Dense(feature_dim, name="feature_map_layer",
                      activation="relu")(inputs1)

    if gpu:
        # rnn encoding
        for i in range(rnn_depth):
            x1 = layers.Bidirectional(layers.CuDNNLSTM(int(model_dim / 2),
                                                       return_sequences=True),
                                      name='bi_lstm_layer' + str(i))(x1)
            x1 = layers.BatchNormalization(name='rnn_batch_norm_layer' +
                                           str(i))(x1)
            x1 = layers.Dropout(rnn_drop_out,
                                name="rnn_dropout_layer" + str(i))(x1)
    else:
        # rnn encoding
        for i in range(rnn_depth):
            x1 = layers.Bidirectional(layers.LSTM(
                int(model_dim / 2),
                return_sequences=True,
                dropout=rnn_drop_out,
                recurrent_dropout=rnn_state_drop_out),
                                      name='bi_lstm_layer' + str(i))(x1)
            x1 = layers.BatchNormalization(name='rnn_batch_norm_layer' +
                                           str(i))(x1)

    # attention
    attens = clayers.CoAttentionWeight(name="coattention_weights_layer")(
        [x1, x2])

    attens1 = clayers.FeatureNormalization(
        name="normalized_coattention_weights_layer1", axis=1)(attens)
    attens2 = clayers.FeatureNormalization(
        name="normalized_coattention_weights_layer2", axis=2)(attens)

    # compare
    focus1 = layers.Dot((1, 1), name="focus_layer1")([attens1, x1])
    focus2 = layers.Dot((2, 1), name="focus_layer2")([attens2, x2])

    pair1 = layers.Concatenate(axis=-1, name="pair_layer1")([x1, focus2])
    pair2 = layers.Concatenate(axis=-1, name="pair_layer2")([x2, focus1])

    x1 = layers.TimeDistributed(layers.Dense(model_dim, activation="relu"),
                                name="compare_layer1")(pair1)
    x2 = layers.TimeDistributed(layers.Dense(model_dim, activation="relu"),
                                name="compare_layer2")(pair2)

    # Multi-Channel CNN for x1
    pooled_outputs = []
    for i in range(len(filter_sizes)):
        conv = layers.Conv1D(num_filters,
                             kernel_size=filter_sizes[i],
                             padding='valid',
                             activation='relu')(x1)
        if pooling == 'max':
            conv = layers.MaxPooling1D(pool_size=time_steps - filter_sizes[i] +
                                       1,
                                       strides=1,
                                       padding='valid')(conv)
        else:
            conv = layers.AveragePooling1D(pool_size=time_steps -
                                           filter_sizes[i] + 1,
                                           strides=1,
                                           padding='valid')(conv)
        pooled_outputs.append(conv)

    x1 = layers.Concatenate(name='concated_layer')(pooled_outputs)
    x1 = layers.Flatten()(x1)
    x1 = layers.Dropout(cnn_drop_out, name='conv_dropout_layer')(x1)
    x1 = layers.BatchNormalization(name="batch_norm_layer")(x1)

    # Average Pool for x2
    x2 = layers.GlobalAveragePooling1D(name="average_pool_layer")(x2)

    x = layers.Concatenate(axis=1, name="concat_deep_feature_layer")([x1, x2])

    # MLP Layers
    for i in range(mlp_depth - 1):
        x = layers.Dense(mlp_dim,
                         activation='selu',
                         kernel_initializer='lecun_normal',
                         name='selu_layer' + str(i))(x)
        x = layers.AlphaDropout(drop_out, name='alpha_layer' + str(i))(x)

    outputs = layers.Dense(output_dim,
                           activation="softmax",
                           name="softmax_layer0")(x)

    model = models.Model(inputs, outputs)

    if return_customized_layers:
        return model, {
            'CoAttentionWeight': clayers.CoAttentionWeight,
            "FeatureNormalization": clayers.FeatureNormalization
        }

    return model
Ejemplo n.º 27
0
def build_model(char_size=27, dim=64, iterations=4, training=True, ilp=False, pca=False):
  """Build the model."""
  # Inputs
  # Context: (rules, preds, chars,)
  context = L.Input(shape=(None, None, None,), name='context', dtype='int32')
  query = L.Input(shape=(None,), name='query', dtype='int32')

  if ilp:
    context, query, templates = ilp

  # Contextual embeddeding of symbols
  onehot_weights = np.eye(char_size)
  onehot_weights[0, 0] = 0 # Clear zero index
  onehot = L.Embedding(char_size, char_size,
                       trainable=False,
                       weights=[onehot_weights],
                       name='onehot')
  embedded_ctx = onehot(context) # (?, rules, preds, chars, char_size)
  embedded_q = onehot(query) # (?, chars, char_size)

  if ilp:
    # Combine the templates with the context, (?, rules+temps, preds, chars, char_size)
    embedded_ctx = L.Lambda(lambda xs: K.concatenate(xs, axis=1), name='template_concat')([templates, embedded_ctx])
    # embedded_ctx = L.concatenate([templates, embedded_ctx], axis=1)

  embed_pred = ZeroGRU(dim, go_backwards=True, name='embed_pred')
  embedded_predq = embed_pred(embedded_q) # (?, dim)
  # For every rule, for every predicate, embed the predicate
  embedded_ctx_preds = NestedTimeDist(NestedTimeDist(embed_pred, name='nest1'), name='nest2')(embedded_ctx)
  # (?, rules, preds, dim)

  embed_rule = ZeroGRU(dim, name='embed_rule')
  embedded_rules = NestedTimeDist(embed_rule, name='d_embed_rule')(embedded_ctx_preds)
  # (?, rules, dim)

  # Reused layers over iterations
  repeat_toctx = L.RepeatVector(K.shape(embedded_ctx)[1], name='repeat_to_ctx')
  diff_sq = L.Lambda(lambda xy: K.square(xy[0]-xy[1]), output_shape=(None, dim), name='diff_sq')
  mult = L.Multiply()
  concat = L.Lambda(lambda xs: K.concatenate(xs, axis=2), output_shape=(None, dim*5), name='concat')
  att_densel = L.Dense(dim//2, activation='tanh', name='att_densel')
  att_dense = L.Dense(1, name='att_dense')
  squeeze2 = L.Lambda(lambda x: K.squeeze(x, 2), name='sequeeze2')
  softmax1 = L.Softmax(axis=1)
  unifier = NestedTimeDist(ZeroGRU(dim, go_backwards=False, name='unifier'), name='dist_unifier')
  dot11 = L.Dot((1, 1))

  # Reasoning iterations
  state = embedded_predq
  repeated_q = repeat_toctx(embedded_predq)
  outs = list()
  for _ in range(iterations):
    # Compute attention between rule and query state
    ctx_state = repeat_toctx(state) # (?, rules, dim)
    s_s_c = diff_sq([ctx_state, embedded_rules])
    s_m_c = mult([embedded_rules, state]) # (?, rules, dim)
    sim_vec = concat([s_s_c, s_m_c, ctx_state, embedded_rules, repeated_q])
    sim_vec = att_densel(sim_vec) # (?, rules, dim//2)
    sim_vec = att_dense(sim_vec) # (?, rules, 1)
    sim_vec = squeeze2(sim_vec) # (?, rules)
    sim_vec = softmax1(sim_vec)
    outs.append(sim_vec)

    # Unify every rule and weighted sum based on attention
    new_states = unifier(embedded_ctx_preds, initial_state=[state])
    # (?, rules, dim)
    state = dot11([sim_vec, new_states])

  # Predication
  out = L.Dense(1, activation='sigmoid', name='out')(state)
  if ilp:
    return outs, out
  elif pca:
    model = Model([context, query], [embedded_rules])
  elif training:
    model = Model([context, query], [out])
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['acc'])
  else:
    model = Model([context, query], outs + [out])
  return model
Ejemplo n.º 28
0
def build_inter_coattention_cnn_model(num_feature_channels1,
                                      num_feature_channels2,
                                      num_features1,
                                      num_features2,
                                      feature_dim1,
                                      output_dim,
                                      num_filters,
                                      filter_sizes,
                                      atten_dim,
                                      model_dim,
                                      mlp_dim,
                                      mlp_depth=1,
                                      drop_out=0.5,
                                      pooling='max',
                                      padding='valid',
                                      return_customized_layers=False):
    """
    Create A Multi-Layer Perceptron Model with Coattention Mechanism.
    
    inputs: 
        embeddings: [batch, num_embed_feature, embed_dims] * 3 ## pronoun, A, B
        positional_features: [batch, num_pos_feature] * 2 ## pronoun-A, pronoun-B
        
    outputs: 
        [batch, num_classes] # in our case there should be 3 output classes: A, B, None
        
    :param output_dim: the output dimension size
    :param model_dim: rrn dimension size
    :param mlp_dim: the dimension size of fully connected layer
    :param mlp_depth: the depth of fully connected layers
    :param drop_out: dropout rate of fully connected layers
    :param return_customized_layers: boolean, default=False
        If True, return model and customized object dictionary, otherwise return model only
    :return: keras model
    """
    def _mlp_channel1(feature_dropout_layer, x):
        #x = feature_dropout_layer(x)
        return x

    def _mlp_channel2(feature_map_layer, x):
        x = feature_map_layer(x)
        return x

    # inputs
    inputs1 = list()
    for fi in range(num_feature_channels1):
        inputs1.append(
            models.Input(shape=(num_features1, feature_dim1),
                         dtype='float32',
                         name='input1_' + str(fi)))

    inputs2 = list()
    for fi in range(num_feature_channels2):
        inputs2.append(
            models.Input(shape=(num_features2, ),
                         dtype='float32',
                         name='input2_' + str(fi)))

    # define feature map layers
    # MLP Layers
    feature_dropout_layer1 = layers.TimeDistributed(
        layers.Dropout(rate=drop_out, name="input_dropout_layer"))
    feature_map_layer2 = layers.Dense(feature_dim1,
                                      name="feature_map_layer2",
                                      activation="relu")

    x1 = [_mlp_channel1(feature_dropout_layer1, input_) for input_ in inputs1]
    x2 = [_mlp_channel2(feature_map_layer2, input_) for input_ in inputs2]

    # From mention-pair embeddings
    reshape_layer = layers.Reshape((1, feature_dim1), name="reshape_layer")
    x2 = [reshape_layer(x2_) for x2_ in x2]
    pair1 = layers.Concatenate(
        axis=1, name="concate_pair1_layer")([x1[0], x1[1], x2[0]])
    pair2 = layers.Concatenate(
        axis=1, name="concate_pair2_layer")([x1[0], x1[2], x2[1]])

    coatten_layer = RemappedCoAttentionWeight(atten_dim,
                                              name="coattention_weights_layer")
    featnorm_layer1 = FeatureNormalization(
        name="normalized_coattention_weights_layer1", axis=1)
    featnorm_layer2 = FeatureNormalization(
        name="normalized_coattention_weights_layer2", axis=2)
    focus_layer1 = layers.Dot((1, 1), name="focus_layer1")
    focus_layer2 = layers.Dot((2, 1), name="focus_layer2")
    pair_layer1 = layers.Concatenate(axis=-1, name="pair_layer1")
    pair_layer2 = layers.Concatenate(axis=-1, name="pair_layer2")

    # attention
    attens = coatten_layer([pair1, pair2])
    attens1 = featnorm_layer1(attens)
    attens2 = featnorm_layer2(attens)
    # compare
    focus1 = focus_layer1([attens1, pair1])
    focus2 = focus_layer2([attens2, pair2])
    pair1 = pair_layer1([pair1, focus2])
    pair2 = pair_layer2([pair2, focus1])

    x = layers.Concatenate(axis=1, name="concate_layer")([pair1, pair2])
    x = layers.TimeDistributed(
        layers.Dropout(rate=drop_out, name="pair_dropout_layer"))(x)
    x = layers.TimeDistributed(
        layers.Dense(mlp_dim, name="pair_feature_map_layer",
                     activation="relu"))(x)
    x = layers.Flatten(name="pair_feature_flatten_layer1")(x)

    #     pooled_outputs = []
    #     for i in range(len(filter_sizes)):
    #         conv = layers.Conv1D(num_filters[i], kernel_size=filter_sizes[i], padding=padding, activation='relu')(x)
    #         if pooling == 'max':
    #             conv = layers.GlobalMaxPooling1D(name='global_pooling_layer' + str(i))(conv)
    #         else:
    #             conv = layers.GlobalAveragePooling1D(name='global_pooling_layer' + str(i))(conv)
    #         pooled_outputs.append(conv)
    #     if len(pooled_outputs) > 1:
    #         x = layers.Concatenate(name='concated_layer')(pooled_outputs)
    #     else:
    #         x = conv

    # MLP Layers
    x = layers.BatchNormalization(name='batch_norm_layer')(x)
    x = layers.Dropout(rate=drop_out, name="dropout_layer")(x)

    for i in range(mlp_depth - 1):
        x = layers.Dense(mlp_dim,
                         activation='selu',
                         kernel_initializer='lecun_normal',
                         name='selu_layer' + str(i))(x)
        x = layers.AlphaDropout(drop_out, name='alpha_layer' + str(i))(x)

    outputs = layers.Dense(output_dim,
                           activation="softmax",
                           name="softmax_layer0")(x)

    model = models.Model(inputs1 + inputs2, outputs)

    if return_customized_layers:
        return model, {
            'RemappedCoAttentionWeight': RemappedCoAttentionWeight,
            "FeatureNormalization": FeatureNormalization
        }

    return model
def build_model(char_size=27,
                dim=64,
                iterations=4,
                training=True,
                ilp=False,
                pca=False):
    """Build the model."""
    # Inputs
    # Context: (rules, preds, chars,)
    # context = L.Input(shape=(None, None, None,), name='context', dtype='int32')
    # query = L.Input(shape=(None,), name='query', dtype='int32')

    if ilp:
        context, query, templates = ilp

    # Contextual embeddeding of symbols
    # texts = []  # list of text samples
    # id_list = []
    # question_list = []
    # label_list = []
    # labels_index = {}  # dictionary mapping label name to numeric id
    # labels = []  # list of label ids
    # TEXT_DATA_DIR = os.path.abspath('.') + "/data/pararule"
    # # TEXT_DATA_DIR = "D:\\AllenAI\\20_newsgroup"
    # Str = '.jsonl'
    # CONTEXT_TEXTS = []
    # test_str = 'test'
    # meta_str = 'meta'

    # for name in sorted(os.listdir(TEXT_DATA_DIR)):
    #   path = os.path.join(TEXT_DATA_DIR, name)
    #   if os.path.isdir(path):
    #     label_id = len(labels_index)
    #     labels_index[name] = label_id
    #     for fname in sorted(os.listdir(path)):
    #       fpath = os.path.join(path, fname)
    #       if Str in fpath:
    #         if test_str not in fpath:
    #           if meta_str not in fpath:
    #             with open(fpath) as f:
    #               for l in json_lines.reader(f):
    #                 if l["id"] not in id_list:
    #                   id_list.append(l["id"])
    #                   questions = l["questions"]
    #                   context = l["context"].replace("\n", " ")
    #                   context = re.sub(r'\s+', ' ', context)
    #                   CONTEXT_TEXTS.append(context)
    #                   for i in range(len(questions)):
    #                     text = questions[i]["text"]
    #                     label = questions[i]["label"]
    #                     if label == True:
    #                       t = 1
    #                     else:
    #                       t = 0
    #                     q = re.sub(r'\s+', ' ', text)
    #                     texts.append(context)
    #                     question_list.append(q)
    #                     label_list.append(int(t))
    #             f.close()
    #       # labels.append(label_id)

    print('Found %s texts.' % len(CONTEXT_TEXTS))

    # MAX_NB_WORDS = 20000
    # MAX_SEQUENCE_LENGTH = 1000
    # tokenizer = Tokenizer(nb_words=MAX_NB_WORDS)
    # tokenizer.fit_on_texts(texts)
    # #sequences = tokenizer.texts_to_sequences(texts)

    word_index = WORD_INDEX
    print('Found %s unique tokens.' % len(word_index))

    #data = pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH)

    # labels = to_categorical(np.asarray(labels))
    #print('Shape of data tensor:', data.shape)
    # print('Shape of label tensor:', labels.shape)

    # split the data into a training set and a validation set
    # indices = np.arange(data.shape[0])
    # np.random.shuffle(indices)
    # data = data[indices]
    # labels = labels[indices]

    embeddings_index = {}
    GLOVE_DIR = os.path.abspath('.') + "/data/glove"
    f = open(os.path.join(GLOVE_DIR, 'glove.6B.100d.txt'),
             'r',
             encoding='utf-8')
    for line in f:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = coefs
    f.close()

    print('Found %s word vectors.' % len(embeddings_index))

    EMBEDDING_DIM = 100

    embedding_matrix = np.zeros((len(word_index) + 1, EMBEDDING_DIM))
    for word, i in word_index.items():
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            # words not found in embedding index will be all-zeros.
            embedding_matrix[i] = embedding_vector

    embedding_layer = L.Embedding(len(word_index) + 1,
                                  EMBEDDING_DIM,
                                  weights=[embedding_matrix],
                                  trainable=False)

    context = L.Input(shape=(
        None,
        None,
        None,
    ),
                      name='context',
                      dtype='int32')
    query = L.Input(shape=(None, ), name='query', dtype='int32')

    embedded_ctx = embedding_layer(
        context)  # (?, rules, preds, chars, char_size)
    embedded_q = embedding_layer(query)  # (?, chars, char_size)
    #onehot_weights = np.eye(char_size)
    #onehot_weights[0, 0] = 0 # Clear zero index
    # onehot = L.Embedding(char_size, char_size,
    #                      trainable=False,
    #                      weights=[onehot_weights],
    #                      name='onehot')
    # embedded_ctx = onehot(context) # (?, rules, preds, chars, char_size)
    # embedded_q = onehot(query) # (?, chars, char_size)

    if ilp:
        # Combine the templates with the context, (?, rules+temps, preds, chars, char_size)
        embedded_ctx = L.Lambda(lambda xs: K.concatenate(xs, axis=1),
                                name='template_concat')(
                                    [templates, embedded_ctx])
        # embedded_ctx = L.concatenate([templates, embedded_ctx], axis=1)

    embed_pred = ZeroGRU(dim, go_backwards=True, name='embed_pred')
    embedded_predq = embed_pred(embedded_q)  # (?, dim)
    # For every rule, for every predicate, embed the predicate
    embedded_ctx_preds = L.TimeDistributed(L.TimeDistributed(embed_pred,
                                                             name='nest1'),
                                           name='nest2')(embedded_ctx)
    # (?, rules, preds, dim)

    # embed_rule = ZeroGRU(dim, go_backwards=True, name='embed_rule')
    # embedded_rules = NestedTimeDist(embed_rule, name='d_embed_rule')(embedded_ctx_preds)
    get_heads = L.Lambda(lambda x: x[:, :, 0, :], name='rule_heads')
    embedded_rules = get_heads(embedded_ctx_preds)
    # (?, rules, dim)

    # Reused layers over iterations
    repeat_toctx = L.RepeatVector(K.shape(embedded_ctx)[1],
                                  name='repeat_to_ctx')
    diff_sq = L.Lambda(lambda xy: K.square(xy[0] - xy[1]),
                       output_shape=(None, dim),
                       name='diff_sq')
    mult = L.Multiply()
    concat = L.Lambda(lambda xs: K.concatenate(xs, axis=2),
                      output_shape=(None, dim * 5),
                      name='concat')
    att_densel = L.Dense(dim // 2, activation='tanh', name='att_densel')
    att_dense = L.Dense(1, activation='sigmoid', name='att_dense')
    squeeze2 = L.Lambda(lambda x: K.squeeze(x, 2), name='sequeeze2')
    rule_mask = L.Lambda(lambda x: K.cast(
        K.any(K.not_equal(x, 0), axis=-1, keepdims=True), 'float32'),
                         name='rule_mask')(embedded_rules)

    unifier = NestedTimeDist(ZeroGRU(dim, name='unifier'), name='dist_unifier')
    dot11 = L.Dot((1, 1))
    # gating = L.Dense(1, activation='sigmoid', name='gating')
    # gate2 = L.Lambda(lambda xyg: xyg[2]*xyg[0] + (1-xyg[2])*xyg[1], name='gate')

    # Reasoning iterations
    state = embedded_predq
    repeated_q = repeat_toctx(embedded_predq)
    outs = list()
    for _ in range(iterations):
        # Compute attention between rule and query state
        ctx_state = repeat_toctx(state)  # (?, rules, dim)
        s_s_c = diff_sq([ctx_state, embedded_rules])
        s_m_c = mult([embedded_rules, state])  # (?, rules, dim)
        sim_vec = concat([s_s_c, s_m_c, ctx_state, embedded_rules, repeated_q])
        sim_vec = att_densel(sim_vec)  # (?, rules, dim//2)
        sim_vec = att_dense(sim_vec)  # (?, rules, 1)
        sim_vec = mult([sim_vec, rule_mask])
        sim_vec = squeeze2(sim_vec)  # (?, rules)
        # sim_vec = L.Softmax(axis=1)(sim_vec)
        outs.append(sim_vec)

        # Unify every rule and weighted sum based on attention
        new_states = unifier(embedded_ctx_preds, initial_state=[state])
        # (?, rules, dim)
        state = dot11([sim_vec, new_states])

        # Apply gating
        # gate = gating(state)
        # outs.append(gate)
        # state = gate2([state, new_state, gate])

    # Predication
    out = L.Dense(1, activation='sigmoid', name='out')(state)
    if ilp:
        return outs, out
    elif pca:
        model = Model([context, query], [embedded_rules])
    elif training:
        model = Model([context, query], [out])
        model.compile(loss='binary_crossentropy',
                      optimizer='adam',
                      metrics=['acc'])
    else:
        model = Model([context, query], outs + [out])
    return model
Ejemplo n.º 30
0
def build_birnn_attention_model(voca_dim,
                                time_steps,
                                output_dim,
                                rnn_dim,
                                mlp_dim,
                                item_embedding=None,
                                rnn_depth=1,
                                mlp_depth=1,
                                num_att_channel=1,
                                drop_out=0.5,
                                rnn_drop_out=0.,
                                rnn_state_drop_out=0.,
                                trainable_embedding=False,
                                gpu=False,
                                return_customized_layers=False):
    """
    Create A Bidirectional Attention Model.

    :param voca_dim: vocabulary dimension size.
    :param time_steps: the length of input
    :param output_dim: the output dimension size
    :param rnn_dim: rrn dimension size
    :param mlp_dim: the dimension size of fully connected layer
    :param item_embedding: integer, numpy 2D array, or None (default=None)
        If item_embedding is a integer, connect a randomly initialized embedding matrix to the input tensor.
        If item_embedding is a matrix, this matrix will be used as the embedding matrix.
        If item_embedding is None, then connect input tensor to RNN layer directly.
    :param rnn_depth: rnn depth
    :param mlp_depth: the depth of fully connected layers
    :param num_att_channel: the number of attention channels, this can be used to mimic multi-head attention mechanism
    :param drop_out: dropout rate of fully connected layers
    :param rnn_drop_out: dropout rate of rnn layers
    :param rnn_state_drop_out: dropout rate of rnn state tensor
    :param trainable_embedding: boolean
    :param gpu: boolean, default=False
        If True, CuDNNLSTM is used instead of LSTM for RNN layer.
    :param return_customized_layers: boolean, default=False
        If True, return model and customized object dictionary, otherwise return model only
    :return: keras model
    """

    if item_embedding is not None:
        inputs = models.Input(shape=(time_steps, ),
                              dtype='int32',
                              name='input0')
        x = inputs

        # item embedding
        if isinstance(item_embedding, np.ndarray):
            assert voca_dim == item_embedding.shape[0]
            x = layers.Embedding(voca_dim,
                                 item_embedding.shape[1],
                                 input_length=time_steps,
                                 weights=[
                                     item_embedding,
                                 ],
                                 trainable=trainable_embedding,
                                 mask_zero=False,
                                 name='embedding_layer0')(x)
        elif utils.is_integer(item_embedding):
            x = layers.Embedding(voca_dim,
                                 item_embedding,
                                 input_length=time_steps,
                                 trainable=trainable_embedding,
                                 mask_zero=False,
                                 name='embedding_layer0')(x)
        else:
            raise ValueError(
                "item_embedding must be either integer or numpy matrix")
    else:
        inputs = models.Input(shape=(time_steps, voca_dim),
                              dtype='float32',
                              name='input0')
        x = inputs

    if gpu:
        # rnn encoding
        for i in range(rnn_depth):
            x = layers.Bidirectional(layers.CuDNNLSTM(rnn_dim,
                                                      return_sequences=True),
                                     name='bi_lstm_layer' + str(i))(x)
            x = layers.BatchNormalization(name='rnn_batch_norm_layer' +
                                          str(i))(x)
            x = layers.Dropout(rnn_drop_out,
                               name="rnn_dropout_layer" + str(i))(x)
    else:
        # rnn encoding
        for i in range(rnn_depth):
            x = layers.Bidirectional(layers.LSTM(
                rnn_dim,
                return_sequences=True,
                dropout=rnn_drop_out,
                recurrent_dropout=rnn_state_drop_out),
                                     name='bi_lstm_layer' + str(i))(x)
            x = layers.BatchNormalization(name='rnn_batch_norm_layer' +
                                          str(i))(x)

    # attention
    attention_heads = []
    x_per = layers.Permute((2, 1), name='permuted_attention_x')(x)
    for h in range(max(1, num_att_channel)):
        attention = clayers.AttentionWeight(name="attention_weights_layer" +
                                            str(h))(x)
        xx = layers.Dot([2, 1], name='focus_head' + str(h) +
                        '_layer0')([x_per, attention])
        attention_heads.append(xx)

    if num_att_channel > 1:
        x = layers.Concatenate(name='focus_layer0')(attention_heads)
    else:
        x = attention_heads[0]

    x = layers.BatchNormalization(name='focused_batch_norm_layer')(x)

    # MLP Layers
    for i in range(mlp_depth - 1):
        x = layers.Dense(mlp_dim,
                         activation='selu',
                         kernel_initializer='lecun_normal',
                         name='selu_layer' + str(i))(x)
        x = layers.AlphaDropout(drop_out, name='alpha_layer' + str(i))(x)

    outputs = layers.Dense(output_dim,
                           activation="softmax",
                           name="softmax_layer0")(x)

    model = models.Model(inputs, outputs)

    if return_customized_layers:
        return model, {'AttentionWeight': clayers.AttentionWeight}
    return model