Example #1
0
def build_doc2vec_model(conf,dp):
    n_terms = len(dp.idx2word)

    # initialize parameters (embeddings)
    word_embed_data = np.array(dp.word_embed)
    item_embed_data = np.random.rand(dp.get_item_size(), conf.dim_word)
    print(dp.get_item_size())
    print("finish data processing")

    # define model
    word_input = Input(shape=(1,), dtype ="int32", name ="word_idx")
    item_pos_input = Input(shape=(1,), dtype ="int32", name ="item_pos_idx")
    item_neg_input = Input(shape=(1,), dtype ="int32", name ="item_neg_idx")

    word_embed = Embedding(output_dim=conf.dim_word, input_dim=n_terms, input_length=1, name="word_embed",
                           weights=[word_embed_data], trainable=False)
    item_embed = Embedding(output_dim=conf.dim_word, input_dim=dp.get_item_size(), input_length=1, name="item_embed",
                           weights=[item_embed_data], trainable=True)

    word_embed_ = word_embed(word_input)
    item_pos_embed_ = item_embed(item_pos_input)
    item_neg_embed_ = item_embed(item_neg_input)

    word_flatten = Flatten()
    word_embed_ = word_flatten(word_embed_)

    item_pos_embed_ = Flatten()(item_pos_embed_)
    item_neg_embed_ = Flatten()(item_neg_embed_)

    pos_layer_ = Dot(axes=-1, normalize=False, name="pos_layer")([word_embed_, item_pos_embed_])
    neg_layer_ = Dot(axes=-1, normalize=False, name="neg_layer")([word_embed_, item_neg_embed_])
    merge_layer_ = Concatenate(axis=-1, name="merge_layer")([pos_layer_, neg_layer_])


    model = Model(input=[word_input, item_pos_input, item_neg_input], output=[merge_layer_, pos_layer_])

    def ranking_loss(y_true, y_pred):
        pos = y_pred[:,0]
        neg = y_pred[:,1]
        loss = K.maximum(0.5 + neg - pos, 0.0)
        return K.mean(loss) + 0 * y_true

    def dummy_loss(y_true, y_pred):
        # loss = K.max(y_pred) + 0 * y_true
        loss = y_pred + 0 * y_true
        return loss

    # model = make_parallel(model, int(os.environ['MKL_NUM_THREADS']))

    model.compile(optimizer=Adam(lr=0.01), loss = {'merge_layer' : ranking_loss, "pos_layer": dummy_loss}, loss_weights=[1, 0])

    print("finish model compiling")
    print(model.summary())

    return model, item_embed, word_embed
Example #2
0
    def get_cnn_model1(self):
        get_diag = Lambda(lambda xin: K.sum(xin*T.eye(self.max_opt_count),axis=2),output_shape=(self.max_opt_count,))
        transp_out = Lambda(lambda xin: K.permute_dimensions(xin,(0,2,1)),output_shape=(self.max_opt_count,self.word_vec_size))
        apply_weights = Lambda(lambda xin: (K.expand_dims(xin[0],axis=-1)*K.expand_dims(xin[1],axis=2)).sum(axis=1), output_shape=(self.word_vec_size,self.max_opt_count))
        tile_q = Lambda(lambda xin: K.tile(xin,(1,self.max_opt_count,1,1)),output_shape=(self.max_opt_count,self.max_q_length,self.word_vec_size))
        exp_dims = Lambda(lambda xin: K.expand_dims(xin,1), output_shape=(1,self.max_q_length,self.word_vec_size))
        exp_dims2 = Lambda(lambda xin: K.expand_dims(xin,3), output_shape=(None,self.word_vec_size,1))
        exp_layer = Lambda(lambda xin: K.exp(xin), output_shape=(self.max_sent_para,self.max_opt_count))
        final_weights = Lambda(lambda xin: xin/K.cast(K.sum(xin, axis=1, keepdims=True), K.floatx()),output_shape=(self.max_sent_para,self.max_opt_count))
        mask_weights = Lambda(lambda xin: T.switch(T.eq(xin,0),np.NINF,xin), output_shape=(self.max_sent_para,self.max_opt_count))
        glob_pool = Lambda(lambda xin: K.mean(xin, axis=[1, 2]),output_shape=(100,))

        filter_sizes = [3,4,5]
        num_filters = 100
        q_input = Input(shape=(self.max_q_length, self.word_vec_size), name='question_input')
        q_exp = exp_dims(q_input)
        q_rep = tile_q(q_exp)
        option_input = Input(shape=(self.max_opt_count, self.max_option_length,self.word_vec_size), name='option_input')
        opt_q = Concatenate(axis=2)([q_rep,option_input])

        cnn_input = Input(shape=(None, self.word_vec_size), name='cnn_input')
        cnn_reshape = exp_dims2(cnn_input)

        conv_0 = Conv2D(num_filters, kernel_size=(filter_sizes[0], self.word_vec_size), padding='valid', kernel_initializer='normal', activation='linear')(cnn_reshape)
        conv_1 = Conv2D(num_filters, kernel_size=(filter_sizes[1], self.word_vec_size), padding='valid', kernel_initializer='normal', activation='linear')(cnn_reshape)
        conv_2 = Conv2D(num_filters, kernel_size=(filter_sizes[2], self.word_vec_size), padding='valid', kernel_initializer='normal', activation='linear')(cnn_reshape)

        meanpool_0 = glob_pool(conv_0)
        meanpool_1 = glob_pool(conv_1)
        meanpool_2 = glob_pool(conv_2)
        concatenated_tensor = Concatenate(axis=1)([meanpool_0, meanpool_1, meanpool_2])

        cnn_model = Model(inputs=cnn_input,outputs=concatenated_tensor)
        cnn_td_opt = TimeDistributed(cnn_model)(opt_q)
        
        doc_input = Input(shape=(self.max_sent_para, self.max_words_sent, self.word_vec_size), name='doc_input')
        cnn_doc = TimeDistributed(cnn_model)(doc_input)
        att_wts = Dot(axes=2,normalize=True)([cnn_doc,cnn_td_opt])
        att_wts = mask_weights(att_wts)
        att_wts = exp_layer(att_wts)
        att_wts = final_weights(att_wts)
        out = apply_weights([cnn_doc,att_wts])

        out = transp_out(out)
        dp = Dot(axes=2,normalize=True)([out,cnn_td_opt])
        out = get_diag(dp)
        probs = MaskedSoftmax()([out,option_input])
        main_model = Model(inputs=[q_input,doc_input,option_input],outputs=probs)
        sgd = SGD(lr=0.1, decay=0., momentum=0., nesterov=False)
        main_model.compile(loss='categorical_crossentropy',optimizer=sgd,metrics=['accuracy'])
        main_model.summary()
        return main_model
Example #3
0
def createSimilarityBranch(embedding_size,
                           mode='innerprod',
                           negative_size=0,
                           graph='A',
                           scale_negative=False):
    """ Branch of global network: computes similarity between embeddings of given two nodes in a graph
    """
    inputT = Input(shape=(
        1,
        embedding_size,
    ), name='Embedding_Target')
    inputC = Input(shape=(
        1,
        embedding_size,
    ), name='Embedding_Context')
    inputs = [inputT, inputC]

    layer_name = 'Output_Similarity'
    if mode == 'l2':  # l2 distance
        similarity = Lambda(l2_dist,
                            output_shape=l2_output_shape_sim,
                            name=layer_name)(inputs)
    elif mode == 'cossim':  # cosine similarity
        similarity = Dot(axes=-1, normalize=True, name=layer_name)(inputs)
    elif mode == 'innerprod':  # inner product
        similarity = Dot(axes=-1, name=layer_name)(inputs)
    else:  # softmax (default)
        inputNS = Input(shape=(
            negative_size,
            embedding_size,
        ),
                        name='Embedding_NS')
        inputs.append(inputNS)
        similarityTC = Dot(axes=-1)([inputT, inputC])
        similarityCNS = Dot(axes=-1)([inputC, inputNS])
        similarity = Concatenate(axis=-1)([similarityTC, similarityCNS])
        similarity = Activation('softmax', name=layer_name)(similarity)

    # normalize negative samples loss
    if scale_negative:

        def normalizeNS(x):
            #            from keras import backend as K
            #            x = K.eval(x)
            #            for i in range(1,len(x)):
            #                x[i] = x[i]/negative_size
            #            return K.variable(x)
            return x / negative_size

        similarity = Activation(normalizeNS)(similarity)

    return Model(inputs, similarity, name='Branch_Similarity' + graph)
Example #4
0
def make_word2vec_model(embedding_dim, num_words):
    '''
    embedding_dim: (int) embedding dimension
    num_words: (int) size of the vocabulary
    '''

    word_input = Input(shape=(1, ), dtype='int32')
    context_input = Input(shape=(1, ), dtype='int32')
    print '*' * 10
    print 'input layer: ', word_input

    word_embedding = Embedding(num_words, embedding_dim)
    we = word_embedding(word_input)
    print 'word embedding layer: ', we

    context_embedding = Embedding(num_words, embedding_dim)
    ce = Reshape((embedding_dim, 1))(context_embedding(context_input))
    print 'context embedding layer: ', ce

    dots = Dot((1, 2))([ce, we])
    print 'merge layer: ', dots

    flat = Flatten()(dots)
    print 'flattern layer: ', flat
    print '*' * 10

    acts = Activation('sigmoid')(flat)

    model = Model(inputs=[word_input, context_input], outputs=acts)
    model.compile('adam', loss='binary_crossentropy')
    return model
def build_model(preprocessor):
	inputs = Input(shape=(X_max_len,))

	# encoder network
	x = Embedding(X_vocab_size, 200, input_length=X_max_len, mask_zero=False, weights=preprocessor.init_vectors)(inputs)
	hidden_state_encoder = Bidirectional(LSTM(hidden_size, return_sequences=True), merge_mode='concat')(x)
	# hidden_state_encoder = Bidirectional(LSTM(hidden_size), merge_mode='concat')(hidden_state_encoder)
	e = Flatten()(hidden_state_encoder)
	e = Dropout(0.3)(e)
	g = Dense(max_seq_len*X_max_len)(e)
	g = Dropout(0.3)(g)
	g = Dense(max_seq_len*X_max_len, kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l2(0.000001))(g)
	g = Reshape((X_max_len, max_seq_len))(g)
	distribituion_over_input = Activation(softMaxAxis1, name='attention_wts')(g)
	encoder_output_after_attention = Dot(axes=(1,1))([distribituion_over_input, hidden_state_encoder])

	# decoder network
	for _ in range(num_layers):
	    x = LSTM(hidden_size, return_sequences=True)(encoder_output_after_attention)
	hidden_state_output = TimeDistributed(Dense(100))(x)
	hidden_state_output = TimeDistributed(Dropout(0.3))(hidden_state_output)
	hidden_state_output = TimeDistributed(Dense(y_vocab_size+1))(hidden_state_output)
	y = Activation('softmax')(hidden_state_output)
	model = Model(input=[inputs], output=[y])
	rmsprop = RMSprop(lr=0.0001, rho=0.9, epsilon=1e-08, decay=0.0)
	model.compile(loss='categorical_crossentropy', optimizer=rmsprop, metrics=['accuracy'])
	return model
Example #6
0
def add_attention_layer(query_embedding,
                        doc_embedding,
                        layer_name,
                        query_mask=None,
                        doc_mask=None,
                        mask=False):
    dot_prod = Dot(axes=-1, name=layer_name)([doc_embedding, query_embedding])
    norm_sim = Activation('softmax')(dot_prod)
    if mask:
        max_sim = Lambda(lambda x: max_pooling_with_mask(x[0], x[1]),
                         output_shape=lambda inp_shp: (
                             inp_shp[0][0],
                             inp_shp[0][2],
                         ))([norm_sim, query_mask])
        mean_sim = Lambda(lambda x: mean_pooling_with_mask(x[0], x[1], x[2]),
                          output_shape=lambda inp_shp: (
                              inp_shp[0][0],
                              inp_shp[0][2],
                          ))([norm_sim, doc_mask, query_mask])
    else:
        max_sim = Lambda(max_pooling,
                         output_shape=lambda inp_shp: (
                             inp_shp[0],
                             inp_shp[2],
                         ))(norm_sim)
        mean_sim = Lambda(mean_pooling,
                          output_shape=lambda inp_shp: (
                              inp_shp[0],
                              inp_shp[2],
                          ))(norm_sim)
    return norm_sim, max_sim, mean_sim
Example #7
0
    def __init__(self, episodes=None, existing_model_file=None, bag_of_words=False):
        if existing_model_file and os.path.exists(existing_model_file):
            self.model = load_model(existing_model_file)
            return
        elif episodes is None:
            raise RuntimeError("must provide either existing model or input data")
        
        max_token_length = len(episodes[0].get_tokens())
        scene_feat_length = len(episodes[0].get_referent())    
        max_token_id = int(max([x for e in episodes for x in e.get_tokens()]))
        self.bag_of_words = bag_of_words
        
        if bag_of_words:    
            tokens_input = Input(shape=(max_token_id,), name="utterance_input", dtype='float32')
            tokens = Dropout(0.5)(tokens_input)
            prediction = Dense(scene_feat_length, name="hidden_layer", 
                               kernel_regularizer=l1(0.001))(tokens)
    
        else:
            tokens_input = Input(shape=(max_token_length,), name="utterance_input", dtype='int32')
            embeddings = Embedding(output_dim=scene_feat_length, input_dim=(max_token_id+1),
                                   name="embedding_layer")(tokens_input)
            prediction = GRU(scene_feat_length, activation=None,  name="recurrent_layer", 
                            dropout=0.2, recurrent_dropout=0.2, kernel_regularizer=l1(0.001))(embeddings) 

        scene_input = Input(shape=(scene_feat_length,), name="scene_input", dtype="float32")
        dotproduct = Dot(axes=1)([prediction, scene_input])
        output = Activation("sigmoid")(dotproduct)
        rmsprop = RMSprop(lr=0.01, rho=0.9, epsilon=1e-08, decay=0.001)
        model = Model(inputs=[tokens_input,scene_input], outputs=output)        
        model.compile(optimizer=rmsprop,loss='binary_crossentropy', metrics=['accuracy'])
        self.model = model
Example #8
0
def build(pfp_len=2048, rxnfp_len=2048, l2v=0.01):
    input_pfp = Input(shape=(pfp_len, ))
    input_rxnfp = Input(shape=(rxnfp_len, ))

    input_pfp_h1 = Dense(1024, activation='elu')(input_pfp)
    input_pfp_h2 = Dropout(0.3)(input_pfp_h1)
    input_pfp_h3 = Highway_self(activation='elu')(input_pfp_h2)
    input_pfp_h4 = Highway_self(activation='elu')(input_pfp_h3)
    input_pfp_h5 = Highway_self(activation='elu')(input_pfp_h4)
    input_pfp_h6 = Highway_self(activation='elu')(input_pfp_h5)
    input_pfp_h7 = Highway_self(activation='elu')(input_pfp_h6)

    input_rxnfp_h1 = Dense(1024, activation='elu')(input_rxnfp)
    merged_h1 = Dot(axes=1, normalize=False)([input_pfp_h7, input_rxnfp_h1])

    output = Dense(1, activation='sigmoid')(merged_h1)
    model = Model([input_pfp, input_rxnfp], output)

    model.count_params()
    model.summary()

    adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)

    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=[
                      'acc',
                      keras.metrics.Precision(name='precision'),
                      keras.metrics.Recall(name='recall')
                  ])
    return model
Example #9
0
def build_model(preprocessor):
    inputs = Input(shape=(X_max_len, ))

    # encoder network
    x = Embedding(X_vocab_size,
                  200,
                  input_length=X_max_len,
                  mask_zero=False,
                  weights=preprocessor.init_vectors)(inputs)
    hidden_state_encoder = LSTM(hidden_size, return_sequences=True)(x)
    e = Flatten()(hidden_state_encoder)
    g = Dense(max_seq_len * X_max_len)(e)
    g = Reshape((X_max_len, max_seq_len))(g)
    distribituion_over_input = Activation(softMaxAxis1,
                                          name='attention_wts')(g)
    encoder_output_after_attention = Dot(axes=(1, 1))(
        [distribituion_over_input, hidden_state_encoder])

    # decoder network
    for _ in range(num_layers):
        x = LSTM(hidden_size,
                 return_sequences=True)(encoder_output_after_attention)
    hidden_state_output = TimeDistributed(Dense(y_vocab_size + 1))(x)
    y = Activation('softmax')(hidden_state_output)
    model = Model(input=[inputs], output=[y])
    model.compile(loss='categorical_crossentropy',
                  optimizer='rmsprop',
                  metrics=['accuracy'])
    return model
Example #10
0
 def _merge_cosim_layer(self, model_1, model_2):
     '''
     Computes the cosine similarity between two tensors.
     :param model_1: The first tensor.
     :param model_2: The second tensor.
     :return: The cosine similarity value between model_1 and model_2.
     '''
     return Dot(axes=1, normalize=True)([model_1, model_2])
Example #11
0
    def train(self, walks, dataset, size, window, negative, pair_ratio, epochs, lambda0):
        vocab_size = len(self.G.nodes())
        k = len(self.G.nums_type)

        # pairwise model
        input_target = Input((1,))
        embedding_target = Embedding(vocab_size, size, name="emb_0")
        target = embedding_target(input_target)

        input_context = Input((1,))

        context = Embedding(vocab_size, size)(input_context)

        dot_product = Dot(axes=2)([target, context])
        dot_product = Reshape((1,))(dot_product)
        output = Activation('sigmoid')(dot_product)

        model_p = Model(input=[input_target, input_context], output=output)
        model_p.summary()
        model_p.compile(loss='binary_crossentropy', optimizer='rmsprop', loss_weights=[1])

        # tuplewise model
        input_tuplew = Input(shape=(k, ), name='input', dtype='int32')
        tuplew = embedding_target(input_tuplew)
        conv = Conv1D(32, 3, activation='relu',name='conv')(tuplew)
        pooling = GlobalMaxPooling1D(name='pooling')(conv)
        output_tuplew = Dense(1,activation='sigmoid')(pooling)

        model_t = Model(input=input_tuplew, output=output_tuplew)
        model_t.summary()
        model_t.compile(loss='binary_crossentropy', optimizer='rmsprop', loss_weights=[lambda0])

        for epoch in range(epochs):

            random.shuffle(walks)

            loss_p = 0.
            loss_t = 0.
            for walk in tqdm(walks,ascii=True):
                pairs, labels_p = skipgrams(walk, vocab_size, negative_samples=negative, window_size=window)
                tuples, labels_t = self._tuple_sample(walk, negative_samples=negative, pair_ratio=pair_ratio)

                if pairs:
                    x_pair = [np.array(x) for x in zip(*pairs)]
                    y_pair = np.array(labels_p, dtype=np.int32)
                    loss_p += model_p.train_on_batch(x_pair,y_pair)

                if tuples:
                    x_tuple = np.asarray(tuples)
                    y_tuple = np.array(labels_t, dtype=np.int32)
                    loss_t += model_t.train_on_batch(x_tuple,y_tuple)                 

            print("epoch:",epoch+1)
            print("loss:",loss_p/len(walks),loss_t/len(walks),(loss_p+lambda0*loss_t)/len(walks))

        self.wv = model_p.get_layer('emb_0').get_weights()[0]
        self.model_t = model_t
    def create_deepconn_dp(self):
        ''' simple dot product instead of factorization machine for final layer.
		this simplification yielded similar results in the paper and should work
		better on small data due to less overfitting. '''
        dotproduct = Dot(axes=1)([self.towerU, self.towerI])
        output = Add()([self.outNeuron, dotproduct])
        model = Model(inputs=[self.inputU, self.inputI], outputs=[output])
        model.compile(optimizer='adam', loss='mse')
        return model
    def generate_embeddedModel(self):
        userModel = self.userEmbeddingModel()
        movieModel = self.movieEmbeddingModel()
        x = Dot(axes=-1)([userModel.output, movieModel.output])

        model = Model([userModel.input, movieModel.input], x)
        print('ID : ', id(self))
        print(model.summary())
        self.deep_model = model
        return model
Example #14
0
    def get_gru_baseline(self):
        lstm_qo = GRU(100,return_sequences=False)
        get_diag = Lambda(lambda xin: K.sum(xin*T.eye(self.max_opt_count),axis=2),output_shape=(self.max_opt_count,))
        transp_out = Lambda(lambda xin: K.permute_dimensions(xin,(0,2,1)),output_shape=(self.max_opt_count,100))
        apply_weights = Lambda(lambda xin: (K.expand_dims(xin[0],axis=-1)*K.expand_dims(xin[1],axis=2)).sum(axis=1), output_shape=(100,self.max_opt_count))
        tile_q = Lambda(lambda xin: K.tile(xin,(1,self.max_opt_count,1,1)),output_shape=(self.max_opt_count,self.max_q_length,self.word_vec_size))
        exp_dims = Lambda(lambda xin: K.expand_dims(xin,1), output_shape=(1,self.max_q_length,self.word_vec_size))
        exp_layer = Lambda(lambda xin: K.exp(xin), output_shape=(self.max_sent_para,self.max_opt_count))
        mask_weights = Lambda(lambda xin: T.switch(T.eq(xin,0),np.NINF,xin), output_shape=(self.max_sent_para,self.max_opt_count))
        final_weights = Lambda(lambda xin: xin/K.cast(K.sum(xin, axis=1, keepdims=True), K.floatx()),output_shape=(self.max_sent_para,self.max_opt_count))


        q_input = Input(shape=(self.max_q_length, self.word_vec_size), name='question_input')
        q_exp = exp_dims(q_input)
        q_rep = tile_q(q_exp)
        option_input = Input(shape=(self.max_opt_count, self.max_option_length,self.word_vec_size), name='option_input')
        opt_q = Concatenate(axis=2)([q_rep,option_input])

        lstm_input = Input(shape=(None, self.word_vec_size), name='lstm_input')
        lstm_mask = Masking(mask_value=0.)(lstm_input)
        lstm_out = lstm_qo(lstm_mask)

        lstm_model = Model(inputs=lstm_input,outputs=lstm_out)
        lstm_td_opt = TimeDistributed(lstm_model)(opt_q)
        
        doc_input = Input(shape=(self.max_sent_para, self.max_words_sent, self.word_vec_size), name='doc_input')
        lstm_doc = TimeDistributed(lstm_model)(doc_input)
        att_wts = Dot(axes=2,normalize=True)([lstm_doc,lstm_td_opt])
        att_wts = mask_weights(att_wts)
        att_wts = exp_layer(att_wts)
        att_wts = final_weights(att_wts)
        out = apply_weights([lstm_doc,att_wts])

        out = transp_out(out)
        dp = Dot(axes=2,normalize=True)([out,lstm_td_opt])
        out = get_diag(dp)
        probs = MaskedSoftmax()([out,option_input])
        main_model = Model(inputs=[q_input,doc_input,option_input],outputs=probs)
        sgd = SGD(lr=0.1, decay=0., momentum=0., nesterov=False)
        main_model.compile(loss='categorical_crossentropy',optimizer=sgd,metrics=['accuracy'])
        main_model.summary()
        return main_model
Example #15
0
def conv_network(x1_shape, input_dims, embedding_matrix):
    input_a = Input(shape=x1_shape[1:])
    input_b = Input(shape=x1_shape[1:])

    def create_base_network(x1_shape, input_dims, embedding_matrix):
        inp = Input(x1_shape[1:])
        embedding_block = Embedding(input_dim=input_dims,
                                    output_dim=300,
                                    weights=[embedding_matrix],
                                    input_length=40,
                                    trainable=False)(inp)
        embedding_block = Convolution1D(
            64,
            5,
        )(embedding_block)
        embedding_block = PReLU()(embedding_block)
        embedding_block = Dropout(0.2)(embedding_block)
        embedding_block = Convolution1D(
            64,
            5,
        )(embedding_block)
        embedding_block = PReLU()(embedding_block)
        embedding_block = GlobalMaxPooling1D()(embedding_block)
        embedding_block = BatchNormalization()(embedding_block)
        embedding_block = Model(inputs=inp, outputs=embedding_block)
        return embedding_block

    base_network = create_base_network(x1_shape, input_dims, embedding_matrix)
    processed_a = base_network(input_a)  # shared
    processed_b = base_network(input_b)  # layers

    distance1 = Lambda(euclidean_distance,
                       output_shape=eucl_dist_output_shape)(
                           [processed_a, processed_b])

    distance2 = Dot(axes=1, normalize=True)([processed_a, processed_b])

    merged = Concatenate()([processed_a, processed_b, distance2])

    merged = BatchNormalization()(merged)
    merged = Dense(64)(merged)
    merged = PReLU()(merged)
    merged = Dropout(0.1)(merged)
    merged = BatchNormalization()(merged)
    merged = Dense(64)(merged)
    merged = PReLU()(merged)

    merged = BatchNormalization()(merged)
    merged = Dense(1, activation="sigmoid")(merged)
    model = Model(inputs=[input_a, input_b], outputs=merged)
    model.compile(loss="binary_crossentropy",
                  optimizer=Adam(1e-3),
                  metrics=["accuracy"])
    return model
Example #16
0
 def gater(self):
     dim_inputs_data = Input(shape=(self.train_dim[1],))
     dim_svm_yhat = Input(shape=(self.experts,))
     layer_1 = Dense(self.hidden_units,
                     activation='sigmoid')(dim_inputs_data)
     layer_2 = Dense(self.experts, name='layer_op_2',
                     activation='sigmoid', use_bias=False)(layer_1)
     layer_3 = Dot(1)([layer_2, dim_svm_yhat])
     out_layer = Dense(1, activation='tanh')(layer_3)
     model = Model(input=[dim_inputs_data, dim_svm_yhat], output=out_layer)
     adam = optimizers.Adam(lr=0.01)
     model.compile(loss='mse', optimizer=adam, metrics=['accuracy'])
     return model
Example #17
0
def createDistanceBranch(embedding_size, mode='l2'):
    """ Branch of global network: computes all pairwise distances between node embeddings of different graphs
    """
    input1 = Input(shape=(embedding_size, ), name='Input_EmbeddingA')
    input2 = Input(shape=(embedding_size, ), name='Input_EmbeddingB')

    layer_name = 'Output_DistanceAB'
    if mode == 'innerprod':  # inner product
        distance12 = Dot(axes=-1, name=layer_name)([input1, input2])
    elif mode == 'cossim':  # cosine similarity
        distance12 = Dot(axes=-1, normalize=True,
                         name=layer_name)([input1, input2])
    else:  # euclidean distance (default)
        #        def l2_graphdist(vects):
        #            x, y = vects
        #            sum_square = K.sum(K.square(x - y), axis=1, keepdims=True)
        #            return K.sigmoid(K.sqrt(K.maximum(sum_square, K.epsilon())))-.5
        #        distance12 =  Lambda(l2_graphdist, output_shape=l2_output_shape, name=layer_name)([input1,input2])
        distance12 = Lambda(l2_dist,
                            output_shape=l2_output_shape_dist,
                            name=layer_name)([input1, input2])
    return Model([input1, input2], distance12, name='Branch_Distance')
Example #18
0
def add_attention_layer_with_query_weighting(query_embedding,
                                             doc_embedding,
                                             layer_name,
                                             attention_level,
                                             query_weight,
                                             query_mask=None,
                                             doc_mask=None,
                                             mask=False):
    """
       Dot -> softmax -> pooling -> (mask) -> weighting

    """

    dot_prod = Dot(axes=-1, name=layer_name)([doc_embedding, query_embedding])
    norm_sim = Activation('softmax')(dot_prod)
    if mask:
        max_sim = Lambda(lambda x: max_pooling_with_mask(x[0], x[1]),
                         output_shape=lambda inp_shp: (
                             inp_shp[0][0],
                             inp_shp[0][2],
                         ))([norm_sim, query_mask])
        mean_sim = Lambda(lambda x: mean_pooling_with_mask(x[0], x[1], x[2]),
                          output_shape=lambda inp_shp: (
                              inp_shp[0][0],
                              inp_shp[0][2],
                          ))([norm_sim, doc_mask, query_mask])
    else:
        max_sim = Lambda(max_pooling,
                         output_shape=lambda inp_shp: (
                             inp_shp[0],
                             inp_shp[2],
                         ))(norm_sim)
        mean_sim = Lambda(mean_pooling,
                          output_shape=lambda inp_shp: (
                              inp_shp[0],
                              inp_shp[2],
                          ))(norm_sim)
    if attention_level <= 1:
        setattr(K, 'params', {'attention_level': attention_level})
        max_sim = Lambda(lambda x: elementwise_prod(x[0], x[1]),
                         output_shape=lambda inp_shp: (
                             inp_shp[0][0],
                             inp_shp[0][1],
                         ))([max_sim, query_weight])
        mean_sim = Lambda(lambda x: elementwise_prod(x[0], x[1]),
                          output_shape=lambda inp_shp:
                          (inp_shp[0][0], inp_shp[0][1]))(
                              [mean_sim, query_weight])
    return norm_sim, max_sim, mean_sim
Example #19
0
    def __init__(self, n_users, n_items, config=None):
        super().__init__(n_users, n_items, config)

        self.implicit = np.zeros((self.n_items, self.n_users, ))

        # Defaults
        default = {'n_factors': 40, 'reg_bias': 0.00005, 'reg_latent': 0.00003, 'implicit_thresh': 4.0,
                   'implicit_thresh_crosstrain': 4.75}
        
        default.update(self.config)
        self.config = default

        n_factors = self.config['n_factors']
        reg_bias = l2(self.config['reg_bias'])
        reg_latent = l2(self.config['reg_latent'])

        self.implicit_thresh = self.config.get('implicit_thresh', 4.0)
        self.implicit_thresh_crosstrain = self.config.get('implicit_thresh_crosstrain', 4.75)

        input_u = Input((1,))
        input_i = Input((1,))

        vec_u = Embedding(self.n_users, n_factors, input_length=1, embeddings_regularizer=reg_latent)(input_u)
        vec_u_r = Flatten()(vec_u)

        vec_implicit = Embedding(self.n_items, self.n_users, input_length=1, trainable=False, name='implicit')(
            input_i)

        implicit_factors = Dense(n_factors, kernel_initializer='normal', activation='linear',
                                 kernel_regularizer=reg_latent)(vec_implicit)

        implicit_factors = Flatten()(implicit_factors)

        mf = Dot(1)([implicit_factors, vec_u_r])

        bias_u = Embedding(self.n_users, 1, input_length=1, embeddings_initializer='zeros',
                           embeddings_regularizer=reg_bias)(input_u)
        bias_u_r = Flatten()(bias_u)
        bias_i = Embedding(self.n_items, 1, input_length=1, embeddings_initializer='zeros',
                           embeddings_regularizer=reg_bias)(input_i)
        bias_i_r = Flatten()(bias_i)

        added = Concatenate()([bias_u_r, bias_i_r, mf])

        mf_out = BiasLayer(bias_initializer=bias_init, name='bias', activation='sigmoid')(added)

        self.model = Model(inputs=[input_u, input_i], outputs=mf_out)

        self.compile()
Example #20
0
def declare_model(vocabulary_size, embedding_dim):
    word_input = Input(shape=(1, ))
    w_emb = Embedding(vocabulary_size, embedding_dim)(word_input)

    context_input = Input(shape=(1, ))
    c_emb = Embedding(vocabulary_size, embedding_dim)(context_input)

    dist = Dot(axes=2)([w_emb, c_emb])
    dist = Reshape((1, ), input_shape=(1, 1))(dist)

    o = Activation('sigmoid')(dist)

    model = Model(inputs=[word_input, context_input], outputs=o)

    return model
Example #21
0
def add_context2query_layer(query_embedding, biattention_matrix,
                            attention_level, max_query_len, max_doc_len):
    # Following the context-to-query implementation of BiDAF model
    # query_embedding: batch_size * max_query_len * nb_filters
    # biattention_matrix: batch_size * max_query_len * max_doc_len
    norm_biattention = Softmax(axis=-2)(biattention_matrix)
    # Activation('softmax', axis=-2)(biattention_matrix)
    reshape_norm_biatt = Reshape((
        max_doc_len,
        max_query_len,
    ))(norm_biattention)
    context_embedding = Dot(axes=[-1, -2],
                            name="context2query-%d" % attention_level)(
                                [reshape_norm_biatt, query_embedding])
    return context_embedding
Example #22
0
def lstm_network(x1_shape, input_dims, embedding_matrix):
    input_a = Input(shape=x1_shape[1:])
    input_b = Input(shape=x1_shape[1:])

    def create_base_network(x1_shape, input_dims, embedding_matrix):
        inp = Input(x1_shape[1:])
        embedding_block = Embedding(input_dim=input_dims,
                                    output_dim=300,
                                    weights=[embedding_matrix],
                                    input_length=40,
                                    trainable=False)(inp)
        #embedding_block = LSTM(32, return_sequences=True)(embedding_block)
        #embedding_block = BatchNormalization()(embedding_block)
        embedding_block = LSTM(32, return_sequences=True)(embedding_block)
        embedding_block = Lambda(lambda x: K.sum(x, axis=1))(embedding_block)
        embedding_block = Model(inputs=inp, outputs=embedding_block)
        return embedding_block

    base_network = create_base_network(x1_shape, input_dims, embedding_matrix)
    processed_a = base_network(input_a)  # shared
    processed_b = base_network(input_b)  # layers

    distance1 = Lambda(
        lambda x: K.mean(K.abs(x[1] - x[0]), axis=1, keepdims=True))([
            processed_a,
            processed_b,
        ])
    distance1 = BatchNormalization()(distance1)
    distance2 = Dot(axes=1, normalize=True)([
        processed_a,
        processed_b,
    ])

    merged = Concatenate()([processed_a, processed_b, distance2])

    merged = Dense(32)(merged)
    merged = PReLU()(merged)
    merged = BatchNormalization()(merged)
    merged = Dense(32)(merged)
    merged = PReLU()(merged)
    merged = BatchNormalization()(merged)

    merged = Dense(1, activation="sigmoid")(merged)
    model = Model(inputs=[input_a, input_b], outputs=merged)
    model.compile(loss="binary_crossentropy",
                  optimizer=Adam(1e-3),
                  metrics=["accuracy"])
    return model
def create_factorization_model(user_size, item_size, hidden_size, **kwargs):
    ''' Basic generalization of matrix factorization models.
	user_size and item_size are number of users and items, respectively
	hidden_size determines number of attributes
	optional arguments:
	regularization: amount of L2 regularization to apply, none by default
	activations: vector of activations for user and item hidden layers. default is "linear", use "relu" for non-negative matrix factorization
	more_complex: learns additional weights for each attribute instead of taking simple dot product. False by default.
	useIntercepts: whether to use user and item intercepts. common practice, but false by default here
	squash: if output is bounded, it can help to tell that to the model, but then need to normalize output to fall between 0 and 1. False by default.
	'''
    inputU = Input(shape=(user_size, ), name="user_1hot")
    inputI = Input(shape=(item_size, ), name="item_1hot")
    regularization = kwargs.get('regularization')
    regularizer = regularizers.l2(regularization) if regularization else None
    if hidden_size:
        activations = kwargs.get('activations') or ["linear", "linear"]
        print(activations)
        hiddenU = Dense(hidden_size,
                        activation=activations[0],
                        name="user_hidden",
                        kernel_regularizer=regularizer,
                        use_bias=True)(inputU)
        hiddenI = Dense(hidden_size,
                        activation=activations[1],
                        name="item_hidden",
                        kernel_regularizer=regularizer,
                        use_bias=True)(inputI)
        output = Dense(1, kernel_regularizer=to_one(regularization or .01))(
            Multiply(name="aspect_points")(
                [hiddenU, hiddenI])) if kwargs.get('more_complex') else Dot(
                    axes=1)([hiddenU, hiddenI])
        if kwargs.get('useIntercepts'):
            intercept = Dense(1, use_bias=True,
                              kernel_regularizer=regularizer)(
                                  Concatenate()([inputU, inputI]))
            output = Add(name="prediction")([output, intercept])
    else:
        output = Dense(1,
                       name="prediction",
                       use_bias=True,
                       kernel_regularizer=regularizer)(Concatenate()(
                           [inputU, inputI]))  # same as user_item model
    if kwargs.get('squash'):
        output = Dense(1)(Dense(1, activation="sigmoid")(output))
    model = Model(inputs=[inputU, inputI], outputs=[output])
    model.compile(optimizer='adam', loss='mse')
    return model
Example #24
0
def MFModel(users_num, movies_num, latent_dim = 666):
	user_input = Input(shape = [1])
	item_input = Input(shape= [1])
	user_vec = Embedding(users_num + 1, latent_dim)(user_input)
	user_vec = Flatten()(user_vec)
	item_vec = Embedding(users_num + 1, latent_dim)(item_input)
	item_vec = Flatten()(item_vec)
	user_bias = Embedding(users_num + 1, 1, embeddings_initializer = "zeros")(user_input)
	user_bias = Flatten()(user_bias)
	item_bias = Embedding(movies_num + 1, 1, embeddings_initializer = "zeros")(item_input)
	item_bias = Flatten()(item_bias)
	r_hat = Dot(axes = 1)([user_bias, item_bias])
	r_hat = Add()([r_hat, user_bias, item_bias])
	model = keras.models.Model([user_input, item_input], r_hat)

	return model
Example #25
0
def alignments2vec(x, y, V, tokenizer):
    # inputs
    w_inputs = Input(shape=(1, ), dtype='int32')
    w = Embedding(V, vec_length)(w_inputs)

    # context
    c_inputs = Input(shape=(1, ), dtype='int32')
    c = Embedding(V, vec_length)(c_inputs)
    o = Dot(axes=2)([w, c])
    o = Reshape((1, ), input_shape=(1, 1))(o)
    o = Activation('sigmoid')(o)

    SkipGram = Model(inputs=[w_inputs, c_inputs], outputs=o)
    SkipGram.summary()
    SkipGram.compile(loss='binary_crossentropy', optimizer='adam')

    history = SkipGram.fit_generator(
        generate_vec_batch(x, y, batch_size, tokenizer, SkipGram),
        steps_per_epoch=steps_per_epoch,
        epochs=300,  #len(x_train)//batch_size//steps_per_epoch,
        validation_data=generate_vec_batch(x, y, batch_size, tokenizer,
                                           SkipGram),
        validation_steps=steps_per_epoch)

    print(history.history.keys())
    # summarize history for accuracy
    # summarize history for loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

    f = open('alignment_vec.txt', 'w')
    f.write('{} {}\n'.format(V - 1, vec_length))
    vectors = SkipGram.get_weights()[0]
    for word, i in tokenizer.word_index.items():
        f.write('{} {}\n'.format(word, ' '.join(map(str,
                                                    list(vectors[i, :])))))
    f.close()

    w2v = gensim.models.KeyedVectors.load_word2vec_format(
        './alignment_vec.txt', binary=False)
    print(w2v.most_similar(positive=['a' * word_length]))
Example #26
0
    def maxpool_cosine_score_model(self, input_dim):
        """Define a model with bi-LSTM layers and without attention."""

        input_a = Input(shape=(input_dim,))
        input_b = Input(shape=(input_dim,))
        if self.type_of_weights == "shared":
            drop_layer = Dropout(self.emb_drop_val)
            drop_a = drop_layer(input_a)
            drop_b = drop_layer(input_b)
            embedding_layer = self.create_embedding_layer(self.max_sequence_length)
            emb_a = embedding_layer(drop_a)
            emb_b = embedding_layer(drop_b)
            lstm_layer = self.create_lstm_layer_max_pooling(self.max_sequence_length)
            lstm_a = lstm_layer(emb_a)
            lstm_b = lstm_layer(emb_b)
        elif self.type_of_weights == "separate":
            drop_layer_a = Dropout(self.emb_drop_val)
            drop_layer_b = Dropout(self.emb_drop_val)
            drop_a = drop_layer_a(input_a)
            drop_b = drop_layer_b(input_b)
            embedding_layer_a = self.create_embedding_layer(self.max_sequence_length)
            embedding_layer_b = self.create_embedding_layer(self.max_sequence_length)
            emb_a = embedding_layer_a(drop_a)
            emb_b = embedding_layer_b(drop_b)
            lstm_layer_a = self.create_lstm_layer_max_pooling(self.max_sequence_length)
            lstm_layer_b = self.create_lstm_layer_max_pooling(self.max_sequence_length)
            lstm_a = lstm_layer_a(emb_a)
            lstm_b = lstm_layer_b(emb_b)
        if self.pooling is None or self.pooling == "max":
            lstm_a = Lambda(self.max_pooling, output_shape=self.max_pooling_output_shape,
                                name="max_pooling_a")(lstm_a)
            lstm_b = Lambda(self.max_pooling, output_shape=self.max_pooling_output_shape,
                                name="max_pooling_b")(lstm_b)
            if self.type_of_weights == "shared":
                dropout = Dropout(self.maxpool_drop_val)
                lstm_a = dropout(lstm_a)
                lstm_b = dropout(lstm_b)
            elif self.type_of_weights == "separate":
                dropout_a = Dropout(self.maxpool_drop_val)
                dropout_b = Dropout(self.maxpool_drop_val)
                lstm_a = dropout_a(lstm_a)
                lstm_b = dropout_b(lstm_b)
        cosine = Dot(normalize=True, axes=-1)([lstm_a, lstm_b])
        model = Model([input_a, input_b], cosine, name="score_model")
        return model
Example #27
0
def get_model():
    logging.info("Building the model")
    input1 = Input(shape=(MAXLEN, 21), dtype='float32', name='input1')
    input2 = Input(shape=(MAXLEN, 21), dtype='float32', name='input2')
    feature_model = get_dense_features()
    vector1 = feature_model(input1)
    vector2 = feature_model(input2)
    net = Dot(axes=1)([vector1, vector2])
    net = Activation('sigmoid')(net)

    model = Model(inputs=[input1, input2], outputs=net)
    model.summary()
    logging.info('Compiling the model')
    optimizer = RMSprop()

    model.compile(optimizer=optimizer, loss='binary_crossentropy')
    logging.info('Compilation finished')
    return model
Example #28
0
def add_query2context_layer(doc_embedding, biattention_matrix, attention_level,
                            max_query_len, max_doc_len, nb_filters):
    # query_embedding: batch_size * max_doc_len * nb_filters
    # biattention_matrix: batch_size * max_query_len * max_doc_len
    max_biattention = Lambda(max_pooling,
                             output_shape=lambda inp_shp: (
                                 inp_shp[0],
                                 inp_shp[2],
                             ))(biattention_matrix)
    norm_biatt = Activation('softmax')(max_biattention)
    reshape_doc_embedding = Reshape((
        nb_filters,
        max_doc_len,
    ))(doc_embedding)
    context_embedding = Dot(axes=-1,
                            name="query2context-%d" % attention_level)(
                                [reshape_doc_embedding, norm_biatt])
    return context_embedding
Example #29
0
def SiameseLSTM(max_token_length, hidden_size, embedding_size=300):
    text_input_1 = Input(shape=(max_token_length, embedding_size),
                         name='text_1')
    text_mask_1 = Masking(mask_value=0.0, name='text_mask_1')(text_input_1)
    # text_dropout_1 = Dropout(.5, name='text_dropout_1')(text_mask_1)

    text_input_2 = Input(shape=(max_token_length, embedding_size),
                         name='text_2')
    text_mask_2 = Masking(mask_value=0.0, name='text_mask_2')(text_input_2)
    # text_dropout_2 = Dropout(.5, name='text_dropout_2')(text_mask_2)

    lstm_1_a = Bidirectional(
        GRU(units=hidden_size, return_sequences=True,
            name='RNN_1_a'))(text_mask_1)

    lstm_1_b = Bidirectional(
        GRU(units=hidden_size, return_sequences=False,
            name='RNN_1_b'))(lstm_1_a)
    """
    lstm_1_c = Bidirectional(GRU(units=hidden_size,
                                 return_sequences=False,
                                 name='RNN_1_c'))(lstm_1_b)
    """

    lstm_2_a = Bidirectional(
        GRU(units=hidden_size, return_sequences=True,
            name='RNN_2_a'))(text_mask_2)

    lstm_2_b = Bidirectional(
        GRU(units=hidden_size, return_sequences=False,
            name='RNN_2_b'))(lstm_2_a)
    """
    lstm_2_c = Bidirectional(GRU(units=hidden_size,
                                 return_sequences=False,
                                 name='RNN_2_c'))(lstm_2_b)
    """

    cosine_similarity = Dot(axes=1, normalize=True,
                            name='cosine_similarity')([lstm_1_b, lstm_2_b])

    model = Model(inputs=[text_input_1, text_input_2],
                  outputs=cosine_similarity)

    return model
Example #30
0
def get(args):
    # get figuration
    # input ID sequence
    w_input = Input(shape=(1,), dtype='int32', name='input')

    # embedding sequence
    w = Embedding(input_dim=args.vocabulary_size,
                  output_dim=args.embedding_size,
                  init='glorot_uniform')(w_input)

    # context
    c_input = Input(shape=(1,), dtype='int32', name='context')
    c = Embedding(input_dim=args.vocabulary_size,
                  output_dim=args.embedding_size,
                  init='glorot_uniform')(c_input)

    # output (cos similarity)
    output_ = Dot(axes=2)([w, c])
    output_ = Reshape((1,), input_shape=(1, 1))(output_)
    output = Activation('sigmoid')(output_)

    # model
    SkipGram_model = Model(inputs=[w_input, c_input], outputs=output)

    # initialize the optimizer
    ADAM_ = Adam(lr=args.lr)

    # compile the SkipGram_model
    SkipGram_model.compile(loss='binary_crossentropy', optimizer=ADAM_)

    # save_sample_image the picture of the SkipGram_model
    print(time.strftime('%Y-%m-%d %H:%M:%S') +
          ' Save picture of SkipGram_model architecture')
    plot_model(SkipGram_model, show_shapes=True,
               to_file=args.model_picture)

    # show the information of the SkipGram_model
    print(time.strftime('%Y-%m-%d %H:%M:%S') + " Model summary")
    print(SkipGram_model.summary())

    # return the skip gram model
    return SkipGram_model