def load_model(): query = Input(shape=(None, WORD_DEPTH)) pos_doc = Input(shape=(None, WORD_DEPTH)) neg_docs = [Input(shape=(None, WORD_DEPTH)) for j in range(J)] BATCH_SIZE = 50 filepath = 'weight.h5' query_conv = Convolution1D(K, FILTER_LENGTH, padding="same", input_shape=(None, WORD_DEPTH), activation="tanh")(query) query_max = Lambda(lambda x: backend.max(x, axis=1), output_shape=(K,))(query_conv) # See section 3.4. query_sem = Dense(L, activation="tanh", input_dim=K)(query_max) # See section 3.5. doc_conv = Convolution1D(K, FILTER_LENGTH, padding="same", input_shape=(None, WORD_DEPTH), activation="tanh") doc_max = Lambda(lambda x: backend.max(x, axis=1), output_shape=(K,)) doc_sem = Dense(L, activation="tanh", input_dim=K) pos_doc_conv = doc_conv(pos_doc) neg_doc_convs = [doc_conv(neg_doc) for neg_doc in neg_docs] pos_doc_max = doc_max(pos_doc_conv) neg_doc_maxes = [doc_max(neg_doc_conv) for neg_doc_conv in neg_doc_convs] pos_doc_sem = doc_sem(pos_doc_max) neg_doc_sems = [doc_sem(neg_doc_max) for neg_doc_max in neg_doc_maxes] R_Q_D_p = dot([query_sem, pos_doc_sem], axes=1, normalize=True) # See equation (4). R_Q_D_ns = [dot([query_sem, neg_doc_sem], axes=1, normalize=True) for neg_doc_sem in neg_doc_sems] concat_Rs = concatenate([R_Q_D_p] + R_Q_D_ns) concat_Rs = Reshape((J + 1, 1))(concat_Rs) weight = np.array([1]).reshape(1, 1, 1) with_gamma = Convolution1D(1, 1, padding="same", input_shape=(J + 1, 1), activation="linear", use_bias=False, weights=[weight])(concat_Rs) # See equation (5). with_gamma = Reshape((J + 1,))(with_gamma) prob = Activation("softmax")(with_gamma) # See equation (5). model = Model(inputs=[query, pos_doc] + neg_docs, outputs=prob) model.compile(optimizer="adadelta", loss="categorical_crossentropy") model.load_weights(filepath) return model
def keras_skip_gram(trainlist1,weight1,weight2): N,d=weight1.shape negative_num=trainlist1[0][2].shape[1] shared_layer1 = Embedding(input_dim=N, output_dim=d, weights=[weight1]) #shared_layer1 is the output layer shared_layer2 = Embedding(input_dim=N, output_dim=d, weights=[weight2]) #shared_layer2 is the hidden layer input_target = Input(shape=(1,), dtype='int32', name='input_1') input_source = Input(shape=(1,), dtype='int32', name='input_2') input_negative = Input(shape=(negative_num,),dtype='int32',name='input_3') target= shared_layer1(input_target) source= shared_layer2(input_source) negative= shared_layer1(input_negative) positive_dot = dot([source, target], axes=(2), normalize=False) negative_dot = dot([source, negative], axes=(2), normalize=False) all_dot = concatenate([positive_dot, negative_dot],axis=2) sigmoid_sample = Activation('sigmoid')(all_dot) model = Model(inputs=[input_target,input_source,input_negative], outputs=[sigmoid_sample]) sgd2 = optimizers.SGD(lr=0.025, nesterov=True) model.compile(loss='binary_crossentropy', optimizer=sgd2) for [a1,a2,a4,y1] in trainlist1: loss = model.train_on_batch([a1, a2, a4], y1) embed_output=shared_layer1.get_weights()[0] embed_hidden=shared_layer2.get_weights()[0] return embed_output,embed_hidden
def buildBLTSM(numFeatures, LRate): #MODEL WITH ATTENTION nb_lstm_cells = 128 nb_classes = 4 nb_hidden_units = 512 # Logistic regression for learning the attention parameters with a standalone feature as input input_attention = Input(shape=(nb_lstm_cells * 2, )) u = Dense(nb_lstm_cells * 2, activation='softmax')(input_attention) # Bi-directional Long Short-Term Memory for learning the temporal aggregation input_feature = Input(shape=(None, numFeatures)) x = Masking(mask_value=0.)(input_feature) x = Dense(nb_hidden_units, activation='relu')(x) x = Dropout(0.5)(x) x = Dense(nb_hidden_units, activation='relu')(x) x = Dropout(0.5)(x) y = Bidirectional(LSTM(nb_lstm_cells, return_sequences=True, dropout=0.5))(x) # To compute the final weights for the frames which sum to unity alpha = dot([u, y], axes=-1) # inner prod. alpha = Activation('softmax')(alpha) # Weighted pooling to get the utterance-level representation z = dot([alpha, y], axes=1) # Get posterior probability for each emotional class output = Dense(nb_classes, activation='softmax')(z) model = Model(inputs=[input_attention, input_feature], outputs=output) model.compile(loss='categorical_crossentropy', optimizer=RMSprop(lr=LRate, rho=0.9, epsilon=None, decay=0.0), metrics=['categorical_accuracy']) #model.compile(loss='categorical_crossentropy', optimizer=RMSprop(lr=0.001), metrics=['categorical_accuracy']) return model
def siamese(): input_tensor = Input(shape=INPUT_SHAPE) vgg_model1 = Model(input_tensor, vgg_16_base(input_tensor)) # vgg_model2 = Model(input_tensor,vgg_16_base(input_tensor)) # vgg_model3 = Model(input_tensor,vgg_16_base(input_tensor)) input_im1 = Input(shape=INPUT_SHAPE, name='Anchor') # 固定图像 input_im2 = Input(shape=INPUT_SHAPE, name='Positive') # 正例图像 input_im3 = Input(shape=INPUT_SHAPE, name='Negtive') # 反例图像 out_im1 = vgg_model1(input_im1) out_im2 = vgg_model1(input_im2) out_im3 = vgg_model1(input_im3) # out_im1 = Dense(128)(out_im1) # out_im2 = Dense(128)(out_im2) # out_im3 = Dense(128)(out_im3) right_cos = dot([out_im1, out_im2], -1, normalize=True) wrong_cos = dot([out_im1, out_im3], -1, normalize=True) Triple_loss = Lambda(lambda x: K.relu(MARGIN + x[0] - x[1]))( [wrong_cos, right_cos]) train_model = Model(inputs=[input_im1, input_im2, input_im3], outputs=Triple_loss) model_encoder = Model(inputs=input_im1, outputs=out_im1) model_right_encoder = Model(inputs=input_im2, outputs=out_im2) train_model.compile(optimizer=SGD(lr=1e-4, decay=1e-6), loss=lambda y_true, y_pred: y_pred) # 忽视y_ture model_encoder.compile(optimizer='adam', loss='mse') model_right_encoder.compile(optimizer='adam', loss='mse') return train_model, model_encoder, model_right_encoder
def get_word2vec_model(self): with tf.device('/gpu:0'): input_target = Input((1, )) input_context = Input((1, )) target = self.word_embd(input_target) target = Reshape((opt.word_embd_size, 1))(target) context = self.word_embd(input_context) context = Reshape((opt.word_embd_size, 1))(context) # setup a cosine similarity operation which will be output in a secondary model similarity = merge.dot([target, context], axes=0, normalize=True) # now perform the dot product operation to get a similarity measure dot_product = merge.dot([target, context], axes=1) dot_product = Reshape((1, ))(dot_product) # add the sigmoid output layer output = Dense(1, activation='sigmoid')(dot_product) # create the primary training model model = Model(input=[input_target, input_context], output=output) model.compile(loss='binary_crossentropy', optimizer='rmsprop') model.summary() validation_model = Model(input=[input_target, input_context], output=similarity) return model, validation_model
def create_softmax_la_network(input_shape, nb_lstm_cells=128, nb_classes=7): ''' input_shape: (time_steps, features,) ''' with K.name_scope('BLSTMLayer'): # Bi-directional Long Short-Term Memory for learning the temporal aggregation input_feature = Input(shape=input_shape) x = Masking(mask_value=globalvars.masking_value)(input_feature) x = Dense(globalvars.nb_hidden_units, activation='relu')(x) x = Dropout(0.5)(x) x = Dense(globalvars.nb_hidden_units, activation='relu')(x) x = Dropout(0.5)(x) y = Bidirectional( LSTM(nb_lstm_cells, return_sequences=True, dropout=0.5))(x) with K.name_scope('AttentionLayer'): # Logistic regression for learning the attention parameters with a standalone feature as input input_attention = Input(shape=(nb_lstm_cells * 2, )) u = Dense(nb_lstm_cells * 2, activation='softmax')(input_attention) # To compute the final weights for the frames which sum to unity alpha = dot([u, y], axes=-1) # inner prod. alpha = Activation('softmax')(alpha) with K.name_scope('WeightedPooling'): # Weighted pooling to get the utterance-level representation z = dot([alpha, y], axes=1) # Get posterior probability for each emotional class output = Dense(nb_classes, activation='softmax')(z) return Model(inputs=[input_attention, input_feature], outputs=output)
def create_emb_model(maxlen, word_rep =100): #maxlen = None num_u = word_rep vocabulary_size = 51253 reduction_d_a = 64 reduction_r = 16 FILTER_LENGTH = 2 * num_u K = 128 # Embedding network word_emb = Embedding(input_dim=vocabulary_size + 1, output_dim=word_rep, input_length=maxlen, mask_zero=True) biLSTM_H = Bidirectional(LSTM(num_u, return_sequences=True, name="LSTM"), merge_mode='concat', name = "Bidirectional_LSTM") # multi-layer perceptron, using attention mlp_hid_1 = Dense(reduction_d_a, activation = "tanh", name="mlp_tanh") mlp_hid_2 = Dense(reduction_r, activation="softmax", name="mlp_softmax")#activity_regularizer=penalization_l2, name="mlp_softmax") # learning to rank architecture Conv1D_Feature = Convolution1D(1, 2 * num_u, padding = "same", input_shape = (2 * num_u, reduction_r), activation = "linear", use_bias = False, name="position_aware") f_conv = Convolution1D(K, FILTER_LENGTH, padding = "same", activation = "tanh", name="feature_conv") f_max = Lambda(lambda x: backend.max(x, axis = 1), output_shape = (K, ), name="feature_max") sem_h1 = Dense(K, activation="tanh", name="sem_h1") sem_h2 = Dense(K, activation="tanh", name="sem_h2") sem_out = Dense(K, activation="tanh", name="sme_out") qry = Input(shape=(maxlen,), name="qry_input") doc = Input(shape=(maxlen,), name="doc_input") #qry_rep = Masking(mask_value=.0)(qry) #doc_rep = Masking(mask_value=.0)(doc) qry_rep = Masking(mask_value=.0)(word_emb(qry)) doc_rep = Masking(mask_value=.0)(word_emb(doc)) # query feature map qry_H = biLSTM_H(qry_rep) q_h1 = mlp_hid_1(qry_H) q_A = mlp_hid_2(q_h1) M = dot([qry_H, q_A], axes = 1, normalize="False", name="with_Attention_qry") M = Reshape((2 * num_u, reduction_r))(M) #M = Reshape((2 * num_u, ))(M) # doc feature map doc_H = biLSTM_H(doc_rep) d_h1 = mlp_hid_1(doc_H) d_A = mlp_hid_2(d_h1) M_d = dot([doc_H, d_A], axes = 1, normalize="False", name="with_Attention_doc") M_d = Reshape((2 * num_u, reduction_r))(M_d) #M_d = Reshape((2 * num_u, ))(M_d) # ranking task conv_qry = f_max(f_conv(M)) #conv_qry = Reshape((2 * num_u,))(conv_qry) ref_qry = sem_out(sem_h2(sem_h1(conv_qry))) conv_doc = f_max(f_conv(M_d)) #conv_doc = Reshape((2 * num_u,))(conv_doc) ref_doc = sem_out(sem_h2(sem_h1(conv_doc))) # similarity similarity = dot([ref_qry, ref_doc], axes=1, normalize="True", name="cosine_similarity") #similarity = dot([M, M_d], axes=1, normalize="True", name="cosine_similarity") predict = Activation("sigmoid", name="predict_layer")(similarity) model = Model(inputs = [qry, doc], outputs = predict) model.compile(optimizer = "adadelta", loss = "binary_crossentropy", metrics=['accuracy']) model.summary() return model
def inner_loss(self, y_true, y_pred, margin=1.0): #output=keras.layers.concatenate([video_vector,p_vector,n_vector],axis=1) video_vector, p_vector, n_vector = tf.unstack(y_pred, axis=1) right_cos = dot([video_vector, p_vector], -1, normalize=True) wrong_cos = dot([video_vector, n_vector], -1, normalize=True) loss = Lambda(lambda x: K.relu(margin + x[0] - x[1]))( [wrong_cos, right_cos]) #loss = Lambda(lambda x: x[0]-x[1])([wrong_cos,right_cos]) return loss
def create_model(): WORD_DEPTH = 51253 K = 300 # Dimensionality of the projetion layer. See section 3.1. L = 128 # Dimensionality of latent semantic space. See section 3.1. J = 3 # Number of random unclicked documents serving as negative examples for a query. See section 3. # Input tensors holding the query, positive (clicked) document, and negative (unclicked) documents. # The first dimension is None because the queries and documents can vary in length. query = Input(shape = (WORD_DEPTH,), name="query") pos_doc = Input(shape = (WORD_DEPTH,), name="pos_doc") neg_docs = [Input(shape = (WORD_DEPTH,), name="neg_doc_" + str(j)) for j in range(J)] # Latent Semantic Model # projection high dimension to low. proj = Dense(K, name="proj_1", activation="tanh") proj_2 = Dense(K, name="proj_2", activation="tanh") sem = Dense(L, name="sem", activation = "tanh") query_proj = proj(query) pos_doc_proj = proj(pos_doc) neg_doc_projs = [proj(neg_doc) for neg_doc in neg_docs] query_proj2 = proj_2(query_proj) pos_doc_proj2 = proj_2(pos_doc_proj) neg_doc_proj2s = [proj_2(neg_doc_proj) for neg_doc_proj in neg_doc_projs] query_sem = sem(query_proj2) pos_doc_sem = sem(pos_doc_proj2) neg_doc_sems = [sem(neg_doc_proj2) for neg_doc_proj2 in neg_doc_proj2s] # This layer calculates the cosine similarity between the semantic representations of # a query and a document. R_Q_D_p = dot([query_sem, pos_doc_sem], axes = 1, normalize = True, name="pos_cos") # See equation (5). R_Q_D_ns = [dot([query_sem, neg_doc_sem], axes = 1, normalize = True, name="neg_cos_" + str(i)) for i, neg_doc_sem in enumerate(neg_doc_sems)] # See equation (5). concat_Rs = concatenate([R_Q_D_p] + R_Q_D_ns, name="concat_without_gamma") concat_Rs = Reshape((J + 1, 1))(concat_Rs) # In this step, we multiply each R(Q, D) value by gamma. In the paper, gamma is # described as a smoothing factor for the softmax function, and it's set empirically # on a held-out data set. weight = np.full((1, 1, 1), 1) # We're also going to learn gamma's value by pretending it's a single 1 x 1 kernel. with_gamma = Convolution1D(1, 1, padding = "same", input_shape = (J + 1, 1), activation = "linear", use_bias = False, weights = [weight])(concat_Rs) # See equation (5). with_gamma = Reshape((J + 1, ))(with_gamma) # Finally, we use the softmax function to calculate P(D+|Q). prob = Activation("softmax")(with_gamma) # See equation (5). # We now have everything we need to define our model. model = Model(inputs = [query, pos_doc] + neg_docs, outputs = prob) model.compile(optimizer = "adadelta", loss = "categorical_crossentropy", metrics=['accuracy']) model.summary()
def __init__(self, K=300, L=128, J=1, WORD_DEPTH=50005): # Input tensors holding the query, positive (clicked) document, and negative (unclicked) documents. # The first dimension is None because the queries and documents can vary in length. query = Input(shape=(WORD_DEPTH, )) pos_doc = Input(shape=(WORD_DEPTH, )) neg_docs = [Input(shape=(WORD_DEPTH, )) for j in range(J)] dense = Dense(L, activation="tanh") query_sem = dense(query) # query_sem = Dense(L, activation = "tanh")(query) # See section 3.5. # doc_sem = Dense(L, activation = "tanh") # shared dense doc_sem = dense pos_doc_sem = doc_sem(pos_doc) neg_doc_sems = [doc_sem(neg_doc) for neg_doc in neg_docs] # This layer calculates the cosine similarity between the semantic representations of # a query and a document. R_Q_D_p = dot([query_sem, pos_doc_sem], axes=1, normalize=True) # See equation (4). R_Q_D_ns = [ dot([query_sem, neg_doc_sem], axes=1, normalize=True) for neg_doc_sem in neg_doc_sems ] # See equation (4). concat_Rs = concatenate([R_Q_D_p] + R_Q_D_ns) concat_Rs = Reshape((J + 1, 1))(concat_Rs) # In this step, we multiply each R(Q, D) value by gamma. In the paper, gamma is # described as a smoothing factor for the softmax function, and it's set empirically # on a held-out data set. We're going to learn gamma's value by pretending it's # a single 1 x 1 kernel. weight = np.array([1]).reshape(1, 1, 1) with_gamma = Convolution1D(1, 1, padding="same", input_shape=(J + 1, 1), activation="linear", use_bias=False, weights=[weight ])(concat_Rs) # See equation (5). with_gamma = Reshape((J + 1, ))(with_gamma) # Finally, we use the softmax function to calculate P(D+|Q). prob = Activation("softmax")(with_gamma) # See equation (5). # We now have everything we need to define our model. self.model = Model(inputs=[query, pos_doc] + neg_docs, outputs=prob) self.model.compile(optimizer="adadelta", loss="categorical_crossentropy") self.encoder = Model(inputs=query, outputs=query_sem)
def buildBLTSM(numFeaturesAudio, numFeaturesText): nb_lstm_cells = 128 nb_classes = 4 nb_hidden_units = 512 #128 #MODEL AUDIO WITH ATTENTION #Input attention input_attention = Input(shape=(nb_lstm_cells * 2, )) u = Dense(nb_lstm_cells * 2, activation='softmax')(input_attention) #Input Audio and Text input_featureAudio = Input(shape=(None, numFeaturesAudio)) input_featureText = Input(shape=(None, numFeaturesText)) #Both model parallel structure x1 = Masking(mask_value=0.)(input_featureText) x1 = Dense(nb_hidden_units, activation='relu')(x1) x1 = Dropout(0.5)(x1) x1 = Dense(nb_hidden_units, activation='relu')(x1) x1 = Dropout(0.5)(x1) y1 = Bidirectional(LSTM(nb_lstm_cells, return_sequences=True, dropout=0.5))(x1) x2 = Masking(mask_value=0.)(input_featureAudio) x2 = Dense(nb_hidden_units, activation='relu')(x2) x2 = Dropout(0.5)(x2) x2 = Dense(nb_hidden_units, activation='relu')(x2) x2 = Dropout(0.5)(x2) y2 = Bidirectional(LSTM(nb_lstm_cells, return_sequences=True, dropout=0.5))(x2) #Attention step parallel for both model alpha1 = dot([u, y1], axes=-1) # inner prod. alpha1 = Activation('softmax')(alpha1) alpha2 = dot([u, y2], axes=-1) # inner prod. alpha2 = Activation('softmax')(alpha2) z1 = dot([alpha1, y1], axes=1) z2 = dot([alpha2, y2], axes=1) #Merge step mrg = Concatenate(mode='concat')([z1, z2]) #mrg = Concatenate([z1,z2]) #Dense layer and final output refOut = Dense(nb_hidden_units, activation='relu')(mrg) output = Dense(nb_classes, activation='softmax')(refOut) model = Model( inputs=[input_attention, input_featureAudio, input_featureText], outputs=output) model.compile(loss='categorical_crossentropy', optimizer=RMSprop(lr=LRate, rho=0.9, epsilon=None, decay=0.0), metrics=['categorical_accuracy' ]) #mean_squared_error #categorical_crossentropy return model
def __init__(self): # Input tensors holding the query, positive (clicked) document, and negative (unclicked) documents. # The first dimension is None because the queries and documents can vary in length. query = Input(shape = (None, WORD_DEPTH)) pos_doc = Input(shape = (None, WORD_DEPTH)) neg_docs = [Input(shape = (None, WORD_DEPTH)) for j in range(J)] query_conv = Convolution1D(K, FILTER_LENGTH, padding = "same", input_shape = (None, WORD_DEPTH), activation = "tanh")(query) # See equation (2). query_max = Lambda(lambda x: backend.max(x, axis = 1), output_shape = (K, ))(query_conv) # See section 3.4. query_sem = Dense(L, activation = "tanh", input_dim = K)(query_max) # See section 3.5. # The document equivalent of the above query model. doc_conv = Convolution1D(K, FILTER_LENGTH, padding = "same", input_shape = (None, WORD_DEPTH), activation = "tanh") doc_max = Lambda(lambda x: backend.max(x, axis = 1), output_shape = (K, )) doc_sem = Dense(L, activation = "tanh", input_dim = K) pos_doc_conv = doc_conv(pos_doc) neg_doc_convs = [doc_conv(neg_doc) for neg_doc in neg_docs] pos_doc_max = doc_max(pos_doc_conv) neg_doc_maxes = [doc_max(neg_doc_conv) for neg_doc_conv in neg_doc_convs] pos_doc_sem = doc_sem(pos_doc_max) neg_doc_sems = [doc_sem(neg_doc_max) for neg_doc_max in neg_doc_maxes] # This layer calculates the cosine similarity between the semantic representations of # a query and a document. R_Q_D_p = dot([query_sem, pos_doc_sem], axes = 1, normalize = True) # See equation (4). R_Q_D_ns = [dot([query_sem, neg_doc_sem], axes = 1, normalize = True) for neg_doc_sem in neg_doc_sems] # See equation (4). concat_Rs = concatenate([R_Q_D_p] + R_Q_D_ns) concat_Rs = Reshape((J + 1, 1))(concat_Rs) # In this step, we multiply each R(Q, D) value by gamma. In the paper, gamma is # described as a smoothing factor for the softmax function, and it's set empirically # on a held-out data set. We're going to learn gamma's value by pretending it's # a single 1 x 1 kernel. weight = np.array([1]).reshape(1, 1, 1) with_gamma = Convolution1D(1, 1, padding = "same", input_shape = (J + 1, 1), activation = "linear", use_bias = False, weights = [weight])(concat_Rs) # See equation (5). with_gamma = Reshape((J + 1, ))(with_gamma) # Finally, we use the softmax function to calculate P(D+|Q). prob = Activation("softmax")(with_gamma) # See equation (5). # We now have everything we need to define our model. self.model = Model(inputs = [query, pos_doc] + neg_docs, outputs = prob) self.model.compile(optimizer = "adadelta", loss = "categorical_crossentropy") self.encoder = Model(inputs=query, outputs=query_sem)
def build(self, vector_dim, vocab_size, lr): """ returns a word2vec model """ print("Building keras model...") stddev = 1.0 / vector_dim print("Setting initializer standard deviation to: {}".format(stddev)) initializer = RandomNormal(mean=0.0, stddev=stddev, seed=10) word_input = Input(shape=(1, ), name="word_input") context_input = Input(shape=(1, ), name="context_input") Ebd = Embedding(input_dim=vocab_size + 1, output_dim=vector_dim, name="embedding", embeddings_initializer=initializer) word = Ebd(word_input) context = Ebd(context_input) merged = dot([word, context], axes=2, normalize=True, name="cos") merged = Flatten()(merged) output = Dense(1, activation='sigmoid', name="output")(merged) optimizer = SGD(lr=lr) model = Model(inputs=[word_input, context_input], outputs=output) model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=['accuracy']) self.model = model
def testEmbeddingLayerCosineSim(self): """ Test Keras 'Embedding' layer returned by 'get_embedding_layer' function for a simple word similarity task. """ keras_w2v_model = self.model_cos_sim keras_w2v_model_wv = keras_w2v_model.wv embedding_layer = keras_w2v_model_wv.get_keras_embedding() input_a = Input(shape=(1,), dtype='int32', name='input_a') input_b = Input(shape=(1,), dtype='int32', name='input_b') embedding_a = embedding_layer(input_a) embedding_b = embedding_layer(input_b) similarity = dot([embedding_a, embedding_b], axes=2, normalize=True) model = Model(inputs=[input_a, input_b], outputs=similarity) model.compile(optimizer='sgd', loss='mse') word_a = 'graph' word_b = 'trees' output = model.predict([ np.asarray([keras_w2v_model.wv.get_index(word_a)]), np.asarray([keras_w2v_model.wv.get_index(word_b)]) ]) # output is the cosine distance between the two words (as a similarity measure) self.assertTrue(type(output[0][0][0]) == np.float32) # verify that a float is returned
def word_embedding(self): input_target = Input((1,)) input_context = Input((1,)) target = Embedding(self.vocab_size, self.latent_dim, name='target-embedding', mask_zero=False)(input_target) context = Embedding(self.vocab_size, self.latent_dim, name="context-embedding", mask_zero=False)(input_context) dot_product = merge.dot([target, context], axes=2, normalize=False, name="dot") # dot_product = Reshape((1,))(dot_product) dot_product = Flatten()(dot_product) # add the sigmoid output layer output = Dense(1, activation='sigmoid', name='output')(dot_product) # setup a cosine similarity operation which will be output in a secondary model # similarity = merge.dot([target, context], axes=2, normalize=True) # create the primary training model self.model = Model(input=[input_target, input_context], output=output) self.model.compile(loss='binary_crossentropy', optimizer='rmsprop') # create a secondary validation model to run our similarity checks during training # self.validation_model = Model(input=[input_target, input_context], output=similarity) self.model_checkpoint = ModelCheckpoint( filepath=SAVE_DIR / 'word2vect-model-{epoch:02d}.hdf5', verbose=1, save_best_only=True)
def build_pixel_comparison_network(input_shape): channels, height, width = input_shape input = Input(shape=(height, width, channels)) first = Flatten()(Lambda(lambda x: x[:, :, :, :1])(input)) second = Flatten()(Lambda(lambda x: x[:, :, :, 1:])(input)) # second = Lambda(lambda x: -x)(second) # difference = add([first, second]) # raw_result = Lambda(lambda x: K.mean(K.abs(x), axis=1, keepdims=True))(difference) # prob_zero = Lambda(lambda x: x / 255.0)(raw_result) # prob_one = Lambda(lambda x: 1.0 - x)(prob_zero) prob_one = dot([first, second], axes=1, normalize=True) prob_zero = Lambda(lambda x: 1.0 - x)(prob_one) output = concatenate([prob_zero, prob_one]) return Model(inputs=input, outputs=output)
def testEmbeddingLayerCosineSim(self): """ Test Keras 'Embedding' layer returned by 'get_embedding_layer' function for a simple word similarity task. """ keras_w2v_model = self.model_cos_sim keras_w2v_model_wv = keras_w2v_model.wv embedding_layer = keras_w2v_model_wv.get_keras_embedding() input_a = Input(shape=(1,), dtype='int32', name='input_a') input_b = Input(shape=(1,), dtype='int32', name='input_b') embedding_a = embedding_layer(input_a) embedding_b = embedding_layer(input_b) similarity = dot([embedding_a, embedding_b], axes=2, normalize=True) model = Model(input=[input_a, input_b], output=similarity) model.compile(optimizer='sgd', loss='mse') word_a = 'graph' word_b = 'trees' output = model.predict([ np.asarray([keras_w2v_model.wv.vocab[word_a].index]), np.asarray([keras_w2v_model.wv.vocab[word_b].index]) ]) # output is the cosine distance between the two words (as a similarity measure) self.assertTrue(type(output[0][0][0]) == np.float32) # verify that a float is returned
def get_model(): # входные данные покупеталей customer_input = Input(shape=(1, ), name='customer_input', dtype='int64') # каждому покупателю сопоставляем вектор коэффициентов размером n_latent_factors (строим матрицу "П") customer_embedding = Embedding(n_customers, n_latent_factors, name='customer_embedding')(customer_input) # Embedding возвращает трехмерную матрицу размерностью (n_customers, 1, n_latent_factors) # необходимо привести ее в двумерному виду (n_customers, n_latent_factors) customer_vec = Flatten(name='FlattenCustomers')(customer_embedding) # входные данные товаров item_input = Input(shape=(1, ), name='item_input', dtype='int64') # каждому товару сопоставляем вектор коэффициентов размером n_latent_factors (строим матрицу "Т") item_embedding = Embedding(n_items, n_latent_factors, name='item_embedding')(item_input) # Embedding возвращает трехмерную матрицу размерностью (n_items, 1, n_latent_factors) # необходимо привести ее в двумерному виду (n_items, n_latent_factors) item_vec = Flatten(name='FlattenItems')(item_embedding) # матрица оценок О = П * Т sim = dot([customer_vec, item_vec], name='Simalarity-Dot-Product', axes=1) #sim = K.dot(customer_vec, t_item_vec) # Dot(), name='Simalarity-Dot-Product', axes=1) return keras.models.Model([customer_input, item_input], sim)
def get_model(self, num_classes, activation='sigmoid'): max_len = opt.max_len voca_size = opt.unigram_hash_size + 1 with tf.device('/gpu:0'): embd = Embedding(voca_size, opt.embd_size, name='uni_embd') t_uni = Input((max_len, ), name="input_1") t_uni_embd = embd(t_uni) # token w_uni = Input((max_len, ), name="input_2") w_uni_mat = Reshape((max_len, 1))(w_uni) # weight uni_embd_mat = dot([t_uni_embd, w_uni_mat], axes=1) uni_embd = Reshape((opt.embd_size, ))(uni_embd_mat) embd_out = Dropout(rate=0.5)(uni_embd) relu = Activation('relu', name='relu1')(embd_out) outputs = Dense(num_classes, activation=activation)(relu) model = Model(inputs=[t_uni, w_uni], outputs=outputs) optm = keras.optimizers.Nadam(opt.lr) #model = Sequential() #optm = 'adadelta' model.compile( loss='binary_crossentropy', #model.compile(loss='mse', optimizer=optm, metrics=[top1_acc]) model.summary(print_fn=lambda x: self.logger.info(x)) return model
def nn_model(frame=4, input_shape=[5, 5], num_actions=5): with tf.name_scope('deep_q_network'): with tf.name_scope('input'): # 5*5*4 input_state = Input(shape=(frame, input_shape[0], input_shape[1])) input_action = Input(shape=(num_actions, )) with tf.name_scope('fc2'): flattened = Flatten()(input_state) dense2 = Dense(128, kernel_initializer='glorot_uniform', activation='relu')(flattened) with tf.name_scope('output'): q_values = Dense(num_actions, activation=None)(dense2) q_v = dot([q_values, input_action], axes=1) network_model = Model(inputs=[input_state, input_action], outputs=q_v) #方案1,输入state,action,输出一个q_value q_values_func = K.function( [input_state], [q_values]) #方案2,输入一个state,输出一系列[action,q_value] network_model.summary() return network_model, q_values_func
def get_model(): allinput=[] embedded=[] for i,col in tqdm(enumerate(train_features)): ini_input = Input(shape=(1,), dtype='int32') reshape=makeembed(cnt[i],64,ini_input) embedded.append(reshape) allinput.append(ini_input) dotpreds = dot([embedded[0], embedded[1]], axes=1) for i,col in tqdm(enumerate(train_features)): if i == 0: preds = embedded[i] else: preds = concatenate([embedded[i],preds]) preds = concatenate([dotpreds,preds]) preds = Dense(128, activation='relu')(preds) preds = Dropout(0.5)(preds) preds = Dense(1, activation='sigmoid')(preds) model=Model(inputs=allinput,outputs=preds) opt = RMSprop(lr=1e-3) model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['acc']) return model
def build_cf_model(n_users, n_movies, dim, isBest=False): u_input = Input(shape=(1,)) if isBest: u = Embedding(n_users, dim, embeddings_regularizer=l2(1e-5))(u_input) else: u = Embedding(n_users, dim)(u_input) u = Reshape((dim,))(u) u = Dropout(0.1)(u) m_input = Input(shape=(1,)) if isBest: m = Embedding(n_movies, dim, embeddings_regularizer=l2(1e-5))(m_input) else: m = Embedding(n_movies, dim)(m_input) m = Reshape((dim,))(m) m = Dropout(0.1)(m) if isBest: u_bias = Embedding(n_users, 1, embeddings_regularizer=l2(1e-5))(u_input) else: u_bias = Embedding(n_users, 1)(u_input) u_bias = Reshape((1,))(u_bias) if isBest: m_bias = Embedding(n_movies, 1, embeddings_regularizer=l2(1e-5))(m_input) else: m_bias = Embedding(n_movies, 1)(m_input) m_bias = Reshape((1,))(m_bias) out = dot([u, m], -1) out = add([out, u_bias, m_bias]) if isBest: out = Lambda(lambda x: x + K.constant(3.581712))(out) model = Model(inputs=[u_input, m_input], outputs=out) return model
def get_model(): user_embeddings = Embedding(u_cnt, 64, embeddings_initializer=RandomNormal(0, 0.01), input_length=1, trainable=True) song_embeddings = Embedding(s_cnt, 64, embeddings_initializer=RandomNormal(0, 0.01), input_length=1, trainable=True) uid_input = Input(shape=(1, ), dtype='int32') embedded_usr = user_embeddings(uid_input) embedded_usr = Reshape((64, ))(embedded_usr) sid_input = Input(shape=(1, ), dtype='int32') embedded_song = song_embeddings(sid_input) embedded_song = Reshape((64, ))(embedded_song) preds = dot([embedded_usr, embedded_song], axes=1) preds = Activation('sigmoid')(preds) model = Model(inputs=[uid_input, sid_input], outputs=preds) opt = RMSprop(lr=1e-3) model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['acc']) return model
def get_model(self, cate_type): if cate_type == 'bm': lr = opt.bm_lr embd_size = opt.bm_embd_size unigram_hash_size = opt.bm_unigram_hash_size image_fc_size = 1024 hidden_size = 1024 output_size = 556 elif cate_type == 's': lr = opt.s_lr embd_size = opt.s_embd_size unigram_hash_size = opt.s_unigram_hash_size image_fc_size = 1024 hidden_size = 1024 output_size = 3191 elif cate_type == 'd': lr = opt.d_lr embd_size = opt.d_embd_size unigram_hash_size = opt.d_unigram_hash_size image_fc_size = 512 hidden_size = 512 output_size = 405 voca_size = unigram_hash_size + 1 with tf.device('/gpu:0'): # input t_uni = Input((opt.max_len,), name='t_uni') w_uni = Input((opt.max_len,), name='w_uni') w_uni_mat = Reshape((opt.max_len, 1))(w_uni) image_feature = Input((opt.image_size,), name='image_feature') # Embedding t_uni_embd = Embedding(voca_size, embd_size)(t_uni) uni_embd_mat = dot([t_uni_embd, w_uni_mat], axes=1) uni_embd = Reshape((embd_size,))(uni_embd_mat) embd_relu = Activation('relu')(uni_embd) # image image_fc = Dense(image_fc_size, activation='relu')(image_feature) #concate concat_wordImage = concatenate([embd_relu, image_fc], axis=1) FIANL_hidden = Dense(hidden_size, activation=None)(concat_wordImage) dropout = Dropout(rate=0.5)(FIANL_hidden) relu = Activation('relu')(dropout) embd_image_out = Dense(output_size, activation='sigmoid', name='EmbdImage')(relu) # define Model model = Model(inputs=[t_uni, w_uni, image_feature], outputs=embd_image_out) optm = keras.optimizers.Nadam(lr) model.load_weights('../data/model/s/weights') model.compile(loss='binary_crossentropy', optimizer=optm, metrics=[top1_acc]) model.summary(print_fn=lambda x: self.logger.info(x)) return model
def shallow_deep(): """ shallow&deep model """ inputs = [uwi] + udc + \ [iwi1]+ idc1+ [idd1] + [idb1] + \ [iwi2]+ idc2 + [idd2] + [idb2] + \ [iwi3] + idc3 + [idd3] + [idb3] + \ [iwi4] + idc4 + [idd4] + [idb4] + \ [iwi5] + idc5 + [idd5] + [idb5] ufv1 = ufv() ifv_1 = ifv1() ifv_2 = ifv2() ifv_3 = ifv3() ifv_4 = ifv4() ifv_5 = ifv5() uipp = dot([ufv1, ifv_1], axes=1, normalize=True) uinps = [ dot([ufv1, ifv_2], axes=1, normalize=True), dot([ufv1, ifv_3], axes=1, normalize=True), dot([ufv1, ifv_4], axes=1, normalize=True), dot([ufv1, ifv_5], axes=1, normalize=True) ] outputs = concatenate([uipp] + uinps) outputs = Reshape((4 + 1, 1))(outputs) weight = np.array([1]).reshape(1, 1, 1) with_gamma = Convolution1D(1, 1, padding="same", input_shape=(4 + 1, 1), activation="linear", use_bias=False, weights=[weight])(outputs) with_gamma = Reshape((4 + 1, ))(with_gamma) prob = Activation("softmax")(with_gamma) model = Model(inputs=inputs, outputs=prob) model.compile(optimizer="adadelta", loss="categorical_crossentropy") plot_model(model, to_file="ns-shallow&deep.png", show_shapes=True)
def Keras_skip_gram(G, walks, iteration): """ Keras to run word2vec algorithm with skip_gram model. """ walks_sentences = [list(np.array(walk)) for walk in walks] embedding1 = np.random.uniform(-1 / G.embedding_size, 1 / G.embedding_size, (G.vocabulary, G.embedding_size)) embedding2 = np.random.uniform(-1 / G.embedding_size, 1 / G.embedding_size, (G.vocabulary, G.embedding_size)) shared_layer1 = Embedding(input_dim=G.vocabulary, output_dim=G.embedding_size, weights=[embedding1]) shared_layer2 = Embedding(input_dim=G.vocabulary, output_dim=G.embedding_size, weights=[embedding2]) input_target = Input(shape=(1, ), dtype='int32', name='input_1') input_source = Input(shape=(1, ), dtype='int32', name='input_2') input_negative = Input(shape=(G.negative, ), dtype='int32', name='input_3') target = shared_layer1(input_target) source = shared_layer2(input_source) negative = shared_layer1(input_negative) positive_dot = dot([source, target], axes=(2), normalize=False) negative_dot = dot([source, negative], axes=(2), normalize=False) all_dot = concatenate([positive_dot, negative_dot], axis=2) sigmoid_sample = Activation('sigmoid')(all_dot) model = Model(inputs=[input_target, input_source, input_negative], outputs=[sigmoid_sample]) sgd2 = optimizers.SGD(lr=0.025, nesterov=True) model.compile(loss='binary_crossentropy', optimizer=sgd2) train_list = skip_train(walks_sentences, G.window_size) for i in range(iteration): for [a1, a2, a4, y1] in train_list: loss = model.train_on_batch([a1, a2, a4], y1) embed = shared_layer2.get_weights()[0] return embed
def LSTM_2(input_shape, nb_classes, nb_lstm_cells=128): ''' input_shape: (time_steps, features,) Dense layers -> No activation BLSTM -> activation softsign ''' tf.logging.set_verbosity( tf.logging.ERROR) # evitar warnings por cambio de versión with K.name_scope('BLSTMLayer'): # Bi-directional Long Short-Term Memory for learning the temporal aggregation input_feature = Input(shape=input_shape) nb_lstm_cells = 128 x = Masking(mask_value=globalvars.masking_value)(input_feature) x = LSTM(nb_lstm_cells, return_sequences=True, dropout=0.5)(x) x = LSTM(nb_lstm_cells, return_sequences=True, dropout=0.5)(x) y = LSTM(nb_lstm_cells, return_sequences=True, dropout=0.5)(x) with K.name_scope('AttentionLayer'): # Logistic regression for learning the attention parameters with a standalone feature as input input_attention = Input(shape=(nb_lstm_cells * 2, )) u = Dense(nb_lstm_cells, activation='softsign')(input_attention) # To compute the final weights for the frames which sum to unity alpha = dot([u, y], axes=-1) # inner prod. alpha = Activation('softmax')(alpha) with K.name_scope('WeightedPooling'): # Weighted pooling to get the utterance-level representation z = dot([alpha, y], axes=1) with K.name_scope('OUTPUT'): # Get posterior probability for each emotional class output = Dense(nb_classes, activation='softmax')(z) model = Model(name=inspect.stack()[0][3], inputs=[input_attention, input_feature], outputs=output) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def MF(n_U, n_M, F): U_input = Input(shape=(1, ), dtype='int64', name='users') M_input = Input(shape=(1, ), dtype='int64', name='movies') U_embedding = Embedding(input_dim=max_userid, output_dim=F)(U_input) M_embedding = Embedding(input_dim=max_movieid, output_dim=F)(M_input) predicted_preference = dot(inputs=[U_embedding, M_embedding], axes=2) predicted_preference = Flatten()(predicted_preference) model = Model(inputs=[U_input, M_input], outputs=predicted_preference) return model
def MatchScore(l, r, mode="euclidean"): if mode == "euclidean": return Lambda(compute_euclidean_match_score, output_shape=out_shape)([l, r]) elif mode == "cos": return Lambda(compute_cos_match_score, output_shape=out_shape)([l, r]) elif mode == "dot": return dot([l, r], axes=-1) else: raise ValueError("Unknown match score mode %s" % mode)
def LAYER(input1, input2, max_len=max_len): Avg = Dropout(rate=0.5)(input1) Avg = BatchNormalization()(Avg) Avg = GlobalAveragePooling1D()(Avg) mat = Reshape((max_len, 1))(input2) Dot = dot([input1, mat], axes=1) Dot = Flatten()(Dot) Dot = Dropout(rate=0.5)(Dot) Dot = BatchNormalization()(Dot) return Avg, Dot
def create_model(MAX_QRY_LENGTH=50, MAX_DOC_LENGTH=2900, NUM_OF_FEATS=10, PSGS_SIZE=[(50, 1)], NUM_OF_FILTERS=5, tau=1): alpha_size = len(PSGS_SIZE) psgMat = Input(shape=( MAX_QRY_LENGTH, MAX_DOC_LENGTH, 1, ), name="passage") homoMat = Input(shape=(NUM_OF_FEATS, ), name="h_feats") # Convolution2D, Meaning pooling and Max pooling. # Conv2D, Mean pooling, Max pooling M, K, r = [], [], [] for idx, PSG_SIZE in enumerate(PSGS_SIZE): tau = PSG_SIZE[0] / 2 pool_size = (MAX_QRY_LENGTH - PSG_SIZE[0]) / tau + 1 # Convolution m_1 = Convolution2D(filters=NUM_OF_FILTERS, kernel_size=PSG_SIZE, strides=tau, padding='valid', name="pConv2D_" + str(idx))(psgMat) M.append(m_1) # Mean pooling k_1 = AveragePooling2D(pool_size=(pool_size, 1), strides=1, name="pAvePool_" + str(idx))(M[idx]) K.append(k_1) # Max Pooling r_1 = GlobalMaxPooling2D(name="pMaxPool_" + str(idx))(K[idx]) r.append(r_1) concat_r = concatenate(r) # Fusion Matrix and predict relevance # get h(q, d) # MLP(DENSE(len(r(q,d)))) phi_h = Dense(alpha_size, activation="softmax", name="TrainMat")(homoMat) dot_prod = dot([concat_r, phi_h], axes=1, name="rel_dot") # tanh(dot(r.transpose * h)) #pred = Activation("tanh", name="activation_tanh")(dot_prod) pred = Dense(1, activation="sigmoid", name="activation_sigmoid")(dot_prod) # We now have everything we need to define our model. model = Model(inputs=[psgMat, homoMat], outputs=pred) model.summary() ''' from keras.utils import plot_model plot_model(model, to_file='model.png') ''' return model
def crossatt(self, x): doc, query, doc_mask, q_mask = x[0], x[1], x[2], x[3] trans_doc = K.permute_dimensions(doc, (0, 2, 1)) match_score = K.tanh(dot([query, trans_doc], (2, 1))) query_to_doc_att = K.softmax(K.sum(match_score, axis=1)) doc_to_query_att = K.softmax(K.sum(match_score, axis=-1)) alpha = query_to_doc_att * doc_mask a_sum = K.sum(alpha, axis=1) _a_sum = K.expand_dims(a_sum, -1) alpha = alpha / _a_sum beta = doc_to_query_att * q_mask b_sum = K.sum(beta, axis=1) _b_sum = K.expand_dims(b_sum, 1) beta = beta / _b_sum doc_vector = dot([trans_doc, alpha], (2, 1)) trans_que = K.permute_dimensions(query, (0, 2, 1)) que_vector = dot([trans_que, beta], (2, 1)) final_hidden = K.concatenate([doc_vector, que_vector]) return final_hidden
word_model =Sequential() word_model.add(Embedding(vocab_size,embed_size, embeddings_initializer="glorot_uniform", input_length=1)) word_model.add(Reshape((embed_size,))) context_model =Sequential() context_model.add(Embedding(vocab_size,embed_size, embeddings_initializer="glorot_uniform", input_length=1) ) context_model.add(Reshape((embed_size,))) match =dot([word_model.output,context_model.output],1) output =Dense(1,kernel_initializer='glorot_uniform' ,activation='sigmoid')(match) model =Model(inputs=[word_model.input,context_model.input],outputs=output) model.compile(loss='mean_squared_error',optimizer='adam') model.summary() def loadData(): ''' I love green eggs and ham. (context,word): ([I,green],love) ([love,eggs],green) ([green,and],eggs) -------------->
question_input = Input(shape=(question_maxlen,)) # story encoder memory story_encoder = Embedding(input_dim=vocab_size, output_dim=EMBEDDING_SIZE, input_length=story_maxlen)(story_input) story_encoder = Dropout(0.3)(story_encoder) # question encoder question_encoder = Embedding(input_dim=vocab_size, output_dim=EMBEDDING_SIZE, input_length=question_maxlen)(question_input) question_encoder = Dropout(0.3)(question_encoder) # match between story and question match = dot([story_encoder, question_encoder], axes=[2, 2]) # encode story into vector space of question story_encoder_c = Embedding(input_dim=vocab_size, output_dim=question_maxlen, input_length=story_maxlen)(story_input) story_encoder_c = Dropout(0.3)(story_encoder_c) # combine match and story vectors response = add([match, story_encoder_c]) response = Permute((2, 1))(response) # combine response and question vectors answer = concatenate([response, question_encoder], axis=-1) answer = LSTM(LATENT_SIZE)(answer) answer = Dropout(0.3)(answer)
doc_conv = Convolution1D(K, FILTER_LENGTH, padding = "same", input_shape = (None, WORD_DEPTH), activation = "tanh") doc_max = Lambda(lambda x: backend.max(x, axis = 1), output_shape = (K, )) doc_sem = Dense(L, activation = "tanh", input_dim = K) pos_doc_conv = doc_conv(pos_doc) neg_doc_convs = [doc_conv(neg_doc) for neg_doc in neg_docs] pos_doc_max = doc_max(pos_doc_conv) neg_doc_maxes = [doc_max(neg_doc_conv) for neg_doc_conv in neg_doc_convs] pos_doc_sem = doc_sem(pos_doc_max) neg_doc_sems = [doc_sem(neg_doc_max) for neg_doc_max in neg_doc_maxes] # This layer calculates the cosine similarity between the semantic representations of # a query and a document. R_Q_D_p = dot([query_sem, pos_doc_sem], axes = 1, normalize = True) # See equation (4). R_Q_D_ns = [dot([query_sem, neg_doc_sem], axes = 1, normalize = True) for neg_doc_sem in neg_doc_sems] # See equation (4). concat_Rs = concatenate([R_Q_D_p] + R_Q_D_ns) concat_Rs = Reshape((J + 1, 1))(concat_Rs) # In this step, we multiply each R(Q, D) value by gamma. In the paper, gamma is # described as a smoothing factor for the softmax function, and it's set empirically # on a held-out data set. We're going to learn gamma's value by pretending it's # a single 1 x 1 kernel. weight = np.array([1]).reshape(1, 1, 1) with_gamma = Convolution1D(1, 1, padding = "same", input_shape = (J + 1, 1), activation = "linear", use_bias = False, weights = [weight])(concat_Rs) # See equation (5). with_gamma = Reshape((J + 1, ))(with_gamma) # Finally, we use the softmax function to calculate P(D+|Q). prob = Activation("softmax")(with_gamma) # See equation (5).