def build_model(self): from tensorflow.python.keras.layers import Dense, Dot dim = self.sent1.get_shape().as_list()[-1] temp_W = tf.layers.dense(self.sent2, dim, name="dense") # (B, L2, dim) temp_W = Dot(axes=[2, 2])([self.sent1, temp_W]) # (B, L1, L2) if self.sent1_mask is not None: s1_mask_exp = tf.expand_dims(self.sent1_mask, axis=2) # (B, L1, 1) s2_mask_exp = tf.expand_dims(self.sent2_mask, axis=1) # (B, 1, L2) temp_W1 = temp_W - (1 - s1_mask_exp) * 1e20 temp_W2 = temp_W - (1 - s2_mask_exp) * 1e20 else: temp_W1 = temp_W temp_W2 = temp_W W1 = tf.nn.softmax(temp_W1, axis=1) W2 = tf.nn.softmax(temp_W2, axis=2) M1 = Dot(axes=[2, 1])([W2, self.sent2]) M2 = Dot(axes=[2, 1])([W1, self.sent1]) s1_cat = tf.concat([M2 - self.sent2, M2 * self.sent2], axis=-1) s2_cat = tf.concat([M1 - self.sent1, M1 * self.sent1], axis=-1) S1 = tf.layers.dense(s1_cat, dim, activation=tf.nn.relu, name="cat_dense") S2 = tf.layers.dense(s2_cat, dim, activation=tf.nn.relu, name="cat_dense", reuse=True) if self.is_training: S1 = dropout(S1, dropout_prob=0.1) S1 = dropout(S1, dropout_prob=0.1) if self.sent1_mask is not None: S2 = S2 * tf.expand_dims(self.sent1_mask, axis=2) S1 = S1 * tf.expand_dims(self.sent2_mask, axis=2) C1 = tf.reduce_max(S1, axis=1) C2 = tf.reduce_max(S2, axis=1) C_cat = tf.concat([C1, C2], axis=1) return gelu(tf.layers.dense(C_cat, dim))
def build_model(self, vocab_size: int, vector_dim: int): """ Builds the Keras model. :param vocab_size: The number of distinct words. :param vector_dim: The vector dimension of each word. :return: the Keras GloVe model. """ input_target = Input((1, ), name="central_word_id") input_context = Input((1, ), name="context_word_id") central_embedding = Embedding(vocab_size, vector_dim, input_length=1, name=CNTRL_EMB)(input_target) central_bias = Embedding(vocab_size, 1, input_length=1, name=CNTRL_BS)(input_target) context_embedding = Embedding(vocab_size, vector_dim, input_length=1, name=CTX_EMB)(input_context) context_bias = Embedding(vocab_size, 1, input_length=1, name=CTX_BS)(input_context) dot_product = Dot(axes=-1)([central_embedding, context_embedding]) dot_product = Reshape((1, ))(dot_product) bias_target = Reshape((1, ))(central_bias) bias_context = Reshape((1, ))(context_bias) prediction = Add()([dot_product, bias_target, bias_context]) model = Model(inputs=[input_target, input_context], outputs=prediction) model.compile(loss=self.custom_loss, optimizer=Adagrad(lr=self.lr)) print(model.summary()) return model
def build_model(self, f_sizes): """ :param f_size: sparse feature nunique :return: """ dim_input = len(f_sizes) # +1 input_x = [Input(shape=(1, )) for i in range(dim_input)] # 多列 sparse feature biases = [ self.get_embed(x, size, 1) for (x, size) in zip(input_x, f_sizes) ] factors = [ self.get_embed(x, size) for (x, size) in zip(input_x, f_sizes) ] s = Add()(factors) diffs = [Subtract()([s, x]) for x in factors] dots = [Dot(axes=1)([d, x]) for d, x in zip(diffs, factors)] x = Concatenate()(biases + dots) x = BatchNormalization()(x) output = Dense(1, activation='relu', kernel_regularizer=l2(self.kernel_l2))(x) model = Model(inputs=input_x, outputs=[output]) model.compile(optimizer=Adam(clipnorm=0.5), loss='mean_squared_error') # TODO: radam output_f = factors + biases model_features = Model(inputs=input_x, outputs=output_f) return model, model_features
def build_trainable_graph(self, network): action_mask_input = Input(shape=(self.action_len, ), name='a_mask_inp') q_values = network.output q_values_taken_action = Dot(axes=-1, name='qs_a')([q_values, action_mask_input]) trainable_network = Model(inputs=[network.input, action_mask_input], outputs=q_values_taken_action) trainable_network.compile(optimizer=self.optimizer, loss='mse', metrics=['mae']) return trainable_network
def tnet(inputs, num_features): bias = Constant(np.eye(num_features).flatten()) reg = OrthogonalRegularizer(num_features) x = conv_bn(inputs, 32) x = conv_bn(x, 64) x = conv_bn(x, 512) x = GlobalMaxPooling1D()(x) x = dense_bn(x, 256) x = dense_bn(x, 128) x = Dense(num_features * num_features, kernel_initializer='zeros', bias_initializer=bias, activity_regularizer=reg)(x) feat_T = Reshape((num_features, num_features))(x) return Dot(axes=(2, 1))([inputs, feat_T])
def build_model_1(f_size): dim_input = len(f_size) input_x = [Input(shape=(1, )) for i in range(dim_input)] biases = [get_embed(x, size, 1) for (x, size) in zip(input_x, f_size)] factors = [ get_embed(x, size, k_latent) for (x, size) in zip(input_x, f_size) ] s = Add()(factors) diffs = [Subtract()([s, x]) for x in factors] dots = [Dot(axes=1)([d, x]) for d, x in zip(diffs, factors)] x = Concatenate()(biases + dots) x = BatchNormalization()(x) output = Dense(1, activation='relu', kernel_regularizer=l2(kernel_reg))(x) model = Model(inputs=input_x, outputs=[output]) model.compile(optimizer=Adam(clipnorm=0.5), loss='mean_squared_error') output_f = factors + biases model_features = Model(inputs=input_x, outputs=output_f) return model, model_features
def build(self): self.transformer_meth = transformer.EncoderModel(vocab_size=self.vocab_size, model_dim=self.hidden_dims, embed_dim=self.embed_dims, ffn_dim=self.lstm_dims, droput_rate=0.2, n_heads=2, max_len=self.meth_name_len, name='methT') self.transformer_apiseq = transformer.EncoderModel(vocab_size=self.vocab_size, model_dim=self.hidden_dims, embed_dim=self.embed_dims, ffn_dim=self.lstm_dims, droput_rate=0.2, n_heads=4, max_len=self.apiseq_len, name='apiseqT') self.transformer_desc = transformer.EncoderModel(vocab_size=self.vocab_size, model_dim=self.hidden_dims, embed_dim=self.embed_dims, ffn_dim=self.lstm_dims, droput_rate=0.2, n_heads=4, max_len=self.desc_len, name='descT') # self.transformer_ast = EncoderModel(vocab_size=self.vocab_size, model_dim=self.hidden_dims, embed_dim=self.embed_dims, ffn_dim=self.lstm_dims, droput_rate=0.2, n_heads=4, max_len=128) self.transformer_tokens = transformer.EncoderModel(vocab_size=self.vocab_size, model_dim=self.hidden_dims, embed_dim=self.embed_dims, ffn_dim=self.lstm_dims, droput_rate=0.2, n_heads=8, max_len=self.tokens_len, name='tokensT') # create path to store model Info # 1 -- CodeNN meth_name = Input(shape=(self.meth_name_len,), dtype='int32', name='meth_name') apiseq = Input(shape=(self.apiseq_len,), dtype='int32', name='apiseq') tokens3 = Input(shape=(self.tokens_len,), dtype='int32', name='tokens3') # method name # embedding layer meth_name_out = self.transformer_meth(meth_name) # max pooling maxpool = Lambda(lambda x: k.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpooling_methodname') method_name_pool = maxpool(meth_name_out) activation = Activation('tanh', name='active_method_name') method_name_repr = activation(method_name_pool) # apiseq # embedding layer apiseq_out = self.transformer_apiseq(apiseq) # max pooling maxpool = Lambda(lambda x: k.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpooling_apiseq') apiseq_pool = maxpool(apiseq_out) activation = Activation('tanh', name='active_apiseq') apiseq_repr = activation(apiseq_pool) # tokens # embedding layer tokens_out = self.transformer_tokens(tokens3) # max pooling maxpool = Lambda(lambda x: k.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpooling_tokens') tokens_pool = maxpool(tokens_out) activation = Activation('tanh', name='active_tokens') tokens_repr = activation(tokens_pool) # fusion method_name, apiseq, tokens merge_method_name_api = Concatenate(name='merge_methname_api')([method_name_repr, apiseq_repr]) merge_code_repr = Concatenate(name='merge_code_repr')([merge_method_name_api, tokens_repr]) code_repr = Dense(self.hidden_dims, activation='tanh', name='dense_coderepr')(merge_code_repr) self.code_repr_model = Model(inputs=[meth_name, apiseq, tokens3], outputs=[code_repr], name='code_repr_model') self.code_repr_model.summary() self.output = Model(inputs=self.code_repr_model.input, outputs=self.code_repr_model.get_layer('tokensT').output) self.output.summary() # 2 -- description desc = Input(shape=(self.desc_len,), dtype='int32', name='desc') # desc # embedding layer desc_out = self.transformer_desc(desc) # max pooling maxpool = Lambda(lambda x: k.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpooling_desc') desc_pool = maxpool(desc_out) activation = Activation('tanh', name='active_desc') desc_repr = activation(desc_pool) self.desc_repr_model = Model(inputs=[desc], outputs=[desc_repr], name='desc_repr_model') self.desc_repr_model.summary() # 3 -- cosine similarity code_repr = self.code_repr_model([meth_name, apiseq, tokens3]) desc_repr = self.desc_repr_model([desc]) cos_sim = Dot(axes=1, normalize=True, name='cos_sim')([code_repr, desc_repr]) sim_model = Model(inputs=[meth_name, apiseq, tokens3, desc], outputs=[cos_sim], name='sim_model') self.sim_model = sim_model self.sim_model.summary() # 4 -- build training model good_sim = sim_model([self.meth_name, self.apiseq, self.tokens, self.desc_good]) bad_sim = sim_model([self.meth_name, self.apiseq, self.tokens, self.desc_bad]) loss = Lambda(lambda x: k.maximum(1e-6, self.margin - (x[0] - x[1])), output_shape=lambda x: x[0], name='loss')( [good_sim, bad_sim]) self.training_model = Model(inputs=[self.meth_name, self.apiseq, self.tokens, self.desc_good, self.desc_bad], outputs=[loss], name='training_model') self.training_model.summary()
def MULT(self, sent1, sent2, sent1_mask, sent2_mask): from tensorflow.python.keras.layers import Dense, Dot dim = sent1.get_shape().as_list()[-1] length1 = sent1.get_shape().as_list()[1] length2 = sent2.get_shape().as_list()[1] temp_W = tf.layers.dense(sent2, dim, name="dense") # (B, L2, dim) temp_W = Dot(axes=[2, 2])([sent1, temp_W]) # (B, L1, L2) if sent1_mask is not None: s1_mask_exp = tf.expand_dims(sent1_mask, axis=2) # (B, L1, 1) s2_mask_exp = tf.expand_dims(sent2_mask, axis=1) # (B, 1, L2) temp_W1 = temp_W - (1 - s1_mask_exp) * 1e20 temp_W2 = temp_W - (1 - s2_mask_exp) * 1e20 else: temp_W1 = temp_W temp_W2 = temp_W W1 = tf.nn.softmax(temp_W1, axis=1) W2 = tf.nn.softmax(temp_W2, axis=2) W1 = tf.transpose(W1, perm=[0, 2, 1]) w1_val, w1_index = tf.nn.top_k(W1, k=20) w2_val, w2_index = tf.nn.top_k(W2, k=20) sent1_repeat = tf.tile(tf.expand_dims(sent1, axis=1), [1, length2, 1, 1]) sent2_repeat = tf.tile(tf.expand_dims(sent2, axis=1), [1, length1, 1, 1]) sent1_top = tf.batch_gather(sent1_repeat, w1_index) sent2_top = tf.batch_gather(sent2_repeat, w2_index) w1_val = w1_val / tf.reduce_sum(w1_val, axis=2, keepdims=True) w2_val = w2_val / tf.reduce_sum(w2_val, axis=2, keepdims=True) w1_val = tf.expand_dims(w1_val, axis=3) w2_val = tf.expand_dims(w2_val, axis=3) M1 = tf.reduce_sum(w2_val * sent2_top, axis=2) M2 = tf.reduce_sum(w1_val * sent1_top, axis=2) # M1 = Dot(axes=[2, 1])([W2, sent2]) # M2 = Dot(axes=[1, 1])([W1, sent1]) # s1_cat = tf.concat([M2 - sent2, M2 * sent2], axis=-1) # s2_cat = tf.concat([M1 - sent1, M1 * sent1], axis=-1) # S1 = tf.layers.dense(s1_cat, dim, activation=tf.nn.relu, name="cat_dense") # S2 = tf.layers.dense(s2_cat, dim, activation=tf.nn.relu, name="cat_dense", reuse=True) # if self.is_training: # S1 = dropout(S1, dropout_prob=0.1) # S2 = dropout(S2, dropout_prob=0.1) # S1 = M1 * sent1 S2 = M2 * sent2 if sent1_mask is not None: S1 = S1 * tf.expand_dims(sent1_mask, axis=2) S2 = S2 * tf.expand_dims(sent2_mask, axis=2) from layers.ParallelInfo import TextCNN cnn1 = TextCNN(dim, [1, 2, 3, 4, 5], dim, scope_name="cnn1") cnn2 = TextCNN(dim, [1, 2, 3, 4, 5], dim, scope_name="cnn2") S1 = cnn1(S1) S2 = cnn2(S2) feature1 = tf.layers.dense(S1, dim, activation=tf.tanh) feature2 = tf.layers.dense(S2, dim, activation=tf.tanh) feature_total = tf.concat([feature1, feature2], axis=1) return feature_total
def _build(self, lambda_u=0.0001, lambda_v=0.0001, optimizer='rmsprop', loss='mse', metrics='mse', initializer='uniform'): # init session on first time ref sess = self.session # user embedding user_InputLayer = Input(shape=(1,), dtype='int32', name='user_input') user_EmbeddingLayer = Embedding(input_dim=self.user_num, output_dim=self.embedding_dim, input_length=1, name='user_embedding', embeddings_regularizer=l2(lambda_u), embeddings_initializer=initializer)(user_InputLayer) user_EmbeddingLayer = Flatten(name='user_flatten')(user_EmbeddingLayer) # implicit feedback feedback_InputLayer = Input(shape=(None,), dtype='int32', name='implicit_feedback') feedback_EmbeddingLayer = Embedding(input_dim=self.item_num + 1, output_dim=self.embedding_dim, name='implicit_feedback_embedding', embeddings_regularizer=l2(lambda_v), embeddings_initializer=initializer, mask_zero=True)(feedback_InputLayer) feedback_EmbeddingLayer = MeanPoolingLayer()(feedback_EmbeddingLayer) user_EmbeddingLayer = Add()([user_EmbeddingLayer, feedback_EmbeddingLayer]) # user bias user_BiasLayer = Embedding(input_dim=self.user_num, output_dim=1, input_length=1, name='user_bias', embeddings_regularizer=l2(lambda_u), embeddings_initializer=Zeros())(user_InputLayer) user_BiasLayer = Flatten()(user_BiasLayer) # item embedding item_InputLayer = Input(shape=(1,), dtype='int32', name='item_input') item_EmbeddingLayer = Embedding(input_dim=self.item_num, output_dim=self.embedding_dim, input_length=1, name='item_embedding', embeddings_regularizer=l2(lambda_v), embeddings_initializer=RandomNormal(mean=0, stddev=1))(item_InputLayer) item_EmbeddingLayer = Flatten(name='item_flatten')(item_EmbeddingLayer) # item bias item_BiasLayer = Embedding(input_dim=self.item_num, output_dim=1, input_length=1, name='item_bias', embeddings_regularizer=l2(lambda_v), embeddings_initializer=Zeros())(item_InputLayer) item_BiasLayer = Flatten()(item_BiasLayer) # rating prediction dotLayer = Dot(axes=-1, name='dot_layer')([user_EmbeddingLayer, item_EmbeddingLayer]) # add mu, user bias and item bias dotLayer = ConstantLayer(mu=self.mu)(dotLayer) dotLayer = Add()([dotLayer, user_BiasLayer]) dotLayer = Add()([dotLayer, item_BiasLayer]) # create model self._model = Model(inputs=[user_InputLayer, item_InputLayer, feedback_InputLayer], outputs=[dotLayer]) # compile model optimizer_instance = getattr(tf.keras.optimizers, optimizer.optimizer)(**optimizer.kwargs) losses = getattr(tf.keras.losses, loss) self._model.compile(optimizer=optimizer_instance, loss=losses, metrics=metrics) # pick user_embedding and user_bias for aggregating self._trainable_weights = {v.name.split("/")[0]: v for v in self._model.trainable_weights} LOGGER.debug(f"trainable weights {self._trainable_weights}") self._aggregate_weights = {"user_embedding": self._trainable_weights["user_embedding"], "user_bias": self._trainable_weights["user_bias"]}
def build(self): # 1 -- CodeNN methodname = Input(shape=(self.methname_len, ), dtype='int32', name='methodname') apiseq = Input(shape=(self.apiseq_len, ), dtype='int32', name='apiseq') tokens = Input(shape=(self.tokens_len, ), dtype='int32', name='tokens') # methodname # embedding layer init_emd_weights = np.load( self.data_dir + self.init_embed_weights_methodname ) if self.init_embed_weights_methodname is not None else None init_emd_weights = init_emd_weights if init_emd_weights is None else [ init_emd_weights ] embedding = Embedding(input_dim=self.vocab_size, output_dim=self.embed_dims, weights=init_emd_weights, mask_zero=False, name='embedding_methodname') methodname_embedding = embedding(methodname) # dropout dropout = Dropout(0.25, name='dropout_methodname_embed') methodname_dropout = dropout(methodname_embedding) # forward rnn fw_rnn = LSTM(self.lstm_dims, recurrent_dropout=0.2, return_sequences=True, name='lstm_methodname_fw') # backward rnn bw_rnn = LSTM(self.lstm_dims, recurrent_dropout=0.2, return_sequences=True, go_backwards=True, name='lstm_methodname_bw') methodname_fw = fw_rnn(methodname_dropout) methodname_bw = bw_rnn(methodname_dropout) dropout = Dropout(0.25, name='dropout_methodname_rnn') methodname_fw_dropout = dropout(methodname_fw) methodname_bw_dropout = dropout(methodname_bw) # max pooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpooling_methodname') methodname_pool = Concatenate(name='concat_methodname_lstm')( [maxpool(methodname_fw_dropout), maxpool(methodname_bw_dropout)]) activation = Activation('tanh', name='active_methodname') methodname_repr = activation(methodname_pool) # apiseq # embedding layer embedding = Embedding(input_dim=self.vocab_size, output_dim=self.embed_dims, mask_zero=False, name='embedding_apiseq') apiseq_embedding = embedding(apiseq) # dropout dropout = Dropout(0.25, name='dropout_apiseq_embed') apiseq_dropout = dropout(apiseq_embedding) # forward rnn fw_rnn = LSTM(self.lstm_dims, return_sequences=True, recurrent_dropout=0.2, name='lstm_apiseq_fw') # backward rnn bw_rnn = LSTM(self.lstm_dims, return_sequences=True, recurrent_dropout=0.2, go_backwards=True, name='lstm_apiseq_bw') apiseq_fw = fw_rnn(apiseq_dropout) apiseq_bw = bw_rnn(apiseq_dropout) dropout = Dropout(0.25, name='dropout_apiseq_rnn') apiseq_fw_dropout = dropout(apiseq_fw) apiseq_bw_dropout = dropout(apiseq_bw) # max pooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpooling_apiseq') apiseq_pool = Concatenate(name='concat_apiseq_lstm')( [maxpool(apiseq_fw_dropout), maxpool(apiseq_bw_dropout)]) activation = Activation('tanh', name='active_apiseq') apiseq_repr = activation(apiseq_pool) # tokens # embedding layer init_emd_weights = np.load( self.data_dir + self.init_embed_weights_tokens ) if self.init_embed_weights_tokens is not None else None init_emd_weights = init_emd_weights if init_emd_weights is None else [ init_emd_weights ] embedding = Embedding(input_dim=self.vocab_size, output_dim=self.embed_dims, weights=init_emd_weights, mask_zero=False, name='embedding_tokens') tokens_embedding = embedding(tokens) # dropout dropout = Dropout(0.25, name='dropout_tokens_embed') tokens_dropout = dropout(tokens_embedding) # forward rnn fw_rnn = LSTM(self.lstm_dims, recurrent_dropout=0.2, return_sequences=True, name='lstm_tokens_fw') # backward rnn bw_rnn = LSTM(self.lstm_dims, recurrent_dropout=0.2, return_sequences=True, go_backwards=True, name='lstm_tokens_bw') tokens_fw = fw_rnn(tokens_dropout) tokens_bw = bw_rnn(tokens_dropout) dropout = Dropout(0.25, name='dropout_tokens_rnn') tokens_fw_dropout = dropout(tokens_fw) tokens_bw_dropout = dropout(tokens_bw) # max pooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpooling_tokens') tokens_pool = Concatenate(name='concat_tokens_lstm')( [maxpool(tokens_fw_dropout), maxpool(tokens_bw_dropout)]) activation = Activation('tanh', name='active_tokens') tokens_repr = activation(tokens_pool) # fusion methodname, apiseq, tokens merge_methname_api = Concatenate(name='merge_methname_api')( [methodname_repr, apiseq_repr]) merge_code_repr = Concatenate(name='merge_code_repr')( [merge_methname_api, tokens_repr]) code_repr = Dense(self.hidden_dims, activation='tanh', name='dense_coderepr')(merge_code_repr) self.code_repr_model = Model(inputs=[methodname, apiseq, tokens], outputs=[code_repr], name='code_repr_model') self.code_repr_model.summary() # 2 -- description desc = Input(shape=(self.desc_len, ), dtype='int32', name='desc') # desc # embedding layer init_emd_weights = np.load( self.data_dir + self.init_embed_weights_desc ) if self.init_embed_weights_desc is not None else None init_emd_weights = init_emd_weights if init_emd_weights is None else [ init_emd_weights ] embedding = Embedding(input_dim=self.vocab_size, output_dim=self.embed_dims, weights=init_emd_weights, mask_zero=False, name='embedding_desc') desc_embedding = embedding(desc) # dropout dropout = Dropout(0.25, name='dropout_desc_embed') desc_dropout = dropout(desc_embedding) # forward rnn fw_rnn = LSTM(self.lstm_dims, recurrent_dropout=0.2, return_sequences=True, name='lstm_desc_fw') # backward rnn bw_rnn = LSTM(self.lstm_dims, recurrent_dropout=0.2, return_sequences=True, go_backwards=True, name='lstm_desc_bw') desc_fw = fw_rnn(desc_dropout) desc_bw = bw_rnn(desc_dropout) dropout = Dropout(0.25, name='dropout_desc_rnn') desc_fw_dropout = dropout(desc_fw) desc_bw_dropout = dropout(desc_bw) # max pooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpooling_desc') desc_pool = Concatenate(name='concat_desc_lstm')( [maxpool(desc_fw_dropout), maxpool(desc_bw_dropout)]) activation = Activation('tanh', name='active_desc') desc_repr = activation(desc_pool) self.desc_repr_model = Model(inputs=[desc], outputs=[desc_repr], name='desc_repr_model') self.desc_repr_model.summary() # 3 -- cosine similarity code_repr = self.code_repr_model([methodname, apiseq, tokens]) desc_repr = self.desc_repr_model([desc]) cos_sim = Dot(axes=1, normalize=True, name='cos_sim')([code_repr, desc_repr]) sim_model = Model(inputs=[methodname, apiseq, tokens, desc], outputs=[cos_sim], name='sim_model') self.sim_model = sim_model self.sim_model.summary() # 4 -- build training model good_sim = sim_model( [self.methodname, self.apiseq, self.tokens, self.desc_good]) bad_sim = sim_model( [self.methodname, self.apiseq, self.tokens, self.desc_bad]) loss = Lambda(lambda x: K.maximum(1e-6, self.margin - x[0] + x[1]), output_shape=lambda x: x[0], name='loss')([good_sim, bad_sim]) self.training_model = Model(inputs=[ self.methodname, self.apiseq, self.tokens, self.desc_good, self.desc_bad ], outputs=[loss], name='training_model') self.training_model.summary()
output_dim=G.embedding_dimension, weights=[embeddingTwo]) word_embedding = shared_embedding_layer(word_index) word_embedding = Lambda(lambda x: x * 1)(word_embedding) context_embeddings = shared_embedding_layer2(context) negative_words_embedding = shared_embedding_layer(negative_samples) negative_words_embedding = Lambda(lambda x: x * 1)(negative_words_embedding) # Now the context words are averaged to get the CBOW vector cbow = Lambda(lambda x: K.mean(x, axis=1), output_shape=(G.embedding_dimension, ))(context_embeddings) # The context is multiplied (dot product) with current word and negative sampled words print(type(word_embedding)) print(type(cbow)) word_context_product = Dot(axes=-1)([word_embedding, cbow]) word_context_product = Lambda(lambda x: tf.math.sigmoid(x))( word_context_product) # word_context_product = Dense(1,activation = "sigmoid")(word_context_product) print(K.shape(word_embedding)) print(K.shape(word_context_product)) print(K.shape(cbow)) negative_context_product = Dot(axes=-1)([negative_words_embedding, cbow]) # negative_context_product = Dense(1, activation = "sigmoid")(negative_context_product) boost = 1 import sys if len(sys.argv) > 5: boost = float(sys.argv[5]) if boost > 1: negative_context_product = Lambda(lambda x: x * boost)(
def deepSimDEF_network(args, model_ind, max_ann_len=None, go_term_embedding_file_path=None, sub_ontology_interested=None, go_term_indeces=None, model_summary=False): embedding_dim = args.embedding_dim activation_hidden = args.activation_hidden activation_highway = args.activation_highway activation_output = args.activation_output dropout = args.dropout embedding_dropout = args.embedding_dropout annotation_dropout = args.annotation_dropout pretrained_embedding = args.pretrained_embedding updatable_embedding = args.updatable_embedding loss = args.loss optimizer = args.optimizer learning_rate = args.learning_rate checkpoint = args.checkpoint verbose = args.verbose highway_layer = args.highway_layer cosine_similarity = args.cosine_similarity deepsimdef_mode = args.deepsimdef_mode _inputs = [ ] # used to represent the input data to the network (from different channels) _embeddings = {} # used for weight-sharing of the embeddings _denses = [] # used for weight-sharing of dense layers whenever needed _Gene_channel = [] # for the middle part up-until highway if checkpoint: with open('{}/model_{}.json'.format(checkpoint, model_ind + 1), 'r') as json_file: model = model_from_json(json_file.read()) # load the json model model.load_weights('{}/model_{}.h5'.format( checkpoint, model_ind + 1)) # load weights into new model if deepsimdef_mode == 'training': model.compile(loss=loss, optimizer=optimizer) if verbose: print("Loaded model {} from disk".format(model_ind + 1)) return model for i in range(2): # bottom-half of the network, 2 for 2 channels _GO_term_channel = [ ] # for bottom-half until flattening maxpooled embeddings for sbo in sub_ontology_interested: _inputs.append(Input(shape=(max_ann_len[sbo], ), dtype='int32')) if sbo in _embeddings: embedding_layer = _embeddings[ sbo] # for the second pair when we need weight-sharing else: if pretrained_embedding: embedding_matrix = load_embedding( go_term_embedding_file_path[sbo], embedding_dim, go_term_indeces[sbo]) if verbose: print( "Loaded {} word vectors for {} (Model {})".format( len(embedding_matrix), sbo, model_ind + 1)) embedding_layer = Embedding( input_dim=len(go_term_indeces[sbo]) + 1, output_dim=embedding_dim, weights=[embedding_matrix], input_length=max_ann_len[sbo], trainable=updatable_embedding, name="embedding_{}_{}".format(sbo, model_ind)) else: # without using pre-trained word embedings embedding_layer = Embedding( input_dim=len(go_term_indeces[sbo]) + 1, output_dim=embedding_dim, input_length=max_ann_len[sbo], name="embedding_{}_{}".format(sbo, model_ind)) _embeddings[sbo] = embedding_layer GO_term_emb = embedding_layer(_inputs[-1]) if 0 < annotation_dropout: GO_term_emb = DropAnnotation(annotation_dropout)(GO_term_emb) if 0 < embedding_dropout: GO_term_emb = SpatialDropout1D(embedding_dropout)(GO_term_emb) GO_term_emb = MaxPooling1D(pool_size=max_ann_len[sbo])(GO_term_emb) GO_term_emb = Flatten()(GO_term_emb) _GO_term_channel.append(GO_term_emb) Gene_emb = Concatenate(axis=-1)(_GO_term_channel) if 1 < len( sub_ontology_interested) else _GO_term_channel[0] Dns = _denses[0] if len(_denses) == 1 else Dense( units=embedding_dim * len(sub_ontology_interested), activation=activation_hidden) _denses.append(Dns) Gene_emb = Dns(Gene_emb) Gene_emb = Dropout(dropout)(Gene_emb) _Gene_channel.append(Gene_emb) if cosine_similarity: preds = Dot(axes=1, normalize=True)(_Gene_channel) else: merge = Concatenate(axis=-1)(_Gene_channel) if highway_layer: merge = highway(merge, activation=activation_highway) merge = Dropout(dropout)(merge) merge = Dense(units=embedding_dim * len(sub_ontology_interested), activation=activation_hidden)(merge) merge = Dropout(dropout)(merge) preds = Dense(units=1, activation=activation_output)(merge) model = Model(inputs=_inputs, outputs=preds) model.compile(loss=loss, optimizer=optimizer) model.optimizer.lr = learning_rate # setting the learning rate of the model optimizer if model_summary: print(model.summary()) if verbose: print("Model for fold number {} instantiated!!\n".format(model_ind + 1)) return model
def build(self): self.transformer_meth = transformer.EncoderModel(vocab_size=self.vocab_size, model_dim=self.hidden_dims, embed_dim=self.embed_dims, ffn_dim=self.lstm_dims, droput_rate=0.2, n_heads=8, max_len=self.meth_name_len, name='methT') self.transformer_apiseq = transformer.EncoderModel(vocab_size=self.vocab_size, model_dim=self.hidden_dims, embed_dim=self.embed_dims, ffn_dim=self.lstm_dims, droput_rate=0.2, n_heads=8, max_len=self.apiseq_len, name='apiseqT') self.transformer_desc = transformer.EncoderModel(vocab_size=self.vocab_size, model_dim=self.hidden_dims, embed_dim=self.embed_dims, ffn_dim=self.lstm_dims, droput_rate=0.2, n_heads=8, max_len=self.desc_len, name='descT') # self.transformer_ast = EncoderModel(vocab_size=self.vocab_size, model_dim=self.hidden_dims, embed_dim=self.embed_dims, ffn_dim=self.lstm_dims, droput_rate=0.2, n_heads=4, max_len=128) self.transformer_tokens = transformer.EncoderModel(vocab_size=self.vocab_size, model_dim=self.hidden_dims, embed_dim=self.embed_dims, ffn_dim=self.lstm_dims, droput_rate=0.2, n_heads=8, max_len=self.tokens_len, name='tokensT') # create path to store model Info # 1 -- CodeNN meth_name = Input(shape=(self.meth_name_len,), dtype='int32', name='meth_name') apiseq = Input(shape=(self.apiseq_len,), dtype='int32', name='apiseq') tokens3 = Input(shape=(self.tokens_len,), dtype='int32', name='tokens3') # method name # embedding layer meth_name_out = self.transformer_meth(meth_name) # max pooling maxpool = Lambda(lambda x: k.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpooling_methodname') method_name_pool = maxpool(meth_name_out) activation = Activation('tanh', name='active_method_name') method_name_repr = activation(method_name_pool) # apiseq # embedding layer apiseq_out = self.transformer_apiseq(apiseq) # max pooling maxpool = Lambda(lambda x: k.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpooling_apiseq') apiseq_pool = maxpool(apiseq_out) activation = Activation('tanh', name='active_apiseq') apiseq_repr = activation(apiseq_pool) # tokens # embedding layer init_emd_weights = np.load( self.data_dir + self.init_embed_weights_tokens) if self.init_embed_weights_tokens is not None else None init_emd_weights = init_emd_weights if init_emd_weights is None else [init_emd_weights] embedding = Embedding( input_dim=self.vocab_size, output_dim=self.embed_dims, weights=init_emd_weights, mask_zero=False, name='embedding_tokens' ) tokens_embedding = embedding(tokens3) # dropout dropout = Dropout(0.25, name='dropout_tokens_embed') tokens_dropout = dropout(tokens_embedding) # forward rnn fw_rnn = LSTM(self.lstm_dims, return_sequences=True, name='lstm_tokens_fw') # backward rnn bw_rnn = LSTM(self.lstm_dims, return_sequences=True, go_backwards=True, name='lstm_tokens_bw') tokens_fw = fw_rnn(tokens_dropout) tokens_bw = bw_rnn(tokens_dropout) dropout = Dropout(0.25, name='dropout_tokens_rnn') tokens_fw_dropout = dropout(tokens_fw) tokens_bw_dropout = dropout(tokens_bw) # max pooling maxpool = Lambda(lambda x: k.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpooling_tokens') tokens_pool = Concatenate(name='concat_tokens_lstm')([maxpool(tokens_fw_dropout), maxpool(tokens_bw_dropout)]) tokens_pool = maxpool(tokens_dropout) activation = Activation('tanh', name='active_tokens') tokens_repr = activation(tokens_pool) tokens_repr = tf.reshape(tokens_repr, [128, 256]) # fusion method_name, apiseq, tokens merge_method_name_api = Concatenate(name='merge_methname_api')([method_name_repr, apiseq_repr]) merge_code_repr = Concatenate(name='merge_code_repr')([merge_method_name_api, tokens_repr]) print(merge_code_repr) code_repr = Dense(self.hidden_dims, activation='tanh', name='dense_coderepr')(merge_code_repr) self.code_repr_model = Model(inputs=[meth_name, apiseq, tokens3], outputs=[code_repr], name='code_repr_model') self.code_repr_model.summary() # self.output = Model(inputs=self.code_repr_model.input, outputs=self.code_repr_model.get_layer('tokensT').output) # self.output.summary() # 2 -- description desc = Input(shape=(self.desc_len,), dtype='int32', name='desc') # desc # embedding layer desc_out = self.transformer_desc(desc) # max pooling maxpool = Lambda(lambda x: k.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpooling_desc') desc_pool = maxpool(desc_out) activation = Activation('tanh', name='active_desc') desc_repr = activation(desc_pool) self.desc_repr_model = Model(inputs=[desc], outputs=[desc_repr], name='desc_repr_model') self.desc_repr_model.summary() # 3 -- cosine similarity code_repr = self.code_repr_model([meth_name, apiseq, tokens3]) desc_repr = self.desc_repr_model([desc]) cos_sim = Dot(axes=1, normalize=True, name='cos_sim')([code_repr, desc_repr]) sim_model = Model(inputs=[meth_name, apiseq, tokens3, desc], outputs=[cos_sim], name='sim_model') self.sim_model = sim_model self.sim_model.summary() # 4 -- build training model good_sim = sim_model([self.meth_name, self.apiseq, self.tokens, self.desc_good]) bad_sim = sim_model([self.meth_name, self.apiseq, self.tokens, self.desc_bad]) loss = Lambda(lambda x: k.maximum(1e-6, self.margin - (x[0] - x[1])), output_shape=lambda x: x[0], name='loss')( [good_sim, bad_sim]) self.training_model = Model(inputs=[self.meth_name, self.apiseq, self.tokens, self.desc_good, self.desc_bad], outputs=[loss], name='training_model') self.training_model.summary()
def build(self, lambda_u=0.0001, lambda_v=0.0001, optimizer='rmsprop', loss='mse', metrics='mse', initializer='uniform'): """ Init session and create model architecture. :param lambda_u: lambda value of l2 norm for user embeddings. :param lambda_v: lambda value of l2 norm for item embeddings. :param optimizer: optimizer type. :param loss: loss type. :param metrics: evaluation metrics. :param initializer: initializer of embedding :return: """ # init session on first time ref sess = self.session # user embedding user_input_layer = Input(shape=(1, ), dtype='int32', name='user_input') user_embedding_layer = Embedding( input_dim=self.user_num, output_dim=self.embedding_dim, input_length=1, name='user_embedding', embeddings_regularizer=l2(lambda_u), embeddings_initializer=initializer)(user_input_layer) user_embedding_layer = Flatten( name='user_flatten')(user_embedding_layer) # item embedding item_input_layer = Input(shape=(1, ), dtype='int32', name='item_input') item_embedding_layer = Embedding( input_dim=self.item_num, output_dim=self.embedding_dim, input_length=1, name='item_embedding', embeddings_regularizer=l2(lambda_v), embeddings_initializer=initializer)(item_input_layer) item_embedding_layer = Flatten( name='item_flatten')(item_embedding_layer) # rating prediction dot_layer = Dot(axes=-1, name='dot_layer')( [user_embedding_layer, item_embedding_layer]) self._model = Model(inputs=[user_input_layer, item_input_layer], outputs=[dot_layer]) # compile model optimizer_instance = getattr(tf.keras.optimizers, optimizer.optimizer)(**optimizer.kwargs) losses = getattr(tf.keras.losses, loss) self._model.compile(optimizer=optimizer_instance, loss=losses, metrics=metrics) # pick user_embedding for aggregating self._trainable_weights = { v.name.split("/")[0]: v for v in self._model.trainable_weights } self._aggregate_weights = { "user_embedding": self._trainable_weights["user_embedding"] }