def cnn_rnn_tmp(nb_words, EMBEDDING_DIM, \ embedding_matrix, MAX_SEQUENCE_LENGTH, \ num_rnn, num_dense, rate_drop_rnn, \ rate_drop_dense, act): ''' This is the more complex cnn rnn model model: input layer; embedding layer; more complex cnn based attention layer; rnn layer; dense layer; output layer ''' embedding_layer = Embedding(nb_words, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=True) rnn_layer = Bidirectional( GRU(num_rnn, dropout=rate_drop_rnn, recurrent_dropout=rate_drop_rnn)) cnn_layer = Conv1D(activation="relu", padding="valid", strides=1, filters=32, kernel_size=4) conv1 = Conv1D(filters=128, kernel_size=1, padding='same', activation='relu') conv2 = Conv1D(filters=128, kernel_size=2, padding='same', activation='relu') conv3 = Conv1D(filters=128, kernel_size=3, padding='same', activation='relu') conv4 = Conv1D(filters=128, kernel_size=4, padding='same', activation='relu') conv5 = Conv1D(filters=32, kernel_size=5, padding='same', activation='relu') conv6 = Conv1D(filters=32, kernel_size=6, padding='same', activation='relu') pooling_layer = GlobalMaxPooling1D() cnn_dense = Dense(300) cnn_dropout1 = Dropout(0.2) cnn_dropout2 = Dropout(0.2) cnn_batchnormalization = BatchNormalization() cnn_repeatvector = RepeatVector(EMBEDDING_DIM) cnn_dense1 = Dense(300) cnn_timedistributed = TimeDistributed(Dense(1)) sequence_1_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') embedded_sequences_1 = embedding_layer(sequence_1_input) sequence_2_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') embedded_sequences_2 = embedding_layer(sequence_2_input) conv1a = conv1(embedded_sequences_1) glob1a = GlobalAveragePooling1D()(conv1a) glob1a = Dropout(0.5)(glob1a) glob1a = BatchNormalization()(glob1a) conv1b = conv1(embedded_sequences_2) glob1b = GlobalAveragePooling1D()(conv1b) glob1b = Dropout(0.5)(glob1b) glob1b = BatchNormalization()(glob1b) conv2a = conv2(embedded_sequences_1) glob2a = GlobalAveragePooling1D()(conv2a) glob2a = Dropout(0.5)(glob2a) glob2a = BatchNormalization()(glob2a) conv2b = conv2(embedded_sequences_2) glob2b = GlobalAveragePooling1D()(conv2b) glob2b = Dropout(0.5)(glob2b) glob2b = BatchNormalization()(glob2b) conv3a = conv3(embedded_sequences_1) glob3a = GlobalAveragePooling1D()(conv3a) glob3a = Dropout(0.5)(glob3a) glob3a = BatchNormalization()(glob3a) conv3b = conv3(embedded_sequences_2) glob3b = GlobalAveragePooling1D()(conv3b) glob3b = Dropout(0.5)(glob3b) glob3b = BatchNormalization()(glob3b) conv4a = conv4(embedded_sequences_1) glob4a = GlobalAveragePooling1D()(conv4a) glob4a = Dropout(0.5)(glob4a) glob4a = BatchNormalization()(glob4a) conv4b = conv4(embedded_sequences_2) glob4b = GlobalAveragePooling1D()(conv4b) glob4b = Dropout(0.5)(glob4b) glob4b = BatchNormalization()(glob4b) conv5a = conv5(embedded_sequences_1) glob5a = GlobalAveragePooling1D()(conv5a) glob5a = Dropout(0.5)(glob5a) glob5a = BatchNormalization()(glob5a) conv5b = conv5(embedded_sequences_2) glob5b = GlobalAveragePooling1D()(conv5b) glob5b = Dropout(0.5)(glob5b) glob5b = BatchNormalization()(glob5b) conv6a = conv6(embedded_sequences_1) glob6a = GlobalAveragePooling1D()(conv6a) glob6a = Dropout(0.5)(glob6a) glob6a = BatchNormalization()(glob6a) conv6b = conv6(embedded_sequences_2) glob6b = GlobalAveragePooling1D()(conv6b) glob6b = Dropout(0.5)(glob6b) glob6b = BatchNormalization()(glob6b) cnn_1 = concatenate([glob1a, glob2a, glob3a, glob4a, glob5a, glob6a]) cnn_2 = concatenate([glob1b, glob2b, glob3b, glob4b, glob5b, glob6b]) cnn_1_t = cnn_dense1(cnn_1) cnn_2_t = cnn_dense1(cnn_2) a1 = multiply([cnn_1_t, embedded_sequences_1]) a2 = multiply([cnn_2_t, embedded_sequences_2]) a1 = Permute([2, 1])(a1) a2 = Permute([2, 1])(a2) a1 = Lambda(lambda x: K.sum(x, axis=1))(a1) a2 = Lambda(lambda x: K.sum(x, axis=1))(a2) a1 = Activation('sigmoid')(a1) a2 = Activation('sigmoid')(a2) embedded_sequences_1 = Permute([2, 1])(embedded_sequences_1) embedded_sequences_2 = Permute([2, 1])(embedded_sequences_2) x1 = multiply([a1, embedded_sequences_1]) x2 = multiply([a2, embedded_sequences_2]) x1 = Permute([2, 1])(x1) x2 = Permute([2, 1])(x2) x1 = rnn_layer(x1) x2 = rnn_layer(x2) merged = multiply([x1, x2]) merged = Dropout(rate_drop_dense)(merged) merged = BatchNormalization()(merged) merged = Dense(num_dense, activation=act)(merged) merged = Dropout(rate_drop_dense)(merged) merged = BatchNormalization()(merged) preds = Dense(3, activation='softmax')(merged) ######################################## ## train the model ######################################## model = Model(inputs=[sequence_1_input, sequence_2_input], outputs=preds) model.compile(loss='categorical_crossentropy', optimizer='nadam', metrics=['acc']) model.summary() # print(STAMP) return model
def cnn_rnn(nb_words, EMBEDDING_DIM, \ embedding_matrix, MAX_SEQUENCE_LENGTH, \ num_rnn, num_dense, rate_drop_rnn, \ rate_drop_dense, act): ''' This is the basic cnn rnn model model: input layer; embedding layer; cnn based attention layer; rnn layer; dense layer; output layer ''' embedding_layer = Embedding(nb_words, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=False) rnn_layer = Bidirectional( GRU(num_rnn, dropout=rate_drop_rnn, recurrent_dropout=rate_drop_rnn)) cnn_layer = Conv1D(activation="relu", padding="valid", strides=1, filters=128, kernel_size=2) # cnn_layer1 = Conv1D(activation="relu", padding="valid", strides=1, filters=64, kernel_size=4) pooling_layer = GlobalMaxPooling1D() cnn_dense = Dense(300) cnn_dropout1 = Dropout(0.35) cnn_dropout2 = Dropout(0.35) cnn_batchnormalization = BatchNormalization() cnn_repeatvector = RepeatVector(EMBEDDING_DIM) cnn_dense1 = Dense(300) cnn_timedistributed = TimeDistributed(Dense(1)) sequence_1_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') embedded_sequences_1 = embedding_layer(sequence_1_input) sequence_2_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') embedded_sequences_2 = embedding_layer(sequence_2_input) cnn_1 = cnn_layer(embedded_sequences_1) # cnn_1 = cnn_layer1(cnn_1) cnn_1 = pooling_layer(cnn_1) cnn_1 = cnn_dropout1(cnn_1) cnn_1 = cnn_dense(cnn_1) cnn_1 = cnn_dropout2(cnn_1) cnn_1 = cnn_batchnormalization(cnn_1) cnn_2 = cnn_layer(embedded_sequences_2) # cnn_2 = cnn_layer1(cnn_2) cnn_2 = pooling_layer(cnn_2) cnn_2 = cnn_dropout1(cnn_2) cnn_2 = cnn_dense(cnn_2) cnn_2 = cnn_dropout2(cnn_2) cnn_2 = cnn_batchnormalization(cnn_2) # cnn_1 = cnn_repeatvector(cnn_1) # cnn_2 = cnn_repeatvector(cnn_2) cnn_1_t = cnn_dense1(cnn_1) cnn_2_t = cnn_dense1(cnn_2) # cnn_1_t = cnn_timedistributed(cnn_1) # cnn_2_t = cnn_timedistributed(cnn_2) # cnn_1_t = Permute([2, 1])(cnn_1_t) # cnn_2_t = Permute([2, 1])(cnn_2_t) a1 = multiply([cnn_1_t, embedded_sequences_1]) a2 = multiply([cnn_2_t, embedded_sequences_2]) a1 = Permute([2, 1])(a1) a2 = Permute([2, 1])(a2) a1 = Lambda(lambda x: K.sum(x, axis=1))(a1) a2 = Lambda(lambda x: K.sum(x, axis=1))(a2) a1 = Activation('softmax')(a1) a2 = Activation('softmax')(a2) embedded_sequences_1 = Permute([2, 1])(embedded_sequences_1) embedded_sequences_2 = Permute([2, 1])(embedded_sequences_2) x1 = multiply([a1, embedded_sequences_1]) x2 = multiply([a2, embedded_sequences_2]) x1 = Permute([2, 1])(x1) x2 = Permute([2, 1])(x2) x1 = rnn_layer(x1) x2 = rnn_layer(x2) merged = multiply([x1, x2]) merged = Dropout(rate_drop_dense)(merged) merged = BatchNormalization()(merged) merged = Dense(num_dense, activation=act)(merged) merged = Dropout(rate_drop_dense)(merged) merged = BatchNormalization()(merged) preds = Dense(3, activation='softmax')(merged) # x1 = TimeDistributed(Dense(EMBEDDING_DIM, activation='relu'))(embedded_sequences_1) # x1 = Lambda(lambda x: K.max(x, axis=1), output_shape=(EMBEDDING_DIM, ))(x1) # y1 = TimeDistributed(Dense(EMBEDDING_DIM, activation='relu'))(embedded_sequences_2) # y1 = Lambda(lambda x: K.max(x, axis=1), output_shape=(EMBEDDING_DIM, ))(y1) ######################################## ## train the model ######################################## model = Model(inputs=[sequence_1_input, sequence_2_input], outputs=preds) model.compile(loss='categorical_crossentropy', optimizer='nadam', metrics=['acc']) model.summary() # print(STAMP) return model
def __init__(self, title_word_length, content_word_length, title_char_length, content_char_length, fs_btm_tw_cw_length, fs_btm_tc_length, class_num, word_embedding_matrix, char_embedding_matrix, optimizer_name, lr, metrics): # set attributes self.title_word_length = title_word_length self.content_word_length = content_word_length self.title_char_length = title_char_length self.content_char_length = content_char_length self.fs_btm_tw_cw_length = fs_btm_tw_cw_length self.fs_btm_tc_length = fs_btm_tc_length self.class_num = class_num self.word_embedding_matrix = word_embedding_matrix self.char_embedding_matrix = char_embedding_matrix self.optimizer_name = optimizer_name self.lr = lr self.metrics = metrics # Placeholder for input (title and content) title_word_input = Input(shape=(title_word_length, ), dtype='int32', name="title_word_input") cont_word_input = Input(shape=(content_word_length, ), dtype='int32', name="content_word_input") title_char_input = Input(shape=(title_char_length, ), dtype='int32', name="title_char_input") cont_char_input = Input(shape=(content_char_length, ), dtype='int32', name="content_char_input") # Embedding layer with K.tf.device("/cpu:0"): word_embedding_layer = Embedding(len(word_embedding_matrix), 256, weights=[word_embedding_matrix], trainable=True, name='word_embedding') title_word_emb = word_embedding_layer(title_word_input) cont_word_emb = word_embedding_layer(cont_word_input) char_embedding_layer = Embedding(len(char_embedding_matrix), 256, weights=[char_embedding_matrix], trainable=True, name='char_embedding') title_char_emb = char_embedding_layer(title_char_input) cont_char_emb = char_embedding_layer(cont_char_input) # Create a convolution + max pooling layer title_content_features = list() for win_size in range(1, 8): # batch_size x doc_len x embed_size title_content_features.append( GlobalMaxPooling1D()(Conv1D(100, win_size, activation='relu', padding='same')(title_word_emb))) title_content_features.append( GlobalMaxPooling1D()(Conv1D(100, win_size, activation='relu', padding='same')(cont_word_emb))) title_content_features.append( GlobalMaxPooling1D()(Conv1D(100, win_size, activation='relu', padding='same')(title_char_emb))) title_content_features.append( GlobalMaxPooling1D()(Conv1D(100, win_size, activation='relu', padding='same')(cont_char_emb))) # add btm_tw_cw features + btm_tc features fs_btm_tw_cw_input = Input(shape=(fs_btm_tw_cw_length, ), dtype='float32', name="fs_btm_tw_cw_input") fs_btm_tc_input = Input(shape=(fs_btm_tc_length, ), dtype='float32', name="fs_btm_tc_input") fs_btm_raw_features = concatenate( [fs_btm_tw_cw_input, fs_btm_tc_input]) fs_btm_emb_features = Dense( 1024, activation='relu', name='fs_btm_embedding')(fs_btm_raw_features) fs_btm_emb_features = Dropout( 0.5, name='fs_btm_embedding_dropout')(fs_btm_emb_features) title_content_features.append(fs_btm_emb_features) title_content_features = concatenate(title_content_features) # Full connection title_content_features = Dense( 3600, activation='relu', name='fs_embedding')(title_content_features) title_content_features = Dropout( 0.5, name='fs_embedding_dropout')(title_content_features) # Prediction preds = Dense(class_num, activation='sigmoid', name='prediction')(title_content_features) self._model = Model([ title_word_input, cont_word_input, title_char_input, cont_char_input, fs_btm_tw_cw_input, fs_btm_tc_input ], preds) if 'rmsprop' == optimizer_name: optimizer = optimizers.RMSprop(lr=lr) elif 'adam' == optimizer_name: optimizer = optimizers.Adam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=1e-08) else: optimizer = None self._model.compile(loss=binary_crossentropy_sum, optimizer=optimizer, metrics=metrics) self._model.summary()
batch_size, vocabulary, skip_step=num_steps) hidden_size = 500 use_dropout = True ################################# # showing difference between time distributed and not time distributed # return sequence and not return sequence. # this site = key. # https://machinelearningmastery.com/timedistributed-layer-for-long-short-term-memory-networks-in-python/ #''' model = Sequential() model.add(Embedding(vocabulary, hidden_size, input_length=num_steps)) model.add(LSTM(hidden_size, return_sequences=True)) model.add(LSTM(hidden_size, return_sequences=False)) if use_dropout: model.add(Dropout(0.5)) model.add(Dense(vocabulary)) model.add(Activation('softmax')) #''' ''' model = Sequential() model.add(Embedding(vocabulary, hidden_size, input_length=num_steps)) model.add(LSTM(hidden_size, return_sequences=True)) model.add(LSTM(hidden_size, return_sequences=True)) if use_dropout: model.add(Dropout(0.5)) model.add(TimeDistributed(Dense(vocabulary)))
y_one_hot_labels = np.asarray(y_one_hot) #分割训练集、测试集 X_train, X_test, y_train, y_test = train_test_split(X_seq, y_one_hot_labels, test_size=0.2) num_words = 2000 vec_size = 128 output_shape = 2 #构建模型 data_input = Input(shape=[maxlen]) word_vec = Embedding(input_dim=num_words + 1, input_length=maxlen, output_dim=vec_size, mask_zero=0, name='Embedding')(data_input) x = Conv1D(filters=128, kernel_size=[3], strides=1, padding='same', activation='relu')(word_vec) x = GlobalMaxPool1D()(x) x = Dropout(0.1)(x) x = Dense(500, activation='relu')(x) x = Dropout(0.1)(x) x = Dense(output_shape, activation='softmax')(x) model = Model(inputs=data_input, outputs=x) model.compile(loss='categorical_crossentropy', optimizer='adam',
''' You've already prepared your sequences of text, with each of the sequences consisting of four words. It's time to build your LSTM model! Your model will be trained on the first three words of each sequence, predicting the 4th one. You are going to use an Embedding layer that will essentially learn to turn words into vectors. These vectors will then be passed to a simple LSTM layer. Our output is a Dense layer with as many neurons as words in the vocabulary and softmax activation. This is because we want to obtain the highest probable next word out of all possible words. The size of the vocabulary of words (the unique number of words) is stored in vocab_size. ''' # Import the Embedding, LSTM and Dense layer from keras.layers import Embedding, LSTM, Dense model = Sequential() # Add an Embedding layer with the right parameters model.add(Embedding(input_dim=vocab_size, output_dim=8, input_length=3)) # Add a 32 unit LSTM layer model.add(LSTM(32)) # Add a hidden Dense layer of 32 units and an output layer of vocab_size with softmax model.add(Dense(32, activation='relu')) model.add(Dense(vocab_size, activation='softmax')) model.summary()
# Use same mean and stdev of embeddings the GloVe has when generating the random init. all_embs = np.stack(embeddings_index.values()) emb_mean, emb_std = all_embs.mean(), all_embs.std() word_index = tokenizer.word_index nb_words = min(max_features, len(word_index)) embedding_matrix = np.random.normal(emb_mean, emb_std, (nb_words, embed_size)) for word, i in word_index.items(): if i >= max_features: continue embedding_vector = embeddings_index.get(word) if embedding_vector is not None: embedding_matrix[i] = embedding_vector # CNN model model = Sequential() model.add( Embedding(max_features, embed_size, weights=[embedding_matrix], trainable=False)) model.add(Conv1D(128, 7, activation='relu', padding='same')) model.add(MaxPooling1D(2)) model.add(Conv1D(128, 7, activation='relu', padding='same')) model.add(GlobalMaxPooling1D()) model.add(Dropout(dropout_rate)) model.add(Dense(32, activation='relu')) model.add(Dense(6, activation='sigmoid')) #multi-label (k-hot encoding) adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
def build(self): ''' 1. Build Code Representation Model ''' logger.debug('Building Code Representation Model') methname = Input(shape=(self.data_params['methname_len'], ), dtype='int32', name='methname') apiseq = Input(shape=(self.data_params['apiseq_len'], ), dtype='int32', name='apiseq') tokens = Input(shape=(self.data_params['tokens_len'], ), dtype='int32', name='tokens') ## method name representation ## #1.embedding init_emb_weights = np.load( self.config['workdir'] + self.model_params['init_embed_weights_methname'] ) if self.model_params[ 'init_embed_weights_methname'] is not None else None init_emb_weights = init_emb_weights if init_emb_weights is None else [ init_emb_weights ] embedding = Embedding( input_dim=self.data_params['n_words'], output_dim=self.model_params.get('n_embed_dims', 100), weights=init_emb_weights, mask_zero= False, #Whether 0 in the input is a special "padding" value that should be masked out. #If set True, all subsequent layers in the model must support masking, otherwise an exception will be raised. name='embedding_methname') methname_embedding = embedding(methname) dropout = Dropout(0.25, name='dropout_methname_embed') methname_dropout = dropout(methname_embedding) #2.rnn f_rnn = LSTM(self.model_params.get('n_lstm_dims', 128), recurrent_dropout=0.2, return_sequences=True, name='lstm_methname_f') b_rnn = LSTM(self.model_params.get('n_lstm_dims', 128), return_sequences=True, recurrent_dropout=0.2, name='lstm_methname_b', go_backwards=True) methname_f_rnn = f_rnn(methname_dropout) methname_b_rnn = b_rnn(methname_dropout) dropout = Dropout(0.25, name='dropout_methname_rnn') methname_f_dropout = dropout(methname_f_rnn) methname_b_dropout = dropout(methname_b_rnn) #3.maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpool_methname') methname_pool = Concatenate(name='concat_methname_lstms')( [maxpool(methname_f_dropout), maxpool(methname_b_dropout)]) activation = Activation('tanh', name='active_methname') methname_repr = activation(methname_pool) ## API Sequence Representation ## #1.embedding embedding = Embedding( input_dim=self.data_params['n_words'], output_dim=self.model_params.get('n_embed_dims', 100), #weights=weights, mask_zero= False, #Whether 0 in the input is a special "padding" value that should be masked out. #If set True, all subsequent layers must support masking, otherwise an exception will be raised. name='embedding_apiseq') apiseq_embedding = embedding(apiseq) dropout = Dropout(0.25, name='dropout_apiseq_embed') apiseq_dropout = dropout(apiseq_embedding) #2.rnn f_rnn = LSTM(self.model_params.get('n_lstm_dims', 100), return_sequences=True, recurrent_dropout=0.2, name='lstm_apiseq_f') b_rnn = LSTM(self.model_params.get('n_lstm_dims', 100), return_sequences=True, recurrent_dropout=0.2, name='lstm_apiseq_b', go_backwards=True) apiseq_f_rnn = f_rnn(apiseq_dropout) apiseq_b_rnn = b_rnn(apiseq_dropout) dropout = Dropout(0.25, name='dropout_apiseq_rnn') apiseq_f_dropout = dropout(apiseq_f_rnn) apiseq_b_dropout = dropout(apiseq_b_rnn) #3.maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpool_apiseq') apiseq_pool = Concatenate(name='concat_apiseq_lstms')( [maxpool(apiseq_f_dropout), maxpool(apiseq_b_dropout)]) activation = Activation('tanh', name='active_apiseq') apiseq_repr = activation(apiseq_pool) ## Tokens Representation ## #1.embedding init_emb_weights = np.load( self.config['workdir'] + self.model_params['init_embed_weights_tokens'] ) if self.model_params[ 'init_embed_weights_tokens'] is not None else None init_emb_weights = init_emb_weights if init_emb_weights is None else [ init_emb_weights ] embedding = Embedding( input_dim=self.data_params['n_words'], output_dim=self.model_params.get('n_embed_dims', 100), weights=init_emb_weights, #mask_zero=True,#Whether 0 in the input is a special "padding" value that should be masked out. #If set True, all subsequent layers must support masking, otherwise an exception will be raised. name='embedding_tokens') tokens_embedding = embedding(tokens) dropout = Dropout(0.25, name='dropout_tokens_embed') tokens_dropout = dropout(tokens_embedding) #4.maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpool_tokens') tokens_pool = maxpool(tokens_dropout) activation = Activation('tanh', name='active_tokens') tokens_repr = activation(tokens_pool) ## concatenate the representation of code ## merged_methname_api = Concatenate(name='merge_methname_api')( [methname_repr, apiseq_repr]) merged_code_repr = Concatenate(name='merge_coderepr')( [merged_methname_api, tokens_repr]) code_repr = Dense(self.model_params.get('n_hidden', 400), activation='tanh', name='dense_coderepr')(merged_code_repr) self._code_repr_model = Model(inputs=[methname, apiseq, tokens], outputs=[code_repr], name='code_repr_model') print('\nsummary of code representation model') self._code_repr_model.summary() fname = self.config['workdir'] + 'models/' + self.model_params[ 'model_name'] + '/_code_repr_model.png' #plot_model(self._code_repr_model, show_shapes=True, to_file=fname) ''' 2. Build Desc Representation Model ''' ## Desc Representation ## logger.debug('Building Desc Representation Model') desc = Input(shape=(self.data_params['desc_len'], ), dtype='int32', name='desc') #1.embedding init_emb_weights = np.load( self.config['workdir'] + self.model_params['init_embed_weights_desc'] ) if self.model_params['init_embed_weights_desc'] is not None else None init_emb_weights = init_emb_weights if init_emb_weights is None else [ init_emb_weights ] embedding = Embedding( input_dim=self.data_params['n_words'], output_dim=self.model_params.get('n_embed_dims', 100), weights=init_emb_weights, mask_zero= True, #Whether 0 in the input is a special "padding" value that should be masked out. #If set True, all subsequent layers must support masking, otherwise an exception will be raised. name='embedding_desc') desc_embedding = embedding(desc) dropout = Dropout(0.25, name='dropout_desc_embed') desc_dropout = dropout(desc_embedding) #2. rnn f_rnn = LSTM(self.model_params.get('n_lstm_dims', 100), return_sequences=True, recurrent_dropout=0.2, name='lstm_desc_f') b_rnn = LSTM(self.model_params.get('n_lstm_dims', 100), return_sequences=True, recurrent_dropout=0.2, name='lstm_desc_b', go_backwards=True) desc_f_rnn = f_rnn(desc_dropout) desc_b_rnn = b_rnn(desc_dropout) dropout = Dropout(0.25, name='dropout_desc_rnn') desc_f_dropout = dropout(desc_f_rnn) desc_b_dropout = dropout(desc_b_rnn) #3. maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpool_desc') desc_pool = Concatenate(name='concat_desc_rnns')( [maxpool(desc_f_dropout), maxpool(desc_b_dropout)]) activation = Activation('tanh', name='active_desc') desc_repr = activation(desc_pool) self._desc_repr_model = Model(inputs=[desc], outputs=[desc_repr], name='desc_repr_model') print('\nsummary of desc representation model') self._desc_repr_model.summary() fname = self.config['workdir'] + 'models/' + self.model_params[ 'model_name'] + '/_desc_repr_model.png' #plot_model(self._desc_repr_model, show_shapes=True, to_file=fname) """ 3: calculate the cosine similarity between code and desc """ logger.debug('Building similarity model') code_repr = self._code_repr_model([methname, apiseq, tokens]) desc_repr = self._desc_repr_model([desc]) cos_sim = Dot(axes=1, normalize=True, name='cos_sim')([code_repr, desc_repr]) sim_model = Model(inputs=[methname, apiseq, tokens, desc], outputs=[cos_sim], name='sim_model') self._sim_model = sim_model #for model evaluation print("\nsummary of similarity model") self._sim_model.summary() fname = self.config['workdir'] + 'models/' + self.model_params[ 'model_name'] + '/_sim_model.png' #plot_model(self._sim_model, show_shapes=True, to_file=fname) ''' 4:Build training model ''' good_sim = sim_model( [self.methname, self.apiseq, self.tokens, self.desc_good]) # similarity of good output bad_sim = sim_model( [self.methname, self.apiseq, self.tokens, self.desc_bad]) #similarity of bad output loss = Lambda(lambda x: K.maximum( 1e-6, self.model_params['margin'] - x[0] + x[1]), output_shape=lambda x: x[0], name='loss')([good_sim, bad_sim]) logger.debug('Building training model') self._training_model = Model(inputs=[ self.methname, self.apiseq, self.tokens, self.desc_good, self.desc_bad ], outputs=[loss], name='training_model') print('\nsummary of training model') self._training_model.summary() fname = self.config['workdir'] + 'models/' + self.model_params[ 'model_name'] + '/_training_model.png'
##==================== #keras构建层的辅助函数 #定义切片操作 def slice(x, index): return x[:, :, index] ##=========================== #build base #使用max pooling而不是k-max pooling,实验的结果证明,max pooling的效果略好于k-max pooling print('Build model...') main_input = Input(shape=(maxlen, ), dtype='int32') embedding_map = Embedding(output_dim=embedding_dims, input_dim=max_features, input_length=maxlen, W_regularizer=l2(reg_conf[0]))(main_input) ## convs = [] for index in range(embedding_dims): #print ("i:",index) t = Lambda(slice, output_shape=(maxlen, 1), arguments={'index': index}, name='slice_' + str(index + 1))(embedding_map) x = Reshape((maxlen, 1, 1))(t) #(batch, height, width, channels) #第一层conv and pooling x = Convolution2D(m1, w1,
print(len(input_test), 'test sequences') print(input_train[0]) # Reverse sequences # input_train = [x[::-1] for x in input_train] # input_test = [x[::-1] for x in input_test] print('Pad sequences (samples x time)') input_train = sequence.pad_sequences(input_train, maxlen=maxlen) input_test = sequence.pad_sequences(input_test, maxlen=maxlen) print(input_train[0]) print('input_train shape:', input_train.shape) print('input_test shape:', input_test.shape) model = Sequential() model.add(Embedding(max_features, 32)) model.add(Bidirectional(LSTM(32))) # model.add(LSTM(32)) # model.add(SimpleRNN(32)) model.add(Dense(1, activation='sigmoid')) model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc']) history = model.fit(input_train, y_train, epochs=10, batch_size=128, validation_split=0.2) print(history.history) acc = history.history['acc']
def build_rnn_search_model(source_vacabuary_size, source_embedding_dim, source_initia_embedding, encoder_rnn_output_dim_list, attention_context_dim, decoder_rnn_output_dim, decoder_rnn_output_dropout_rate, target_vacabuary_size, target_embedding_dim, target_initia_embedding, decoder_hidden_unit_numbers, decoder_hidden_unit_activation_functions, optimizer='rmsprop', beam_search_max_output_length, beam_size, weight_regularizer=None, devices=None): # TODO: apply constraints source_word = Input((None, ), dtype='int32') source_word_mask = Input((None, ), dtype='int32') # source_word = trim_right_padding(source_word) source_embedding = Embedding(source_vacabuary_size, source_embedding_dim, weights=[source_initia_embedding], W_regularizer=weight_regularizer) encoder_output = source_embedding(source_word) # multiple bi-directional rnn layers for encoder_rnn_output_dim in encoder_rnn_output_dim_list: recurrent_left_to_right = GRU(encoder_rnn_output_dim, return_sequences=True, W_regularizer=weight_regularizer, U_regularizer=weight_regularizer, b_regularizer=weight_regularizer) recurrent_right_to_left = GRU(encoder_rnn_output_dim, return_sequences=True, go_backwards=True, W_regularizer=weight_regularizer, U_regularizer=weight_regularizer, b_regularizer=weight_regularizer) h1 = recurrent_left_to_right(encoder_output, source_word_mask) h2 = recurrent_right_to_left(encoder_output, source_word_mask) encoder_output = BiDirectionalLayer()([h1, h2]) # the output of the last bi-directional RNN layer is the source context source_context = encoder_output # attention attention = AttentionLayer(attention_context_dim=attention_context_dim, W_a_regularizer=weight_regularizer, U_a_regularizer=weight_regularizer, v_a_regularizer=weight_regularizer) # decoder decoder_input_sequence = Input((None, ), dtype='int32') # starting with bos decoder_input_sequence_mask = Input((None, ), dtype='int32') decoder_rnn_cell = GRU(decoder_rnn_output_dim, return_sequences=True, W_regularizer=weight_regularizer, U_regularizer=weight_regularizer, b_regularizer=weight_regularizer) target_embedding = Embedding(target_vacabuary_size, target_embedding_dim, weights=[target_initia_embedding], W_regularizer=weight_regularizer) rnn_decoder = RNNDecoderLayer(decoder_rnn_cell, attention, target_embedding) rnn_decoder_output = rnn_decoder( [decoder_input_sequence, source_context], [decoder_input_sequence_mask, source_word_mask]) rnn_decoder_output_dropout = Dropout(decoder_rnn_output_dropout_rate) rnn_decoder_output = rnn_decoder_output_dropout(rnn_decoder_output) mlp_classifier_hidden_layers = [] for decoder_hidden_unit_number, decoder_hidden_unit_activation_function in zip( decoder_hidden_unit_numbers, decoder_hidden_unit_activation_functions): layer = Dense(decoder_hidden_unit_number, activation=decoder_hidden_unit_activation_function, W_regularizer=weight_regularizer, b_regularizer=weight_regularizer) mlp_classifier_hidden_layers.append(layer) mlp_classifier_output_layer = Dense(output_dim=target_vacabuary_size, activation='softmax', W_regularizer=weight_regularizer, b_regularizer=weight_regularizer) mlp_classifier = MLPClassifierLayer(mlp_classifier_hidden_layers, mlp_classifier_output_layer) time_distributed_mlp_classifier = TimeDistributed(mlp_classifier) time_distributed_mlp_classifier_output = time_distributed_mlp_classifier( rnn_decoder_output, mask=decoder_input_sequence_mask) # output and its mask will will be used to generate proper loss function by the optimizer rnn_search_model = Model(input=[ source_word, source_word_mask, decoder_input_sequence, decoder_input_sequence_mask ], output=time_distributed_mlp_classifier_output) # training with multiple devices if devices: rnn_search_model = convert_to_model_with_parallel_training( rnn_search_model, devices) # TODO: try other loss, such as importance sampling based loss, e.g., sampled_softmax_loss (this will need to extend Keras model, which assumes that the loss function does not hold any trainable parameters rnn_search_model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) beam_search_initial_input = Input(get_shape=(1, )) rnn_decoder_with_beam_search = RNNDecoderLayerWithBeamSearch( beam_search_max_output_length, beam_size, decoder_rnn_cell, attention, target_embedding, mlp_classifier) beam_search_output_lattice = rnn_decoder_with_beam_search( [beam_search_initial_input, source_context]) rnn_search_runtime_model = Model( input=[source_word, source_word_mask, beam_search_initial_input], output=beam_search_output_lattice) return (rnn_search_model, rnn_search_runtime_model)
# convert text to int sequence x_train1 = tokenizer.texts_to_sequences(x_train1) x_train2 = tokenizer.texts_to_sequences(x_train2) # max_sequence_len = 50 , as it gives a good measure max_sequence_len = 50 # pad the sequences x_train1 = pad_sequences(x_train1, maxlen=max_sequence_len, padding='pre') x_train2 = pad_sequences(x_train2, maxlen=max_sequence_len, padding='pre') # model - siamese lstm inp1 = Input(shape=(max_sequence_len, ), name='sentence_1') inp2 = Input(shape=(max_sequence_len, ), name='sentence_2') emb = Embedding(output_dim=40, input_dim=vocab_len, input_length=max_sequence_len) encoder = LSTM(80) e1 = encoder(emb(inp1)) e2 = encoder(emb(inp2)) x = concatenate([e1, e2]) x = Dense(20, activation='relu')(x) out = Dense(1, activation='sigmoid')(x) model = Model(inputs=[inp1, inp2], outputs=out) model.summary() # compile the model model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
from keras.models import Sequential from keras.layers import Dense, Dropout, Activation from keras.layers import Embedding from keras.layers import LSTM from keras.layers import Conv1D, MaxPooling1D from keras import optimizers max_features = 26 embedding_size = 256 kernel_size = 5 filters = 250 pool_size = 2 lstm_output_size = 64 #print('Building model...') model = Sequential() model.add(Embedding(max_features, embedding_size)) model.add(Dropout(0.2)) model.add(Conv1D(filters, kernel_size,padding ='valid',activation = 'relu',strides = 1)) model.add(MaxPooling1D(pool_size = pool_size)) model.add(LSTM(lstm_output_size)) model.add(Dense(1)) model.add(Activation('sigmoid')) model.compile(loss = 'binary_crossentropy',optimizer = optimizers.Adam(),metrics = ['acc'])
def basic_cnn(nb_words, EMBEDDING_DIM, \ embedding_matrix, MAX_SEQUENCE_LENGTH, \ num_rnn, num_dense, rate_drop_rnn, \ rate_drop_dense, act): ''' This is the basic cnn model model: input layer; embedding layer; several cnn layer; dense layer; output layer ''' embedding_layer = Embedding(nb_words, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=True) conv1 = Conv1D(filters=128, kernel_size=1, padding='same', activation='relu') conv2 = Conv1D(filters=128, kernel_size=2, padding='same', activation='relu') conv3 = Conv1D(filters=128, kernel_size=3, padding='same', activation='relu') conv4 = Conv1D(filters=128, kernel_size=4, padding='same', activation='relu') conv5 = Conv1D(filters=32, kernel_size=5, padding='same', activation='relu') conv6 = Conv1D(filters=32, kernel_size=6, padding='same', activation='relu') sequence_1_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') embedded_sequences_1 = embedding_layer(sequence_1_input) sequence_2_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') embedded_sequences_2 = embedding_layer(sequence_2_input) conv1a = conv1(embedded_sequences_1) glob1a = GlobalAveragePooling1D()(conv1a) conv1b = conv1(embedded_sequences_2) glob1b = GlobalAveragePooling1D()(conv1b) conv2a = conv2(embedded_sequences_1) glob2a = GlobalAveragePooling1D()(conv2a) conv2b = conv2(embedded_sequences_2) glob2b = GlobalAveragePooling1D()(conv2b) conv3a = conv3(embedded_sequences_1) glob3a = GlobalAveragePooling1D()(conv3a) conv3b = conv3(embedded_sequences_2) glob3b = GlobalAveragePooling1D()(conv3b) conv4a = conv4(embedded_sequences_1) glob4a = GlobalAveragePooling1D()(conv4a) conv4b = conv4(embedded_sequences_2) glob4b = GlobalAveragePooling1D()(conv4b) conv5a = conv5(embedded_sequences_1) glob5a = GlobalAveragePooling1D()(conv5a) conv5b = conv5(embedded_sequences_2) glob5b = GlobalAveragePooling1D()(conv5b) conv6a = conv6(embedded_sequences_1) glob6a = GlobalAveragePooling1D()(conv6a) conv6b = conv6(embedded_sequences_2) glob6b = GlobalAveragePooling1D()(conv6b) mergea = concatenate([glob1a, glob2a, glob3a, glob4a, glob5a, glob6a]) mergeb = concatenate([glob1b, glob2b, glob3b, glob4b, glob5b, glob6b]) # We take the explicit absolute difference between the two sentences # Furthermore we take the multiply different entries to get a different measure of equalness diff = Lambda(lambda x: K.abs(x[0] - x[1]), output_shape=(4 * 128 + 2 * 32, ))([mergea, mergeb]) mul = Lambda(lambda x: x[0] * x[1], output_shape=(4 * 128 + 2 * 32, ))([mergea, mergeb]) merge = concatenate([diff, mul]) # The MLP that determines the outcome x = Dropout(0.2)(merge) x = BatchNormalization()(x) x = Dense(300, activation='relu')(x) x = Dropout(0.2)(x) x = BatchNormalization()(x) preds = Dense(3, activation='softmax')(x) ######################################## ## train the model ######################################## model = Model(inputs=[sequence_1_input, sequence_2_input], outputs=preds) model.compile(loss='categorical_crossentropy', optimizer='nadam', metrics=['acc']) model.summary() # print(STAMP) return model
test_size=0.05, random_state=1) # print(x_train.shape, x_test.shape, y_train.shape, y_test.shape) # w2v_weight.shape, embedding_size, max_sentence_length # Metric from modules import recall, precision, f1score # Building Network # CNN model = Sequential() model.add( Embedding(vocab_size, embedding_size, input_length=max_sentence_length, weights=[w2v_weight])) model.add(Conv1D(filters=256, kernel_size=5, activation='relu')) model.add(Dropout(0.2)) model.add(Conv1D(filters=128, kernel_size=5, activation='relu')) model.add(Dropout(0.2)) model.add(Flatten()) model.add(Dense(98)) model.add(BatchNormalization()) model.add(ReLU()) # model.add(GlobalMaxPooling1D()) model.add(Dense(98, activation="softmax")) model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["acc", precision, recall, f1score]) model.summary()
def basic_attention(nb_words, EMBEDDING_DIM, \ embedding_matrix, MAX_SEQUENCE_LENGTH, \ num_rnn, num_dense, rate_drop_rnn, \ rate_drop_dense, act): ''' This is the basic attention model model: input layer; embedding layer; rnn layer; attention layer; dense layer; output layer ''' embedding_layer = Embedding(nb_words, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=True) rnn_layer = Bidirectional( GRU(num_rnn, dropout=rate_drop_rnn, recurrent_dropout=rate_drop_rnn, return_sequences=True)) attention_W = TimeDistributed(Dense(350, activation='tanh')) attention_w = TimeDistributed(Dense(1)) attention_softmax = Activation('softmax') attention_sum = Lambda(lambda x: K.sum(x, axis=1)) sequence_1_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') embedded_sequences_1 = embedding_layer(sequence_1_input) x1 = rnn_layer(embedded_sequences_1) attention1 = attention_W(x1) attention1 = attention_w(attention1) attention1 = attention_softmax(attention1) attention1 = Permute([2, 1])(attention1) x1 = Permute([2, 1])(x1) x1 = multiply([attention1, x1]) x1 = Permute([2, 1])(x1) x1 = attention_sum(x1) sequence_2_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') embedded_sequences_2 = embedding_layer(sequence_2_input) x2 = rnn_layer(embedded_sequences_2) attention2 = attention_W(x2) attention2 = attention_w(attention2) attention2 = attention_softmax(attention2) attention2 = Permute([2, 1])(attention2) x2 = Permute([2, 1])(x2) x2 = multiply([attention2, x2]) x2 = Permute([2, 1])(x2) x2 = attention_sum(x2) merged = multiply([x1, x2]) merged = Dropout(rate_drop_dense)(merged) merged = BatchNormalization()(merged) merged = Dense(num_dense, activation=act)(merged) merged = Dropout(rate_drop_dense)(merged) merged = BatchNormalization()(merged) preds = Dense(3, activation='softmax')(merged) ######################################## ## train the model ######################################## model = Model(inputs=[sequence_1_input, sequence_2_input], outputs=preds) model.compile(loss='categorical_crossentropy', optimizer='nadam', metrics=['acc']) model.summary() # print(STAMP) return model
X = numpy.array(id_data) Y = numpy.array(labels) histories = [] accu = [] for trainidx, testidx in kf.split(X): print(trainidx) print(testidx) train_data, train_labels, test_data, test_labels = X[trainidx], Y[ trainidx], X[testidx], Y[testidx] train_data = sequence.pad_sequences(train_data, maxlen=param["max_len"]) test_data = sequence.pad_sequences(test_data, maxlen=param["max_len"]) embedding_layer = Embedding(output_dim=vocab_dim, input_dim=n_symbols, trainable=False) embedding_layer.build( (None, )) # if you don't do this, the next step won't work embedding_layer.set_weights([embedding_weights]) param = { "max_len": 64, "batch_size": 32, #16? "embed_dims": 128, "filters": 16, "filter_size": 4, "hidden_dims": 64, "epochs": 10 }