def rnn(embedding_matrix, config): if config['rnn'] == 'gru' and config['gpu']: encode = Bidirectional( CuDNNGRU(config['rnn_output_size'], return_sequences=True)) encode2 = Bidirectional( CuDNNGRU(config['rnn_output_size'], return_sequences=True)) encode3 = Bidirectional( CuDNNGRU(config['rnn_output_size'], return_sequences=True)) else: encode = Bidirectional( CuDNNLSTM(config['rnn_output_size'], return_sequences=True)) encode2 = Bidirectional( CuDNNLSTM(config['rnn_output_size'] * 2, return_sequences=True)) encode3 = Bidirectional( CuDNNGRU(config['rnn_output_size'] * 4, return_sequences=True)) q1 = Input(shape=(config['max_length'], ), dtype='int32', name='q1_input') q2 = Input((config['max_length'], ), dtype='int32', name='q2_input') embedding_layer = Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1], trainable=config['embed_trainable'], weights=[embedding_matrix] # mask_zero=True ) q1_embed = embedding_layer(q1) q2_embed = embedding_layer(q2) # bsz, 1, emb_dims q1_embed = BatchNormalization(axis=2)(q1_embed) q2_embed = BatchNormalization(axis=2)(q2_embed) q1_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q1_embed) q2_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q2_embed) q1_encoded = encode(q1_embed) q2_encoded = encode(q2_embed) q1_encoded = Dropout(0.2)(q1_encoded) q2_encoded = Dropout(0.2)(q2_encoded) # 双向 # q1_encoded = encode2(q1_encoded) # q2_encoded = encode2(q2_encoded) # resnet rnn_layer2_input1 = concatenate([q1_embed, q1_encoded]) rnn_layer2_input2 = concatenate([q2_embed, q2_encoded]) q1_encoded2 = encode2(rnn_layer2_input1) q2_encoded2 = encode2(rnn_layer2_input2) # add res shortcut res_block1 = add([q1_encoded, q1_encoded2]) res_block2 = add([q2_encoded, q2_encoded2]) rnn_layer3_input1 = concatenate([q1_embed, res_block1]) rnn_layer3_input2 = concatenate([q2_embed, res_block2]) # rnn_layer3_input1 = concatenate([q1_embed,q1_encoded,q1_encoded2]) # rnn_layer3_input2 = concatenate([q2_embed,q2_encoded,q2_encoded2]) q1_encoded3 = encode3(rnn_layer3_input1) q2_encoded3 = encode3(rnn_layer3_input2) # merged1 = GlobalMaxPool1D()(q1_encoded3) # merged2 = GlobalMaxPool1D()(q2_encoded3) # q1_encoded = concatenate([q1_encoded, q1_encoded2], axis=-1) # q2_encoded = concatenate([q2_encoded, q2_encoded2], axis=-1) # merged1 = concatenate([q1_encoded2, q1_embed], axis=-1) # merged2 = concatenate([q2_encoded2, q2_embed], axis=-1) # # TODO add attention rep , maxpooling rep q1_encoded3 = concatenate([q1_encoded, q1_encoded2, q1_encoded3]) q2_encoded3 = concatenate([q2_encoded, q2_encoded2, q2_encoded3]) merged1 = GlobalMaxPool1D()(q1_encoded3) merged2 = GlobalMaxPool1D()(q2_encoded3) # avg1 = GlobalAvgPool1D()(q1_encoded3) # avg2 = GlobalAvgPool1D()(q2_encoded3) # merged1 = concatenate([max1,avg1]) # merged2 = concatenate([max2,avg2]) sub_rep = Lambda(lambda x: K.abs(x[0] - x[1]))([merged1, merged2]) mul_rep = Lambda(lambda x: x[0] * x[1])([merged1, merged2]) # jaccard_rep = Lambda(lambda x: x[0]*x[1]/(K.sum(x[0]**2,axis=1,keepdims=True)+K.sum(x[1]**2,axis=1,keepdims=True)- # K.sum(K.abs(x[0]*x[1]),axis=1,keepdims=True)))([merged1,merged2]) # merged = Concatenate()([merged1, merged2, mul_rep, sub_rep,jaccard_rep]) feature_input = Input(shape=(config['feature_length'], )) feature_dense = BatchNormalization()(feature_input) feature_dense = Dense(config['dense_dim'], activation='relu')(feature_dense) merged = Concatenate()([merged1, merged2, mul_rep, sub_rep, feature_dense]) # Classifier dense = Dropout(config['dense_dropout'])(merged) dense = BatchNormalization()(dense) dense = Dense(config['dense_dim'], activation='relu')(dense) dense = Dropout(config['dense_dropout'])(dense) dense = BatchNormalization()(dense) predictions = Dense(1, activation='sigmoid')(dense) model = Model(inputs=[q1, q2, feature_input], outputs=predictions) opt = optimizers.get(config['optimizer']) K.set_value(opt.lr, config['learning_rate']) model.compile(optimizer=opt, loss='binary_crossentropy', metrics=[f1]) return model
# if printed out, it would look like the following # Tensor("input_1:0", shape=(?, 200), dtype=float32) embed_size = 128 x = Embedding(max_features, embed_size)(inp) # this will have a shape of (None, 200, 128) x = LSTM(60, return_sequences=True, name='lstm_layer')(x) # this will have a shape of (None, 200, 60) # first number in (None, 200, 60) is the batch size # followed by the time step and the output size # according to https://www.kaggle.com/sbongo/for-beginners-tackling-toxic-using-keras # this is the unrolled version of the LSTM because there are 60 hidden layers # I suppose the rolled version would just have a 1 instead of 60 x = GlobalMaxPool1D()(x) x = Dropout(0.1)(x) x = Dense(50, activation="relu")(x) x = Dropout(0.1)(x) x = Dense(6, activation="sigmoid")(x) model = Model(inputs=inp, outputs=x) model.compile(loss="binary_crossentropy", optimizer='adam', metrics=['accuracy'])
trainable=False) # 2) MODEL LAYERS............................................ # Creating a 1-D ConvNet with Global Max POoling # Since the input is size N X T ,,,so we pass T # which is MAX_SEQUENCE_LENGTH input_ = Input(shape=(MAX_SEQUENCE_LENGTH, )) layer = embedding_layer(input_) layer = Conv1D(128, 3, activation='relu')(layer) layer = Dropout(0.3)(layer) layer = MaxPool1D(3)(layer) layer = Conv1D(128, 3, activation='relu')(layer) layer = Dropout(0.4)(layer) layer = MaxPool1D(3)(layer) layer = Conv1D(128, 3, activation='relu')(layer) layer = GlobalMaxPool1D()(layer) layer = Dense(128, activation='relu')(layer) output = Dense(len(possible_labels), activation='sigmoid')(layer) # ----------------- MODEL COMPILE -------------------------.. model = Model(input_, output) model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) # ----------------- MODEL TRAINING -------------------------- train = model.fit(data, targets, batch_size=BATCH_SIZE, epochs=EPOCH, validation_split=VALIDATION_SPLIT)
def create_model(self): if self.config.model_type == 'LSTM': # model = Sequential() # model.add(Embedding(len(self.word_dict), self.config.max_words ,input_length = self.X.shape[1])) # model.add(LSTM(self.config.dim, return_sequences=True , recurrent_dropout=self.config.dropout)) # model.add(Dropout(self.config.dropout)) # model.add(LSTM(self.config.dim, return_sequences=True , recurrent_dropout=self.config.dropout )) # model.add(Dropout(self.config.dropout)) # model.add(LSTM(self.config.dim , recurrent_dropout=self.config.dropout)) # model.add(Dense(self.config.dim,activation='relu')) # model.add(Dense(3,activation='softmax')) # model = Sequential() # model.add(Embedding(len(self.word_dict), self.config.max_words ,input_length = self.X.shape[1])) # model.add(LSTM(self.config.dim, dropout=self.config.dropout , recurrent_dropout=self.config.dropout)) # model.add(Dropout(self.config.dropout)) # model.add(Dense(3,activation='softmax')) inp = Input(shape=(self.config.max_len,)) x = Embedding(self.config.max_words, self.config.max_len ,input_length = self.X.shape[1])(inp) x = Bidirectional(LSTM(self.config.dim, return_sequences=True, dropout=self.config.dropout, recurrent_dropout=self.config.dropout , kernel_regularizer=l2(0.01), recurrent_regularizer=l2(0.01), bias_regularizer=l2(0.01)))(x) x = GlobalMaxPool1D()(x) x = Dense(self.config.dim, activation="sigmoid")(x) x = Dropout(self.config.dropout)(x) x = Dense(3, activation='softmax')(x) model = Model(inputs=inp, outputs=x) self.model = model if self.config.model_type == 'GRU': model = Sequential() model.add(Embedding(len(self.word_dict), self.config.max_words ,input_length = self.X.shape[1])) model.add(GRU(self.config.dim, return_sequences=True , recurrent_dropout=self.config.dropout)) model.add(Dropout(self.config.dropout)) model.add(GRU(self.config.dim, return_sequences=True , recurrent_dropout=self.config.dropout )) model.add(Dropout(self.config.dropout)) model.add(GRU(self.config.dim , recurrent_dropout=self.config.dropout)) model.add(Dense(self.config.dim,activation='relu')) model.add(Dense(3,activation='softmax')) self.model = model if self.config.model_type == 'MLP': model = Sequential() model.add(Dense(len(self.word_dict), input_shape=(self.X.shape[1],) , activation="relu")) model.add(Dropout(self.config.dropout)) model.add(Dense(self.config.dim,activation='relu')) model.add(Dropout(self.config.dropout)) model.add(Dense(self.config.dim,activation="relu")) model.add(Dropout(self.config.dropout)) model.add(Dense(self.config.dim , activation="relu")) model.add(Dense(3,activation='softmax')) self.model = model if self.config.debug: print(self.model.summary())
max_features, max_len = 2000, 500 (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) x_train = pad_sequences(x_train, maxlen=max_len) x_test = pad_sequences(x_test, maxlen=max_len) model = Sequential() model.add( Embedding(max_features, 128, input_length=max_len, name='embedding_layer')) model.add(Conv1D(32, 7, activation='relu')) model.add(MaxPooling1D(5)) model.add(Conv1D(32, 7, activation='relu')) model.add(GlobalMaxPool1D()) model.add(Dense(1)) model.summary() model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy']) model.fit(x_train, y_train, epochs=20, batch_size=128, validation_split=0.2, callbacks=[
model = Sequential() model.add( Embedding(max_features, embed_size, weights=[embedding_matrix], trainable=True, name='Word-Embedding-Layer')) model.add(Dropout(0.4, name='Dropout-Regularization-1')) # Best = 0.3 model.add( Bidirectional(LSTM(12, return_sequences=True, dropout=0.35, recurrent_dropout=0.35, kernel_initializer=glorot_normal(seed=None)), name='BDLSTM')) #Best = 300,0.25,0.25 model.add(GlobalMaxPool1D(name='Global-Max-Pool-1d')) model.add( Dense(y_binary.shape[1], activation="softmax", name='FC-Output-Layer')) model.compile(loss='categorical_crossentropy', optimizer='nadam', metrics=['mse', 'acc']) history = model.fit(xtrain, ytrain, validation_split=0.2, validation_data=(xtest, ytest), batch_size=5000, epochs=1, callbacks=[early_stop], verbose=1) #history = model.fit(X_train, Y_train, epochs=42, batch_size=50, verbose=1) print(history.history.keys())
from tqdm.notebook import tqdm from tensorflow import keras import pandas as pd import numpy as np import tensorflow as tf import os import re #%% train_data=pd.read_csv("c:/temp/train_data.csv",index_col=('Unnamed: 0')) embedding_matrix=pd.read_csv('c:/temp/embedding_matrix.csv',index_col=('Unnamed: 0')) target=pd.read_csv('c:/temp/target.csv',index_col=('Unnamed: 0')) input_layer=Input(shape=(50,)) embedding_layer= Embedding(40000,300,weights=[embedding_matrix])(input_layer) LSTM_layer = Bidirectional(LSTM(128, return_sequences = True))(embedding_layer) maxpool_layer = GlobalMaxPool1D()(LSTM_layer) dense_layer_1 = Dense(64, activation="relu")(maxpool_layer) dropout_1 = Dropout(0.5)(dense_layer_1) dense_layer_2 = Dense(32, activation="relu")(dropout_1) dropout_2 = Dropout(0.5)(dense_layer_2) output_layer = Dense(1, activation="sigmoid")(dropout_2) model = Model(input_layer,output_layer) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() #%% # Include the epoch in the file name (uses `str.format`)
# In[ ]: print('Build model...') comment_input = Input((maxlen,)) # we start off with an efficient embedding layer which maps # our vocab indices into embedding_dims dimensions comment_emb = Embedding(max_features, embedding_dims, input_length=maxlen)(comment_input) # we add a GlobalMaxPool1D, which will extract information from the embeddings # of all words in the document comment_emb = SpatialDropout1D(0.25)(comment_emb) max_emb = GlobalMaxPool1D()(comment_emb) # normalized dense layer followed by dropout main = BatchNormalization()(max_emb) main = Dense(64)(main) main = Dropout(0.5)(main) # We project onto a six-unit output layer, and squash it with sigmoids: output = Dense(6, activation='sigmoid')(main) model = Model(inputs=comment_input, outputs=output) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
#OPCION1 = CONV1, LSTM, BI_LSTM # Bidirectional if (nn == 'bi'): main_embeddings_2 = Bidirectional( LSTM(embedding_dim, dropout=0.2, recurrent_dropout=0.2))(main_embeddings_1) elif (nn == 'lstm'): main_embeddings_2 = LSTM(embedding_dim, dropout=0.2, recurrent_dropout=0.2)(main_embeddings_1) else: output_1 = Conv1D(128, 5, activation='relu')(main_embeddings_1) output_2 = GlobalMaxPool1D()(output_1) main_embeddings_2 = Dense(10, activation='relu')(output_2) #OPCION2 = SIGMOID, SOFTMAX main_embeddings_3 = Dense(10, activation='softmax')(main_embeddings_2) merged = main_embeddings_3 # Final predictions hidden1 = Dense(10, activation='relu')(merged) hidden2 = Dense(10, activation='relu')(hidden1) predictions = Dense(1, activation='sigmoid')(hidden2) #Create model model = Model(inputs=main_embeddings_input, outputs=predictions)
def test_deepin_fm(self): try: import nltk nltk.download('movie_reviews') from nltk.corpus import movie_reviews except ImportError: self.skipTest( "NLTK is not not installed. Reinstall with option 'tests'") # download some text data, process it, and create some feature extraction layer to plug in print "processing text..." samplesize = 2000 reviews = [] labels = [] for rf in movie_reviews.fileids(): review = movie_reviews.open(rf).read() reviews.append(review) labels.append(rf.find('pos/') != -1) textdata = pd.DataFrame({ 'text': reviews, 'pos': labels, 'offset_': np.ones(len(reviews)) }) # pre-process text (do same thing Ralph does leave only consecutive alphabetical characters textdata['cleantext'] = textdata['text'].map( lambda x: (" ".join(re.findall('[A-Za-z]+', x))).encode('utf8')) tokens = [i.lower().split(" ") for i in textdata['cleantext']] textdata['len'] = [len(t) for t in tokens] textdata.len.describe() textdata['cat1'] = np.random.randint(0, 9, size=samplesize) textdata['cat2'] = np.random.randint(0, 2, size=samplesize) textdata['real1'] = np.random.uniform(0, 1, size=samplesize) textdata['latenty'] = (textdata.cat1 - 2 * math.pi * textdata.cat2 + textdata.real1 - math.exp(1) * textdata.pos.astype('float') + textdata.real1 * textdata.pos.astype('float') + np.random.normal(size=samplesize)) # convert to binary indicator textdata['y'] = (textdata['latenty'] > 0).astype('int') # sequence length cutoff is going to be 75th percentile cutoff = int(textdata.len.describe()['75%']) tokens = [r[0:min(len(r), cutoff)] for r in tokens] # build vocab vocab = set() counter = 0 for r in tokens: for w in r: if w not in vocab: vocab.add(w) vocabsize = len(vocab) vocab_indices = {} index = 1 for v in vocab: vocab_indices[v] = index index += 1 tokens_indexed = [] for r in tokens: tokens_indexed.append([vocab_indices[w] for w in r]) sequence_mat = sequence.pad_sequences(tokens_indexed, maxlen=cutoff, value=0, padding='post', truncating='post') # build the feature extraction layer # do a CNN mimicing ralph's architecture (but of significantly lower dimensionality) embed_dim = 10 word_seq = Input(batch_shape=(None, sequence_mat.shape[1]), name='wordind_seq') word_embeddings = Embedding(input_dim=vocabsize + 1, output_dim=1, input_length=cutoff, mask_zero=False)(word_seq) word_conv = Convolution1D(filters=10, kernel_size=3, activation='relu', use_bias=True)(word_embeddings) pooler = GlobalMaxPool1D()(word_conv) word_dense_layer = Dense(units=10, activation='relu')(pooler) word_final_layer = Dense(units=embed_dim, name='textfeats')(word_dense_layer) # collect relevant valuesfor deepFM model features = [['cat1'], ['cat2'], ['real1'], ['offset_'], ['textseq']] feature_dim = [ len(textdata['cat1'].unique()), len(textdata['cat2'].unique()), 1, 1, embed_dim ] deep_inputs = [word_seq] deep_feature = [word_final_layer] deepin = [False, False, False, False, True] bias_only = [False, False, False, True, False] realvalued = [ False, False, True, False, None ] # doesn't matter what we assign to the deep feature, so just say None inputs = [ textdata['cat1'], textdata['cat2'], textdata['real1'], pd.Categorical(textdata['offset_']).codes, sequence_mat ] # build deep-in FM difm_obj = DeepFM(features, feature_dim, realval=realvalued, deepin_feature=deepin, deepin_inputs=deep_inputs, deepin_layers=deep_feature) tf.set_random_seed(1) np.random.seed(1) difm = difm_obj.build_model(embed_dim, deep_out=False, bias_only=bias_only, dropout_input=0, dropout_layer=0) print difm.summary() earlyend = EarlyStopping(monitor='val_loss') difm.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer=tf.train.AdamOptimizer()) try: from keras.utils import plot_model plot_model(difm, to_file="difm.png") except: pass difm.fit(x=inputs, y=textdata['y'], batch_size=100, epochs=2, verbose=1, callbacks=[earlyend], validation_split=.1, shuffle=True) # now add a deep-out layer for the interactions tf.set_random_seed(1) np.random.seed(1) diofm = difm_obj.build_model(embed_dim, deep_out=True) # print diofm.summary() earlyend = EarlyStopping(monitor='val_loss') diofm.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer=tf.train.AdamOptimizer()) diofm.fit(x=inputs, y=textdata['y'], batch_size=100, epochs=100, verbose=1, callbacks=[earlyend], validation_split=.1, shuffle=True)