def train_lstm_fusion(X_train, y_train, X_dev, y_dev, embedding_weights, reg=0.0, embed_glove=False): '''Trains an lstm network, using my recurrent attention layer, which is based on Cheng et al. deep attention fusion and ideas from Section 3.1 of Luong et al. 2015 (http://arxiv.org/pdf/1508.04025v5.pdf)''' checkpointer = ModelCheckpoint(filepath="lstm_memfusion_best.hdf5", monitor='val_acc', verbose=1, save_best_only=True) #saves best val loss weights input_sentences = Input(shape=(max_sen_length,), dtype='int32') if embed_glove: # embed glove vectors x = Embedding(input_dim=vocab_size, output_dim=vocab_dim, input_length=max_sen_length, mask_zero=True, weights=[embedding_weights])(input_sentences) else: # or use random embedding x = Embedding(input_dim=vocab_size, output_dim=vocab_dim, input_length=max_sen_length, mask_zero=True)(input_sentences) dropout_x = Dropout(0.15)(x) lstm_out = LSTM(vocab_dim, dropout_U=0.25, return_sequences=True)(dropout_x) context = TDistSoftAttention(LSTMMem(vocab_dim/2, dropout_U=0.25, return_mem=True))(lstm_out) # NOTE: attention needs to be twice that of LSTMem for r*cell_in operation to be valid attentional_hs = AttnFusion(vocab_dim, dropout_U=0.3, W_regularizer=l2(0.0), U_regularizer=l2(0.0), return_sequences=False)(context) attentional_hs = Highway(activity_regularizer=activity_l2(reg))(attentional_hs) prediction = Dense(nb_classes, activation='softmax', activity_regularizer=activity_l2(reg))(attentional_hs) history = LossHistory() val_history = ValLossHistory() acc = AccHistory() val_acc = ValAccHistory() model = Model(input=input_sentences, output=prediction) model.compile(optimizer='adadelta', loss='categorical_crossentropy', metrics=['accuracy']) model.fit(X_train, y_train, nb_epoch=40, batch_size=300, validation_data=(X_dev, y_dev), callbacks=[checkpointer, early_stop_val, history, val_history, acc, val_acc]) pickle.dump(history.losses, open("lstm_memfusion_trainloss.p", "wb")) pickle.dump(val_history.losses, open("lstm_memfusion_devloss.p", "wb")) pickle.dump(acc.losses, open("lstm_memfusion_trainacc.p", "wb")) pickle.dump(val_acc.losses, open("lstm_memfusion_devacc.p", "wb"))
def dense_highway_nn(input_len, num_units, dropout=0.5): input_x = Input(shape=(input_len, )) dense = Dense(num_units, init='normal', activation='relu')(input_x) dropout = Dropout(dropout)(dense) highway = Highway()(dropout) output_y = Dense(1, activation='sigmoid')(highway) model = Model(input=[input_x], output=[output_y]) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc']) return model
def get_model(go_id, parent_id): filepath = DATA_ROOT + CUR_LEVEL + parent_id + '/' + go_id + '.hdf5' model = Sequential() model.add(Dense(8000, activation='relu', input_dim=8000)) model.add(Highway()) model.add(Dense(1, activation='sigmoid')) model.compile( loss='binary_crossentropy', optimizer='rmsprop', class_mode='binary') # Loading saved weights print 'Loading weights for ' + go_id model.load_weights(filepath) return model
def model(train_df, test_df): # Training batch_size = 64 nb_epoch = 64 train_data, test_data = train_df['data'].values, test_df['data'].values train_label, test_label = train_df['sequence'].values, test_df[ 'sequence'].values for i in range(len(train_label)): train_label[i] = AAINDEX[train_label[i][-1]] for i in range(len(test_label)): test_label[i] = AAINDEX[test_label[i][-1]] train_label = np_utils.to_categorical(train_label, 20) test_label = np_utils.to_categorical(test_label, 20) # train_data = numpy.hstack(train_data).reshape(train_data.shape[0], 8000) # test_data = numpy.hstack(test_data).reshape(test_data.shape[0], 8000) print('X_train shape: ', train_data.shape) print('X_test shape: ', test_data.shape) model = Sequential() model.add(Dense(8000, activation='relu', input_dim=8000)) model.add(Highway()) model.add(Dense(1, activation='sigmoid')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') model_path = DATA_ROOT + 'fofe_sequence.hdf5' checkpointer = ModelCheckpoint(filepath=model_path, verbose=1, save_best_only=True) earlystopper = EarlyStopping(monitor='val_loss', patience=7, verbose=1) model.fit(X=train_data, y=train_label, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=1, validation_split=0.2, callbacks=[checkpointer, earlystopper]) # Loading saved weights print 'Loading weights' model.load_weights(model_path) pred_data = model.predict_classes(test_data, batch_size=batch_size) return classification_report(list(test_label), pred_data)
def model(labels, data, go_id): # set parameters: # Training batch_size = 100 nb_epoch = 100 pdb.set_trace() train, test = train_test_split(labels, data, batch_size=batch_size) train_label, train_data = train test_label, test_data = test test_label_rep = test_label shap = numpy.shape(train_data) print('X_train shape: ', shap) print('X_test shape: ', test_data.shape) model = Sequential() model.add(Dense(shap[1], activation='relu', input_dim=shap[1])) model.add(Highway()) model.add(Dense(1, activation='sigmoid')) print 'compiling model' model.compile(loss='binary_crossentropy', optimizer='rmsprop', class_mode="binary") print 'running at most 60 epochs' checkpointer = ModelCheckpoint(filepath="bestmodel.hdf5", verbose=1, save_best_only=True) earlystopper = EarlyStopping(monitor='val_loss', patience=5, verbose=1) model.fit(train_data, train_label, batch_size=batch_size, nb_epoch=nb_epoch, shuffle=True, show_accuracy=True, validation_split=0.3, callbacks=[checkpointer, earlystopper]) # # Loading saved weights print 'Loading weights' model.load_weights('bestmodel.hdf5') pred_data = model.predict_classes(test_data, batch_size=batch_size) # Saving the model tresults = model.evaluate(test_data, test_label, show_accuracy=True) print tresults return classification_report(list(test_label_rep), pred_data)
def get_model( go_id, parent_id, level): filepath = DATA_ROOT + 'level_' + str(level) + '/' + parent_id + '/' + go_id + '.hdf5' if not os.path.exists(filepath): return None key = parent_id + "_" + go_id if key in go_model: return go_model[key] model = Sequential() model.add(Dense(8000, activation='relu', input_dim=8000)) model.add(Highway()) model.add(Dense(1, activation='sigmoid')) model.compile( loss='binary_crossentropy', optimizer='rmsprop', class_mode='binary') # Loading saved weights print 'Loading weights for ' + parent_id + '-' + go_id try: model.load_weights(filepath) except Exception, e: print 'Could not load weights for %s %s %d' % (parent_id, go_id, level) return None
def model(labels, data, go_id): # set parameters: # Convolution filter_length = 7 nb_filter = 64 pool_length = 2 k=7 # LSTM lstm_output_size = 70 # Training batch_size = 32 nb_epoch = 12 train, test = train_test_split( labels, data, batch_size=batch_size) train_label, train_data = train test_label, test_data = test test_label_rep = test_label model = Sequential() model.add(Convolution1D( input_dim=20, input_length=500, nb_filter=320, filter_length=20, border_mode="valid", activation="relu", subsample_length=1)) model.add(MaxPooling1D(pool_length=10, stride=10)) model.add(Dropout(0.2)) model.add(Convolution1D( nb_filter=320, filter_length=20, border_mode="valid", activation="relu", subsample_length=1)) model.add(MaxPooling1D(pool_length=10, stride=10)) model.add(Dropout(0.2)) model.add(Flatten()) model.add(Highway()) model.add(Dropout(0.5)) model.add(Dense(output_dim=1000)) model.add(Activation('relu')) model.add(Dense(output_dim=1)) model.add(Activation('sigmoid')) print 'compiling model' model.compile( loss='binary_crossentropy', optimizer='rmsprop', class_mode="binary") print 'running at most 60 epochs' model_path = DATA_ROOT + go_id + '.hdf5' checkpointer = ModelCheckpoint( filepath=model_path, verbose=1, save_best_only=True) earlystopper = EarlyStopping(monitor='val_loss', patience=5, verbose=1) model.fit( train_data, train_label, batch_size=batch_size, nb_epoch=60, shuffle=True, show_accuracy=True, validation_split=0.3, callbacks=[checkpointer, earlystopper]) # # Loading saved weights print 'Loading weights' model.load_weights(model_path) pred_data = model.predict_classes(test_data, batch_size=batch_size) # Saving the model # tresults = model.evaluate(test_data, test_label,show_accuracy=True) # print tresults return classification_report(list(test_label_rep), pred_data)
def model(df, parent_id, go_id): # Training batch_size = 64 nb_epoch = 64 # Split pandas DataFrame n = len(df) split = 0.8 m = int(n * split) train, test = df[:m], df[m:] # train, test = train_test_split( # labels, data, batch_size=batch_size) train_label, train_data = train['labels'], train['data'] if len(train_data) < 100: raise Exception("No training data for " + go_id) test_label, test_data = test['labels'], test['data'] test_label_rep = test_label train_data = train_data.as_matrix() test_data = test_data.as_matrix() train_data = numpy.hstack(train_data).reshape(train_data.shape[0], 8000) test_data = numpy.hstack(test_data).reshape(test_data.shape[0], 8000) shape = numpy.shape(train_data) print('X_train shape: ', shape) print('X_test shape: ', test_data.shape) model = Sequential() model.add(Dense(8000, activation='relu', input_dim=8000)) model.add(Highway()) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='rmsprop', class_mode='binary') model_path = DATA_ROOT + parent_id + '/' + go_id + '.hdf5' checkpointer = ModelCheckpoint(filepath=model_path, verbose=1, save_best_only=True) earlystopper = EarlyStopping(monitor='val_loss', patience=7, verbose=1) model.fit(X=train_data, y=train_label, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=1, validation_split=0.2, callbacks=[checkpointer, earlystopper]) # Loading saved weights print 'Loading weights' model.load_weights(model_path) pred_data = model.predict_classes(test_data, batch_size=batch_size) return classification_report(list(test_label_rep), pred_data)