def train(self): input = Input(shape=(120,)) model = Embedding(input_dim=self.num_words, output_dim=50, input_length=120)(input) model = Dropout(0.1)(model) model = Bidirectional(LSTM(units=100, return_sequences=True, recurrent_dropout=0.1))(model) out = TimeDistributed(Dense(self.num_entities, activation="softmax"))(model) model = Model(input, out) model.compile(optimizer="rmsprop", loss="categorical_crossentropy", metrics=["accuracy"]) history = model.fit(x=self.X_train, y=np.array(self.Y_train), batch_size=64, epochs=10, validation_data=(self.X_validation, self.Y_validation)) model.save("../models/ner_" + str(datetime.utcnow().microsecond)) test_eval = model.evaluate(self.X_test, self.Y_test, verbose=0) print('Test loss:', test_eval[0]) print('Test accuracy:', test_eval[1]) return model, history
def hyperopt_train_test(params): epsilon = 10**params['epsilon_exp'] optimizer = optimizers.adam(lr=params['learning_rate'], epsilon=epsilon) if dmc_parameters["use_embedding_layer"]: input = Input(shape=(dmc_parameters["max_seq_len"], )) model = Embedding(input_dim=dmc_parameters["one_hot_vector_len"], output_dim=params['embedding_layer_output'], input_length=dmc_parameters["max_seq_len"])(input) model = Dropout(rate=params['embedding_dropout'])(model) else: input = Input(shape=(dmc_parameters["max_seq_len"], dmc_parameters["one_hot_vector_len"])) model = input if params['bi_lstm1_units'] > 0: model = Bidirectional( CuDNNLSTM(units=params['bi_lstm1_units'], return_sequences=True))(model) if params['bi_lstm2_units'] > 0: model = Bidirectional( CuDNNLSTM(units=params['bi_lstm2_units'], return_sequences=True))(model) if dmc_parameters["use_crf_layer"]: crf = CRF(dmc_parameters["num_tags"]) # CRF layer out = crf(model) # output model = Model(input, out) model.compile(optimizer=optimizer, loss=losses.crf_loss, metrics=[metrics.crf_accuracy, avg_proximity_metric()]) else: out = TimeDistributed( Dense(dmc_parameters["num_tags"], activation="softmax"))(model) model = Model(input, out) model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy", avg_proximity_metric()]) model.summary() es = EarlyStopping(monitor='val_loss', min_delta=0, patience=dmc_parameters["patience"], verbose=False, mode='min', restore_best_weights=True) history = model.fit(X_tr, np.array(y_tr), batch_size=dmc_parameters['batch_size'], epochs=dmc_parameters["epochs"], validation_data=(X_vl, np.array(y_vl)), verbose=False, shuffle=True, callbacks=[es]) loss, acc, prox = model.evaluate(x=X_vl, y=np.array(y_vl), batch_size=dmc_parameters['batch_size'], verbose=False) validation_labels = deepMirCut.pred2label(y_vl, dmc_parameters) validation_pred = model.predict(X_vl, verbose=False) pred_labels = deepMirCut.pred2label(validation_pred, dmc_parameters) fScore = f1_score(validation_labels, pred_labels) return loss, acc, prox, fScore
model) # softmax output layer model = Model(model_input, out) model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]) model.summary() history = model.fit(X_train, np.array(y_train), batch_size=32, epochs=1, validation_split=0.1, verbose=1, callbacks=callbacks) loss, accuracy = model.evaluate(X_test, np.array(y_test)) # save model print('saved model to ', args.output_model_path) model.save(MODEL_FILE) with file_io.FileIO(MODEL_FILE, mode='rb') as input_f: with file_io.FileIO(args.output_model_path + '/' + MODEL_FILE, mode='wb+') as output_f: output_f.write(input_f.read()) # write out metrics metrics = { 'metrics': [{ 'name': 'accuracy-score', 'numberValue': accuracy, 'format': "PERCENTAGE",
metrics = ["accuracy"]); print(model.summary()); monitor = EarlyStopping(monitor = "val_acc", min_delta = 0.0001, patience = 3, verbose = 1, mode = "max"); board = TensorBoard(log_dir = "log/{}".format(arguments.id)); model.fit(([inputs, cues] if arguments.cues else inputs), outputs, validation_split = arguments.vs, batch_size = arguments.bs, epochs = arguments.epochs, callbacks = [monitor, board], verbose = 1); if arguments.debug: print("model.evaluate() on training: {}" "".format(model.evaluate(([inputs, cues] if arguments.cues else inputs), outputs, verbose = 1))); model.save(arguments.id + ".h5"); # # in a few, rare circumstances, we allow ourselves to re-interpret variable # names, as is the case of .inputs. and .outputs. here: now turning our focus # to the evaluation data. # n = 0; unknown = 0; inputs = np.zeros((len(test), LENGTH), dtype = int); cues = np.zeros((len(test), LENGTH), dtype = int); golds = np.zeros((len(test), LENGTH, len(classes) - (2 if arguments.cues else 0)),
weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH)(input) model = Bidirectional(LSTM(100, return_sequences=True, dropout=0.50), merge_mode='concat')(model) model = TimeDistributed(Dense(100, activation='relu'))(model) model = Flatten()(model) model = Dense(100, activation='relu')(model) output = Dense(27, activation='softmax')(model) model = Model(input, output) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # In[ ]: model.fit(X_train, Y_train, validation_split=0.15, epochs=20, verbose=2) # In[ ]: # evaluate the model loss, accuracy = model.evaluate(X_test, Y_test, verbose=2) print('Accuracy: %f' % (accuracy * 100)) # In[66]: from sklearn.metrics import classification_report, confusion_matrix Y_pred = model.predict(X_test) y_pred = np.array([np.argmax(pred) for pred in Y_pred]) print(' Classification Report:\n', classification_report(Y_test, y_pred), '\n')
out_i.append(idx2tag[p_i]) out.append(out_i) return out # evaluate the model #test_pred=model.predict(X_test) # from sklearn.metrics import classification_report # y_pred = model.predict(X_test, batch_size=64, verbose=1) # y_pred_bool = np.argmax(y_pred, axis=1) # print(y_pred) #print(classification_report(y_test, y_pred_bool)) loss, accuracy, f1_score, precision, recall = model.evaluate(X_test, y_test, verbose=0) print("loss:%f accuracy:%f f1_score:%f precision:%f recall:%f" % (loss, accuracy, f1_score, precision, recall)) #pred_labels = pred2label(test_pred) #test_labels = pred2label(y_test) #print(pred_labels) #print(classification_report(test_labels,pred_labels)) # f1,pr,rec=0,0,0 # for i in range(len(test_labels)): # pr+=precision_score(test_labels[i],pred_labels[i],average='micro') # f1+=f1_score(test_labels[i],pred_labels[i],average='micro') # rec+=recall_score(test_labels[i],pred_labels[i],average='micro') # pr/=len(test_labels) # rec/=len(test_labels) # f1/=len(test_labels)
# set titles sub_fig1.set_title('Accuracy') sub_fig2.set_title('Loss') print(hist) # set values and labels sub_fig1.plot(hist["crf_viterbi_accuracy"], label='acc') sub_fig1.plot(hist["val_crf_viterbi_accuracy"], label='val_acc') sub_fig1.legend(loc="lower right") sub_fig2.plot(hist["loss"], label='loss') sub_fig2.plot(hist["val_loss"], label='val_loss') sub_fig2.legend(loc="upper right") plt.xlabel('epoch') # show figure plt.show() score = model.evaluate(X_te, np.array(y_te), batch_size=batch_size, verbose=1) print(model.metrics_names) print("Score:") print(score) # ## Prediction on test set from seqeval.metrics import precision_score, recall_score, f1_score, classification_report # print("Input:") # print(X_te[0]) # print("Supposed output:") # print(y_te) # print(np.array(y_te)) test_pred = model.predict([X_te], verbose=1) # print("Prediction result:") # print(test_pred[0]) idx2tag = {i: w for w, i in tags2idx.items()}
def train_eval(data_path, model_name, option='simple', emb_path=None): """Train a model with the data in path. Save it (and the formatting) as model_name. If option is 'emb', emb_path is the path to the embedding to be used. """ # get the data try: X_train, y_train = get_data(data_path + '/train') X_val, y_val = get_data(data_path + '/val') X_test, y_test = get_data(data_path + '/test') except: raise Exception("Some data file does not exist") # preprocess the texts for X in [X_train, X_val, X_test]: preprocess_text(X) # Keras needs the sequences to be numerical and padded, as well as the labels # We will need all the words and labels for this words = list(set([w for sent in X_train + X_val + X_test for w in sent])) labels = list(set([l for sent in y_train for l in sent])) words.append('--PAD--') # labels.append('--PAD--') n_labels = len(labels) n_words = len(words) words2num = {word: i for i, word in enumerate(words)} labels2num = {label: i for i, label in enumerate(labels)} # a trick for NER... if 'O' in labels2num: labels2num['--PAD--'] = labels2num['O'] else: labels2num['--PAD--'] = enumerate(labels) + 1 [X_train_num, X_val_num, X_test_num ] = [process_sequences(X, words2num) for X in [X_train, X_val, X_test]] [y_train_num, y_val_num, y_test_num ] = [process_sequences(y, labels2num) for y in [y_train, y_val, y_test]] [y_train_num, y_val_num, y_test_num] = [[to_categorical(i, num_classes=n_labels) for i in y] for y in [y_train_num, y_val_num, y_test_num]] if option == 'emb': try: emb_dict = KeyedVectors.load(emb_path) except: raise Exception("Embedding file does not exist") emb_matrix = np.zeros((len(words), emb_dict.vector_size)) for i, w in enumerate(words): # Build a matrix for the indexes with the vector values of corresponding words # If the word does not exist in the embedding, keep zeros if w in emb_dict: emb_matrix[i] = emb_dict[w] # We build a Bidirectional LSTM input = Input(shape=(None, )) if option == 'emb': model = Embedding(input_dim=n_words, output_dim=emb_dict.vector_size, weights=[emb_matrix])(input) else: model = Embedding(input_dim=n_words, output_dim=50)(input) model = Dropout(0.1)(model) model = Bidirectional( LSTM(units=50, return_sequences=True, recurrent_dropout=0.1))(model) out = TimeDistributed(Dense(n_labels, activation="softmax"))( model) # TimeDistributed keeps the outputs for each sequence separated # crf = CRF(n_labels) # CRF layer # out = crf(model) model = Model(input, out) if option == 'crf': crf = CRF(n_labels) # CRF layer out = crf(model) model.compile(optimizer="rmsprop", loss=crf.loss_function, metrics=[crf.accuracy]) else: model.compile(optimizer="rmsprop", loss="categorical_crossentropy", metrics=["accuracy"]) # Fit the model using the validation data model.fit(X_train_num, np.array(y_train_num), batch_size=32, epochs=5, validation_data=(X_val_num, np.array(y_val_num)), verbose=1) # Save the model model.save('{}.hdf5'.format(model_name), overwrite=True) formatter = { 'labels': labels, 'words': 'words', 'words2num': words2num, 'labels2num': labels2num } with open('{}-preproc.json'.format(model_name), 'w+') as f: json.dump(formatter, f) # Evaluate the model on the test data predictions = model.predict(X_test_num) results = model.evaluate(X_test_num, np.array(y_test_num)) print("Overall results for the predictions: {}".format(results)) # This values are not very clear because of class imbalance # Make a better evaluation predictions = np.argmax(predictions, axis=-1) predictions = [[labels[i] for i in pred] for pred in predictions] evaluate(y_test, predictions, labels) return (predictions)