def train(): input = Input(shape=(max_len, )) model = Embedding(input_dim=n_words + 1, output_dim=100, input_length=max_len, mask_zero=True)(input) # 20-dim embedding model = Bidirectional( LSTM(units=50, return_sequences=True, recurrent_dropout=0.1))(model) # variational biLSTM model = TimeDistributed(Dense(50, activation="relu"))( model) # a dense layer as suggested by neuralNer crf = CRF(n_tags) # CRF layer out = crf(model) # output model = Model(input, out) model.compile(optimizer="rmsprop", loss=crf.loss_function, metrics=[crf.accuracy]) model.summary() history = model.fit(x_train, np.array(y_train), batch_size=64, epochs=5, validation_split=0.1, verbose=1) save_load_utils.save_all_weights(model, filepath="models/bilstm-crf.h5") hist = pd.DataFrame(history.history) print(hist) plt.figure(figsize=(12, 12)) plt.plot(hist["crf_viterbi_accuracy"]) plt.plot(hist["val_crf_viterbi_accuracy"]) plt.show()
def test_save_and_load_all_weights(): ''' Test save_all_weights and load_all_weights. Save and load optimizer and model weights but not configuration. ''' def make_model(): _x = Input((10, )) _y = Dense(10)(_x) _m = Model(_x, _y) _m.compile('adam', 'mean_squared_error') _m._make_train_function() return _m # make a model m1 = make_model() # set weights w1 = m1.layers[1].kernel # dense layer w1value = K.get_value(w1) w1value[0, 0:4] = [1, 3, 3, 7] K.set_value(w1, w1value) # set optimizer weights ow1 = m1.optimizer.weights[3] # momentum weights ow1value = K.get_value(ow1) ow1value[0, 0:3] = [4, 2, 0] K.set_value(ow1, ow1value) # save all weights save_all_weights(m1, 'model.h5') # new model m2 = make_model() # load all weights load_all_weights(m2, 'model.h5') # check weights assert_allclose(K.get_value(m2.layers[1].kernel)[0, 0:4], [1, 3, 3, 7]) # check optimizer weights assert_allclose(K.get_value(m2.optimizer.weights[3])[0, 0:3], [4, 2, 0]) os.remove('model.h5')
def trainKerasModel(max_len, num_LSTM_Units, learning_rate, vector_dim, num_docs, embedding_matrix, embeddingLayerFlag, embeddingFlag, dropout, batch_size, epochs, X_Train, y_Train, experiment): model, crf = createModelArchitecture(max_len, num_LSTM_Units, vector_dim, num_docs, embedding_matrix, dropout) print("Model Architecture Created") callbacks = [EarlyStopping(patience=2, monitor='val_loss')] model.compile(loss=crf.loss_function, optimizer=RMSprop(lr=learning_rate), metrics=[crf.accuracy]) model.summary() history = model.fit(X_Train, np.array(y_Train), batch_size=batch_size, epochs=epochs, validation_split=0.15, verbose=1, callbacks=callbacks) print("Model Training Done, Saving...") modelName = 'KerasModel' + '_lstm' + str(num_LSTM_Units) + '_lr' + str( learning_rate) + '_dropOut' + str(dropout) + '_bSize' + str( batch_size) + '_epochs' + str( epochs ) + '_' + embeddingLayerFlag + '_' + embeddingFlag + '_' + str( experiment) + 'exp.h5' save_load_utils.save_all_weights(model, path + modelName, include_optimizer=False) print("Model Saved")
def train(train_x, train_y, train_docs, valid_x, valid_docs, word_vector, wv_for_score_model, score_clf): resultWriter = open(FLAGS.val_acc_predict, 'w', encoding='utf8') trainloss = open(FLAGS.train_loss, 'w', encoding='utf8') model_object = dnnModel(FLAGS) model = model_object.build_bilstm_model(word_vector) maxF1 = 0.0 file_list = ['../dictionary/trainSenDict', '../dictionary/theme1117.txt'] dictionary_list = get_dict(file_list) valid_dict_index = matrix_index(valid_docs, dictionary_list, FLAGS.max_len) for i in range(FLAGS.epoch): resultWriter.write('recycle number is ' + str(i) + '\n') trainloss.write('recycle number is ' + str(i) + '\n') batchIter = batch_iter(len(train_x), zip(train_x, train_y, train_docs), FLAGS.batch_size, False) j = 0 for zip_xydoc in batchIter: print(str(i) + ' th epoch, ' + str(j) + ' th batch.') j += 1 print('train.....') batch_x, batch_y, batch_doc = zip(*zip_xydoc) batch_x = np.array(list(batch_x)) batch_y = np.array(list(batch_y)) dict_index = matrix_index(batch_doc, dictionary_list, FLAGS.max_len) trainHistory = model.train_on_batch([batch_x, dict_index], batch_y) print("train loss is: " + str(trainHistory[0]) + '\n') trainloss.write(str(trainHistory[0])) if (i > FLAGS.evaluate_epoch and j % FLAGS.checkpoint_every == 0): f1 = evaluateVal(model, valid_x, FLAGS.val_label_file, valid_docs, valid_dict_index, wv_for_score_model, score_clf, window=FLAGS.window_size) print("the validation f1 score is " + str(f1)) if (f1 > maxF1): print( '####################update model#####################################################' ) maxF1 = f1 resultWriter.write("the validation f1 is " + str(f1) + '\n') predict(model, FLAGS.val_data_file, FLAGS.result_file, wv_for_score_model, score_clf, FLAGS.window_size, False) resultWriter.flush() trainloss.flush() save_load_utils.save_all_weights(model, FLAGS.model_path) del batch_x del batch_y del batchIter trainloss.close() resultWriter.close()
def save(self, path): """ Save model to path Args: path (str): path to save model weights """ save_load_utils.save_all_weights(self.model, path)
def save(self, path): """ Save model to path Args: path (str): path to save model weights """ save_load_utils.save_all_weights(self.model, path)
def on_epoch_end(self, epoch, logs={}): """ At the end of each epoch, compute the F1 score for the validation data. In case of multi-outputs model, compute one value per output and average all to return the overall F1 score. Same model's weights for the best epoch. """ self.compute_epoch_training_F1() in_length = len(self.model.input_layers) # X data - to predict from out_length = len(self.model.output_layers) # Number of tasks # Compute the model predictions predictions = self.model.predict(self.validation_data[:in_length]) # In case of single output if len(predictions) != out_length: predictions = [predictions] vals_acc = [] vals_recall = [] vals_f1 = [] reports = "" # Iterate over all output predictions for i, pred in enumerate(predictions): _val_acc, _val_recall, _val_f1 = self.compute_scores( np.asarray(pred), self.validation_data[in_length + i]) # Classification report reports += "For task " + str(i + 1) + "\n" reports += "====================================================================================" reports += self.classification_report( i, np.asarray(pred), self.validation_data[in_length + i]) + "\n\n\n" # Add scores internally vals_acc.append(_val_acc) vals_recall.append(_val_recall) vals_f1.append(_val_f1) # Add F1 score to be log f1_name = "val_" + self.model.output_layers[i].name + "_f1" logs[f1_name] = _val_f1 # Add classification reports for all the predicitions/tasks self.test_report.append(reports) # Add internally self.test_acc.append(sum(vals_acc) / len(vals_acc)) self.test_recall.append(sum(vals_recall) / len(vals_recall)) self.test_f1s.append(sum(vals_f1) / len(vals_f1)) # Add to log f1_mean = sum(vals_f1) / len(vals_f1) logs["val_f1"] = f1_mean # Save best model's weights if f1_mean > self.best_score: self.best_score = f1_mean save_load_utils.save_all_weights(self.model, self.model_save_path)
def build_model(x, y, vocab_size, max_len): """Build up and train a bi-directional LSTM + CRF model, saving model architecture and weights, as well as history :param x: :param y: :param vocab_size: :param max_len: :return: """ # TODO: read from an existing Word2Vec model, to enhance embedding performance model = Sequential() model.add( Embedding(input_dim=vocab_size + 1, output_dim=EMBEDDING_SIZE, input_length=max_len, mask_zero=True)) model.add(Bidirectional(LSTM(HIDDEN_UNITS, return_sequences=True))) model.add(Dropout(DROPOUT_RATE)) model.add(Bidirectional(LSTM(HIDDEN_UNITS, return_sequences=True))) model.add(Dropout(DROPOUT_RATE)) # TODO: consider to add a CNN layer to get higher accuracies model.add(TimeDistributed(Dense(HIDDEN_UNITS, activation='relu'))) crf = CRF(5) # CAUTION!!! sparse_target: True for index, False for one-hot model.add(crf) model.summary() model.compile(optimizer='adam', loss=crf.loss_function, metrics=[crf.accuracy]) checkpointer = ModelCheckpoint(filepath='./data/weights.hdf5', monitor='val_loss', verbose=1, save_best_only=True) stopper = EarlyStopping(monitor="val_loss", patience=2) terminator = TerminateOnNaN() history = model.fit(x, y, batch_size=BATCH_SIZE, epochs=EPOCH_NUM, validation_split=0.1, callbacks=[checkpointer, stopper, terminator]) # Save model architecture and weights with open('./data/model_architecture.json', 'w') as f: f.write(model.to_json()) save_load_utils.save_all_weights(model, './data/model_weights.hdf5') with open('./data/history', 'wb') as f: pickle.dump(history.history, f)
def save(self, file_path): """ Saves a model to the local disk, provided a file path. """ save_path = Path(file_path) mkdir(save_path) model_save_path = save_path.joinpath("KerasNER.model") config_save_path = save_path.joinpath("KerasNER.config") arch_save_path = save_path.joinpath("KerasNER.json") encoder_save_path = save_path.joinpath("encoder") if self.config.get_parameter("use_crf"): save_load_utils.save_all_weights(self.model, str(model_save_path)) else: self.model.save(str(model_save_path)) self.config.save(config_save_path) # human-readable model architecture in json with open(arch_save_path, "w") as wf: wf.write(self.model.to_json()) self.encoder.save(encoder_save_path)
def save_model(model, filename): save_load_utils.save_all_weights(model, filename)
def save(self, path): save_load_utils.save_all_weights(self.model, path)
def save_model(model, name, result_folder): path = os.path.join(result_folder, name + '.model') save_load_utils.save_all_weights(model, path) logger.info('saving model under ' + path)
def model_with_padding(self, DICT, n_char): # get sequences and labels separated. # convert BIO tags to numbers sequences, labels = self.get_seq(DICT) # sequences = sequences[:100] # labels = labels[:100] # X = pad_sequences(sequences, maxlen=self.w_arit_mean, padding='post', truncating='post') # y_pad = pad_sequences(labels, maxlen=self.w_arit_mean, padding='post', truncating='post') X = pad_sequences(sequences, maxlen=self.maxSeqLength, padding='post') y_pad = pad_sequences(labels, maxlen=self.maxSeqLength, padding='post') y = [to_categorical(i, num_classes=self.lab_len) for i in y_pad] # early stopping and best epoch #early_stop = keras.callbacks.EarlyStopping(monitor='loss', patience=2, verbose=0, mode='auto') #filepath = "max-seq.h5" #checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='max') #callbacks_list = [checkpoint, early_stop] # Set up the keras model input = Input(shape=(self.maxSeqLength, )) el = Embedding(n_char + 1, 200, name="embed")(input) bl1 = Bidirectional(LSTM(128, return_sequences=True, recurrent_dropout=0.5, dropout=0.5), merge_mode="concat", name="lstm1")(el) bl2 = Bidirectional(LSTM(64, return_sequences=True, recurrent_dropout=0.5, dropout=0.5), merge_mode="concat", name="lstm2")(bl1) bl3 = Bidirectional(LSTM(64, return_sequences=True, recurrent_dropout=0.5, dropout=0.5), merge_mode="concat", name="lstm3")(bl2) model = TimeDistributed(Dense(self.lab_len, activation="relu"))(bl3) crf = CRF(self.lab_len) # CRF layer out = crf(model) # output model = Model(input, out) model.compile(optimizer="rmsprop", loss=crf.loss_function, metrics=[crf.accuracy]) model.summary() #treinar com 32, 147, 245, 735 history = model.fit(X, np.array(y), batch_size=32, epochs=self.epochsN, validation_split=0.0, verbose=1) # save all epochs save_load_utils.save_all_weights(model, 'max_seq_%s_32b.h5' % self.epochsN)
model.summary() history = model.fit([X_train_sents, X_train_sents, X_train_pos, X_train_npos, X_train_features], y_train_ner, batch_size=BATCH_SIZE, epochs=MAX_EPOCHS, verbose=2) hist_dict = history.history # save the model # because we are using keras-contrib, we must save weights like this, and load into network # (see decoding.ipynb) save_load_utils.save_all_weights(model, '../model/nltkposcrf_model.h5') np.save('../model/nltkhist_dict.npy', hist_dict) print("models saved!\n") preds = model.predict([X_test_sents, X_test_sents, X_test_pos, X_test_npos, X_test_features]) preds = np.argmax(preds, axis=-1) preds.shape print(preds[:5]) trues = np.squeeze(y_test_ner, axis=-1) trues.shape
model.add(TimeDistributed(LSTM(BiRNN_UNITS, return_sequences=False))) model.add(Bidirectional(LSTM(BiRNN_UNITS // 2, return_sequences=True))) crf = CRF(len(class_labels), sparse_target=True) model.add(crf) model.summary() model.compile('adam', loss=crf.loss_function, metrics=[crf.accuracy]) model.fit(train_x, train_y, epochs=EPOCHS, validation_data=[test_x, test_y]) test_y_pred = model.predict(test_x).argmax(-1)[test_x > 0] test_y_true = test_y[test_x > 0] print('\n---- Result of Character Emebdding + BiLSTM-CRF ----\n') classification_report(test_y_true, test_y_pred, class_labels) plotConfusionMatrix(test_y_true, test_y_pred, class_labels, reverseDictionary(dictionary_labels)) model.save('modelo.h5') # creates a HDF5 file 'my_model.h5' save_load_utils.save_all_weights(model, 'pesos.h5') # serialize model to JSON model_json = model.to_json() with open("model.json", "w") as json_file: json_file.write(model_json)
def model_no_padding(self, DICT, n_char): # convert BIO tags to numbers self.convert_tags() ''' check if bion contains 'B' and 'I' for i in self.train_data: print(i['bion']) ''' for i in range(len(self.train_data)): corp = self.train_data[i]['corpus'] corp_num = [] for c in corp: corp_num.append(DICT.get(c)) self.train_data[i]['corpus'] = corp_num # get all sizes from the sequences with training data train_l_d = {} train_l_labels = {} for seq in self.train_data: # corpus l = len(seq['corpus']) if l not in train_l_d: train_l_d[l] = [] train_l_d[l].append(seq['corpus']) # labels l1 = len(seq['bion']) if l1 not in train_l_labels: train_l_labels[l1] = [] train_l_labels[l1].append(seq['bion']) ''' for i in range(len(train_l_d[110])): print(len(train_l_d[110][i]) == len(train_l_labels[110][i])) print() print("\n\n") for i in range(len(train_l_d[31])): print(len(train_l_d[31][i]) == len(train_l_labels[31][i])) print("\n\n") for i in range(len(train_l_d[103])): print(len(train_l_d[103][i]) == len(train_l_labels[103][i])) print("\n\n") exit() ''' sizes = list(train_l_d.keys()) # Set up the keras model il = Input(shape=(None, ), dtype='int32') el = Embedding(n_char + 1, 200, name="embed")(il) bl1 = Bidirectional(LSTM(128, return_sequences=True, recurrent_dropout=0.5, dropout=0.5), merge_mode="concat", name="lstm1")(el) bl2 = Bidirectional(LSTM(64, return_sequences=True, recurrent_dropout=0.5, dropout=0.5), merge_mode="concat", name="lstm2")(bl1) bl3 = Bidirectional(LSTM(64, return_sequences=True, recurrent_dropout=0.5, dropout=0.5), merge_mode="concat", name="lstm3")(bl2) model = TimeDistributed(Dense(self.num_labs, activation="relu"))(bl3) crf = CRF(self.num_labs) # CRF layer out = crf(model) # output model = Model(il, out) model.compile(optimizer="rmsprop", loss=crf.loss_function, metrics=[crf.accuracy]) model.summary() f_best = -1 f_index = -1 # OK, start actually training for epoch in range(self.epochsN): print("Epoch", epoch, "start at", datetime.now()) # Train in batches of different sizes - randomize the order of sizes # Except for the first few epochs if epoch > 2: random.shuffle(sizes) for size in sizes: batch = train_l_d[size] labs = train_l_labels[size] tx = np.array([seq for seq in batch]) y = [seq for seq in labs] ty = [to_categorical(i, num_classes=self.num_labs) for i in y] # This trains in mini-batches model.fit(tx, np.array(ty), verbose=0, epochs=1) print("Trained at", datetime.now()) # save all epochs save_load_utils.save_all_weights( model, 'mini-batch-results/epoch_%s.h5' % epoch) # test the results self.test_minibatch(DICT, model) f = self.eval() if f > f_best: f_best = f f_index = epoch # Pick the best model, and save it with a useful name print("Choosing the best epoch") shutil.copyfile("mini-batch-results/epoch_%s.h5" % f_index, "minibatch_%s.h5" % f_index)
y_train, batch_size=32, epochs=2, validation_data=(x_test, y_test)) # Model3: BERT -> BiLSTM -> CRF nerbertbilstm = NerBiLSTM_Bert(config) model = nerbertbilstm.model model.compile(optimizer=Adam(1e-4), loss=crf_loss, metrics=[crf_accuracy]) # 其他 from keras.callbacks import ModelCheckpoint, Callback class LossHistory(Callback): def on_train_begin(self, logs={}): self.losses = [] def on_batch_end(self, batch, logs={}): self.losses.append(logs.get('loss')) checkpointer = ModelCheckpoint(filepath="bilstm_1102_k205_tf130.w", verbose=0, save_best_only=True, save_weights_only=True) losshistory = LossHistory() # 保存和加载模型 # TODO 只能使用keras_contrib的这个API么? from keras_contrib.utils import save_load_utils model_path = 'xxx' save_load_utils.save_all_weights(model, model_path) # 保存模型 save_load_utils.load_all_weights(model, model_path) # 加载模型
model = Sequential() model.add(Embedding(max_features, embedding_size, input_length=maxlen)) model.add(Bidirectional(LSTM(hidden_size, return_sequences=True))) model.add(TimeDistributed(Dense(hidden_size, activation='softmax'))) crf = (CRF(n_tags)) model.add(crf) model.compile(optimizer="rmsprop", loss=crf.loss_function, metrics=[crf.accuracy]) for i in range(nb_epoch): model.fit(X_train, Y_train, batch_size=batch_size, epochs=1) save_load_utils.save_all_weights(model, '../models/bilstm_crf_' + str(i) + '.h5') # In[ ]: score = model.evaluate(X_test, Y_test) print('Test score:', score[0]) print('Test accuracy:', score[1]) # In[ ]: pred = model.predict(X_test) # In[ ]: pred_argmax = [[np.argmax(word) for word in sent] for sent in pred]
history = model.fit([trn_text_idx, trn_char_idx], [trn_slot_idx, trn_int_idx], validation_data=([dev_text_idx, dev_char_idx], [dev_slot_idx, dev_int_idx]), batch_size=BATCH_SIZE, epochs=MAX_EPOCHS, callbacks=callbacks_list, verbose=0) hist_dict = history.history # save architecture with json with open('model/' + modelname + '.json', 'w') as f: f.write(model.to_json()) # save weights save_load_utils.save_all_weights(model, 'model/' + modelname + '.h5') # save training history np.save('model/' + modelname + '_dict.npy', hist_dict) # load test model.load_weights('model/' + modelname + '.h5') from sklearn.metrics import classification_report, f1_score from sklearn.metrics import accuracy_score, precision_score, recall_score # remove nulls and pads and get F1 on only labels def procslots(trues, preds, nonull=True): tru_slots = [] prd_slots = [] for i in range(len(trues)):
def save_crf_model(self, path, name): # check if dir exist DataSaverLoader.directory_exists(path) # Save model save_load_utils.save_all_weights(self.model, path + name)
def save_model(self, filepath): save_load_utils.save_all_weights(self.model, filepath)
def save(self, path): save_load_utils.save_all_weights(self.model, path)
def save_embedding_bilstm2_crf_model(model, filename): save_load_utils.save_all_weights(model, filename)
def get_measures(yTrue, yPred): y1 = yTrue.reshape(1, -1).squeeze() y2 = yPred.reshape(1, -1).squeeze() P = precision_score(y1, y2, average=None) R = recall_score(y1, y2, average=None) F1 = f1_score(y1, y2, average=None) print("Precision=", flush=True) print(P, flush=True) print("Recall=", flush=True) print(R, flush=True) print("F1 score=", flush=True) print(F1, flush=True) # print("Train...", flush=True) # get_measures(y_tr_true, pred_train) print("Test...", flush=True) get_measures(y_true, y_pred) # np.save("y_tr_true.npy", y_tr_true) # np.save("pred_train.npy", pred_train) # np.save("y_true.npy", y_true) # np.save("y_pred.npy", y_pred) save_load_utils.save_all_weights(model, os.path.join(log_dir, state_dict))
def save_model(model, save_model_path): save_load_utils.save_all_weights(model, save_model_path)
def save_model(model, save_model_path): save_load_utils.save_all_weights(model, save_model_path)
def save_embedding_bilstm2_crf_model(model, filename): save_load_utils.save_all_weights(model,filename)
def save_trained_model(self, name): if self.verbose: print("\n[INFO] Saving trained model to '" + name + "'\n") save_load_utils.save_all_weights(self.model, name)
def main(): data, words, tags, pos = load_dataset() # unique n_words = len(words) # total words in vocab n_tags = len(tags) # total tags in vocab n_pos = len(pos) print("Data Loaded successfully..") getter = SentenceGetter(data) # list of (word,POS,Tag) sentences = getter.sentences print("First sentence") print(sentences[0]) max_len = 75 # length of each sequence/sentence # if model is trained, load previous results if trained: assert trained == True, "Trained must be True" # load trained indices word2idx = save_load_word_idx("word2idx.pkl", load=True) idx2word = save_load_word_idx("idx2word.pkl", load=True) tag2idx = save_load_word_idx("tag2idx.pkl", load=True) idx2tag = save_load_word_idx("idx2tag.pkl", load=True) else: assert trained == False # save trained indices word2idx = {w: i + 2 for i, w in enumerate(words)} word2idx["ENDPAD"] = 0 word2idx["UNK"] = 1 idx2word = {i: w for w, i in word2idx.items()} tag2idx = {t: i + 1 for i, t in enumerate(tags)} tag2idx["ENDPAD"] = 0 idx2tag = {i: t for t, i in tag2idx.items()} save_load_word_idx("word2idx.pkl", word2idx=word2idx, save=True) save_load_word_idx("idx2word.pkl", word2idx=idx2word, save=True) save_load_word_idx("tag2idx.pkl", word2idx=tag2idx, save=True) save_load_word_idx("idx2tag.pkl", word2idx=idx2tag, save=True) print("word2idx[\"demonstrators\"].................", word2idx["demonstrators"]) # convert sequence of sentences into corresponding int vectors X = [[word2idx[w[0]] for w in s] for s in sentences] # max length of sequence/sentence print("Max length of sequence(len(sentence)):", max([len(x) for x in X])) # add padding for same length i.e, max_len= 75 with "0" value X = pad_sequences(maxlen=max_len, sequences=X, truncating='post', padding='post', value=0) y = [[tag2idx[w[2]] for w in s] for s in sentences] y = pad_sequences(maxlen=max_len, sequences=y, padding="post", truncating="post", value=tag2idx["ENDPAD"]) # y: class vector to be converted into a matrix (integers from 0 to num_classes). # num_classes: total number of classes. # y = [to_categorical(i, num_classes = n_tags + 1) for i in y] # create a list with all possible chars chars = set([chars for word in words for chars in word]) n_chars = len(chars) max_len_chars = 10 # i, len_word = max([(i,len(word)) for i,word in enumerate(words)]) _max, imax = -1, 0 for i, w in enumerate(words): if len(w) > _max: _max, imax = len(w), i print("Actual max len chars(n_chars){} and word is {}:".format( _max, words[imax])) # create char2idx for converting chars as vector of integers to feed to LSTM char2idx = {char: i + 2 for i, char in enumerate(chars)} char2idx["ENDPAD"] = 0 # to ignore this by mask_zero = True char2idx["UNK"] = 1 # vice versa idx2char = {i: char for char, i in char2idx.items()} # generate char_sequence for input to model X_char = [] for sentence in sentences: sent_seq = [] # max_len = 75 for i in range(max_len): word_seq = [] # char sequence for words for j in range(max_len_chars): try: # chars of specific sentence of i word_seq.append(char2idx.get(sentence[i][0][j])) except: # if char-sequence is out of range , pad it with "PAD" tag word_seq.append(char2idx.get("ENDPAD")) sent_seq.append(word_seq) # append sentence sequences as character-by-character to X_char for Model input X_char.append(np.array(sent_seq)) print(X_char[:1]) print("shape of one X_char[0]: ", X_char[0].shape) print("shape of X_char:{} ".format(np.array(X_char).shape)) print("shape of X:{} ".format(X.shape)) from sklearn.model_selection import train_test_split # split data into (test=90%,train=10%) percentage X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.1, shuffle=True, random_state=2018) X_char_tr, X_char_te, _, _ = train_test_split(X_char, y, test_size=0.1, shuffle=True, random_state=2018) print("shape of X_char_tr:{} ".format(np.array(X_char_tr).shape)) print("shape of X_char_te:{} ".format(np.array(X_char_te).shape)) print("shape of y_tr:{} ".format(np.array(y_tr).shape)) print( "Reshaped X_char_tr:", np.array(X_char_tr).reshape( (len(X_char_tr), max_len, max_len_chars)).shape) print("Reshaped y_tr:", np.array(y_tr).reshape(len(y_tr), max_len, 1).shape) print("X_tr : ", X_tr.shape) # import sys # sys.exit(0) if trained: # model.evaluate(X_te, np.array(y_te), verbose=1) main2(X, X_te, y_te, words=words, tags=tags, idx2word=idx2word, idx2tag=idx2tag) return model = create_model(max_len, n_words, n_tags, n_pos, max_len_chars, n_chars) # second input to be fed like : model.fit([X_tr, second_input]) # second_input_emb = np.array(X_pos[:len(X_tr)]) # second_input_hot = np.array(X_pos[:len(X_tr)]) # score = model.evaluate([X_te, np.array(X_pos[len(X_tr):])], np.array(y_te), verbose=1) # #print accuracy # print("%s: %.2f%%" % (model.metrics_names[1], score[1]*100)) # score = model.evaluate([X_te, np.array(X_pos[len(X_tr):])], np.array(y_te), verbose=1) # print("%s: %.2f%%" % (model.metrics_names[1], score[1]*100)) # history = model.fit([X_tr, second_input_hot], np.array(y_tr),\ # batch_size=32, epochs=2, validation_split=0.1, verbose=1) history = model.fit([X_tr, np.array(X_char_tr)], np.array(y_tr).reshape(len(y_tr),max_len,1),\ batch_size=32, epochs=5, validation_split=0.1, verbose=1) # TODO: pass second arg to model.evaluate() # score = model.evaluate(X_te, y_te, batch_size=16) # evaluate the model for training examples and print accuracy =>98.63% # score = model.evaluate([X_te, np.array(X_pos[len(X_tr):])], np.array(y_te), verbose=1) # print("%s: %.2f%%" % (model.metrics_names[1], score[1]*100)) score = model.evaluate([X_tr, np.array(X_char_tr)], np.array(y_tr).reshape(len(y_te), max_len, 1), verbose=1) print("%s: %.2f%%" % (model.metrics_names[1], score[1] * 100)) # save model on disk # save_path = "C:\\Users\\Usman Ahmad\\Desktop\\P3_LSTM_saved.pkl" model.save("My_Custom_Model3.h5") from keras_contrib.utils import save_load_utils # save using keras_contrib.utils.save_load_utils save_load_utils.save_all_weights(model, "Model_saved_using_contrib.h5") model.save_weights("model_weights.h5") with open("model_architecture.json", "w") as json_file: json_file.write(model.to_json()) # print("Saved model to disk" # serialize weights to HDF5 print("Saved model to disk") plot_history(history) print(model.summary()) print('*' * 50) if words is not None and tags is not None: i = 2318 p = model.predict(np.array([X_te[i]])) p = np.argmax(p, axis=-1) print("{:15} ({:5}): {}".format("Word", "True", "Pred")) for w, pred in zip(X_te[i], p[0]): if w != 0: print("{:15}: {}".format(words[w - 1], tags[pred])) print('*' * 50) print(p) print("len(p) = ", len(p)) # for x in X_te[i]: # print(words[x], end = " ") # print(" ") del model from k.models import load_model # loaded_model = load_model("My_Custom_Model3.h5") # loaded_model = "" load_all_weights(loaded_model, "Model_saved_using_contrib.h5") print("Model Loaded.. Evaluating again") score = loaded_model.evaluate(X_te, np.array(y_te), verbose=1) print("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1] * 100)) score = loaded_model.evaluate(X_te, np.array(y_te), verbose=1) print("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1] * 100)) # score = loaded_model.evaluate(X,Y, verbose=1) #print accuracy # print("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1]*100)) if words is not None and tags is not None: i = 2319 p = loaded_model.predict(np.array([X[i]])) # p = loaded_model.predict([np.array(X[i]),second_input[i]]) p = np.argmax(p, axis=-1) print("{:15} ({:5}): {}".format("Word", "True", "Pred")) for w, pred in zip(X[i], p[0]): # p[0] = p[[1,3,4,5]] print("{:15}: {}".format(words[w], tags[pred]))
def main(self, glove): # get word embeddings utils = wordUtils.Utils() if glove: # use glove self.words_list, self.embedding_matrix = utils.load_glove() unword_n = len(self.words_list) else: self.words_list, self.embedding_matrix = utils.load_word2vec() unword_n = len(self.words_list) # get the training corpus cr = corpusreader.CorpusReader(self.textfile, self.annotfile) corpus = cr.trainseqs print(len(corpus)) train = [] print("Processing training data", datetime.now()) for doc in corpus: tmp_dic = {} tmp_dic['tokens'] = doc['tokens'] # convert SOBIE tags to numbers tags = doc['bio'] tags = [self.lablist[i] for i in tags] tmp_dic['bion'] = tags train.append(tmp_dic) n_emb = 0 n_unk = 0 # get the number of the embedding for idx in range(len(train)): words = train[idx]['tokens'] words_id = [] for i in words: # get the number of the embedding try: # the index of the word in the embedding matrix index = self.words_list.index(i) n_emb = n_emb + 1 except ValueError: # use the embedding full of zeros to identify an unknown word n_unk = n_unk + 1 index = unword_n # the index of the word in the embedding matrix words_id.append(index) train[idx]['tokens'] = words_id # get all sizes from the sequences with training data train_l_d = {} train_l_labels = {} for seq in train: # corpus l = len(seq['tokens']) if l not in train_l_d: train_l_d[l] = [] train_l_d[l].append(seq['tokens']) # labels l1 = len(seq['bion']) if l1 not in train_l_labels: train_l_labels[l1] = [] train_l_labels[l1].append(seq['bion']) sizes = list(train_l_d.keys()) for i in sizes: if len(train_l_d[i]) != len(train_l_labels[i]): print("merda") for m in range(len(train_l_d[i])): if len(train_l_d[i][m]) != len(train_l_labels[i][m]): print("XXX") input = Input(shape=(None,)) el = Embedding(len(self.words_list) + 1, 200, weights=[self.embedding_matrix], trainable=False)(input) model = Bidirectional(LSTM(units=50, return_sequences=True, recurrent_dropout = 0.1))(el) # variational biLSTM model = TimeDistributed(Dense(50, activation="relu"))(model) # a dense layer as suggested by neuralNer crf = CRF(self.lab_len) # CRF layer out = crf(model) # output model = Model(input, out) model.compile(optimizer="rmsprop", loss=crf.loss_function, metrics=[crf.accuracy]) model.summary() f_best = -1 f_index = -1 # OK, start actually training for epoch in range(self.epochsN): print("Epoch", epoch, "start at", datetime.now()) # Train in batches of different sizes - randomize the order of sizes # Except for the first few epochs if epoch > 2: random.shuffle(sizes) for size in sizes: batch = train_l_d[size] labs = train_l_labels[size] tx = np.array([seq for seq in batch]) y = [seq for seq in labs] ty = [to_categorical(i, num_classes=self.lab_len) for i in y] # This trains in mini-batches model.fit(tx, np.array(ty), verbose=0, epochs=1) print("Trained at", datetime.now()) # save all epochs save_load_utils.save_all_weights(model, 'words-results/epoch_%s.h5' % epoch) # test the results test_data = 'corpus_char/tmVarCorpus/treated/test_data.txt' test_labels = 'corpus_char/tmVarCorpus/treated/test_labels.tsv' self.test_model(test_data, test_labels, model, glove) f = self.eval() if f > f_best: f_best = f f_index = epoch # Pick the best model, and save it with a useful name print("Choosing the best epoch") shutil.copyfile("words-results/epoch_%s.h5" % f_index, "words_glove_%s.h5" % f_index)
def save_model(self): save_load_utils.save_all_weights( self.model, self.filepath + '/' + str(self.steps_counter) + '.hdf5')
y_tr = data_dict["y_tr"] y_te = data_dict["y_te"] max_len = 75 n_words = data_dict["n_words"] n_tags = data_dict["n_tags"] tag2idx = data_dict["tag2idx"] pos2idx = data_dict["pos2idx"] word2idx = data_dict["word2idx"] ## Model definition input = Input(shape=(max_len,)) model = Embedding(input_dim=n_words + 1, output_dim=20, input_length=max_len)(input) # 20-dim embedding model = Bidirectional(LSTM(units=50, return_sequences=True, recurrent_dropout=0.1))(model) # variational biLSTM model = TimeDistributed(Dense(50, activation="relu"))(model) # a dense layer as suggested by neuralNer crf = CRF(n_tags) # CRF layer out = crf(model) # output model = Model(input, out) model.compile(optimizer="rmsprop", loss=crf.loss_function, metrics=[crf.accuracy]) print(model.summary()) history = model.fit(X_tr, np.array(y_tr), batch_size=32, epochs=100, validation_split=0.1, verbose=1) #Testing test_pred = model.predict(X_te, verbose=1) idx2tag = {i: w for w, i in tag2idx.items()} pred_labels = pred2label(test_pred) test_labels = pred2label(y_te) print("Recall, Precision and F-score are", get_recall_precision(test_labels, pred_labels, "Destination")) save_load_utils.save_all_weights(model,"BILSTM+CRF_without_pos_without_embeddings")