def main(_): word2idx = pickle_load(FLAGS.word2idx_path) word2vec = pickle_load(FLAGS.word2vec_path) embedding_size = list(word2vec.values())[0].shape[0] word_embeddings = np.zeros([len(word2idx) + 1, embedding_size]) # index 0 for UNK token for word in word2idx.keys(): word_embeddings[word2idx[word]] = word2vec[word] FLAGS.n_words = word_embeddings.shape[0] FLAGS.embedding_size = word_embeddings.shape[1] train_data = read_data(FLAGS.train_data_path, word2idx, FLAGS.max_sequence_len) valid_data = read_data(FLAGS.valid_data_path, word2idx, FLAGS.max_sequence_len) graph = tf.Graph() with tf.Session(graph=graph) as sess: model = BiLSTM_CRF(FLAGS, sess, word_embeddings) model.build_model() model.train(train_data, valid_data)
os.environ["CUDA_VISIBLE_DEVICES"] = "1" char_embedding_mat = np.load('data/char_embedding_matrix.npy') X_train = np.load('data/X_train.npy') X_dev = np.load('data/X_dev.npy') y_train = np.load('data/y_train.npy') y_dev = np.load('data/y_dev.npy') # ner_model = BiLSTM_CRF(n_input=200, n_vocab=char_embedding_mat.shape[0], # n_embed=100, embedding_mat=char_embedding_mat, # keep_prob=0.5, n_lstm=100, keep_prob_lstm=0.8, # n_entity=7, optimizer='adam', batch_size=64, epochs=500) ner_model = BiLSTM_CRF(n_input=200, n_vocab=char_embedding_mat.shape[0], n_embed=100, embedding_mat=char_embedding_mat, keep_prob=0.5, n_lstm=256, keep_prob_lstm=0.6, n_entity=7, optimizer='adam', batch_size=16, epochs=50) cp_folder, cp_file = 'checkpoints', 'bilstm_crf_weights_best.hdf5' log_filepath = 'logs/bilstm_crf_summaries' cb = [ModelCheckpoint(os.path.join(cp_folder, cp_file), monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=True, mode='min'), EarlyStopping(min_delta=1e-8, patience=10, mode='min'), ReduceLROnPlateau(factor=0.2, patience=6, verbose=0, mode='min', epsilon=1e-6, cooldown=4, min_lr=1e-8), TensorBoard(log_dir=log_filepath, write_graph=True, write_images=True, histogram_freq=1)] ner_model.train(X_train, y_train, X_dev, y_dev, cb)