def main(): parser = argparse.ArgumentParser( description='Tuning with bi-directional LSTM-CNN') parser.add_argument('--num_epochs', type=int, default=100, help='Number of training epochs') parser.add_argument('--batch_size', type=int, default=1, help='Number of sentences in each batch') parser.add_argument('--hidden_size', type=int, default=200, help='Number of hidden units in RNN') parser.add_argument('--num_filters', type=int, default=35, help='Number of filters in CNN') parser.add_argument('--min_filter_width', type=int, default=3, help='Number of filters in CNN') parser.add_argument('--max_filter_width', type=int, default=7, help='Number of filters in CNN') parser.add_argument('--embedDimension', type=int, default=300, help='embedding dimension') parser.add_argument('--learning_rate', type=float, default=0.4, help='Learning rate') parser.add_argument('--decay_rate', type=float, default=0.1, help='Decay rate of learning rate') parser.add_argument('--gamma', type=float, default=0.0, help='weight for regularization') parser.add_argument('--schedule', type=int, default=1, help='schedule for learning rate decay') parser.add_argument('--embedding_vectors', help='path for embedding dict') parser.add_argument('--train') parser.add_argument('--trainAux') parser.add_argument('--dev') parser.add_argument('--test') parser.add_argument('--ner_tag_field_l1', type=int, default=1, help='ner tag field') parser.add_argument('--ner_tag_field_l2', type=int, default=1, help='ner tag field') parser.add_argument('--use_gpu', type=int, default=0, help='use gpu') parser.add_argument('--save_dir') parser.add_argument('--vocabChar') parser.add_argument('--vocabOutput') parser.add_argument('--vocabOutputAux') parser.add_argument('--vocabInput') args = parser.parse_args() use_gpu = args.use_gpu train_path = args.train train_path_aux = args.trainAux dev_path = args.dev test_path = args.test num_epochs = args.num_epochs batch_size = args.batch_size hidden_size = args.hidden_size num_filters = args.num_filters min_filter_width = args.min_filter_width max_filter_width = args.max_filter_width learning_rate = args.learning_rate momentum = 0.01 * learning_rate decay_rate = args.decay_rate gamma = args.gamma schedule = args.schedule save_dir = args.save_dir if not os.path.exists(save_dir): os.makedirs(save_dir) embedding_path = args.embedding_vectors inputVocabulary = Vocab() charVocabulary = CharVocab() targetVocabulary = Vocab() targetVocabularyAux = Vocab() if args.vocabChar: with open(args.vocabChar, "r") as f: charVocabulary.__dict__ = json.load(f) charVocabulary.set_freeze() charVocabulary.process() if args.vocabOutput: with open(args.vocabOutput, "r") as f: targetVocabulary.__dict__ = json.load(f) targetVocabulary.set_freeze() targetVocabulary.process() if args.vocabOutputAux: with open(args.vocabOutputAux, "r") as f: targetVocabularyAux.__dict__ = json.load(f) targetVocabularyAux.set_freeze() targetVocabularyAux.process() embedding_vocab = None if args.embedding_vectors: print(args.embedding_vectors) embedd_dict, embedding_vocab, reverse_word_vocab, vocabularySize, embeddingDimension = load_embeddings( embedding_path) print("Read Word Embedding of dimension " + str(embeddingDimension) + " for " + str(vocabularySize) + " number of words") for everyWord in embedding_vocab: inputVocabulary.add(everyWord) inputVocabulary.set_freeze() inputVocabulary.process() else: if args.vocabInput: with open(args.vocabInput, "r") as f: inputVocabulary.__dict__ = json.load(f) inputVocabulary.set_freeze() inputVocabulary.process() else: print( "Neither pre-trained word embeddings nor input vocabulary is specified" ) exit() if charVocabulary.__is_empty__(): charVocabulary.add("<S>") charVocabulary.add("</S>") trainCorpus, trainLabelsRaw, maxTrainLength = readCoNLL( train_path, charVocabulary, targetVocabulary, args.ner_tag_field_l1, embedding_vocab) print("Train Corpus contains " + str(len(trainCorpus)) + " sentences and maximum sentence length is " + str(maxTrainLength)) trainCorpusRawSorted = trainCorpus trainLabelsRawSorted = trainLabelsRaw trainCorpusAux, trainLabelsRawAux, maxTrainLengthAux = readCoNLL( train_path_aux, charVocabulary, targetVocabularyAux, args.ner_tag_field_l2, embedding_vocab) print("Auxiliary Train Corpus contains " + str(len(trainCorpusAux)) + " sentences and maximum sentence length is " + str(maxTrainLengthAux)) trainCorpusRawSortedAux = trainCorpusAux trainLabelsRawSortedAux = trainLabelsRawAux devCorpus, devLabelsRaw, maxDevLength = readCoNLL(dev_path, charVocabulary, targetVocabulary, args.ner_tag_field_l1, embedding_vocab) print("Dev Corpus contains " + str(len(devCorpus)) + " sentences and maximum sentence length is " + str(maxDevLength)) testCorpus, testLabelsRaw, maxTestLength = readCoNLL( test_path, charVocabulary, targetVocabulary, args.ner_tag_field_l1, embedding_vocab) print("Test Corpus contains " + str(len(testCorpus)) + " sentences and maximum sentence length is " + str(maxTestLength)) if not targetVocabulary.get_freeze(): print(targetVocabulary._tok_to_ind) tmp_filename = '%s/output.vocab' % (save_dir) with open(tmp_filename, "w") as f: json.dump(targetVocabulary.__dict__, f) targetVocabulary.set_freeze() if not targetVocabularyAux.get_freeze(): print(targetVocabularyAux._tok_to_ind) tmp_filename = '%s/output_aux.vocab' % (save_dir) with open(tmp_filename, "w") as f: json.dump(targetVocabularyAux.__dict__, f) targetVocabularyAux.set_freeze() if not charVocabulary.get_freeze(): tmp_filename = '%s/char.vocab' % (save_dir) with open(tmp_filename, "w") as f: json.dump(charVocabulary.__dict__, f) charVocabulary.set_freeze() embeddingDimension = args.embedDimension word_embedding = np.random.uniform( -0.1, 0.1, (inputVocabulary.__len__(), embeddingDimension)) if args.embedding_vectors: for everyWord in inputVocabulary._tok_to_ind: if everyWord in embedding_vocab: word_embedding[inputVocabulary.__get_word__( everyWord)] = embedd_dict[embedding_vocab[everyWord]] tmp_filename = '%s/input.vocab' % (save_dir) with open(tmp_filename, "w") as f: json.dump(inputVocabulary.__dict__, f) inputVocabulary.set_freeze() del embedd_dict del reverse_word_vocab del vocabularySize del embedding_vocab print("Read " + str(targetVocabulary.__len__()) + " number of target words") print("Read " + str(targetVocabularyAux.__len__()) + " number of target words") print("Read " + str(inputVocabulary.__len__()) + " number of input words") print("Read " + str(charVocabulary.__len__()) + " number of characters") print("Number of epochs = " + str(num_epochs)) print("Mini-Batch size = " + str(batch_size)) print("Bi-LSTM Hidden size = " + str(hidden_size)) print("Features per CNN filter = " + str(num_filters)) print("Minimum ngrams for CNN filter = " + str(min_filter_width)) print("Maximum ngrams for CNN filter = " + str(max_filter_width)) print("Initial Learning Rate = " + str(learning_rate)) network = BiCNNLSTMTranstion(inputVocabulary.__len__(), embeddingDimension, min_filter_width, max_filter_width, charVocabulary.__len__(), num_filters, hidden_size, targetVocabulary.__len__(), word_embedding, targetVocabularyAux.__len__()) lr = learning_rate lr_aux = learning_rate * 0.1 optim = SGD(network.parameters(), lr=lr, momentum=momentum, weight_decay=gamma, nesterov=True) num_batches = len(trainCorpus) / batch_size + 1 dev_f1 = 0.0 dev_acc = 0.0 dev_precision = 0.0 dev_recall = 0.0 test_f1 = 0.0 test_acc = 0.0 test_precision = 0.0 test_recall = 0.0 best_epoch = 0 print("Training....") if use_gpu == 1: network.cuda() prev_error = 100000.0 for epoch in range(1, num_epochs + 1): print( 'Epoch %d ( learning rate=%.4f, decay rate=%.4f (schedule=%d)): ' % (epoch, lr, decay_rate, schedule)) train_err = 0. train_corr = 0. train_total = 0. start_time = time.time() num_back = 0 network.train() count = 0 count_batch = 0 l1_indices = list(range(len(trainCorpusRawSorted))) l2_indices = list(range(len(trainCorpusRawSortedAux))) with tqdm(total=(len(trainCorpusRawSorted) + len(trainCorpusRawSortedAux))) as pbar: for l1, l2 in zip_longest(l1_indices, l2_indices): if l1 is not None: x_input, batch_length, current_batch_size, current_max_sequence_length, y_output, mask, y_prev = constructBatch( [trainCorpusRawSorted[l1]], [trainLabelsRawSorted[l1]], inputVocabulary, targetVocabulary, charVocabulary, max_filter_width, args.use_gpu) optim.zero_grad() loss, _ = network.loss(x_input, batch_length, current_batch_size, current_max_sequence_length, y_output, mask, y_prev, 0, use_gpu) loss.backward() optim.step() train_err += loss.item() train_total += batch_length.data.sum() count = count + current_batch_size count_batch = count_batch + 1 if l2 is not None: x_input, batch_length, current_batch_size, current_max_sequence_length, y_output, mask, y_prev = constructBatch( [trainCorpusRawSortedAux[l2]], [trainLabelsRawSortedAux[l2]], inputVocabulary, targetVocabularyAux, charVocabulary, max_filter_width, args.use_gpu) optim.zero_grad() loss, _ = network.loss(x_input, batch_length, current_batch_size, current_max_sequence_length, y_output, mask, y_prev, 1, use_gpu) loss.backward() optim.step() time_ave = (time.time() - start_time) / count time_left = (num_batches - count_batch) * time_ave pbar.update(2) print('train: %d loss: %.4f, time: %.2fs' % (num_batches, train_err / count, time.time() - start_time)) network.eval() tmp_filename = '%s/_dev%d' % (save_dir, epoch) current_epoch_loss = 0.0 with codecs.open(tmp_filename, "w", encoding="utf8", errors="ignore") as writer: for inputs, labels in batch(devCorpus, devLabelsRaw, 1): x_input, batch_length, current_batch_size, current_max_sequence_length, y_output, mask, y_prev = constructBatch( inputs, labels, inputVocabulary, targetVocabulary, charVocabulary, max_filter_width, args.use_gpu) loss, _ = network.loss(x_input, batch_length, current_batch_size, current_max_sequence_length, y_output, mask, y_prev, 0, use_gpu) current_epoch_loss = current_epoch_loss + loss.item() loss, preds = network.forward(x_input, batch_length, current_batch_size, current_max_sequence_length, y_output, mask, y_prev, 0, use_gpu) count = 0 for i in range(len(inputs)): for j in range(len(inputs[i])): writer.write(inputs[i][j] + " " + labels[i][j] + " " + targetVocabulary.__get_index__( preds[i][j].item()).upper()) writer.write("\n") writer.write("\n") writer.close() acc, precision, recall, f1 = evaluate(tmp_filename, save_dir) print( 'dev loss: %.2f, dev acc: %.2f%%, precision: %.2f%%, recall: %.2f%%, F1: %.2f%%' % (current_epoch_loss, acc, precision, recall, f1)) if current_epoch_loss > prev_error: lr = lr * 0.7 optim = SGD(network.parameters(), lr=lr, momentum=momentum, weight_decay=gamma, nesterov=True) network.load_state_dict(torch.load(save_dir + "/model")) network.eval() if lr < 0.002: network.eval() tmp_filename = '%s/_test%d' % (save_dir, epoch) with codecs.open(tmp_filename, "w", encoding="utf8", errors="ignore") as writer: for inputs, labels in batch(testCorpus, testLabelsRaw, 1): x_input, batch_length, current_batch_size, current_max_sequence_length, y_output, mask, y_prev = constructBatch( inputs, labels, inputVocabulary, targetVocabulary, charVocabulary, max_filter_width, args.use_gpu) loss, preds = network.forward( x_input, batch_length, current_batch_size, current_max_sequence_length, y_output, mask, y_prev, 0, use_gpu) count = 0 for i in range(len(inputs)): for j in range(len(inputs[i])): writer.write(inputs[i][j] + " " + labels[i][j] + " " + targetVocabulary.__get_index__( preds[i][j].item()).upper()) writer.write("\n") writer.write("\n") writer.close() acc, precision, recall, f1 = evaluate(tmp_filename, save_dir) print( 'test acc: %.2f%%, precision: %.2f%%, recall: %.2f%%, F1: %.2f%%' % (acc, precision, recall, f1)) exit() else: prev_error = current_epoch_loss torch.save(network.state_dict(), save_dir + "/model") network.eval() tmp_filename = '%s/_test%d' % (save_dir, epoch) with codecs.open(tmp_filename, "w", encoding="utf8", errors="ignore") as writer: for inputs, labels in batch(testCorpus, testLabelsRaw, 1): x_input, batch_length, current_batch_size, current_max_sequence_length, y_output, mask, y_prev = constructBatch( inputs, labels, inputVocabulary, targetVocabulary, charVocabulary, max_filter_width, args.use_gpu) loss, preds = network.forward(x_input, batch_length, current_batch_size, current_max_sequence_length, y_output, mask, y_prev, 0, use_gpu) count = 0 for i in range(len(inputs)): for j in range(len(inputs[i])): writer.write(inputs[i][j] + " " + labels[i][j] + " " + targetVocabulary.__get_index__( preds[i][j].item()).upper()) writer.write("\n") writer.write("\n") writer.close() acc, precision, recall, f1 = evaluate(tmp_filename, save_dir) print( 'test acc: %.2f%%, precision: %.2f%%, recall: %.2f%%, F1: %.2f%%' % (acc, precision, recall, f1)) torch.save(network.state_dict(), save_dir + "/model")
def main(): parser = argparse.ArgumentParser( description='Training a Sequence Labeler with bi-directional LSTM-CNN') parser.add_argument('--num_epochs', type=int, default=100, help='Number of training epochs') parser.add_argument('--batch_size', type=int, default=5, help='Number of sentences in each batch') parser.add_argument('--hidden_size', type=int, default=200, help='Number of hidden units in RNN') parser.add_argument('--num_filters', type=int, default=35, help='Number of filters in CNN') parser.add_argument('--min_filter_width', type=int, default=3, help='Number of filters in CNN') parser.add_argument('--max_filter_width', type=int, default=7, help='Number of filters in CNN') parser.add_argument('--embedDimension', type=int, default=300, help='embedding dimension') parser.add_argument('--learning_rate', type=float, default=0.4, help='Learning rate') parser.add_argument('--gamma', type=float, default=0.0, help='weight for regularization') parser.add_argument('--embedding_vectors', help='path for embedding dict') parser.add_argument('--embedding_dict_new', help='path for embedding dict') parser.add_argument('--train') parser.add_argument('--dev') parser.add_argument('--test') parser.add_argument('--vocabChar') parser.add_argument('--vocabOutput') parser.add_argument('--vocabInput') parser.add_argument('--ner_tag_field', type=int, default=1, help='ner tag field') parser.add_argument('--use_gpu', type=int, default=1, help='use gpu') parser.add_argument('--fineTune', type=bool, default=False, help='fineTune pretrained word embeddings') parser.add_argument('--save-dir') parser.add_argument('--perform_evaluation', type=bool, default=False, help='perform evaluation only') parser.add_argument('--deploy', type=bool, default=False, help='deploy') parser.add_argument('--train_from', type=str, default="") args = parser.parse_args() train_path = args.train dev_path = args.dev test_path = args.test num_epochs = args.num_epochs batch_size = args.batch_size hidden_size = args.hidden_size num_filters = args.num_filters min_filter_width = args.min_filter_width max_filter_width = args.max_filter_width learning_rate = args.learning_rate momentum = 0.01 * learning_rate gamma = args.gamma embedding_path = args.embedding_vectors save_dir = args.save_dir # create the output folder if does not exist if not os.path.exists(save_dir): os.makedirs(save_dir) evaluation = args.perform_evaluation inputVocabulary = Vocab() charVocabulary = CharVocab() targetVocabulary = Vocab() # Read Character vocabulary if vocabChar argument is given if args.vocabChar: with open(args.vocabChar, "r") as f: charVocabulary.__dict__ = json.load(f) charVocabulary.set_freeze() charVocabulary.process() # Read Output vocabulary if vocabChar argument is given if args.vocabOutput: with open(args.vocabOutput, "r") as f: targetVocabulary.__dict__ = json.load(f) targetVocabulary.set_freeze() targetVocabulary.process() embedding_vocab = None # If the path to pre-trained embeddings are given if args.embedding_vectors: print(args.embedding_vectors) # load the pre-trained embeddings embedd_dict, embedding_vocab, reverse_word_vocab, vocabularySize, embeddingDimension = load_embeddings( embedding_path) print("Read Word Embedding of dimension " + str(embeddingDimension) + " for " + str(vocabularySize) + " number of words") # add the words to the Input vocabulary which is a dictionary of words for everyWord in embedding_vocab: inputVocabulary.add(everyWord) inputVocabulary.set_freeze() inputVocabulary.process() else: # Read Input vocabulary if vocabChar argument is given if args.vocabInput: with open(args.vocabInput, "r") as f: inputVocabulary.__dict__ = json.load(f) inputVocabulary.set_freeze() inputVocabulary.process() else: print( "Neither pre-trained word embeddings nor input vocabulary is specified" ) exit() # if character vocabulary is initialize with beginning and end markers if charVocabulary.__is_empty__(): charVocabulary.add("<S>") charVocabulary.add("</S>") if evaluation: # if we are performing evaluation, we do not require train and dev splits if not args.deploy: # if we are not deploying the model, then we are interesting in testing the model testCorpus, testLabelsRaw, maxTestLength = readCoNLL( test_path, charVocabulary, targetVocabulary, args.ner_tag_field, inputVocabulary) print("Test Corpus contains " + str(len(testCorpus)) + " sentences and maximum sentence length is " + str(maxTestLength)) print("Read " + str(len(charVocabulary)) + " number of characters") else: # if we are deploying the model, we are trying to get predictions on a plain corpus testCorpus, maxTestLength = readUnlabeledData(test_path) print("Test Corpus contains " + str(len(testCorpus)) + " sentences and maximum sentence length is " + str(maxTestLength)) else: # read train split trainCorpus, trainLabelsRaw, maxTrainLength = readCoNLL( train_path, charVocabulary, targetVocabulary, args.ner_tag_field, embedding_vocab) print("Train Corpus contains " + str(len(trainCorpus)) + " sentences and maximum sentence length is " + str(maxTrainLength)) trainCorpusRawSorted = trainCorpus trainLabelsRawSorted = trainLabelsRaw # read dev split devCorpus, devLabelsRaw, maxDevLength = readCoNLL( dev_path, charVocabulary, targetVocabulary, args.ner_tag_field, embedding_vocab) print("Dev Corpus contains " + str(len(devCorpus)) + " sentences and maximum sentence length is " + str(maxDevLength)) # read test split testCorpus, testLabelsRaw, maxTestLength = readCoNLL( test_path, charVocabulary, targetVocabulary, args.ner_tag_field, embedding_vocab) print("Test Corpus contains " + str(len(testCorpus)) + " sentences and maximum sentence length is " + str(maxTestLength)) if not targetVocabulary.get_freeze(): # save the output vocabulary print(targetVocabulary._tok_to_ind) tmp_filename = '%s/output.vocab' % (save_dir) with open(tmp_filename, "w") as f: json.dump(targetVocabulary.__dict__, f) targetVocabulary.set_freeze() if not charVocabulary.get_freeze(): # save the character vocabulary tmp_filename = '%s/char.vocab' % (save_dir) with open(tmp_filename, "w") as f: json.dump(charVocabulary.__dict__, f) charVocabulary.set_freeze() # initialize word embeddings randomly embeddingDimension = args.embedDimension word_embedding = np.random.uniform( -0.1, 0.1, (inputVocabulary.__len__(), embeddingDimension)) if args.embedding_vectors: # pre-trained word embeddings are given, update the word_embedding variable with pre-trained embeddings for everyWord in inputVocabulary._tok_to_ind: if everyWord in embedding_vocab: word_embedding[inputVocabulary.__get_word__( everyWord)] = embedd_dict[embedding_vocab[everyWord]] # save the input vocabulary tmp_filename = '%s/input.vocab' % (save_dir) with open(tmp_filename, "w") as f: json.dump(inputVocabulary.__dict__, f) inputVocabulary.set_freeze() del embedd_dict del reverse_word_vocab del vocabularySize del embedding_vocab print("Read " + str(targetVocabulary.__len__()) + " number of target words") print("Read " + str(inputVocabulary.__len__()) + " number of input words") print("Read " + str(charVocabulary.__len__()) + " number of characters") print("Number of epochs = " + str(num_epochs)) print("Mini-Batch size = " + str(batch_size)) print("Bi-LSTM Hidden size = " + str(hidden_size)) print("Features per CNN filter = " + str(num_filters)) print("Minimum ngrams for CNN filter = " + str(min_filter_width)) print("Maximum ngrams for CNN filter = " + str(max_filter_width)) print("Initial Learning Rate = " + str(learning_rate)) use_gpu = args.use_gpu # create a Bi-LSTM network network = BiCNNLSTMTranstion(inputVocabulary.__len__(), embeddingDimension, min_filter_width, max_filter_width, charVocabulary.__len__(), num_filters, hidden_size, targetVocabulary.__len__(), word_embedding, args.fineTune) print(network) lr = learning_rate optim = SGD(network.parameters(), lr=lr, momentum=momentum, weight_decay=gamma, nesterov=True) if not evaluation: num_batches = len(trainCorpus) / batch_size + 1 dev_f1 = 0.0 dev_acc = 0.0 dev_precision = 0.0 dev_recall = 0.0 test_f1 = 0.0 test_acc = 0.0 test_precision = 0.0 test_recall = 0.0 best_epoch = 0 if evaluation: # if we are performing evaluation, load the trained model network.load_state_dict(torch.load(save_dir + "/model")) # save output vocabulary as a plain file tmp_filename = '%s/output.vocab.plain' % (save_dir) with open(tmp_filename, "w") as f: for index in range(len(targetVocabulary._ind_to_tok)): f.write(targetVocabulary._ind_to_tok[index]) f.write("\n") f.close() # save input vocabulary as a plain file tmp_filename = '%s/input.vocab.plain' % (save_dir) with open(tmp_filename, "w") as f: for index in range(len(inputVocabulary._ind_to_tok)): f.write(inputVocabulary._ind_to_tok[index]) f.write("\n") f.close() # save character vocabulary as a plain file tmp_filename = '%s/char.vocab.plain' % (save_dir) with open(tmp_filename, "w") as f: for index in range(len(charVocabulary._ind_to_tok)): f.write(charVocabulary._ind_to_tok[index]) f.write("\n") f.close() print("Performing Evaluation") if args.use_gpu == 0: network.cpu() network.eval() tmp_filename = '%s/_test_new' % (save_dir) if args.use_gpu == 1: print("Using GPU....") network.cuda() with codecs.open(tmp_filename, "w", encoding="utf8", errors="ignore") as writer: for inputs, labels in batch(testCorpus, testLabelsRaw, 1): # for every sentence in the test data, convert the input to tensor x_input, batch_length, current_batch_size, current_max_sequence_length, y_output, mask, y_prev = constructBatch( inputs, labels, inputVocabulary, targetVocabulary, charVocabulary, max_filter_width, args.use_gpu) # get predictions by calling the forward() function of the model loss, preds, probs = network.forward( x_input, batch_length, current_batch_size, current_max_sequence_length, y_output, mask, y_prev, args.use_gpu) count = 0 # get the labels and write the output to the file for i in range(len(inputs)): for j in range(len(inputs[i])): writer.write(inputs[i][j]) for k in range(probs[i][j].size()[0]): writer.write(" " + str(probs[i][j][k].item())) writer.write(" " + inputs[i][j] + " " + labels[i][j] + " " + targetVocabulary.__get_index__( preds[i][j].item()).upper()) writer.write("\n") writer.write("\n") writer.close() # Calulate the F-Score on the predicted output acc, precision, recall, f1 = evaluate(tmp_filename, save_dir) print( 'test acc: %.2f%%, precision: %.2f%%, recall: %.2f%%, F1: %.2f%%' % (acc, precision, recall, f1)) else: if args.use_gpu == 1: print("Using GPU....") network.cuda() if args.train_from: print("Loading pre-trained model from " + args.train_from) network.load_state_dict(torch.load(args.train_from)) print("Training....") prev_error = 1000.0 network.eval() tmp_filename = '%s/_dev' % (save_dir) current_epoch_loss = 0.0 with codecs.open(tmp_filename, "w", encoding="utf8", errors="ignore") as writer: for inputs, labels in batch(devCorpus, devLabelsRaw, 1): # for every sentence in the test data, convert the input to tensor x_input, batch_length, current_batch_size, current_max_sequence_length, y_output, mask, y_prev = constructBatch( inputs, labels, inputVocabulary, targetVocabulary, charVocabulary, max_filter_width, args.use_gpu) # get the loss by calling the forward() function of the model loss, _ = network.loss(x_input, batch_length, current_batch_size, current_max_sequence_length, y_output, mask, y_prev, args.use_gpu) current_epoch_loss = current_epoch_loss + loss.item() # get the predictions by calling the forward() function of the model loss, preds, probs = network.forward( x_input, batch_length, current_batch_size, current_max_sequence_length, y_output, mask, y_prev, args.use_gpu) count = 0 # get the labels and write the output to the file for i in range(len(inputs)): for j in range(len(inputs[i])): writer.write(inputs[i][j] + " " + labels[i][j] + " " + targetVocabulary.__get_index__( preds[i][j].item()).upper()) writer.write("\n") writer.write("\n") writer.close() # Calulate the F-Score on the predicted output acc, precision, recall, f1 = evaluate(tmp_filename, save_dir) print( 'dev loss: %.2f, dev acc: %.2f%%, precision: %.2f%%, recall: %.2f%%, F1: %.2f%%' % (current_epoch_loss, acc, precision, recall, f1)) for epoch in range(1, num_epochs + 1): print('Epoch %d ( learning rate=%.4f ): ' % (epoch, lr)) train_err = 0. train_corr = 0. train_total = 0. start_time = time.time() num_back = 0 network.train() count = 0 count_batch = 0 with tqdm(total=(len(trainCorpusRawSorted))) as pbar: for inputs, labels in batch(trainCorpusRawSorted, trainLabelsRawSorted, batch_size): # for every sentence in the test data, convert the input to tensor x_input, batch_length, current_batch_size, current_max_sequence_length, y_output, mask, y_prev = constructBatch( inputs, labels, inputVocabulary, targetVocabulary, charVocabulary, max_filter_width, args.use_gpu) optim.zero_grad() # get the loss by calling the forward() function of the model loss, _ = network.loss(x_input, batch_length, current_batch_size, current_max_sequence_length, y_output, mask, y_prev, args.use_gpu) # calculate gradients by calling backward() function and call optim.step() to perform gradient updation loss.backward() optim.step() train_err += loss.item() train_total += batch_length.data.sum() count = count + current_batch_size count_batch = count_batch + 1 time_ave = (time.time() - start_time) / count time_left = (num_batches - count_batch) * time_ave pbar.update(1) print('train: %d loss: %.4f, time: %.2fs' % (num_batches, train_err / count, time.time() - start_time)) network.eval() tmp_filename = '%s/_dev%d' % (save_dir, epoch) current_epoch_loss = 0.0 with codecs.open(tmp_filename, "w", encoding="utf8", errors="ignore") as writer: for inputs, labels in batch(devCorpus, devLabelsRaw, 1): # for every sentence in the test data, convert the input to tensor x_input, batch_length, current_batch_size, current_max_sequence_length, y_output, mask, y_prev = constructBatch( inputs, labels, inputVocabulary, targetVocabulary, charVocabulary, max_filter_width, args.use_gpu) # get the loss by calling the forward() function of the model loss, _ = network.loss(x_input, batch_length, current_batch_size, current_max_sequence_length, y_output, mask, y_prev, args.use_gpu) current_epoch_loss = current_epoch_loss + loss.item() # get the predictions by calling the forward() function of the model loss, preds, probs = network.forward( x_input, batch_length, current_batch_size, current_max_sequence_length, y_output, mask, y_prev, args.use_gpu) count = 0 # get the labels and write the output to the file for i in range(len(inputs)): for j in range(len(inputs[i])): writer.write(inputs[i][j] + " " + labels[i][j] + " " + targetVocabulary.__get_index__( preds[i][j].item()).upper()) writer.write("\n") writer.write("\n") writer.close() # Calulate the F-Score on the predicted output acc, precision, recall, f1 = evaluate(tmp_filename, save_dir) print( 'dev loss: %.2f, dev acc: %.2f%%, precision: %.2f%%, recall: %.2f%%, F1: %.2f%%' % (current_epoch_loss, acc, precision, recall, f1)) if epoch > 1: if current_epoch_loss > prev_error: # if the validation loss has increased, load the previous epoch model and reduce the learning rate lr = lr * 0.7 optim = SGD(network.parameters(), lr=lr, momentum=momentum, weight_decay=gamma, nesterov=True) network.load_state_dict(torch.load(save_dir + "/model")) network.eval() if lr < 0.002: # if the learning rate is less than 0.002, stop the training network.eval() tmp_filename = '%s/_test%d' % (save_dir, epoch) with codecs.open(tmp_filename, "w", encoding="utf8", errors="ignore") as writer: for inputs, labels in batch( testCorpus, testLabelsRaw, 1): # for every sentence in the test data, convert the input to tensor x_input, batch_length, current_batch_size, current_max_sequence_length, y_output, mask, y_prev = constructBatch( inputs, labels, inputVocabulary, targetVocabulary, charVocabulary, max_filter_width, args.use_gpu) # get the predictions by calling the forward() function of the model loss, preds, probs = network.forward( x_input, batch_length, current_batch_size, current_max_sequence_length, y_output, mask, y_prev, args.use_gpu) count = 0 # get the labels and write the output to the file for i in range(len(inputs)): for j in range(len(inputs[i])): writer.write(inputs[i][j]) for k in range(probs[i][j].size()[0]): writer.write( " " + str(probs[i][j][k].item())) writer.write( " " + inputs[i][j] + " " + labels[i][j] + " " + targetVocabulary.__get_index__( preds[i][j].item()).upper()) writer.write("\n") writer.write("\n") writer.close() # Calulate the F-Score on the predicted output acc, precision, recall, f1 = evaluate( tmp_filename, save_dir) print( 'test acc: %.2f%%, precision: %.2f%%, recall: %.2f%%, F1: %.2f%%' % (acc, precision, recall, f1)) exit() else: prev_error = current_epoch_loss torch.save(network.state_dict(), save_dir + "/model") network.eval() tmp_filename = '%s/_test%d' % (save_dir, epoch) with codecs.open(tmp_filename, "w", encoding="utf8", errors="ignore") as writer: for inputs, labels in batch(testCorpus, testLabelsRaw, 1): # for every sentence in the test data, convert the input to tensor x_input, batch_length, current_batch_size, current_max_sequence_length, y_output, mask, y_prev = constructBatch( inputs, labels, inputVocabulary, targetVocabulary, charVocabulary, max_filter_width, args.use_gpu) # get the predictions by calling the forward() function of the model loss, preds, probs = network.forward( x_input, batch_length, current_batch_size, current_max_sequence_length, y_output, mask, y_prev, args.use_gpu) count = 0 # get the labels and write the output to the file for i in range(len(inputs)): for j in range(len(inputs[i])): writer.write( inputs[i][j] + " " + labels[i][j] + " " + targetVocabulary.__get_index__( preds[i][j].item()).upper()) writer.write("\n") writer.write("\n") writer.close() # Calulate the F-Score on the predicted output acc, precision, recall, f1 = evaluate( tmp_filename, save_dir) print( 'test acc: %.2f%%, precision: %.2f%%, recall: %.2f%%, F1: %.2f%%' % (acc, precision, recall, f1)) else: prev_error = current_epoch_loss torch.save(network.state_dict(), save_dir + "/model")