def process_sentences(writer, merged_summary, epoch, data, model, is_training, learningrate, config, name): """ Process all the sentences with the labeler, return evaluation metrics. """ evaluator = MLTEvaluator(config) batches_of_sentence_ids = create_batches_of_sentence_ids(data, config["batch_equal_size"], config["max_batch_size"]) if is_training == True: random.shuffle(batches_of_sentence_ids) # Each batch has 32 sentences for count, sentence_ids_in_batch in enumerate(batches_of_sentence_ids): print('############### Epoch', epoch + 1,'Batch', count + 1, 'of', len(batches_of_sentence_ids) , '###############') batch = [data[i] for i in sentence_ids_in_batch] summary, cost, sentence_scores, token_scores_list = model.process_batch(merged_summary, batch, is_training, learningrate) evaluator.append_data(cost, batch, sentence_scores, token_scores_list) writer.add_summary(summary, len(batches_of_sentence_ids) * (epoch) + count) while config["garbage_collection"] == True and gc.collect() > 0: pass results = evaluator.get_results(name) for key in results: print(key + ": " + str(results[key])) return results
def process_sentences(data_train, data, model, is_training, learningrate, config, name, epoch): """ Process all the sentences with the labeler, return evaluation metrics. """ evaluator = MLTEvaluator(config) batches_of_sentence_ids = create_batches_of_sentence_ids( data, config["batch_equal_size"], config["max_batch_size"]) if is_training == True: random.shuffle(batches_of_sentence_ids) for sentence_ids_in_batch in batches_of_sentence_ids: batch = [data_train[i] for i in sentence_ids_in_batch] cost, sentence_scores, token_scores = model.process_batch( data_train, batch, is_training, learningrate) evaluator.append_data(cost, batch, sentence_scores, token_scores, name, epoch) while config["garbage_collection"] == True and gc.collect() > 0: pass results = evaluator.get_results(name) for key in results: print(key + ": " + str(results[key])) if name == "dev": f1 = codecs.open("./result_with_know_c2h/result_train_" + str(epoch) + ".txt", encoding="utf-8", mode="w") f1.write(str(results)) f1.close() return results
def process_sentences(data, model, is_training, learningrate, config, name): """ Process all the sentences with the labeler, return evaluation metrics. """ evaluator = MLTEvaluator(config) batches_of_sentence_ids = create_batches_of_sentence_ids( data, config["batch_equal_size"], config["max_batch_size"]) if is_training == True: random.shuffle(batches_of_sentence_ids) for sentence_ids_in_batch in batches_of_sentence_ids: batch = [data[i] for i in sentence_ids_in_batch] cost, sentence_scores, token_scores_list, token_probs, token_probs_all_labels, unsup_weights, selective_weights, indicative_weights = model.process_batch( batch, is_training, learningrate) evaluator.append_data(cost, batch, sentence_scores, token_scores_list, token_probs) while config["garbage_collection"] == True and gc.collect() > 0: pass results = evaluator.get_results(name) for key in results: print(key + ": " + str(results[key])) return results
import sys from model import MLTModel from evaluator import MLTEvaluator from experiment import read_input_files if __name__ == "__main__": model = MLTModel.load(sys.argv[1]) data = read_input_files(sys.argv[2], -1) batch_size = 32 # Evaluator evaluator = MLTEvaluator(model.config) for i in range(0, len(data), batch_size): batch = data[i:i+batch_size] cost, sentence_scores, token_scores_list = model.process_batch_inference(batch, False, 0.0) for j in range(len(batch)): for k in range(len(batch[j])): print(" ".join([str(x) for x in batch[j][k]]) + "\t" + str(token_scores_list[0][j][k]) + "\t" + str(sentence_scores[j])) print("") # Evaluator evaluator.append_data(cost, batch, sentence_scores, token_scores_list) # Evaluator results = evaluator.get_results("test") for key in results: sys.stderr.write(key + ": " + str(results[key]) + "\n")