예제 #1
0
tf.logging.set_verbosity(tf.logging.ERROR)
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

base_folder = '/Users/huihui/git/SogouMRCToolkit/'
data_folder = base_folder + 'data/'
dev_file = data_folder + "dev-v1.1.json"

reader = SquadReader()
eval_data = reader.read(dev_file)
evaluator = SquadEvaluator(dev_file)

vocab = Vocabulary()
vocab_save_path = base_folder + 'data/vocab.json'
vocab.load(vocab_save_path)  # load vocab from save path

test_batch_generator = BatchGenerator(vocab, eval_data, batch_size=60)

model_dir = base_folder + 'models/bidaf/best_weights'
model = BiDAF(vocab)
model.load(model_dir)
model.session.run(tf.local_variables_initializer())
model.inference(test_batch_generator)  # inference on test data

model.evaluate(test_batch_generator, evaluator)

# evaluator.exact_match_score(prediction=,ground_truth=)
# print(SquadEvaluator.exact_match_score())
# print(SquadEvaluator.f1_score)
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

coqa_reader = CoQAReader(-1)
data_folder = os.path.join("/", "home", "baheti", "QADialogueSystem", "Data",
                           "QA_datasets", "coqa/")
train_filename = "coqa-train-v1.0.json"
eval_filename = "coqa-dev-v1.0.json"
vocab = Vocabulary(do_lowercase=True)
vocab_filepath = os.path.join("models", "vocab.txt")
if os.path.exists(vocab_filepath):
    print("loading from filepath")
    # load from the filepath
    vocab.load(vocab_filepath)
else:
    print("creating vocab as new")
    train_data = coqa_reader.read(data_folder + train_filename, 'train')
    eval_data = coqa_reader.read(data_folder + eval_filename, 'dev')
    vocab.build_vocab(train_data + eval_data)
    vocab.save(vocab_filepath)

# Squad seq2seq_train_moses_tokenized
# DATA_DIR = os.path.join("/", "home", "baheti", "QADialogueSystem", "RuleBasedQuestionsToAnswer", "squad_seq2seq_train_moses_tokenized")
# coqa_format_test_save_file = os.path.join(DATA_DIR, "squad_seq2seq_predicted_responses_test_coqa_format.json")
# src_squad_seq2seq_predicted_responses_file = os.path.join(DATA_DIR, "src_squad_seq2seq_predicted_responses_test.txt")
# predictions_save_file = "coqa_predictions_on_squad_seq2seq_predicted_responses_test.txt"

# SQUAD seq2seq dev moses tokenized
DATA_DIR = os.path.join("..", "RuleBasedQuestionsToAnswer",