def main(evaluate=False): corpus = HotpotQuestions() dev_qs = corpus.get_dev() train_qs = corpus.get_train() tokenizer = CoreNLPTokenizer() def tokenize(text): return tokenizer.tokenize(text).words() if evaluate: print("Train:") evaluate_question_detector(train_qs, tokenize, FastNormalizedAnswerDetector(), compute_f1s=True) print("Dev:") evaluate_question_detector(dev_qs, tokenize, FastNormalizedAnswerDetector(), compute_f1s=True) else: train = compute_answer_spans(train_qs, FastNormalizedAnswerDetector(), tokenize) dev = compute_answer_spans(dev_qs, FastNormalizedAnswerDetector(), tokenize) HotpotQuestions.make_corpus(train, dev)
def init(): global PROCESS_TOK PROCESS_TOK = CoreNLPTokenizer() Finalize(PROCESS_TOK, PROCESS_TOK.shutdown, exitpriority=100)
def init(): global PROCESS_TOK, PROCESS_DB PROCESS_TOK = CoreNLPTokenizer() Finalize(PROCESS_TOK, PROCESS_TOK.shutdown, exitpriority=100) PROCESS_DB = DocDB() Finalize(PROCESS_DB, PROCESS_DB.close, exitpriority=100)
def init_encoding_handler(encoding_dir): global DOC_ENCS_HANDLER, PROCESS_TOK DOC_ENCS_HANDLER = DocumentEncodingHandler(encoding_dir) PROCESS_TOK = CoreNLPTokenizer() Finalize(PROCESS_TOK, PROCESS_TOK.shutdown, exitpriority=100)