def main(): parser = argparse.ArgumentParser("Preprocess SQuAD data") parser.add_argument("--train_file", default=config.SQUAD_TRAIN) parser.add_argument("--dev_file", default=config.SQUAD_DEV) parser.add_argument("--weighted-questions", action='store_true') if not exists(config.CORPUS_DIR): mkdir(config.CORPUS_DIR) target_dir = join(config.CORPUS_DIR, SquadCorpus.NAME) if exists(target_dir) and len(listdir(target_dir)) > 0: raise ValueError("Files already exist in " + target_dir) args = parser.parse_known_args()[0] tokenizer = NltkAndPunctTokenizer() print("Parsing train...") train = list( parse_squad_data(args.train_file, "train", tokenizer, weighted_samples=args.weighted_questions)) print("Parsing dev...") dev = list(parse_squad_data(args.dev_file, "dev", tokenizer)) print("Saving...") SquadCorpus.make_corpus(train, dev) print("Done")
def main(): parser = argparse.ArgumentParser("Preprocess SQuAD data") #basedir = join(expanduser("~"), "data", "squad") basedir = join(expanduser("~"), "azayats", "data", "squad") parser.add_argument("--train_file", default=join(basedir, "train-v1.1.json")) parser.add_argument("--dev_file", default=join(basedir, "dev-v1.1.json")) if not exists(config.CORPUS_DIR): mkdir(config.CORPUS_DIR) target_dir = join(config.CORPUS_DIR, SquadCorpus.NAME) if exists(target_dir) and len(listdir(target_dir)) > 0: raise ValueError("Files already exist in " + target_dir) args = parser.parse_args() tokenzier = NltkAndPunctTokenizer() print("Parsing train...") train = list(parse_squad_data(args.train_file, "train", tokenzier)) print("Parsing dev...") dev = list(parse_squad_data(args.dev_file, "dev", tokenzier)) print("Saving...") SquadCorpus.make_corpus(train, dev) print("Done")
def main(): data = SquadCorpus() string_f1 = 0 mapped_string_f1 = 0 docs = data.get_train() n_questions = 0 for doc in tqdm(docs): for para in doc.paragraphs: words = flatten_iterable(para.text) for question in para.questions: n_questions += 1 span_answer = question.answer[0] span_str = " ".join( words[span_answer. para_word_start:span_answer.para_word_end + 1]) raw_answer = span_answer.text mapped_str = para.get_original_text( span_answer.para_word_start, span_answer.para_word_end) string_f1 += f1_score(raw_answer, span_str) mapped_string_f1 += f1_score(raw_answer, mapped_str) print(string_f1 / n_questions) print(mapped_string_f1 / n_questions)
def main(): #Namespace(directory= 'C:/Users/boidiyv/document-qa-master',dump=False, fake=False, verbose=False) parser = argparse.ArgumentParser("Preprocess SQuAD data") #parser = argparse.ArgumentParser() parser.add_argument('--document-qa/docqa/squad', type=Path) parser.add_argument("--train_file", default=config.SQUAD_TRAIN) parser.add_argument("--dev_file", default=config.SQUAD_DEV) #parser.add_argument("--document-qa-master",type=lambda p: Path(p).absolute(),default=Path(__file__).absolute().parent / "document-qa-master",help="Path to the data directory" ) if not exists(config.CORPUS_DIR): mkdir(config.CORPUS_DIR) target_dir = join(config.CORPUS_DIR, SquadCorpus.NAME) if exists(target_dir) and len(listdir(target_dir)) > 0: raise ValueError("Files already exist in " + target_dir) args = parser.parse_args('') tokenzier = NltkAndPunctTokenizer() print("Parsing train...") train = list(parse_squad_data(args.train_file, "train", tokenzier)) print(train) print("Parsing dev...") dev = list(parse_squad_data(args.dev_file, "dev", tokenzier)) print("Saving...") SquadCorpus.make_corpus(train, dev) print("Done")
def show_unk(corpus: SquadCorpus, vec_name: str, context: bool = True, question: bool = True): vecs = corpus.get_pruned_word_vecs(vec_name) docs = corpus.get_train() lower_unk = Counter() unk = Counter() for doc in docs: for para in doc.paragraphs: if context: for sent in para.text: for word in sent: if word not in vecs: unk[word] += 1 word = word.lower() if word not in vecs: lower_unk[word] += 1 if question: for question in para.questions: for word in question.words: if word not in vecs: unk[word] += 1 word = word.lower() if word not in vecs: lower_unk[word] += 1 print("\n".join("%s: %d" % (k, v) for k, v in lower_unk.most_common()))
def show_squad_errors(answers): print("Loading answers..") answer_df = pd.read_csv(answers) print("Loading questions..") corpus = SquadCorpus() questions = {} docs = {} for doc in corpus.get_dev(): for para in doc.paragraphs: for q in para.questions: questions[q.question_id] = q docs[q.question_id] = doc answer_df.sort_values(["question_id", "rank"], inplace=True) grouped = list(answer_df.groupby(["question_id"])) np.random.shuffle(grouped) for question_id, group in grouped: q = questions[question_id] doc = docs[question_id] cur_best_score = group.text_f1.iloc[0] cur_best_conf = group.predicted_score.iloc[0] cur_best_ix = group.index[0] for i in range(1, len(group)): ix = group.index[i] conf = group.predicted_score[ix] if conf > cur_best_conf: score = group.text_f1[ix] if score < cur_best_score: # We hurt our selves! print("Oh no!") print(" ".join(q.words)) print(q.answer.answer_text) print("Best score was %.4f (conf=%.4f), but not is %.4f (conf=%.4f)" % ( cur_best_score, cur_best_conf, score, conf )) cur_para = doc.paragraphs[group.para_number[cur_best_ix]] new_para = doc.paragraphs[group.para_number[ix]] p1_s, p1_e = group.predicted_start[cur_best_ix], group.predicted_end[cur_best_ix] p2_s, p2_e = group.predicted_start[ix], group.predicted_end[ix] print(" ".join(display_para(cur_para.get_context(), None, q.words, p1_s, p1_e))) print() print(" ".join(display_para(new_para.get_context(), None, q.words, p2_s, p2_e))) input() else: cur_best_score = score cur_best_ix = ix cur_best_conf = conf
def show_in_context_unks(corpus: SquadCorpus, vec_name): data = corpus.get_train() np.random.shuffle(data) vecs = corpus.get_pruned_word_vecs(vec_name) for doc in data: paragraphs = list(doc.paragraphs) np.random.shuffle(paragraphs) for para in paragraphs: sentences = list(para.text) + [x.words for x in para.questions] np.random.shuffle(sentences) for words in sentences: for i, word in enumerate(words): if word.lower() not in vecs: words[i] = "{{{" + word + "}}}" print(" ".join(words[max(0, i - 10):min(len(words), i + 10)])) words[i] = word
def main(): corpus = SquadCorpus() prepro = SquadTfIdfRanker(NltkPlusStopWords(True), OPTS.num_per_orig, True) orig_data = corpus.get_train( ) if OPTS.split == 'train' else corpus.get_dev() orig_lens = [ len(p.text[0]) for doc in orig_data for p in doc.paragraphs for q in p.questions ] new_data = preprocess_par(orig_data, corpus.evidence, prepro, n_processes=1) new_lens = [len(p.text) for q in new_data for p in q.paragraphs] print('%d original, mean %.2f words' % (len(orig_lens), np.mean(orig_lens))) print('%d new, mean %.2f words' % (len(new_lens), np.mean(new_lens))) if OPTS.out_file: write_output(OPTS.split, new_data, OPTS.out_file)
def main(): corpus = SquadCorpus() if OPTS.normalize_before_ranking: normalizer = WordNormalizer() else: normalizer = None if OPTS.use_vec_dist: word_vecs = corpus.get_pruned_word_vecs('glove.840B.300d') prepro = SquadVectorTfIdfRanker(NltkPlusStopWords(True), OPTS.num_per_orig, True, word_vecs, word_normalizer=normalizer) else: prepro = SquadTfIdfRanker(NltkPlusStopWords(True), OPTS.num_per_orig, True, word_normalizer=normalizer) orig_data = corpus.get_train() if OPTS.split == 'train' else corpus.get_dev() orig_lens = [len(p.text[0]) for doc in orig_data for p in doc.paragraphs for q in p.questions] new_data = preprocess_par(orig_data, corpus.evidence, prepro, n_processes=1) new_lens = [len(p.text) for q in new_data for p in q.paragraphs] print('%d original, mean %.2f words' % (len(orig_lens), np.mean(orig_lens))) print('%d new, mean %.2f words'% (len(new_lens), np.mean(new_lens))) if OPTS.out_file: write_output(OPTS.split, new_data, OPTS.out_file)
def show_features(corpus: SquadCorpus, vec_name): print("Loading train docs") data = corpus.get_train() np.random.shuffle(data) data = data[:100] print("Loading vectors") vecs = corpus.get_pruned_word_vecs(vec_name) fe = BasicWordFeatures() grouped_by_features = defaultdict(Counter) print("start") for doc in data: paragraphs = list(doc.paragraphs) np.random.shuffle(paragraphs) for para in paragraphs: sentences = list(para.text) + [x.words for x in para.questions] np.random.shuffle(sentences) for words in sentences: for i, word in enumerate(words): if word.lower() not in vecs: x = fe.get_word_features(word) for i, val in enumerate(x): if val > 0: grouped_by_features[i][word] += 1 for i in sorted(grouped_by_features.keys()): name = BasicWordFeatures.features_names[i] if name in ["Len"]: continue vals = grouped_by_features[i] print() print("*" * 30) print("%s-%d %d (%d)" % (name, i, len(vals), sum(vals.values()))) for k, v in vals.most_common(30): print("%s: %d" % (k, v))
def show_nums(corpus: SquadCorpus): n_regex = re.compile(".*[0-9].*") data = corpus.get_train() np.random.shuffle(data) for doc in data: paragraphs = list(doc.paragraphs) np.random.shuffle(paragraphs) for para in paragraphs: sentences = list(para.context) + [x.words for x in para.questions] np.random.shuffle(sentences) for words in sentences: for i, word in enumerate(words): if n_regex.match(word) is not None: print(word)
def check_preprocess_squad(): data = SquadCorpus().get_train() remove_cross = WithIndicators(True) for doc in tqdm(data): for para in doc.paragraphs: q = para.questions[np.random.randint(0, len(para.questions))] text, ans, inv = remove_cross.encode_paragraph(q.words, para.text, para.paragraph_num == 0, q.answer.answer_spans, para.spans) if len(inv) != len(text): raise ValueError() for i in range(len(inv)-1): if inv[i, 0] > inv[i+1, 0]: raise ValueError() for (s1, e1), (s2, e2) in zip(ans, q.answer.answer_spans): if tuple(inv[s1]) != tuple(para.spans[s2]): raise ValueError() if tuple(inv[e1]) != tuple(para.spans[e2]): raise ValueError()
def build_corpus_subset(output): docs = SquadCorpus().get_dev() titles = [clean_title(doc.title) for doc in docs] for i, t in enumerate(titles): if t == "Sky (United Kingdom)": titles[i] = "Sky UK" with sqlite3.connect(DOCUMENT_READER_DB) as conn: c = conn.cursor() c.execute("CREATE TEMPORARY TABLE squad_docs(id)") c.executemany("INSERT INTO squad_docs VALUES (?)", [(x, ) for x in titles]) c.execute("ATTACH DATABASE ? AS db2", (output, )) c.execute("CREATE TABLE db2.documents (id PRIMARY KEY, text);") c.execute( "INSERT INTO db2.documents SELECT * FROM documents WHERE id in squad_docs" ) c.close()
def main(): parser = argparse.ArgumentParser( description='Train a model on document-level SQuAD') parser.add_argument( 'mode', choices=["paragraph", "confidence", "shared-norm", "merge", "sigmoid"]) parser.add_argument("name", help="Output directory") args = parser.parse_args() mode = args.mode out = args.name + "-" + datetime.now().strftime("%m%d-%H%M%S") corpus = SquadCorpus() if mode == "merge": # Adds paragraph start tokens, since we will be concatenating paragraphs together pre = WithIndicators(True, para_tokens=False, doc_start_token=False) else: pre = None model = get_model(50, 100, args.mode, pre) if mode == "paragraph": # Run in the "standard" known-paragraph setting if model.preprocessor is not None: raise NotImplementedError() n_epochs = 26 train_batching = ClusteredBatcher(45, ContextLenBucketedKey(3), True, False) eval_batching = ClusteredBatcher(45, ContextLenKey(), False, False) data = DocumentQaTrainingData(corpus, None, train_batching, eval_batching) eval = [LossEvaluator(), SpanEvaluator(bound=[17], text_eval="squad")] else: eval_set_mode = { "confidence": "flatten", "sigmoid": "flatten", "shared-norm": "group", "merge": "merge" }[mode] eval_dataset = RandomParagraphSetDatasetBuilder( 100, eval_set_mode, True, 0) if mode == "confidence" or mode == "sigmoid": if mode == "sigmoid": # needs to be trained for a really long time for reasons unknown, even this might be too small n_epochs = 100 else: n_epochs = 50 # more epochs since we only "see" the label very other epoch-osh train_batching = ClusteredBatcher(45, ContextLenBucketedKey(3), True, False) data = PreprocessedData( SquadCorpus(), SquadTfIdfRanker(NltkPlusStopWords(True), 4, True, model.preprocessor), StratifyParagraphsBuilder(train_batching, 1), eval_dataset, eval_on_verified=False, ) else: n_epochs = 26 data = PreprocessedData( SquadCorpus(), SquadTfIdfRanker(NltkPlusStopWords(True), 4, True, model.preprocessor), StratifyParagraphSetsBuilder(25, args.mode == "merge", True, 1), eval_dataset, eval_on_verified=False, ) eval = [LossEvaluator(), MultiParagraphSpanEvaluator(17, "squad")] data.preprocess(1) with open(__file__, "r") as f: notes = f.read() notes = args.mode + "\n" + notes trainer.start_training(data, model, train_params(n_epochs), eval, model_dir.ModelDir(out), notes)
def main(): show_unk(SquadCorpus(), "glove.840B.300d")
def main(): parser = argparse.ArgumentParser("Train rejection model on SQuAD") parser.add_argument("--corpus_dir", type=str, default="~/data/document-qa") parser.add_argument("--output_dir", type=str, default="~/model/document-qa/squad") parser.add_argument("--lm_dir", type=str, default="~/data/lm") parser.add_argument("--exp_id", type=str, default="rejection") parser.add_argument("--lr", type=float, default=0.5) parser.add_argument("--epoch", type=int, default=20) parser.add_argument("--dim", type=int, default=100) parser.add_argument("--batch_size", type=int, default=45) parser.add_argument("--l2", type=float, default=0) parser.add_argument("--mode", choices=["input", "output", "both", "none"], default="both") parser.add_argument("--top_layer_only", action="store_true") args = parser.parse_args() print("Arguments : ", args) out = args.output_dir + "_" + args.exp_id + "_lr" + str( args.lr) + "-" + datetime.now().strftime("%m%d-%H%M%S") dim = args.dim batch_size = args.batch_size out = expanduser(out) lm_dir = expanduser(args.lm_dir) corpus_dir = expanduser(args.corpus_dir) print("Make global recurrent_layer...") recurrent_layer = CudnnGru( dim, w_init=tf.keras.initializers.TruncatedNormal(stddev=0.05)) params = trainer.TrainParams(trainer.SerializableOptimizer( "Adadelta", dict(learning_rate=args.lr)), ema=0.999, max_checkpoints_to_keep=2, async_encoding=10, num_epochs=args.epoch, log_period=30, eval_period=1200, save_period=1200, best_weights=("dev", "b17/text-f1"), eval_samples=dict(dev=None, train=8000)) lm_reduce = MapperSeq( ElmoLayer(args.l2, layer_norm=False, top_layer_only=args.top_layer_only), DropoutLayer(0.5), ) model = AttentionWithElmo( encoder=DocumentAndQuestionEncoder(SingleSpanAnswerEncoder()), lm_model=SquadContextConcatSkip(lm_dir=lm_dir), append_before_atten=(args.mode == "both" or args.mode == "output"), append_embed=(args.mode == "both" or args.mode == "input"), max_batch_size=128, word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True), char_embed=CharWordEmbedder(LearnedCharEmbedder(word_size_th=14, char_th=49, char_dim=20, init_scale=0.05, force_cpu=True), MaxPool(Conv1d(100, 5, 0.8)), shared_parameters=True), embed_mapper=SequenceMapperSeq( VariationalDropoutLayer(0.8), recurrent_layer, VariationalDropoutLayer(0.8), ), lm_reduce=None, lm_reduce_shared=lm_reduce, per_sentence=False, memory_builder=NullBiMapper(), attention=BiAttention(TriLinear(bias=True), True), match_encoder=SequenceMapperSeq( FullyConnected(dim * 2, activation="relu"), ResidualLayer( SequenceMapperSeq( VariationalDropoutLayer(0.8), recurrent_layer, VariationalDropoutLayer(0.8), StaticAttentionSelf(TriLinear(bias=True), ConcatWithProduct()), FullyConnected(dim * 2, activation="relu"), )), VariationalDropoutLayer(0.8)), predictor=BoundsPredictor( ChainBiMapper(first_layer=recurrent_layer, second_layer=recurrent_layer))) batcher = ClusteredBatcher(batch_size, ContextLenKey(), False, False) data = DocumentQaTrainingData(SquadCorpus(corpus_dir), None, batcher, batcher) with open(__file__, "r") as f: notes = f.read() notes = str(sorted(args.__dict__.items(), key=lambda x: x[0])) + "\n" + notes trainer.start_training( data, model, params, [LossEvaluator(), SpanEvaluator(bound=[17], text_eval="squad")], ModelDir(out), notes)
def main(): parser = argparse.ArgumentParser(description='Evaluate a model on SQuAD') parser.add_argument('model', help='model directory to evaluate') parser.add_argument("-o", "--official_output", type=str, help="where to output an official result file") parser.add_argument('-n', '--sample_questions', type=int, default=None, help="(for testing) run on a subset of questions") parser.add_argument('--answer_bounds', nargs='+', type=int, default=[17], help="Max size of answer") parser.add_argument('-b', '--batch_size', type=int, default=200, help="Batch size, larger sizes can be faster but uses more memory") parser.add_argument('-s', '--step', default=None, help="Weights to load, can be a checkpoint step or 'latest'") # Add ja_test choice to test Multilingual QA dataset. parser.add_argument( '-c', '--corpus', choices=["dev", "train", "ja_test", "pred"], default="dev") parser.add_argument('--no_ema', action="store_true", help="Don't use EMA weights even if they exist") # Add ja_test choice to test Multilingual QA pipeline. parser.add_argument('-p', '--pred_filepath', default=None, help="The csv file path if you try pred mode") args = parser.parse_args() model_dir = ModelDir(args.model) corpus = SquadCorpus() if args.corpus == "dev": questions = corpus.get_dev() # Add ja_test choice to test Multilingual QA pipeline. elif args.corpus == "ja_test": questions = corpus.get_ja_test() # This is for prediction mode for MLQA pipeline. elif args.corpus == "pred": questions = create_pred_dataset(args.pred_filepath) else: questions = corpus.get_train() questions = split_docs(questions) if args.sample_questions: np.random.RandomState(0).shuffle( sorted(questions, key=lambda x: x.question_id)) questions = questions[:args.sample_questions] questions.sort(key=lambda x: x.n_context_words, reverse=True) dataset = ParagraphAndQuestionDataset( questions, FixedOrderBatcher(args.batch_size, True)) evaluators = [SpanEvaluator(args.answer_bounds, text_eval="squad")] if args.official_output is not None: evaluators.append(RecordSpanPrediction(args.answer_bounds[0])) if args.step is not None: if args.step == "latest": checkpoint = model_dir.get_latest_checkpoint() else: checkpoint = model_dir.get_checkpoint(int(args.step)) else: checkpoint = model_dir.get_best_weights() if checkpoint is not None: print("Using best weights") else: print("Using latest checkpoint") checkpoint = model_dir.get_latest_checkpoint() model = model_dir.get_model() evaluation = trainer.test(model, evaluators, {args.corpus: dataset}, corpus.get_resource_loader(), checkpoint, not args.no_ema)[args.corpus] # Print the scalar results in a two column table scalars = evaluation.scalars cols = list(sorted(scalars.keys())) table = [cols] header = ["Metric", ""] table.append([("%s" % scalars[x] if x in scalars else "-") for x in cols]) print_table([header] + transpose_lists(table)) # Save the official output if args.official_output is not None: quid_to_para = {} for x in questions: quid_to_para[x.question_id] = x.paragraph q_id_to_answers = {} q_ids = evaluation.per_sample["question_id"] spans = evaluation.per_sample["predicted_span"] for q_id, (start, end) in zip(q_ids, spans): text = quid_to_para[q_id].get_original_text(start, end) q_id_to_answers[q_id] = text with open(args.official_output, "w") as f: json.dump(q_id_to_answers, f)
def main(): parser = argparse.ArgumentParser( description='Evaluate a model on document-level SQuAD') parser.add_argument('model', help='model to use') parser.add_argument( 'output', type=str, help="Store the per-paragraph results in csv format in this file") parser.add_argument('-n', '--n_sample', type=int, default=None, help="(for testing) sample documents") parser.add_argument( '-s', '--async', type=int, default=10, help="Encoding batch asynchronously, queueing up to this many") parser.add_argument('-a', '--answer_bound', type=int, default=17, help="Max answer span length") parser.add_argument('-p', '--n_paragraphs', type=int, default=None, help="Max number of paragraphs to use") parser.add_argument( '-b', '--batch_size', type=int, default=200, help="Batch size, larger sizes can be faster but uses more memory") parser.add_argument('-c', '--corpus', choices=["dev", "train", "doc-rd-dev"], default="dev") parser.add_argument('--no_ema', action="store_true", help="Don't use EMA weights even if they exist") args = parser.parse_args() model_dir = ModelDir(args.model) print("Loading data") questions = [] ranker = SquadTfIdfRanker(NltkPlusStopWords(True), args.n_paragraphs, force_answer=False) if args.corpus == "doc-rd-dev": docs = SquadCorpus().get_dev() if args.n_sample is not None: docs.sort(key=lambda x: x.doc_id) np.random.RandomState(0).shuffle(docs) docs = docs[:args.n_sample] print("Fetching document reader docs...") doc_rd_versions = get_doc_rd_doc(docs) print("Ranking and matching with questions...") for doc in tqdm(docs): doc_questions = flatten_iterable(x.questions for x in doc.paragraphs) paragraphs = doc_rd_versions[doc.title] ranks = ranker.rank([x.words for x in doc_questions], [x.text for x in paragraphs]) for i, question in enumerate(doc_questions): para_ranks = np.argsort(ranks[i]) for para_rank, para_num in enumerate( para_ranks[:args.n_paragraphs]): # Just use dummy answers spans for these pairs questions.append( RankedParagraphQuestion( question.words, TokenSpans(question.answer.answer_text, np.zeros((0, 2), dtype=np.int32)), question.question_id, paragraphs[para_num], para_rank, para_num)) rl = ResourceLoader() else: if args.corpus == "dev": docs = SquadCorpus().get_dev() else: docs = SquadCorpus().get_train() rl = SquadCorpus().get_resource_loader() if args.n_sample is not None: docs.sort(key=lambda x: x.doc_id) np.random.RandomState(0).shuffle(docs) docs = docs[:args.n_sample] for q in ranker.ranked_questions(docs): for i, p in enumerate(q.paragraphs): questions.append( RankedParagraphQuestion( q.question, TokenSpans(q.answer_text, p.answer_spans), q.question_id, ParagraphWithInverse([p.text], p.original_text, p.spans), i, p.paragraph_num)) print("Split %d docs into %d paragraphs" % (len(docs), len(questions))) questions = sorted(questions, key=lambda x: (x.n_context_words, len(x.question)), reverse=True) for q in questions: if len(q.answer.answer_spans.shape) != 2: raise ValueError() checkpoint = model_dir.get_best_weights() if checkpoint is not None: print("Using best weights") else: print("Using latest checkpoint") checkpoint = model_dir.get_latest_checkpoint() if checkpoint is None: raise ValueError("No checkpoints found") data = ParagraphAndQuestionDataset( questions, FixedOrderBatcher(args.batch_size, True)) model = model_dir.get_model() evaluation = trainer.test( model, [RecordParagraphSpanPrediction(args.answer_bound, True)], {args.corpus: data}, rl, checkpoint, not args.no_ema, args. async)[args.corpus] print("Saving result") output_file = args.output df = pd.DataFrame(evaluation.per_sample) df.sort_values(["question_id", "rank"], inplace=True, ascending=True) group_by = ["question_id"] f1 = compute_ranked_scores(df, "predicted_score", "text_f1", group_by) em = compute_ranked_scores(df, "predicted_score", "text_em", group_by) table = [["N Paragraphs", "EM", "F1"]] table += list([str(i + 1), "%.4f" % e, "%.4f" % f] for i, (e, f) in enumerate(zip(em, f1))) print_table(table) df.to_csv(output_file, index=False)
def main(): data = split_docs(SquadCorpus().get_train()) np.random.shuffle(data) for point in data: print(" ".join(point.question))
def main(): parser = argparse.ArgumentParser("Train our ELMo model on SQuAD") parser.add_argument("loss_mode", choices=['default', 'confidence']) parser.add_argument("output_dir") parser.add_argument("--dim", type=int, default=90) parser.add_argument("--l2", type=float, default=0) parser.add_argument("--mode", choices=["input", "output", "both", "none"], default="both") parser.add_argument("--top_layer_only", action="store_true") parser.add_argument("--no-tfidf", action='store_true', help="Don't add TF-IDF negative examples") args = parser.parse_args() out = args.output_dir + "-" + datetime.now().strftime("%m%d-%H%M%S") dim = args.dim recurrent_layer = CudnnGru(dim, w_init=TruncatedNormal(stddev=0.05)) if args.loss_mode == 'default': n_epochs = 24 answer_encoder = SingleSpanAnswerEncoder() predictor = BoundsPredictor( ChainBiMapper(first_layer=recurrent_layer, second_layer=recurrent_layer)) batcher = ClusteredBatcher(45, ContextLenKey(), False, False) data = DocumentQaTrainingData(SquadCorpus(), None, batcher, batcher) elif args.loss_mode == 'confidence': if args.no_tfidf: prepro = SquadDefault() n_epochs = 15 else: prepro = SquadTfIdfRanker(NltkPlusStopWords(True), 4, True) n_epochs = 50 answer_encoder = DenseMultiSpanAnswerEncoder() predictor = ConfidencePredictor(ChainBiMapper( first_layer=recurrent_layer, second_layer=recurrent_layer, ), AttentionEncoder(), FullyConnected(80, activation="tanh"), aggregate="sum") eval_dataset = RandomParagraphSetDatasetBuilder( 100, 'flatten', True, 0) train_batching = ClusteredBatcher(45, ContextLenBucketedKey(3), True, False) data = PreprocessedData(SquadCorpus(), prepro, StratifyParagraphsBuilder(train_batching, 1), eval_dataset, eval_on_verified=False) data.preprocess(1) params = trainer.TrainParams(trainer.SerializableOptimizer( "Adadelta", dict(learning_rate=1.0)), ema=0.999, max_checkpoints_to_keep=2, async_encoding=10, num_epochs=n_epochs, log_period=30, eval_period=1200, save_period=1200, best_weights=("dev", "b17/text-f1"), eval_samples=dict(dev=None, train=8000)) lm_reduce = MapperSeq( ElmoLayer(args.l2, layer_norm=False, top_layer_only=args.top_layer_only), DropoutLayer(0.5), ) model = AttentionWithElmo( encoder=DocumentAndQuestionEncoder(answer_encoder), lm_model=SquadContextConcatSkip(), append_before_atten=(args.mode == "both" or args.mode == "output"), append_embed=(args.mode == "both" or args.mode == "input"), max_batch_size=128, word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True), char_embed=CharWordEmbedder(LearnedCharEmbedder(word_size_th=14, char_th=49, char_dim=20, init_scale=0.05, force_cpu=True), MaxPool(Conv1d(100, 5, 0.8)), shared_parameters=True), embed_mapper=SequenceMapperSeq( VariationalDropoutLayer(0.8), recurrent_layer, VariationalDropoutLayer(0.8), ), lm_reduce=None, lm_reduce_shared=lm_reduce, per_sentence=False, memory_builder=NullBiMapper(), attention=BiAttention(TriLinear(bias=True), True), match_encoder=SequenceMapperSeq( FullyConnected(dim * 2, activation="relu"), ResidualLayer( SequenceMapperSeq( VariationalDropoutLayer(0.8), recurrent_layer, VariationalDropoutLayer(0.8), StaticAttentionSelf(TriLinear(bias=True), ConcatWithProduct()), FullyConnected(dim * 2, activation="relu"), )), VariationalDropoutLayer(0.8)), predictor=predictor) with open(__file__, "r") as f: notes = f.read() notes = str(sorted(args.__dict__.items(), key=lambda x: x[0])) + "\n" + notes trainer.start_training( data, model, params, [LossEvaluator(), SpanEvaluator(bound=[17], text_eval="squad")], ModelDir(out), notes)
def main(): parser = argparse.ArgumentParser(description='Evaluate a model on SQuAD') parser.add_argument('model', help='model directory to evaluate') parser.add_argument("-o", "--official_output", type=str, help="where to output an official result file") parser.add_argument('-n', '--sample_questions', type=int, default=None, help="(for testing) run on a subset of questions") parser.add_argument('--answer_bounds', nargs='+', type=int, default=[17], help="Max size of answer") parser.add_argument('-b', '--batch_size', type=int, default=200, help="Batch size, larger sizes can be faster but uses more memory") parser.add_argument('-s', '--step', default=None, help="Weights to load, can be a checkpoint step or 'latest'") parser.add_argument('-c', '--corpus', choices=["dev", "train"], default="dev") parser.add_argument('--no_ema', action="store_true", help="Don't use EMA weights even if they exist") parser.add_argument('--none_prob', action="store_true", help="Output none probability for samples") parser.add_argument('--elmo', action="store_true", help="Use elmo model") parser.add_argument('--per_question_loss_file', type=str, default=None, help="Run question by question and output a question_id -> loss output to this file") args = parser.parse_known_args()[0] model_dir = ModelDir(args.model) corpus = SquadCorpus() if args.corpus == "dev": questions = corpus.get_dev() else: questions = corpus.get_train() questions = split_docs(questions) if args.sample_questions: np.random.RandomState(0).shuffle(sorted(questions, key=lambda x: x.question_id)) questions = questions[:args.sample_questions] questions.sort(key=lambda x:x.n_context_words, reverse=True) dataset = ParagraphAndQuestionDataset(questions, FixedOrderBatcher(args.batch_size, True)) evaluators = [SpanEvaluator(args.answer_bounds, text_eval="squad")] if args.official_output is not None: evaluators.append(RecordSpanPrediction(args.answer_bounds[0])) if args.per_question_loss_file is not None: evaluators.append(RecordSpanPredictionScore(args.answer_bounds[0], args.batch_size, args.none_prob)) if args.step is not None: if args.step == "latest": checkpoint = model_dir.get_latest_checkpoint() else: checkpoint = model_dir.get_checkpoint(int(args.step)) else: checkpoint = model_dir.get_best_weights() if checkpoint is not None: print("Using best weights") else: print("Using latest checkpoint") checkpoint = model_dir.get_latest_checkpoint() model = model_dir.get_model() if args.elmo: model.lm_model.lm_vocab_file = './elmo-params/squad_train_dev_all_unique_tokens.txt' model.lm_model.options_file = './elmo-params/options_squad_lm_2x4096_512_2048cnn_2xhighway_skip.json' model.lm_model.weight_file = './elmo-params/squad_context_concat_lm_2x4096_512_2048cnn_2xhighway_skip.hdf5' model.lm_model.embed_weights_file = None evaluation = trainer.test(model, evaluators, {args.corpus: dataset}, corpus.get_resource_loader(), checkpoint, not args.no_ema)[args.corpus] # Print the scalar results in a two column table scalars = evaluation.scalars cols = list(sorted(scalars.keys())) table = [cols] header = ["Metric", ""] table.append([("%s" % scalars[x] if x in scalars else "-") for x in cols]) print_table([header] + transpose_lists(table)) # Save the official output if args.official_output is not None: quid_to_para = {} for x in questions: quid_to_para[x.question_id] = x.paragraph q_id_to_answers = {} q_ids = evaluation.per_sample["question_id"] spans = evaluation.per_sample["predicted_span"] for q_id, (start, end) in zip(q_ids, spans): text = quid_to_para[q_id].get_original_text(start, end) q_id_to_answers[q_id] = text with open(args.official_output, "w") as f: json.dump(q_id_to_answers, f) if args.per_question_loss_file is not None: print("Saving result") output_file = args.per_question_loss_file ids = evaluation.per_sample["question_ids"] f1s = evaluation.per_sample["text_f1"] ems = evaluation.per_sample["text_em"] losses = evaluation.per_sample["loss"] if args.none_prob: none_probs = evaluation.per_sample["none_probs"] """ results = {question_id: {'f1': float(f1), 'em': float(em), 'loss': float(loss), 'none_prob': float(none_prob)} for question_id, f1, em, loss, none_prob in zip(ids, f1s, ems, losses, none_probs)} """ results = {question_id: float(none_prob) for question_id, none_prob in zip(ids, none_probs)} else: results = {question_id: {'f1': float(f1), 'em': float(em), 'loss': float(loss)} for question_id, f1, em, loss in zip(ids, f1s, ems, losses)} with open(output_file, 'w') as f: json.dump(results, f)