Beispiel #1
0
def show_unk(corpus: SquadCorpus,
             vec_name: str,
             context: bool = True,
             question: bool = True):
    vecs = corpus.get_pruned_word_vecs(vec_name)
    docs = corpus.get_train()

    lower_unk = Counter()
    unk = Counter()

    for doc in docs:
        for para in doc.paragraphs:
            if context:
                for sent in para.text:
                    for word in sent:
                        if word not in vecs:
                            unk[word] += 1
                        word = word.lower()
                        if word not in vecs:
                            lower_unk[word] += 1
            if question:
                for question in para.questions:
                    for word in question.words:
                        if word not in vecs:
                            unk[word] += 1
                        word = word.lower()
                        if word not in vecs:
                            lower_unk[word] += 1

    print("\n".join("%s: %d" % (k, v) for k, v in lower_unk.most_common()))
Beispiel #2
0
def main():
    data = SquadCorpus()

    string_f1 = 0
    mapped_string_f1 = 0

    docs = data.get_train()
    n_questions = 0

    for doc in tqdm(docs):
        for para in doc.paragraphs:
            words = flatten_iterable(para.text)
            for question in para.questions:
                n_questions += 1
                span_answer = question.answer[0]
                span_str = " ".join(
                    words[span_answer.
                          para_word_start:span_answer.para_word_end + 1])
                raw_answer = span_answer.text
                mapped_str = para.get_original_text(
                    span_answer.para_word_start, span_answer.para_word_end)

                string_f1 += f1_score(raw_answer, span_str)
                mapped_string_f1 += f1_score(raw_answer, mapped_str)

    print(string_f1 / n_questions)
    print(mapped_string_f1 / n_questions)
Beispiel #3
0
def show_nums(corpus: SquadCorpus):
    n_regex = re.compile(".*[0-9].*")
    data = corpus.get_train()
    np.random.shuffle(data)

    for doc in data:
        paragraphs = list(doc.paragraphs)
        np.random.shuffle(paragraphs)
        for para in paragraphs:
            sentences = list(para.context) + [x.words for x in para.questions]
            np.random.shuffle(sentences)
            for words in sentences:
                for i, word in enumerate(words):
                    if n_regex.match(word) is not None:
                        print(word)
Beispiel #4
0
def show_in_context_unks(corpus: SquadCorpus, vec_name):
    data = corpus.get_train()
    np.random.shuffle(data)
    vecs = corpus.get_pruned_word_vecs(vec_name)

    for doc in data:
        paragraphs = list(doc.paragraphs)
        np.random.shuffle(paragraphs)
        for para in paragraphs:
            sentences = list(para.text) + [x.words for x in para.questions]
            np.random.shuffle(sentences)
            for words in sentences:
                for i, word in enumerate(words):
                    if word.lower() not in vecs:
                        words[i] = "{{{" + word + "}}}"
                        print(" ".join(words[max(0, i -
                                                 10):min(len(words), i + 10)]))
                        words[i] = word
def main():
    corpus = SquadCorpus()
    prepro = SquadTfIdfRanker(NltkPlusStopWords(True), OPTS.num_per_orig, True)
    orig_data = corpus.get_train(
    ) if OPTS.split == 'train' else corpus.get_dev()
    orig_lens = [
        len(p.text[0]) for doc in orig_data for p in doc.paragraphs
        for q in p.questions
    ]
    new_data = preprocess_par(orig_data,
                              corpus.evidence,
                              prepro,
                              n_processes=1)
    new_lens = [len(p.text) for q in new_data for p in q.paragraphs]
    print('%d original, mean %.2f words' %
          (len(orig_lens), np.mean(orig_lens)))
    print('%d new, mean %.2f words' % (len(new_lens), np.mean(new_lens)))
    if OPTS.out_file:
        write_output(OPTS.split, new_data, OPTS.out_file)
def main():
  corpus = SquadCorpus()
  if OPTS.normalize_before_ranking:
      normalizer = WordNormalizer()
  else:
      normalizer = None
  if OPTS.use_vec_dist:
    word_vecs = corpus.get_pruned_word_vecs('glove.840B.300d')
    prepro = SquadVectorTfIdfRanker(NltkPlusStopWords(True), OPTS.num_per_orig, True, word_vecs, word_normalizer=normalizer)
  else:
    prepro = SquadTfIdfRanker(NltkPlusStopWords(True), OPTS.num_per_orig, True, word_normalizer=normalizer)
  orig_data = corpus.get_train() if OPTS.split == 'train' else corpus.get_dev()
  orig_lens = [len(p.text[0]) for doc in orig_data for p in doc.paragraphs
               for q in p.questions] 
  new_data = preprocess_par(orig_data, corpus.evidence, prepro, n_processes=1)
  new_lens = [len(p.text) for q in new_data for p in q.paragraphs]
  print('%d original, mean %.2f words' % (len(orig_lens), np.mean(orig_lens)))
  print('%d new, mean %.2f words'% (len(new_lens), np.mean(new_lens)))
  if OPTS.out_file:
    write_output(OPTS.split, new_data, OPTS.out_file)
Beispiel #7
0
def show_features(corpus: SquadCorpus, vec_name):
    print("Loading train docs")
    data = corpus.get_train()
    np.random.shuffle(data)
    data = data[:100]

    print("Loading vectors")
    vecs = corpus.get_pruned_word_vecs(vec_name)
    fe = BasicWordFeatures()

    grouped_by_features = defaultdict(Counter)

    print("start")

    for doc in data:
        paragraphs = list(doc.paragraphs)
        np.random.shuffle(paragraphs)
        for para in paragraphs:
            sentences = list(para.text) + [x.words for x in para.questions]
            np.random.shuffle(sentences)
            for words in sentences:
                for i, word in enumerate(words):
                    if word.lower() not in vecs:
                        x = fe.get_word_features(word)
                        for i, val in enumerate(x):
                            if val > 0:
                                grouped_by_features[i][word] += 1

    for i in sorted(grouped_by_features.keys()):
        name = BasicWordFeatures.features_names[i]
        if name in ["Len"]:
            continue
        vals = grouped_by_features[i]
        print()
        print("*" * 30)
        print("%s-%d %d (%d)" % (name, i, len(vals), sum(vals.values())))
        for k, v in vals.most_common(30):
            print("%s: %d" % (k, v))
Beispiel #8
0
def main():
    parser = argparse.ArgumentParser(description='Evaluate a model on SQuAD')
    parser.add_argument('model', help='model directory to evaluate')
    parser.add_argument("-o", "--official_output", type=str,
                        help="where to output an official result file")
    parser.add_argument('-n', '--sample_questions', type=int, default=None,
                        help="(for testing) run on a subset of questions")
    parser.add_argument('--answer_bounds', nargs='+', type=int, default=[17],
                        help="Max size of answer")
    parser.add_argument('-b', '--batch_size', type=int, default=200,
                        help="Batch size, larger sizes can be faster but uses more memory")
    parser.add_argument('-s', '--step', default=None,
                        help="Weights to load, can be a checkpoint step or 'latest'")
    # Add ja_test choice to test Multilingual QA dataset.
    parser.add_argument(
        '-c', '--corpus', choices=["dev", "train", "ja_test", "pred"], default="dev")
    parser.add_argument('--no_ema', action="store_true",
                        help="Don't use EMA weights even if they exist")
    # Add ja_test choice to test Multilingual QA pipeline.
    parser.add_argument('-p', '--pred_filepath', default=None,
                        help="The csv file path if you try pred mode")
    args = parser.parse_args()

    model_dir = ModelDir(args.model)

    corpus = SquadCorpus()
    if args.corpus == "dev":
        questions = corpus.get_dev()
    # Add ja_test choice to test Multilingual QA pipeline.
    elif args.corpus == "ja_test":
        questions = corpus.get_ja_test()
    # This is for prediction mode for MLQA pipeline.
    elif args.corpus == "pred":
        questions = create_pred_dataset(args.pred_filepath)
    else:
        questions = corpus.get_train()
    questions = split_docs(questions)

    if args.sample_questions:
        np.random.RandomState(0).shuffle(
            sorted(questions, key=lambda x: x.question_id))
        questions = questions[:args.sample_questions]

    questions.sort(key=lambda x: x.n_context_words, reverse=True)
    dataset = ParagraphAndQuestionDataset(
        questions, FixedOrderBatcher(args.batch_size, True))

    evaluators = [SpanEvaluator(args.answer_bounds, text_eval="squad")]
    if args.official_output is not None:
        evaluators.append(RecordSpanPrediction(args.answer_bounds[0]))

    if args.step is not None:
        if args.step == "latest":
            checkpoint = model_dir.get_latest_checkpoint()
        else:
            checkpoint = model_dir.get_checkpoint(int(args.step))
    else:
        checkpoint = model_dir.get_best_weights()
        if checkpoint is not None:
            print("Using best weights")
        else:
            print("Using latest checkpoint")
            checkpoint = model_dir.get_latest_checkpoint()

    model = model_dir.get_model()

    evaluation = trainer.test(model, evaluators, {args.corpus: dataset},
                              corpus.get_resource_loader(), checkpoint, not args.no_ema)[args.corpus]

    # Print the scalar results in a two column table
    scalars = evaluation.scalars
    cols = list(sorted(scalars.keys()))
    table = [cols]
    header = ["Metric", ""]
    table.append([("%s" % scalars[x] if x in scalars else "-") for x in cols])
    print_table([header] + transpose_lists(table))

    # Save the official output
    if args.official_output is not None:
        quid_to_para = {}
        for x in questions:
            quid_to_para[x.question_id] = x.paragraph

        q_id_to_answers = {}
        q_ids = evaluation.per_sample["question_id"]
        spans = evaluation.per_sample["predicted_span"]
        for q_id, (start, end) in zip(q_ids, spans):
            text = quid_to_para[q_id].get_original_text(start, end)
            q_id_to_answers[q_id] = text

        with open(args.official_output, "w") as f:
            json.dump(q_id_to_answers, f)
Beispiel #9
0
def main():
    parser = argparse.ArgumentParser(description='Evaluate a model on SQuAD')
    parser.add_argument('model', help='model directory to evaluate')
    parser.add_argument("-o", "--official_output", type=str, help="where to output an official result file")
    parser.add_argument('-n', '--sample_questions', type=int, default=None,
                        help="(for testing) run on a subset of questions")
    parser.add_argument('--answer_bounds', nargs='+', type=int, default=[17],
                        help="Max size of answer")
    parser.add_argument('-b', '--batch_size', type=int, default=200,
                        help="Batch size, larger sizes can be faster but uses more memory")
    parser.add_argument('-s', '--step', default=None,
                        help="Weights to load, can be a checkpoint step or 'latest'")
    parser.add_argument('-c', '--corpus', choices=["dev", "train"], default="dev")
    parser.add_argument('--no_ema', action="store_true", help="Don't use EMA weights even if they exist")
    parser.add_argument('--none_prob', action="store_true", help="Output none probability for samples")
    parser.add_argument('--elmo', action="store_true", help="Use elmo model")
    parser.add_argument('--per_question_loss_file', type=str, default=None,
            help="Run question by question and output a question_id -> loss output to this file")
    args = parser.parse_known_args()[0]

    model_dir = ModelDir(args.model)

    corpus = SquadCorpus()
    if args.corpus == "dev":
        questions = corpus.get_dev()
    else:
        questions = corpus.get_train()
    questions = split_docs(questions)

    if args.sample_questions:
        np.random.RandomState(0).shuffle(sorted(questions, key=lambda x: x.question_id))
        questions = questions[:args.sample_questions]

    questions.sort(key=lambda x:x.n_context_words, reverse=True)
    dataset = ParagraphAndQuestionDataset(questions, FixedOrderBatcher(args.batch_size, True))

    evaluators = [SpanEvaluator(args.answer_bounds, text_eval="squad")]
    if args.official_output is not None:
        evaluators.append(RecordSpanPrediction(args.answer_bounds[0]))
    if args.per_question_loss_file is not None:
        evaluators.append(RecordSpanPredictionScore(args.answer_bounds[0], args.batch_size, args.none_prob))

    if args.step is not None:
        if args.step == "latest":
            checkpoint = model_dir.get_latest_checkpoint()
        else:
            checkpoint = model_dir.get_checkpoint(int(args.step))
    else:
        checkpoint = model_dir.get_best_weights()
        if checkpoint is not None:
            print("Using best weights")
        else:
            print("Using latest checkpoint")
            checkpoint = model_dir.get_latest_checkpoint()

    model = model_dir.get_model()
    if args.elmo:
        model.lm_model.lm_vocab_file = './elmo-params/squad_train_dev_all_unique_tokens.txt'
        model.lm_model.options_file = './elmo-params/options_squad_lm_2x4096_512_2048cnn_2xhighway_skip.json'
        model.lm_model.weight_file = './elmo-params/squad_context_concat_lm_2x4096_512_2048cnn_2xhighway_skip.hdf5'
        model.lm_model.embed_weights_file = None


    evaluation = trainer.test(model, evaluators, {args.corpus: dataset},
                              corpus.get_resource_loader(), checkpoint, not args.no_ema)[args.corpus]

    # Print the scalar results in a two column table
    scalars = evaluation.scalars
    cols = list(sorted(scalars.keys()))
    table = [cols]
    header = ["Metric", ""]
    table.append([("%s" % scalars[x] if x in scalars else "-") for x in cols])
    print_table([header] + transpose_lists(table))

    # Save the official output
    if args.official_output is not None:
        quid_to_para = {}
        for x in questions:
            quid_to_para[x.question_id] = x.paragraph

        q_id_to_answers = {}
        q_ids = evaluation.per_sample["question_id"]
        spans = evaluation.per_sample["predicted_span"]
        for q_id, (start, end) in zip(q_ids, spans):
            text = quid_to_para[q_id].get_original_text(start, end)
            q_id_to_answers[q_id] = text

        with open(args.official_output, "w") as f:
            json.dump(q_id_to_answers, f)

    if args.per_question_loss_file is not None:
        print("Saving result")
        output_file = args.per_question_loss_file
        ids = evaluation.per_sample["question_ids"]
        f1s = evaluation.per_sample["text_f1"]
        ems = evaluation.per_sample["text_em"]
        losses = evaluation.per_sample["loss"]

        if args.none_prob:
            none_probs = evaluation.per_sample["none_probs"]
            """
            results = {question_id: {'f1': float(f1), 'em': float(em), 'loss': float(loss), 'none_prob': float(none_prob)} for question_id, f1, em, loss, none_prob in zip(ids, f1s, ems, losses, none_probs)}
            """
            results = {question_id: float(none_prob) for question_id, none_prob in zip(ids, none_probs)}
        else:
            results = {question_id: {'f1': float(f1), 'em': float(em), 'loss': float(loss)} for question_id, f1, em, loss in zip(ids, f1s, ems, losses)}


        with open(output_file, 'w') as f:
            json.dump(results, f)