Beispiel #1
0
    def check_eval(eval_treebank, ep, flag='dev'):
        # nonlocal best_eval_fscore
        # nonlocal best_eval_model_path
        # nonlocal best_eval_processed

        dev_start_time = time.time()

        eval_predicted = []
        for dev_start_index in range(0, len(eval_treebank),
                                     args.eval_batch_size):
            subbatch_trees = eval_treebank[dev_start_index:dev_start_index +
                                           args.eval_batch_size]
            subbatch_sentences = [[(leaf.tag, leaf.word)
                                   for leaf in tree.leaves()]
                                  for tree in subbatch_trees]
            predicted, _ = parser.parse_batch(subbatch_sentences)
            del _
            eval_predicted.extend([p.convert() for p in predicted])

        eval_fscore = evaluate.evalb(args.evalb_dir, eval_treebank,
                                     eval_predicted)

        logger.info(flag + ' eval '
                    'epoch {} '
                    "fscore {} "
                    "elapsed {} "
                    "total-elapsed {}".format(
                        ep,
                        eval_fscore,
                        format_elapsed(dev_start_time),
                        format_elapsed(start_time),
                    ))
        return eval_fscore
Beispiel #2
0
def run_test(args):
    print("Loading test trees from {}...".format(args.test_path))
    test_treebank = treebanks.load_trees(args.test_path, args.test_path_text,
                                         args.text_processing)
    print("Loaded {:,} test examples.".format(len(test_treebank)))

    if len(args.model_path) != 1:
        raise NotImplementedError("Ensembling multiple parsers is not "
                                  "implemented in this version of the code.")

    model_path = args.model_path[0]
    print("Loading model from {}...".format(model_path))
    parser = parse_chart.ChartParser.from_trained(model_path)
    if args.no_predict_tags and parser.f_tag is not None:
        print("Removing part-of-speech tagging head...")
        parser.f_tag = None
    if args.parallelize:
        parser.parallelize()
    elif torch.cuda.is_available():
        parser.cuda()

    print("Parsing test sentences...")
    start_time = time.time()

    test_predicted = parser.parse(
        test_treebank.without_gold_annotations(),
        subbatch_max_tokens=args.subbatch_max_tokens,
    )

    if args.output_path == "-":
        for tree in test_predicted:
            print(tree.pformat(margin=1e100))
    elif args.output_path:
        with open(args.output_path, "w") as outfile:
            for tree in test_predicted:
                outfile.write("{}\n".format(tree.pformat(margin=1e100)))

    # The tree loader does some preprocessing to the trees (e.g. stripping TOP
    # symbols or SPMRL morphological features). We compare with the input file
    # directly to be extra careful about not corrupting the evaluation. We also
    # allow specifying a separate "raw" file for the gold trees: the inputs to
    # our parser have traces removed and may have predicted tags substituted,
    # and we may wish to compare against the raw gold trees to make sure we
    # haven't made a mistake. As far as we can tell all of these variations give
    # equivalent results.
    ref_gold_path = args.test_path
    if args.test_path_raw is not None:
        print("Comparing with raw trees from", args.test_path_raw)
        ref_gold_path = args.test_path_raw

    test_fscore = evaluate.evalb(args.evalb_dir,
                                 test_treebank.trees,
                                 test_predicted,
                                 ref_gold_path=ref_gold_path)

    print("test-fscore {} "
          "test-elapsed {}".format(
              test_fscore,
              format_elapsed(start_time),
          ))
Beispiel #3
0
def run_test(args):
    print("Loading test trees from {}...".format(args.test_path))
    test_treebank = trees.load_trees(args.test_path)
    print("Loaded {:,} test examples.".format(len(test_treebank)))

    print("Loading model from {}...".format(args.model_path_base))
    # model = dy.ParameterCollection()
    # [parser] = dy.load(args.model_path_base, model)
    parser = torch.load(args.model_path_base)

    print("Parsing test sentences...")

    start_time = time.time()

    test_predicted = []
    for tree in test_treebank:
        # dy.renew_cg()
        parser.eval()
        sentence = [(leaf.tag, leaf.word) for leaf in tree.leaves()]
        predicted, _ = parser.parse(sentence)
        test_predicted.append(predicted.convert())

    test_fscore = evaluate.evalb(args.evalb_dir, test_treebank, test_predicted)

    print("test-fscore {} "
          "test-elapsed {}".format(
              test_fscore,
              format_elapsed(start_time),
          ))
    def check_dev(epoch_num):
        nonlocal best_dev_fscore
        nonlocal best_model_path
        nonlocal best_dev_processed

        dev_start_time = time.time()

        parser.eval()

        dev_predicted = []

        for dev_start_index in range(0, len(dev_treebank),
                                     args.eval_batch_size):
            subbatch_trees = dev_treebank[dev_start_index:dev_start_index +
                                          args.eval_batch_size]
            subbatch_sentences = [[(leaf.tag, leaf.word)
                                   for leaf in tree.leaves()]
                                  for tree in subbatch_trees]

            (
                predicted,
                _,
            ) = parser.parse_batch(subbatch_sentences)
            del _

            dev_predicted.extend([p.convert() for p in predicted])
        dev_fscore = evaluate.evalb(args.evalb_dir, dev_treebank,
                                    dev_predicted)

        print("\n"
              "dev-fscore {} "
              "dev-elapsed {} "
              "total-elapsed {}".format(dev_fscore,
                                        format_elapsed(dev_start_time),
                                        format_elapsed(start_time)))

        if dev_fscore.fscore > best_dev_fscore:
            if best_model_path is not None:
                extensions = [".pt"]
                for ext in extensions:
                    path = best_model_path + ext
                    if os.path.exists(path):
                        print(
                            "Removing previous model file {}...".format(path))
                        os.remove(path)

            best_dev_fscore = dev_fscore.fscore
            best_model_path = "{}_best_dev={:.2f}".format(
                args.model_path_base, dev_fscore.fscore)
            best_dev_processed = total_processed
            print("Saving new best model to {}...".format(best_model_path))
            torch.save(
                {
                    "spec": parser.spec,
                    "state_dict": parser.state_dict(),
                    "trainer": trainer.state_dict(),
                },
                best_model_path + ".pt",
            )
Beispiel #5
0
    def check_dev():
        nonlocal best_dev_fscore
        nonlocal best_dev_model_path
        nonlocal dev_efscore

        dev_start_time = time.time()

        dev_predicted = []
        for dev_start_index in range(0, len(dev_treebank),
                                     args.eval_batch_size):
            subbatch_trees = dev_treebank[dev_start_index:dev_start_index +
                                          args.eval_batch_size]
            subbatch_sentences = [[(leaf.tag, leaf.word)
                                   for leaf in tree.leaves()]
                                  for tree in subbatch_trees]
            predicted, _ = parser.parse_batch(subbatch_sentences)
            del _
            dev_predicted.extend([p.convert() for p in predicted])

        dev_fscore = evaluate.evalb(args.evalb_dir, dev_treebank,
                                    dev_predicted)

        print(" dev-fscore {} "
              "dev-elapsed {} "
              "total-elapsed {}".format(
                  dev_fscore,
                  format_elapsed(dev_start_time),
                  format_elapsed(start_time),
              ),
              flush=True)

        dev_efscore = evaluate_EDITED.Evaluate(dev_treebank, dev_predicted)

        print(" dev-Efscore: {}".format(dev_efscore), flush=True)

        # MJ - keep model with best efscore
        if dev_efscore.efscore > best_dev_fscore:
            best_dev_fscore = dev_efscore.efscore
            if best_dev_model_path is not None:
                extensions = [".pt"]
                for ext in extensions:
                    path = best_dev_model_path + ext
                    if os.path.exists(path):
                        print(
                            " Removing previous model file {}...".format(path),
                            flush=True)
                        os.remove(path)

                best_dev_model_path = "{}_Edev={:.4}".format(
                    args.model_path_base, best_dev_fscore)
                print(" Saving new best model to {}...".format(
                    best_dev_model_path),
                      flush=True)
                torch.save(
                    {
                        'spec': parser.spec,
                        'state_dict': parser.state_dict(),
                        'trainer': trainer.state_dict(),
                    }, best_dev_model_path + ".pt")
def run_parse_extra(args):
    if args.output_path != '-' and os.path.exists(args.output_path):
        print("Error: output file already exists:", args.output_path)
        return

    print("Loading parse trees from {}...".format(args.input_path))
    treebank = trees.load_trees(args.input_path)
    if args.max_len_eval > 0:
        treebank = [
            tree for tree in treebank
            if len(list(tree.leaves())) <= args.max_len_eval
        ]
    print("Loaded {:,} parse tree examples.".format(len(treebank)))

    print("Loading model from {}...".format(args.model_path_base))
    assert args.model_path_base.endswith(
        ".pt"), "Only pytorch savefiles supported"

    info = torch_load(args.model_path_base)
    assert 'hparams' in info['spec'], "Older savefiles not supported"
    parser = parse_nk.NKChartParser.from_spec(info['spec'], info['state_dict'])

    print("Parsing test sentences...")
    start_time = time.time()

    new_treebank = []
    for start_index in range(0, len(treebank), args.eval_batch_size):
        subbatch_trees = treebank[start_index:start_index +
                                  args.eval_batch_size]
        subbatch_sentences = [[(leaf.tag, leaf.word) for leaf in tree.leaves()]
                              for tree in subbatch_trees]
        predicted, _ = parser.parse_batch(subbatch_sentences)
        del _
        new_treebank.extend([p.convert() for p in predicted])

    assert len(treebank) == len(new_treebank), (len(treebank),
                                                len(new_treebank))

    if args.write_parse is not None:
        print('writing to {}'.format(args.write_parse))
        f = open(args.write_parse, 'w')
        for x, y in zip(new_treebank, treebank):
            gold = '(ROOT {})'.format(y.linearize())
            pred = '(ROOT {})'.format(x.linearize())
            ex = dict(gold=gold, pred=pred)
            f.write(json.dumps(ex) + '\n')
        f.close()

    test_fscore = evaluate.evalb(args.evalb_dir,
                                 treebank,
                                 new_treebank,
                                 ref_gold_path=None)

    print("test-fscore {} "
          "test-elapsed {}".format(
              test_fscore,
              format_elapsed(start_time),
          ))
Beispiel #7
0
def run_test(args):
    print("Loading test trees from {}...".format(args.test_path))
    test_treebank = trees.load_trees(args.test_path)
    print("Loaded {:,} test examples.".format(len(test_treebank)))

    print("Loading model from {}...".format(args.model_path_base))
    assert args.model_path_base.endswith(
        ".pt"), "Only pytorch savefiles supported"

    info = torch_load(args.model_path_base)
    assert 'hparams' in info['spec'], "Older savefiles not supported"
    parser = SAPar_model.SAChartParser.from_spec(info['spec'],
                                                 info['state_dict'])

    print("Parsing test sentences...")
    start_time = time.time()

    test_predicted = []
    for start_index in tqdm(range(0, len(test_treebank),
                                  args.eval_batch_size)):
        subbatch_trees = test_treebank[start_index:start_index +
                                       args.eval_batch_size]
        subbatch_sentences = [[(leaf.tag, leaf.word) for leaf in tree.leaves()]
                              for tree in subbatch_trees]
        predicted, _ = parser.parse_batch(subbatch_sentences)
        del _
        test_predicted.extend([p.convert() for p in predicted])

    # The tree loader does some preprocessing to the trees (e.g. stripping TOP
    # symbols or SPMRL morphological features). We compare with the input file
    # directly to be extra careful about not corrupting the evaluation. We also
    # allow specifying a separate "raw" file for the gold trees: the inputs to
    # our parser have traces removed and may have predicted tags substituted,
    # and we may wish to compare against the raw gold trees to make sure we
    # haven't made a mistake. As far as we can tell all of these variations give
    # equivalent results.
    ref_gold_path = args.test_path
    if args.test_path_raw is not None:
        print("Comparing with raw trees from", args.test_path_raw)
        ref_gold_path = args.test_path_raw

    test_fscore = evaluate.evalb(args.evalb_dir,
                                 test_treebank,
                                 test_predicted,
                                 ref_gold_path=ref_gold_path)

    model_name = args.model_path_base[args.model_path_base.rfind('/') +
                                      1:args.model_path_base.rfind('.')]
    output_file = './results/' + model_name + '.txt'
    with open(output_file, "w") as outfile:
        for tree in test_predicted:
            outfile.write("{}\n".format(tree.linearize()))

    print("test-fscore {} "
          "test-elapsed {}".format(
              test_fscore,
              format_elapsed(start_time),
          ))
    def check_dev():
        nonlocal best_dev_fscore
        nonlocal best_dev_model_path
        nonlocal best_dev_processed

        dev_start_time = time.time()

        dev_predicted = []
        for dev_start_index in range(0, len(dev_treebank),
                                     args.eval_batch_size):
            subbatch_trees = dev_treebank[dev_start_index:dev_start_index +
                                          args.eval_batch_size]
            subbatch_sentences = [[(leaf.tag, leaf.word)
                                   for leaf in tree.leaves()]
                                  for tree in subbatch_trees]
            predicted, _ = parser.parse_batch(
                subbatch_sentences,
                span_index=span_index,
                k=K,
                zero_empty=parser.zero_empty,
                train_nn=args.train_through_nn,
            )
            del _
            dev_predicted.extend([p.convert() for p in predicted])

        dev_fscore = evaluate.evalb(args.evalb_dir, dev_treebank,
                                    dev_predicted)

        print("dev-fscore {} "
              "dev-elapsed {} "
              "total-elapsed {}".format(
                  dev_fscore,
                  format_elapsed(dev_start_time),
                  format_elapsed(start_time),
              ))

        if dev_fscore.fscore > best_dev_fscore:
            if best_dev_model_path is not None:
                extensions = [".pt"]
                for ext in extensions:
                    path = best_dev_model_path + ext
                    if os.path.exists(path):
                        print(
                            "Removing previous model file {}...".format(path))
                        os.remove(path)

            best_dev_fscore = dev_fscore.fscore
            best_dev_model_path = "{}_dev={:.2f}".format(
                args.model_path_base, dev_fscore.fscore)
            best_dev_processed = total_processed
            print("Saving new best model to {}...".format(best_dev_model_path))
            torch.save(
                {
                    'spec': parser.spec,
                    'state_dict': parser.state_dict(),
                    'trainer': trainer.state_dict(),
                }, best_dev_model_path + ".pt")
Beispiel #9
0
def run_ensemble(args):
    print("Loading test trees from {}...".format(args.test_path))
    test_treebank = trees.load_trees(args.test_path)
    print("Loaded {:,} test examples.".format(len(test_treebank)))

    parsers = []
    for model_path_base in args.model_path_base:
        print("Loading model from {}...".format(model_path_base))
        assert model_path_base.endswith(".pt"), "Only pytorch savefiles supported"

        info = torch_load(model_path_base)
        assert 'hparams' in info['spec'], "Older savefiles not supported"
        parser = parse_nk.NKChartParser.from_spec(info['spec'], info['state_dict'])
        parsers.append(parser)

    # Ensure that label scores charts produced by the models can be combined
    # using simple averaging
    ref_label_vocab = parsers[0].label_vocab
    for parser in parsers:
        assert parser.label_vocab.indices == ref_label_vocab.indices

    print("Parsing test sentences...")
    start_time = time.time()

    test_predicted = []
    # Ensemble by averaging label score charts from different models
    # We did not observe any benefits to doing weighted averaging, probably
    # because all our parsers output label scores of around the same magnitude
    for start_index in range(0, len(test_treebank), args.eval_batch_size):
        subbatch_trees = test_treebank[start_index:start_index+args.eval_batch_size]
        subbatch_sentences = [[(leaf.tag, leaf.word) for leaf in tree.leaves()] for tree in subbatch_trees]

        chart_lists = []
        for parser in parsers:
            charts = parser.parse_batch(subbatch_sentences, return_label_scores_charts=True)
            chart_lists.append(charts)

        subbatch_charts = [np.mean(list(sentence_charts), 0) for sentence_charts in zip(*chart_lists)]
        predicted, _ = parsers[0].decode_from_chart_batch(subbatch_sentences, subbatch_charts)
        del _
        test_predicted.extend([p.convert() for p in predicted])

    test_fscore = evaluate.evalb(args.evalb_dir, test_treebank, test_predicted, ref_gold_path=args.test_path)

    print(
        "test-fscore {} "
        "test-elapsed {}".format(
            test_fscore,
            format_elapsed(start_time),
        )
    )
def run_test(args):

    test_path = args.test_ptb_path

    if args.dataset == "ctb":
        test_path = args.test_ctb_path

    print("Loading model from {}...".format(args.model_path_base))
    assert args.model_path_base.endswith(
        ".pt"), "Only pytorch savefiles supported"

    info = torch_load(args.model_path_base)
    assert "hparams" in info["spec"], "Older savefiles not supported"
    parser = Lparser.ChartParser.from_spec(info["spec"], info["state_dict"])
    parser.eval()

    print("Loading test trees from {}...".format(test_path))
    test_treebank = trees.load_trees(test_path)
    print("Loaded {:,} test examples.".format(len(test_treebank)))

    print("Parsing test sentences...")
    start_time = time.time()

    punct_set = "." "``" "''" ":" ","

    parser.eval()
    test_predicted = []
    for start_index in range(0, len(test_treebank), args.eval_batch_size):
        subbatch_trees = test_treebank[start_index:start_index +
                                       args.eval_batch_size]

        subbatch_sentences = [[(leaf.tag, leaf.word) for leaf in tree.leaves()]
                              for tree in subbatch_trees]

        (
            predicted,
            _,
        ) = parser.parse_batch(subbatch_sentences)
        del _
        test_predicted.extend([p.convert() for p in predicted])

    test_fscore = evaluate.evalb(args.evalb_dir, test_treebank, test_predicted)
    print("test-fscore {} "
          "test-elapsed {}".format(
              test_fscore,
              format_elapsed(start_time),
          ))
    def check_dev():
        nonlocal best_dev_fscore
        nonlocal best_dev_model_path
        nonlocal best_dev_processed

        dev_start_time = time.time()

        dev_predicted = parser.parse(
            dev_treebank.without_gold_annotations(),
            subbatch_max_tokens=args.subbatch_max_tokens,
        )
        dev_fscore = evaluate.evalb(args.evalb_dir, dev_treebank.trees, dev_predicted)

        print(
            "dev-fscore {} "
            "dev-elapsed {} "
            "total-elapsed {}".format(
                dev_fscore,
                format_elapsed(dev_start_time),
                format_elapsed(start_time),
            )
        )

        if dev_fscore.fscore > best_dev_fscore:
            if best_dev_model_path is not None:
                extensions = [".pt"]
                for ext in extensions:
                    path = best_dev_model_path + ext
                    if os.path.exists(path):
                        print("Removing previous model file {}...".format(path))
                        os.remove(path)

            best_dev_fscore = dev_fscore.fscore
            best_dev_model_path = "{}_dev={:.2f}".format(
                args.model_path_base, dev_fscore.fscore
            )
            best_dev_processed = total_processed
            print("Saving new best model to {}...".format(best_dev_model_path))
            torch.save(
                {
                    "config": parser.config,
                    "state_dict": parser.state_dict(),
                    "optimizer": optimizer.state_dict(),
                },
                best_dev_model_path + ".pt",
            )
Beispiel #12
0
    def check_dev():
        nonlocal best_dev_fscore
        nonlocal best_dev_model_path

        dev_start_time = time.time()

        dev_predicted = []
        for tree in dev_treebank:
            # dy.renew_cg()
            parser.eval()
            sentence = [(leaf.tag, leaf.word) for leaf in tree.leaves()]
            predicted, _ = parser.parse(sentence)
            dev_predicted.append(predicted.convert())

        dev_fscore = evaluate.evalb(args.evalb_dir, dev_treebank,
                                    dev_predicted)

        print("dev-fscore {} "
              "dev-elapsed {} "
              "total-elapsed {}".format(
                  dev_fscore,
                  format_elapsed(dev_start_time),
                  format_elapsed(start_time),
              ))

        if dev_fscore.fscore > best_dev_fscore:
            if best_dev_model_path is not None:
                # for ext in [".data", ".meta"]:
                #     path = best_dev_model_path + ext
                #     if os.path.exists(path):
                #         print("Removing previous model file {}...".format(path))
                #         os.remove(path)
                path = best_dev_model_path
                if os.path.exists(path):
                    print("Removing previous model file {}...".format(path))
                    os.remove(path)

            best_dev_fscore = dev_fscore.fscore
            # best_dev_model_path = "{}_dev={:.2f}".format(
            best_dev_model_path = "{}_dev={:.2f}.pth".format(
                args.model_path_base, dev_fscore.fscore)
            print("Saving new best model to {}...".format(best_dev_model_path))
            # dy.save(best_dev_model_path, [parser])
            torch.save(parser, best_dev_model_path)
Beispiel #13
0
def check_performance(parser, treebank, sentence_embeddings, args):
    dev_start_time = time.time()

    dev_predicted = []
    for tree_index, tree in enumerate(treebank):
        if tree_index % 100 == 0:
            dy.renew_cg()
        if sentence_embeddings is not None:
            elmo_embeddings = dy.inputTensor(sentence_embeddings[str(tree_index)][:, :, :])
        else:
            elmo_embeddings = None
        sentence = [(leaf.tag, leaf.word) for leaf in tree.leaves]
        predicted = parser.span_parser(sentence,
                                       is_train=False,
                                       elmo_embeddings=elmo_embeddings)
        dev_predicted.append(predicted.convert())

    dev_fscore = evaluate.evalb('EVALB', treebank, dev_predicted, args=args, name="dev")
    return dev_fscore, dev_start_time
def run_test(args):
    print("Loading test trees from {}...".format(args.test_path))
    test_treebank = treebanks.load_trees(
        args.test_path, args.test_path_text, args.text_processing
    )
    print("Loaded {:,} test examples.".format(len(test_treebank)))

    print("Loading model from {}...".format(args.model_path))
    parser = Parser(args.model_path, batch_size=args.batch_size)

    print("Parsing test sentences...")
    start_time = time.time()

    if args.output_path == "-":
        output_file = sys.stdout
    elif args.output_path:
        output_file = open(args.output_path, "w")
    else:
        output_file = None

    test_predicted = []
    for predicted_tree in parser.parse_sents(
        inputs_from_treebank(test_treebank, predict_tags=args.predict_tags)
    ):
        test_predicted.append(predicted_tree)
        if output_file is not None:
            print(tree.pformat(margin=1e100), file=output_file)

    test_fscore = evaluate.evalb(args.evalb_dir, test_treebank.trees, test_predicted)

    print(
        "test-fscore {} "
        "test-elapsed {}".format(
            test_fscore,
            format_elapsed(start_time),
        )
    )
Beispiel #15
0
    def check_dev():
        nonlocal best_dev_fscore
        nonlocal best_dev_model_path

        dev_start_time = time.time()

        dev_predicted = run_eval(parser, dev_treebank)
        dev_fscore = evaluate.evalb(args.evalb_dir, dev_treebank,
                                    dev_predicted)

        print("dev-fscore {} "
              "dev-elapsed {} "
              "total-elapsed {}".format(
                  dev_fscore,
                  format_elapsed(dev_start_time),
                  format_elapsed(start_time),
              ))

        if dev_fscore.fscore > best_dev_fscore:
            if best_dev_model_path is not None:
                # for ext in [".data", ".meta"]:
                #     path = best_dev_model_path + ext
                #     if os.path.exists(path):
                #         print("Removing previous model file {}...".format(path))
                #         os.remove(path)
                path = best_dev_model_path
                if os.path.exists(path):
                    print("Removing previous model file {}...".format(path))
                    os.remove(path)

            best_dev_fscore = dev_fscore.fscore
            # best_dev_model_path = "{}_dev={:.2f}".format(
            best_dev_model_path = "{}_dev={:.2f}.pth".format(
                args.model_path_base, dev_fscore.fscore)
            print("Saving new best model to {}...".format(best_dev_model_path))
            # dy.save(best_dev_model_path, [parser])
            torch.save(parser, best_dev_model_path)
Beispiel #16
0
def run_test(args):
    print("Loading test trees from {}...".format(args.test_path))
    test_treebank = trees.load_trees(args.test_path)
    print("Loaded {:,} test examples.".format(len(test_treebank)))

    print("Loading model from {}...".format(args.model_path_base))
    # model = dy.ParameterCollection()
    # [parser] = dy.load(args.model_path_base, model)
    parser = torch.load(args.model_path_base)
    if torch.cuda.is_available():
        parser = parser.cuda()

    print("Parsing test sentences...")

    start_time = time.time()

    test_predicted = run_eval(parser, test_treebank)
    test_fscore = evaluate.evalb(args.evalb_dir, test_treebank, test_predicted)

    print("test-fscore {} "
          "test-elapsed {}".format(
              test_fscore,
              format_elapsed(start_time),
          ))
Beispiel #17
0
def main(args):

    parser = Parser(args.grammar, args.expand_binaries)
    print(
        'Grammar rules:',
        f'{parser.grammar.num_lexical_rules:,} lexical,',
        f'{parser.grammar.num_unary_rules:,} unary,',
        f'{parser.grammar.num_binary_rules:,} binary.'
    )

    if args.infile:

        print(f'Predicting trees for tokens in `{args.infile}`.')
        print(f'Writing trees to file `{args.outfile}`...')

        if args.parallel:
            trees = predict_from_file_parallel(
                parser, args.infile, args.num_lines, args.tokenize)
        else:
            trees = predict_from_file(
                parser, args.infile, args.num_lines, args.tokenize)

        with open(args.outfile, 'w') as fout:
            print('\n'.join(trees), file=fout)

        if args.show:
            show(args.outfile)

        print('Evaluating bracket score...')
        if args.goldfile:
            try:
                evalb(args.evalb_dir, args.outfile, args.goldfile, args.result, args.ignore_empty)
                if args.show:
                    show(args.result)
            except:
                exit('Could not evaluate trees. Maybe you did not parse the entire file?')

        print(f'Finished. Results saved to `{args.result}`.')

    elif args.treefile:
        num_trees = 10 if args.num_lines == None else args.num_lines

        parses = predict_from_trees(parser, args.treefile)

        fscores = []
        for i in range(num_trees):
            gold, pred, prec, rec, fscore = next(parses)
            fscores.append(fscore)

            print(f'Tree {i}, f1={fscore:.3f}.')
            print()
            print('Gold:')
            gold.pretty_print()
            print()
            print('Pred:')
            pred.pretty_print()
            print()

        print()
        print('All F1 =', ' '.join([f'{fscore:.3f}' for fscore in fscores]))
        print('Avg F1 = ', sum(fscores) / len(fscores))

    elif args.syneval:
        syneval(parser, args.syneval, args.outfile, parallel=args.parallel, short=args.short)

    else:
        if args.sent:
            sentence = tokenize.word_tokenize(args.sent)
        else:
            # Demo: use a default test-sentence with gold tree.
            sentence, gold = SENT.split(), GOLD

        print('Parsing sentence...')
        start = time.time()
        tree, score = parser.parse(sentence, use_numpy=args.use_numpy)
        elapsed = time.time() - start
        tree.un_chomsky_normal_form()

        print('Predicted.')
        print()
        tree.pretty_print()
        print('Logprob:', score)
        print()

        if not args.sent:
            gold = Tree.fromstring(gold)
            prec, recall, fscore = parser.evalb(
                gold.pformat(margin=np.inf), tree.pformat(margin=np.inf))
            print('Gold.')
            gold.pretty_print()
            print(f'Precision = {prec:.3f}')
            print(f'Recall = {recall:.3f}')
            print(f'F1 = {fscore:.3f}')
            print()

        print(f'Parse-time: {elapsed:.3f}s.')

        if args.perplexity:
            perplexity = parser.perplexity(sentence)
            print('Perplexity:', round(perplexity, 2))
    def check_dev():
        nonlocal best_dev_fscore
        nonlocal best_dev_model_path
        nonlocal best_dev_processed

        dev_start_time = time.time()

        dev_predicted = []
        eval_batch_size = args.eval_batch_size
        for dev_start_index in range(0, len(dev_treebank), eval_batch_size):
            subbatch_trees = dev_treebank[dev_start_index:dev_start_index \
                                        + eval_batch_size]
            subbatch_sent_ids = dev_sent_ids[dev_start_index:dev_start_index \
                    + eval_batch_size]
            if hparams.seg:
                subbatch_txt = [tree[0] for tree in subbatch_trees]
                subbatch_lbl = [tree[1] for tree in subbatch_trees]
                subbatch_sentences = [[(lbl,txt) for lbl,txt in zip(sent_lbl,sent_txt)] \
                                      for sent_lbl,sent_txt in zip(subbatch_lbl,subbatch_txt)]
            else:
                subbatch_sentences = [[(leaf.tag, leaf.word) for leaf in \
                                 tree.leaves()] for tree in subbatch_trees]

            subbatch_features = load_features(subbatch_sent_ids, dev_feat_dict)

            predicted, _ = parser.parse_batch(subbatch_sentences, \
                    subbatch_sent_ids, subbatch_features)

            del _
            if hparams.seg:
                dev_predicted.extend(predicted)
            else:
                dev_predicted.extend([p.convert() for p in predicted])

        if hparams.seg:
            dev_fscore = evaluate.seg_fscore(dev_treebank, dev_predicted)
        else:
            dev_fscore = evaluate.evalb(args.evalb_dir, dev_treebank,
                                        dev_predicted)
        """
        with open('tmp_preds.txt','w') as f:
            for pred in dev_predicted:
                f.write(pred.linearize())
                f.write('\n')
        with open('tmp_gold.txt','w') as f:
            for gold in dev_treebank:
                f.write(gold.linearize())
                f.write('\n')
        """
        print("dev-fscore {} "
              "dev-elapsed {} "
              "total-elapsed {}".format(
                  dev_fscore,
                  format_elapsed(dev_start_time),
                  format_elapsed(start_time),
              ))

        sys.stdout.flush()

        if dev_fscore.fscore > best_dev_fscore:
            if best_dev_model_path is not None:
                extensions = [".pt"]
                for ext in extensions:
                    path = best_dev_model_path + ext
                    if os.path.exists(path):
                        print(
                            "Removing previous model file {}...".format(path))
                        os.remove(path)

            best_dev_fscore = dev_fscore.fscore
            best_dev_model_path = "{}_dev={:.2f}".format(
                args.model_path_base, dev_fscore.fscore)
            best_dev_processed = total_processed
            print("Saving new best model to {}...".format(best_dev_model_path))
            torch.save(
                {
                    'spec': parser.spec,
                    'state_dict': parser.state_dict(),
                    'trainer': trainer.state_dict(),
                }, best_dev_model_path + ".pt")
            sys.stdout.flush()
Beispiel #19
0
print("Parsing test sentences using tensorflow...")
start_time = time.time()

test_predicted = []
for start_index in range(0, len(test_treebank), args.eval_batch_size):
# for start_index in range(0, 2, 2):
    print(start_index, format_elapsed(start_time))
    subbatch_trees = test_treebank[start_index:start_index+args.eval_batch_size]
    subbatch_sentences = [[(leaf.tag, leaf.word) for leaf in tree.leaves()] for tree in subbatch_trees]
    predicted, _ = tf_parse_batch(subbatch_sentences)

    del _
    test_predicted.extend([p.convert() for p in predicted])

test_fscore = evaluate.evalb(args.evalb_dir, test_treebank[:len(test_predicted)], test_predicted)

print('Done', format_elapsed(start_time))
str(test_fscore)

#%%

input_node_names = [the_inp_tokens.name.split(':')[0], the_inp_mask.name.split(':')[0]]
output_node_names = [the_out_chart.name.split(':')[0], the_out_tags.name.split(':')[0]]

print("Input node names:", input_node_names)
print("Output node names:", output_node_names)

graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, output_node_names)

#%%
Beispiel #20
0
def test_on_parses(args):
    if not os.path.exists(args.experiment_directory):
        os.mkdir(args.experiment_directory)
    model = dy.ParameterCollection()
    [parser] = dy.load(args.model_path_base, model)

    treebank = trees.load_trees(args.input_file, strip_top=True, filter_none=True)
    output = [tree.linearize() for tree in treebank]
    with open(os.path.join(args.experiment_directory, 'parses.txt'), 'w') as f:
        f.write('\n'.join(output))
    sentence_embeddings = h5py.File(args.elmo_embeddings_file_path, 'r')

    test_predicted = []
    start_time = time.time()
    total_log_likelihood = 0
    total_confusion_matrix = {}
    total_turned_off = 0
    ranks = []
    num_correct = 0
    total = 0
    for tree_index, tree in enumerate(treebank):
        if tree_index % 100 == 0:
            print(tree_index)
            dy.renew_cg()
        sentence = [(leaf.tag, leaf.word) for leaf in tree.leaves]
        elmo_embeddings_np = sentence_embeddings[str(tree_index)][:, :, :]
        assert elmo_embeddings_np.shape[1] == len(sentence), (
            elmo_embeddings_np.shape[1], len(sentence), [word for pos, word in sentence])
        elmo_embeddings = dy.inputTensor(elmo_embeddings_np)
        predicted, (additional_info, c, t) = parser.span_parser(sentence, is_train=False,
                                                                elmo_embeddings=elmo_embeddings)
        num_correct += c
        total += t
        rank = additional_info[3]
        ranks.append(rank)
        total_log_likelihood += additional_info[-1]
        test_predicted.append(predicted.convert())
    print('pos accuracy', num_correct / total)
    print("total time", time.time() - start_time)
    print("total loglikelihood", total_log_likelihood)
    print("total turned off", total_turned_off)
    print(total_confusion_matrix)

    print(ranks)
    print("avg", np.mean(ranks), "median", np.median(ranks))

    dev_fscore_without_labels = evaluate.evalb('EVALB/', treebank, test_predicted,
                                               args=args,
                                               erase_labels=True,
                                               name="without-labels")
    print("dev-fscore without labels", dev_fscore_without_labels)

    dev_fscore_without_labels = evaluate.evalb('EVALB/', treebank, test_predicted,
                                               args=args,
                                               erase_labels=True,
                                               flatten=True,
                                               name="without-label-flattened")
    print("dev-fscore without labels and flattened", dev_fscore_without_labels)

    dev_fscore_without_labels = evaluate.evalb('EVALB/', treebank, test_predicted,
                                               args=args,
                                               erase_labels=False,
                                               flatten=True,
                                               name="flattened")
    print("dev-fscore with labels and flattened", dev_fscore_without_labels)

    test_fscore = evaluate.evalb('EVALB/', treebank, test_predicted, args=args,
                                 name="regular")

    print(
        "test-fscore {} "
        "test-elapsed {}".format(
            test_fscore,
            format_elapsed(start_time),
        )
    )
    with open(os.path.join(args.experiment_directory, "confusion_matrix.pickle"), "wb") as f:
        pickle.dump(total_confusion_matrix, f)
Beispiel #21
0
def run_test_qbank(args):
    if not os.path.exists(args.experiment_directory):
        os.mkdir(args.experiment_directory)

    print("Loading model from {}...".format(args.model_path_base))
    model = dy.ParameterCollection()
    [parser] = dy.load(args.model_path_base, model)

    all_trees = trees.load_trees(args.question_bank_trees_path)

    if args.stanford_split == 'true':
        print('using stanford split')
        split_to_indices = {
            'train': list(range(0, 1000)) + list(range(2000, 3000)),
            'dev': list(range(1000, 1500)) + list(range(3000, 3500)),
            'test': list(range(1500, 2000)) + list(range(3500, 4000))
        }
    else:
        print('not using stanford split')
        split_to_indices = {
            'train': range(0, 2000),
            'dev': range(2000, 3000),
            'test': range(3000, 4000)
        }

    test_indices = split_to_indices[args.split]
    qb_embeddings_file = h5py.File('../question-bank.hdf5', 'r')
    dev_predicted = []
    for test_index in test_indices:
        if len(dev_predicted) % 100 == 0:
            dy.renew_cg()
        tree = all_trees[test_index]
        sentence = [(leaf.tag, leaf.word) for leaf in tree.leaves]
        test_embeddings_np = qb_embeddings_file[str(test_index)][:, :, :]
        assert test_embeddings_np.shape[1] == len(sentence)
        test_embeddings = dy.inputTensor(test_embeddings_np)
        predicted, _ = parser.span_parser(sentence, is_train=False,
                                          elmo_embeddings=test_embeddings)
        dev_predicted.append(predicted.convert())

    test_treebank = [all_trees[index] for index in test_indices]
    dev_fscore_without_labels = evaluate.evalb(args.evalb_dir, test_treebank, dev_predicted,
                                               args=args,
                                               erase_labels=True,
                                               name="without-labels")
    print("dev-fscore without labels", dev_fscore_without_labels)

    dev_fscore_without_labels = evaluate.evalb(args.evalb_dir, test_treebank, dev_predicted,
                                               args=args,
                                               erase_labels=True,
                                               flatten=True,
                                               name="without-label-flattened")
    print("dev-fscore without labels and flattened", dev_fscore_without_labels)

    dev_fscore_without_labels = evaluate.evalb(args.evalb_dir, test_treebank, dev_predicted,
                                               args=args,
                                               erase_labels=False,
                                               flatten=True,
                                               name="flattened")
    print("dev-fscore with labels and flattened", dev_fscore_without_labels)

    test_fscore = evaluate.evalb(args.evalb_dir, test_treebank, dev_predicted, args=args,
                                 name="regular")

    print("regular", test_fscore)
def run_export(args):
    if args.test_path is not None:
        print("Loading test trees from {}...".format(args.test_path))
        test_treebank = treebanks.load_trees(
            args.test_path, args.test_path_text, args.text_processing
        )
        print("Loaded {:,} test examples.".format(len(test_treebank)))
    else:
        test_treebank = None

    print("Loading model from {}...".format(args.model_path))
    parser = Parser(args.model_path, batch_size=args.batch_size)
    model = parser._parser
    if model.pretrained_model is None:
        raise ValueError(
            "Exporting is only defined when using a pre-trained transformer "
            "encoder. For CharLSTM-based model, just distribute the pytorch "
            "checkpoint directly. You may manually delete the 'optimizer' "
            "field to reduce file size by discarding the optimizer state."
        )

    if test_treebank is not None:
        print("Parsing test sentences (predicting tags)...")
        start_time = time.time()
        test_inputs = inputs_from_treebank(test_treebank, predict_tags=True)
        test_predicted = list(parser.parse_sents(test_inputs))
        test_fscore = evaluate.evalb(args.evalb_dir, test_treebank.trees, test_predicted)
        test_elapsed = format_elapsed(start_time)
        print("test-fscore {} test-elapsed {}".format(test_fscore, test_elapsed))

        print("Parsing test sentences (not predicting tags)...")
        start_time = time.time()
        test_inputs = inputs_from_treebank(test_treebank, predict_tags=False)
        notags_test_predicted = list(parser.parse_sents(test_inputs))
        notags_test_fscore = evaluate.evalb(
            args.evalb_dir, test_treebank.trees, notags_test_predicted
        )
        notags_test_elapsed = format_elapsed(start_time)
        print(
            "test-fscore {} test-elapsed {}".format(notags_test_fscore, notags_test_elapsed)
        )

    print("Exporting tokenizer...")
    model.retokenizer.tokenizer.save_pretrained(args.output_dir)

    print("Exporting config...")
    config = model.pretrained_model.config
    config.benepar = model.config
    config.save_pretrained(args.output_dir)

    if args.compress:
        print("Compressing weights...")
        state_dict = get_compressed_state_dict(model.cpu())
        print("Saving weights...")
    else:
        print("Exporting weights...")
        state_dict = model.cpu().state_dict()
    torch.save(state_dict, os.path.join(args.output_dir, "benepar_model.bin"))

    del model, parser, state_dict

    print("Loading exported model from {}...".format(args.output_dir))
    exported_parser = Parser(args.output_dir, batch_size=args.batch_size)

    if test_treebank is None:
        print()
        print("Export complete.")
        print("Did not verify model accuracy because no treebank was provided.")
        return

    print("Parsing test sentences (predicting tags)...")
    start_time = time.time()
    test_inputs = inputs_from_treebank(test_treebank, predict_tags=True)
    exported_predicted = list(exported_parser.parse_sents(test_inputs))
    exported_fscore = evaluate.evalb(
        args.evalb_dir, test_treebank.trees, exported_predicted
    )
    exported_elapsed = format_elapsed(start_time)
    print(
        "exported-fscore {} exported-elapsed {}".format(
            exported_fscore, exported_elapsed
        )
    )

    print("Parsing test sentences (not predicting tags)...")
    start_time = time.time()
    test_inputs = inputs_from_treebank(test_treebank, predict_tags=False)
    notags_exported_predicted = list(exported_parser.parse_sents(test_inputs))
    notags_exported_fscore = evaluate.evalb(
        args.evalb_dir, test_treebank.trees, notags_exported_predicted
    )
    notags_exported_elapsed = format_elapsed(start_time)
    print(
        "exported-fscore {} exported-elapsed {}".format(
            notags_exported_fscore, notags_exported_elapsed
        )
    )

    print()
    print("Export and verification complete.")
    fscore_delta = evaluate.FScore(
        recall=notags_exported_fscore.recall - notags_test_fscore.recall,
        precision=notags_exported_fscore.precision - notags_test_fscore.precision,
        fscore=notags_exported_fscore.fscore - notags_test_fscore.fscore,
        complete_match=(
            notags_exported_fscore.complete_match - notags_test_fscore.complete_match
        ),
        tagging_accuracy=(
            exported_fscore.tagging_accuracy - test_fscore.tagging_accuracy
        ),
    )
    print("delta-fscore {}".format(fscore_delta))
Beispiel #23
0
    def check_dev(epoch_num):
        nonlocal best_dev_score
        nonlocal best_model_path

        dev_start_time = time.time()

        parser.eval()

        dev_predicted = []

        for dev_start_index in range(0, len(dev_treebank), args.eval_batch_size):
            subbatch_trees = dev_treebank[dev_start_index:dev_start_index+args.eval_batch_size]
            subbatch_sentences = [[(leaf.tag, leaf.word) for leaf in tree.leaves()] for tree in subbatch_trees]

            predicted,  _,= parser.parse_batch(subbatch_sentences)
            del _

            dev_predicted.extend([p.convert() for p in predicted])

        dev_fscore = evaluate.evalb(args.evalb_dir, dev_treebank, dev_predicted)

        print(
            "dev-fscore {} "
            "dev-elapsed {} "
            "total-elapsed {}".format(
                dev_fscore,
                format_elapsed(dev_start_time),
                format_elapsed(start_time),
            )
        )

        dev_pred_head = [[leaf.father for leaf in tree.leaves()] for tree in dev_predicted]
        dev_pred_type = [[leaf.type for leaf in tree.leaves()] for tree in dev_predicted]
        assert len(dev_pred_head) == len(dev_pred_type)
        assert len(dev_pred_type) == len(dep_dev_type)
        stats, stats_nopunc, stats_root, num_inst = dep_eval.eval(len(dev_pred_head), dep_dev_word, dep_dev_pos,
                                                                  dev_pred_head, dev_pred_type,
                                                                  dep_dev_headid, dep_dev_type,
                                                                  dep_dev_lengs, punct_set=punct_set,
                                                                  symbolic_root=False)
        dev_ucorr, dev_lcorr, dev_total, dev_ucomlpete, dev_lcomplete = stats
        dev_ucorr_nopunc, dev_lcorr_nopunc, dev_total_nopunc, dev_ucomlpete_nopunc, dev_lcomplete_nopunc = stats_nopunc
        dev_root_corr, dev_total_root = stats_root
        dev_total_inst = num_inst
        print(
            'W. Punct: ucorr: %d, lcorr: %d, total: %d, uas: %.2f%%, las: %.2f%%, ucm: %.2f%%, lcm: %.2f%%' % (
                dev_ucorr, dev_lcorr, dev_total, dev_ucorr * 100 / dev_total, dev_lcorr * 100 / dev_total,
                dev_ucomlpete * 100 / dev_total_inst, dev_lcomplete * 100 / dev_total_inst))
        print(
            'Wo Punct: ucorr: %d, lcorr: %d, total: %d, uas: %.2f%%, las: %.2f%%, ucm: %.2f%%, lcm: %.2f%%' % (
                dev_ucorr_nopunc, dev_lcorr_nopunc, dev_total_nopunc,
                dev_ucorr_nopunc * 100 / dev_total_nopunc,
                dev_lcorr_nopunc * 100 / dev_total_nopunc,
                dev_ucomlpete_nopunc * 100 / dev_total_inst, dev_lcomplete_nopunc * 100 / dev_total_inst))
        print('Root: corr: %d, total: %d, acc: %.2f%%' % (
            dev_root_corr, dev_total_root, dev_root_corr * 100 / dev_total_root))

        dev_uas = dev_ucorr_nopunc * 100 / dev_total_nopunc
        dev_las = dev_lcorr_nopunc * 100 / dev_total_nopunc

        if dev_fscore.fscore + dev_las > best_dev_score :
            if best_model_path is not None:
                extensions = [".pt"]
                for ext in extensions:
                    path = best_model_path + ext
                    if os.path.exists(path):
                        print("Removing previous model file {}...".format(path))
                        os.remove(path)

            best_dev_score = dev_fscore.fscore + dev_las
            best_model_path = "{}_best_dev={:.2f}_devuas={:.2f}_devlas={:.2f}".format(
                args.model_path_base, dev_fscore.fscore, dev_uas,dev_las)
            print("Saving new best model to {}...".format(best_model_path))
            torch.save({
                'spec': parser.spec,
                'state_dict': parser.state_dict(),
                'trainer' : trainer.state_dict(),
                }, besthh_model_path + ".pt")
def run_test(args):
    print("Loading test trees from {}...".format(args.test_path))
    if args.test_lbls:
        test_txt = [
            l.strip().split() for l in open(args.test_path, 'r').readlines()
        ]
        test_lbls = [
            l.strip().split() for l in open(args.test_lbls, 'r').readlines()
        ]
        test_sent_ids = [
            l.strip() for l in open(args.test_sent_id_path, 'r').readlines()
        ]
        test_treebank = [(txt, lbl) for txt, lbl in zip(test_txt, test_lbls)]
    else:
        test_treebank, test_sent_ids = trees.load_trees_with_idx(args.test_path, \
            args.test_sent_id_path, strip_top=False)

    if not args.new_set:
        test_pause_path = os.path.join(args.feature_path, args.test_prefix + \
            '_pause.pickle')
        with open(test_pause_path, 'rb') as f:
            test_pause_data = pickle.load(f, encoding='latin1')

        # to_remove = set(test_sent_ids).difference(set(test_pause_data.keys()))
        # to_remove = sorted([test_sent_ids.index(i) for i in to_remove])
        # for x in to_remove[::-1]:
        #     test_treebank.pop(x)
        #     test_sent_ids.pop(x)

    print("Loaded {:,} test examples.".format(len(test_treebank)))

    print("Loading model from {}...".format(args.model_path_base))
    assert args.model_path_base.endswith(".pt"), "Only pytorch files supported"

    info = torch_load(args.model_path_base)
    print(info.keys())
    assert 'hparams' in info['spec'], "Older savefiles not supported"

    parser = parse_model.SpeechParser.from_spec(info['spec'], \
            info['state_dict'])

    from prettytable import PrettyTable
    total_params = 0
    table = PrettyTable(["Modules", "Parameters"])
    for name, parameter in parser.named_parameters():
        if not parameter.requires_grad: continue
        param = parameter.numel()
        table.add_row([name, param])
        total_params += param

    parser.eval()  # turn off dropout at evaluation time
    label_vocab = parser.label_vocab
    #print("{} ({:,}): {}".format("label", label_vocab.size, \
    #        sorted(value for value in label_vocab.values)))

    test_feat_dict = {}
    if info['spec']['speech_features'] is not None:
        speech_features = info['spec']['speech_features']
        print("Loading speech features for test set...")
        for feat_type in speech_features:
            print("\t", feat_type)
            feat_path = os.path.join(args.feature_path, \
                    args.test_prefix + '_' + feat_type + '.pickle')
            with open(feat_path, 'rb') as f:
                feat_data = pickle.load(f, encoding='latin1')
            test_feat_dict[feat_type] = feat_data

    print("Parsing test sentences...")
    start_time = time.time()

    test_predicted = []
    test_scores = []
    pscores = []
    gscores = []
    with torch.no_grad():
        for start_index in range(0, len(test_treebank), args.eval_batch_size):
            subbatch_treebank = test_treebank[start_index:start_index \
                    + args.eval_batch_size]
            subbatch_sent_ids = test_sent_ids[start_index:start_index \
                    + args.eval_batch_size]
            if args.test_lbls:  # EKN using this instead of the seg flag bc it's an hparam
                subbatch_txt = [turn[0] for turn in subbatch_treebank]
                subbatch_lbl = [turn[1] for turn in subbatch_treebank]
                subbatch_sentences = [[(lbl,txt) for lbl,txt in zip(sent_lbl,sent_txt)] for \
                                   sent_lbl,sent_txt in zip(subbatch_lbl,subbatch_txt)]
            else:
                subbatch_sentences = [[(leaf.tag, leaf.word) for leaf in \
                    tree.leaves()] for tree in subbatch_treebank]
                subbatch_trees = [t.convert() for t in subbatch_treebank]
            subbatch_features = load_features(subbatch_sent_ids, test_feat_dict\
                    , args.sp_off)
            predicted, scores = parser.parse_batch(subbatch_sentences, \
                        subbatch_sent_ids, subbatch_features)
            if not args.get_scores:
                del scores
            else:
                charts = parser.parse_batch(subbatch_sentences, \
                        subbatch_sent_ids, subbatch_features, subbatch_trees, True)
                for i in range(len(charts)):
                    decoder_args = dict(sentence_len=len(subbatch_sentences[i]),\
                            label_scores_chart=charts[i],\
                            gold=subbatch_trees[i],\
                            label_vocab=parser.label_vocab, \
                            is_train=False, \
                            backoff=True)
                    p_score, _, _, _, _ = chart_helper.decode(
                        False, **decoder_args)
                    g_score, _, _, _, _ = chart_helper.decode(
                        True, **decoder_args)
                    pscores.append(p_score)
                    gscores.append(g_score)
                test_scores += scores
            if args.test_lbls:
                test_predicted.extend(predicted)
            else:
                test_predicted.extend([p.convert() for p in predicted])

    # DEBUG
    # print(test_scores)
    #print(test_score_offsets)

    with open(args.output_path, 'w') as output_file:
        for tree in test_predicted:
            if args.test_lbls:
                #import pdb;pdb.set_trace()
                lbls = ' '.join(tree)
                output_file.write("{}\n".format(lbls))
            else:
                output_file.write("{}\n".format(tree.linearize()))
    print("Output written to:", args.output_path)

    if args.get_scores:
        with open(args.output_path + '.scores', 'w') as output_file:
            for score1, score2, score3 in zip(test_scores, pscores, gscores):
                output_file.write("{}\t{}\t{}\n".format(
                    score1, score2, score3))
        print("Output scores written to:", args.output_path + '.scores')

    if args.write_gold:
        with open(args.test_prefix + '_sent_ids.txt', 'w') as sid_file:
            for sent_id in test_sent_ids:
                sid_file.write("{}\n".format(sent_id))
        print("Sent ids written to:", args.test_prefix + '_sent_ids.txt')

        with open(args.test_prefix + '_gold.txt', 'w') as gold_file:
            for tree in test_treebank:
                gold_file.write("{}\n".format(tree.linearize()))
        print("Gold trees written to:", args.test_prefix + '_gold.txt')

    # The tree loader does some preprocessing to the trees (e.g. stripping TOP
    # symbols or SPMRL morphological features). We compare with the input file
    # directly to be extra careful about not corrupting the evaluation. We also
    # allow specifying a separate "raw" file for the gold trees: the inputs to
    # our parser have traces removed and may have predicted tags substituted,
    # and we may wish to compare against the raw gold trees to make sure we
    # haven't made a mistake. As far as we can tell all of these variations give
    # equivalent results.
    ref_gold_path = args.test_path
    if args.test_path_raw is not None:
        print("Comparing with raw trees from", args.test_path_raw)
        ref_gold_path = args.test_path_raw
    else:
        # Need this since I'm evaluating on subset
        ref_gold_path = None

    if args.test_lbls:
        test_fscore = evaluate.seg_fscore(test_treebank,
                                          test_predicted,
                                          is_train=False)
    else:
        test_fscore = evaluate.evalb(args.evalb_dir, test_treebank, \
            test_predicted, ref_gold_path=ref_gold_path, is_train=False)

    print("test-fscore {} "
          "test-elapsed {}".format(
              test_fscore,
              format_elapsed(start_time),
          ))
Beispiel #25
0
def run_test(args):

    const_test_path = args.consttest_ptb_path

    dep_test_path = args.deptest_ptb_path

    if args.dataset == 'ctb':
        const_test_path = args.consttest_ctb_path
        dep_test_path = args.deptest_ctb_path

    print("Loading model from {}...".format(args.model_path_base))
    assert args.model_path_base.endswith(".pt"), "Only pytorch savefiles supported"

    info = torch_load(args.model_path_base)
    assert 'hparams' in info['spec'], "Older savefiles not supported"
    parser = Zparser.ChartParser.from_spec(info['spec'], info['state_dict'])
    parser.eval()

    dep_test_reader = CoNLLXReader(dep_test_path, parser.type_vocab)
    print('Reading dependency parsing data from %s' % dep_test_path)

    dep_test_data = []
    test_inst = dep_test_reader.getNext()
    dep_test_headid = np.zeros([40000, 300], dtype=int)
    dep_test_type = []
    dep_test_word = []
    dep_test_pos = []
    dep_test_lengs = np.zeros(40000, dtype=int)
    cun = 0
    while test_inst is not None:
        inst_size = test_inst.length()
        dep_test_lengs[cun] = inst_size
        sent = test_inst.sentence
        dep_test_data.append((sent.words, test_inst.postags, test_inst.heads, test_inst.types))
        for i in range(inst_size):
            dep_test_headid[cun][i] = test_inst.heads[i]
        dep_test_type.append(test_inst.types)
        dep_test_word.append(sent.words)
        dep_test_pos.append(sent.postags)
        # dep_sentences.append([(tag, word) for i, (word, tag) in enumerate(zip(sent.words, sent.postags))])
        test_inst = dep_test_reader.getNext()
        cun = cun + 1

    dep_test_reader.close()

    print("Loading test trees from {}...".format(const_test_path))
    test_treebank = trees.load_trees(const_test_path, dep_test_headid, dep_test_type, dep_test_word)
    print("Loaded {:,} test examples.".format(len(test_treebank)))

    print("Parsing test sentences...")
    start_time = time.time()

    punct_set = '.' '``' "''" ':' ','

    parser.eval()
    test_predicted = []
    for start_index in range(0, len(test_treebank), args.eval_batch_size):
        subbatch_trees = test_treebank[start_index:start_index + args.eval_batch_size]

        subbatch_sentences = [[(leaf.tag, leaf.word) for leaf in tree.leaves()] for tree in subbatch_trees]

        predicted, _, = parser.parse_batch(subbatch_sentences)
        del _
        test_predicted.extend([p.convert() for p in predicted])

    test_fscore = evaluate.evalb(args.evalb_dir, test_treebank, test_predicted)
    print(
        "test-fscore {} "
        "test-elapsed {}".format(
            test_fscore,
            format_elapsed(start_time),
        )
    )

    test_pred_head = [[leaf.father for leaf in tree.leaves()] for tree in test_predicted]
    test_pred_type = [[leaf.type for leaf in tree.leaves()] for tree in test_predicted]
    assert len(test_pred_head) == len(test_pred_type)
    assert len(test_pred_type) == len(dep_test_type)
    stats, stats_nopunc, stats_root, test_total_inst = dep_eval.eval(len(test_pred_head), dep_test_word, dep_test_pos,
                                                                     test_pred_head,
                                                                     test_pred_type, dep_test_headid, dep_test_type,
                                                                     dep_test_lengs, punct_set=punct_set,
                                                                     symbolic_root=False)

    test_ucorrect, test_lcorrect, test_total, test_ucomlpete_match, test_lcomplete_match = stats
    test_ucorrect_nopunc, test_lcorrect_nopunc, test_total_nopunc, test_ucomlpete_match_nopunc, test_lcomplete_match_nopunc = stats_nopunc
    test_root_correct, test_total_root = stats_root

    print(
        'best test W. Punct: ucorr: %d, lcorr: %d, total: %d, uas: %.2f%%, las: %.2f%%, ucm: %.2f%%, lcm: %.2f%%' % (
            test_ucorrect, test_lcorrect, test_total, test_ucorrect * 100 / test_total,
            test_lcorrect * 100 / test_total,
            test_ucomlpete_match * 100 / test_total_inst, test_lcomplete_match * 100 / test_total_inst
            ))
    print(
        'best test Wo Punct: ucorr: %d, lcorr: %d, total: %d, uas: %.2f%%, las: %.2f%%, ucm: %.2f%%, lcm: %.2f%% ' % (
            test_ucorrect_nopunc, test_lcorrect_nopunc, test_total_nopunc,
            test_ucorrect_nopunc * 100 / test_total_nopunc,
            test_lcorrect_nopunc * 100 / test_total_nopunc,
            test_ucomlpete_match_nopunc * 100 / test_total_inst,
            test_lcomplete_match_nopunc * 100 / test_total_inst))
    print('best test Root: corr: %d, total: %d, acc: %.2f%%' % (
        test_root_correct, test_total_root, test_root_correct * 100 / test_total_root))
    print(
        '============================================================================================================================')
def run_test(args):
    print("Loading test trees from {}...".format(args.test_path))
    test_treebank = trees.load_trees(args.test_path)
    print("Loaded {:,} test examples.".format(len(test_treebank)))

    print("Loading model from {}...".format(args.model_path_base))
    assert args.model_path_base.endswith(
        ".pt"), "Only pytorch savefiles supported"

    info = torch_load(args.model_path_base)
    assert 'hparams' in info['spec'], "Older savefiles not supported"
    parser = parse_jc.NKChartParser.from_spec(info['spec'], info['state_dict'])

    if args.redo_vocab:
        print(
            "Loading memory bank trees from {} for generating label vocab...".
            format(args.train_path))
        train_treebank = trees.load_trees(args.train_path)
        parser.label_vocab = gen_label_vocab(
            [tree.convert() for tree in train_treebank])

    print("Parsing test sentences...")
    start_time = time.time()

    if args.use_neighbours:
        index_const = index.FaissIndex if args.library == "faiss" else index.AnnoyIndex
        span_index = index_const(
            num_labels=len(parser.label_vocab.values),
            metric=parser.metric,
        )
        prefix = index.get_index_prefix(
            index_base_path=args.index_path,
            full_model_path=args.model_path_base,
            nn_prefix=args.nn_prefix,
        )
        span_index.load(prefix)

        # also remove relu
        parser.no_relu = args.no_relu
        if args.no_relu:
            parser.remove_relu()

    test_predicted = []
    for start_index in range(0, len(test_treebank), args.eval_batch_size):
        subbatch_trees = test_treebank[start_index:start_index +
                                       args.eval_batch_size]
        subbatch_sentences = [[(leaf.tag, leaf.word) for leaf in tree.leaves()]
                              for tree in subbatch_trees]
        predicted, _ = parser.parse_batch(
            subbatch_sentences,
            span_index=span_index if args.use_neighbours else None,
            k=args.k,
            zero_empty=args.zero_empty,
        )
        del _
        test_predicted.extend([p.convert() for p in predicted])

    # The tree loader does some preprocessing to the trees (e.g. stripping TOP
    # symbols or SPMRL morphological features). We compare with the input file
    # directly to be extra careful about not corrupting the evaluation. We also
    # allow specifying a separate "raw" file for the gold trees: the inputs to
    # our parser have traces removed and may have predicted tags substituted,
    # and we may wish to compare against the raw gold trees to make sure we
    # haven't made a mistake. As far as we can tell all of these variations give
    # equivalent results.
    ref_gold_path = args.test_path
    if args.test_path_raw is not None:
        print("Comparing with raw trees from", args.test_path_raw)
        ref_gold_path = args.test_path_raw

    test_fscore = evaluate.evalb(
        args.evalb_dir,
        test_treebank,
        test_predicted,
        ref_gold_path=ref_gold_path,
    )

    print("test-fscore {} "
          "test-elapsed {}".format(
              test_fscore,
              format_elapsed(start_time),
          ))
Beispiel #27
0
def evaluate_on_brown_corpus(args):
    if not os.path.exists(args.experiment_directory):
        os.mkdir(args.experiment_directory)

    model = dy.ParameterCollection()
    [parser] = dy.load(args.model_path_base, model)
    assert parser.use_elmo == args.use_elmo, (parser.use_elmo, args.use_elmo)

    directories = ['cf', 'cg', 'ck', 'cl', 'cm', 'cn', 'cp', 'cr']
    for directory in directories:
        print('-' * 100)
        print(directory)
        input_file = '../brown/' + directory + '/' + directory + '.all.mrg'
        expt_name = args.experiment_directory + '/' + directory
        if not os.path.exists(expt_name):
            os.mkdir(expt_name)
        cleaned_corpus_path = trees.cleanup_text(input_file)
        treebank = trees.load_trees(cleaned_corpus_path, strip_top=True, filter_none=True)
        sentences = [[(leaf.tag, leaf.word) for leaf in tree.leaves] for tree in treebank]
        tokenized_lines = [' '.join([word for pos, word in sentence]) for sentence in sentences]
        if args.use_elmo:
            embedding_file = compute_elmo_embeddings(tokenized_lines, expt_name)
        else:
            embedding_file = None
        dev_predicted = []
        num_correct = 0
        total = 0
        for tree_index, tree in enumerate(treebank):
            if tree_index % 100 == 0:
                print(tree_index)
                dy.renew_cg()
            sentence = sentences[tree_index]
            if args.use_elmo:
                embeddings_np = embedding_file[str(tree_index)][:, :, :]
                assert embeddings_np.shape[1] == len(sentence), (
                    embeddings_np.shape[1], len(sentence))
                embeddings = dy.inputTensor(embeddings_np)
            else:
                embeddings = None
            predicted, (additional_info, c, t) = parser.span_parser(sentence, is_train=False,
                                                                    elmo_embeddings=embeddings)
            num_correct += c
            total += t
            dev_predicted.append(predicted.convert())

        dev_fscore_without_labels = evaluate.evalb('EVALB/', treebank, dev_predicted,
                                                   args=args,
                                                   erase_labels=True,
                                                   name="without-labels",
                                                   expt_name=expt_name)
        print("dev-fscore without labels", dev_fscore_without_labels)

        dev_fscore_without_labels = evaluate.evalb('EVALB/', treebank, dev_predicted,
                                                   args=args,
                                                   erase_labels=True,
                                                   flatten=True,
                                                   name="without-label-flattened",
                                                   expt_name=expt_name)
        print("dev-fscore without labels and flattened", dev_fscore_without_labels)

        dev_fscore_without_labels = evaluate.evalb('EVALB/', treebank, dev_predicted,
                                                   args=args,
                                                   erase_labels=False,
                                                   flatten=True,
                                                   name="flattened",
                                                   expt_name=expt_name)
        print("dev-fscore with labels and flattened", dev_fscore_without_labels)

        test_fscore = evaluate.evalb('EVALB/', treebank, dev_predicted, args=args,
                                     name="regular",
                                     expt_name=expt_name)

        print("regular", test_fscore)
        pos_fraction = num_correct / total
        print('pos fraction', pos_fraction)
        with open(expt_name + '/pos_accuracy.txt', 'w') as f:
            f.write(str(pos_fraction))
def run_test(args):
    print("Loading test trees from {}...".format(args.test_path))
    test_treebank = trees.load_trees(args.test_path)
    print("Loaded {:,} test examples.".format(len(test_treebank)))

    print("Loading model from {}...".format(args.model_path_base))
    assert args.model_path_base.endswith(
        ".pt"), "Only pytorch savefiles supported"

    info = torch_load(args.model_path_base)
    assert 'hparams' in info['spec'], "Older savefiles not supported"
    parser = parse_nk.NKChartParser.from_spec(info['spec'], info['state_dict'])
    hparams = info['spec']['hparams']
    if ('use_extra_info' in hparams) and hparams['use_extra_info']:
        loaded_test_info = h5py.File(args.test_path + '.hdf5', 'r')
        test_info = list()
        for i in range(len(test_treebank)):
            item_info = list()
            for key in sorted(loaded_test_info.keys()):
                item_info.append(loaded_test_info[key + '/' + str(i)])
            item_info = np.array(item_info)
            item_info = np.concatenate([
                -1e8 * np.ones(
                    (item_info.shape[0], item_info.shape[1], 1)), item_info
            ],
                                       axis=2)
            item_info = np.concatenate([
                item_info, -1e8 * np.ones(
                    (item_info.shape[0], 1, item_info.shape[2]))
            ],
                                       axis=1)
            test_info.append(item_info)
        print("Loaded and processed extra info from constituency test.")
    else:
        test_info = None

    print("Parsing test sentences...")
    start_time = time.time()

    test_predicted = []
    for start_index in range(0, len(test_treebank), args.eval_batch_size):
        try:
            subbatch_trees = test_treebank[start_index:start_index +
                                           args.eval_batch_size]
            subbatch_sentences = [[(leaf.tag, leaf.word)
                                   for leaf in tree.leaves()]
                                  for tree in subbatch_trees]
            subbatch_info = test_info[
                start_index:start_index +
                args.eval_batch_size] if test_info is not None else None
            predicted, _ = parser.parse_batch(subbatch_sentences,
                                              extra_info=subbatch_info)
        except:
            from IPython import embed
            embed(using=False)
        del _
        test_predicted.extend([p.convert() for p in predicted])

    # The tree loader does some preprocessing to the trees (e.g. stripping TOP
    # symbols or SPMRL morphological features). We compare with the input file
    # directly to be extra careful about not corrupting the evaluation. We also
    # allow specifying a separate "raw" file for the gold trees: the inputs to
    # our parser have traces removed and may have predicted tags substituted,
    # and we may wish to compare against the raw gold trees to make sure we
    # haven't made a mistake. As far as we can tell all of these variations give
    # equivalent results.
    ref_gold_path = args.test_path
    if args.test_path_raw is not None:
        print("Comparing with raw trees from", args.test_path_raw)
        ref_gold_path = args.test_path_raw

    try:
        test_fscore = evaluate.evalb(args.evalb_dir,
                                     test_treebank,
                                     test_predicted,
                                     ref_gold_path=ref_gold_path)
        print("labeled-fscore {} ".format(test_fscore))
    except:
        print('Failed to predict labeled score.')

    for rm_punct in [True]:
        for compute_level in ["corpus"]:
            clean_test_treebank = [
                tokens2list(
                    tree.linearize().replace('(',
                                             ' ( ').replace(')',
                                                            ' ) ').split(),
                    rm_punct) for tree in test_treebank
            ]
            clean_test_predicted = [
                tokens2list(
                    tree.linearize().replace('(',
                                             ' ( ').replace(')',
                                                            ' ) ').split(),
                    rm_punct) for tree in test_predicted
            ]
            unlabeled_fscore = evaluate.evaluate_unlabeled(
                clean_test_treebank, clean_test_predicted, compute_level)
            print("unlabeled-fscore ({} {}) {}".format(rm_punct, compute_level,
                                                       unlabeled_fscore))
    gold_bracket_num = sum(
        [len(get_brackets(x)[0]) for x in clean_test_treebank])
    predicted_bracket_num = sum(
        [len(get_brackets(x)[0]) for x in clean_test_predicted])
    print("# gold brackets: {}".format(gold_bracket_num))
    print("# predicted brackets: {}".format(predicted_bracket_num))
    for i, tree in enumerate(clean_test_treebank):
        print(tree2str(tree))
        print(tree2str(clean_test_predicted[i]))