def test_evaluate_from_args(self):
        kebab_args = ["evaluate", str(self.FIXTURES_ROOT / "bidaf" / "serialization" / "model.tar.gz"),
                      str(self.FIXTURES_ROOT / "data" / "squad.json"),
                      "--cuda-device", "-1"]

        args = self.parser.parse_args(kebab_args)
        metrics = evaluate_from_args(args)
        assert metrics.keys() == {'span_acc', 'end_acc', 'start_acc', 'em', 'f1', 'loss'}
Esempio n. 2
0
    def test_evaluate_from_args(self):
        kebab_args = ["evaluate", "tests/fixtures/bidaf/serialization/model.tar.gz",
                      "--evaluation-data-file", "tests/fixtures/data/squad.json",
                      "--cuda-device", "-1"]

        args = self.parser.parse_args(kebab_args)
        metrics = evaluate_from_args(args)
        assert metrics.keys() == {'span_acc', 'end_acc', 'start_acc', 'em', 'f1'}
Esempio n. 3
0
    def test_evaluate_from_args(self):
        kebab_args = ["evaluate", str(self.FIXTURES_ROOT / "bidaf" / "serialization" / "model.tar.gz"),
                      str(self.FIXTURES_ROOT / "data" / "squad.json"),
                      "--cuda-device", "-1"]

        args = self.parser.parse_args(kebab_args)
        metrics = evaluate_from_args(args)
        assert metrics.keys() == {'span_acc', 'end_acc', 'start_acc', 'em', 'f1'}
Esempio n. 4
0
def evaluate(config, serialization_dir, bert_path, trainable):
    if os.path.exists(serialization_dir):
        print(f"{serialization_dir} exists, removing...")
        shutil.rmtree(serialization_dir)

    print("#" * 40)
    print("# Training")
    print("#" * 40)
    os.environ["BERT_DIMS"] = str(
        BertModel.from_pretrained(bert_path).config.hidden_size)
    os.environ["BERT_PATH"] = bert_path
    os.environ["TRAINABLE"] = str(int(trainable))
    train_model_from_file(config, serialization_dir)

    print("#" * 40)
    print("# Evaluating")
    print("#" * 40)
    args = eval_args(serialization_dir)
    evaluate_from_args(args)
Esempio n. 5
0
 def test_output_file_evaluate_from_args(self):
     output_file = str(self.TEST_DIR / "metrics.json")
     kebab_args = ["evaluate", str(self.FIXTURES_ROOT / "bidaf" / "serialization" / "model.tar.gz"),
                   str(self.FIXTURES_ROOT / "data" / "squad.json"),
                   "--cuda-device", "-1",
                   "--output-file", output_file]
     args = self.parser.parse_args(kebab_args)
     computed_metrics = evaluate_from_args(args)
     with open(output_file, 'r') as file:
         saved_metrics = json.load(file)
     assert computed_metrics == saved_metrics
 def test_output_file_evaluate_from_args(self):
     output_file = str(self.TEST_DIR / "metrics.json")
     kebab_args = ["evaluate", str(self.FIXTURES_ROOT / "bidaf" / "serialization" / "model.tar.gz"),
                   str(self.FIXTURES_ROOT / "data" / "squad.json"),
                   "--cuda-device", "-1",
                   "--output-file", output_file]
     args = self.parser.parse_args(kebab_args)
     computed_metrics = evaluate_from_args(args)
     with open(output_file, 'r') as file:
         saved_metrics = json.load(file)
     assert computed_metrics == saved_metrics
Esempio n. 7
0
    def test_evaluate_from_args(self):
        kebab_args = [
            "evaluate", "tests/fixtures/bidaf/serialization/model.tar.gz",
            "--evaluation-data-file", "tests/fixtures/data/squad.json",
            "--cuda-device", "-1"
        ]

        args = self.parser.parse_args(kebab_args)
        metrics = evaluate_from_args(args)
        assert metrics.keys() == {
            'span_acc', 'end_acc', 'start_acc', 'em', 'f1'
        }
Esempio n. 8
0
    def test_evaluate_from_args(self):
        kebab_args = [
            u"evaluate",
            unicode(self.FIXTURES_ROOT / u"bidaf" / u"serialization" /
                    u"model.tar.gz"),
            unicode(self.FIXTURES_ROOT / u"data" / u"squad.json"),
            u"--cuda-device", u"-1"
        ]

        args = self.parser.parse_args(kebab_args)
        metrics = evaluate_from_args(args)
        assert list(metrics.keys()) == set(
            [u'span_acc', u'end_acc', u'start_acc', u'em', u'f1'])
Esempio n. 9
0
    def test_evaluate_from_args(self):
        parser = argparse.ArgumentParser(description="Testing")
        subparsers = parser.add_subparsers(title='Commands', metavar='')
        add_subparser(subparsers)

        raw_args = ["evaluate",
                    "--config_file", "tests/fixtures/bidaf/experiment.json",
                    "--evaluation_data_file", "tests/fixtures/data/squad.json"]

        args = parser.parse_args(raw_args)

        metrics = evaluate_from_args(args)

        assert metrics == {'full_span_acc': 0.0, 'span_end_acc': 0.0, 'span_start_acc': 0.0}
Esempio n. 10
0
    def restore_and_evaluate(self) -> Dict[str, Any]:
        allennlp_args = argparse.Namespace()
        allennlp_args.file_friendly_logging = False
        allennlp_args.archive_file = "result/model.tar.gz"
        allennlp_args.weights_file = None
        allennlp_args.cuda_device = -1
        allennlp_args.overrides = ""
        allennlp_args.input_file = "data/movie_review/test.tsv"
        allennlp_args.embedding_sources_mapping = ""
        allennlp_args.extend_vocab = False
        allennlp_args.batch_size = None
        allennlp_args.batch_weight_key = None
        allennlp_args.output_file = "evaluation/evaluation"
        allennlp_args.predictions_output_file = "evaluation/pred"

        metric = evaluate_from_args(allennlp_args)
Esempio n. 11
0
 def test_output_file_evaluate_from_args(self):
     output_file = str(self.TEST_DIR / "metrics.json")
     kebab_args = [
         "evaluate",
         str(self.FIXTURES_ROOT / "simple_tagger_with_span_f1" /
             "serialization" / "model.tar.gz"),
         str(self.FIXTURES_ROOT / "data" / "conll2003.txt"),
         "--cuda-device",
         "-1",
         "--output-file",
         output_file,
     ]
     args = self.parser.parse_args(kebab_args)
     computed_metrics = evaluate_from_args(args)
     with open(output_file, "r") as file:
         saved_metrics = json.load(file)
     assert computed_metrics == saved_metrics
Esempio n. 12
0
    def test_evaluate_from_args(self):
        parser = argparse.ArgumentParser(description="Testing")
        subparsers = parser.add_subparsers(title='Commands', metavar='')
        add_subparser(subparsers)

        raw_args = [
            "evaluate", "--archive_file",
            "tests/fixtures/bidaf/serialization/model.tar.gz",
            "--evaluation_data_file", "tests/fixtures/data/squad.json"
        ]

        args = parser.parse_args(raw_args)

        metrics = evaluate_from_args(args)

        assert metrics.keys() == {
            'span_acc', 'end_acc', 'start_acc', 'em', 'f1'
        }
Esempio n. 13
0
    def test_evaluate_from_args(self):
        parser = argparse.ArgumentParser(description="Testing")
        subparsers = parser.add_subparsers(title='Commands', metavar='')
        Evaluate().add_subparser('evaluate', subparsers)

        snake_args = ["evaluate",
                      "--archive_file", "tests/fixtures/bidaf/serialization/model.tar.gz",
                      "--evaluation_data_file", "tests/fixtures/data/squad.json",
                      "--cuda_device", "-1"]

        kebab_args = ["evaluate",
                      "--archive-file", "tests/fixtures/bidaf/serialization/model.tar.gz",
                      "--evaluation-data-file", "tests/fixtures/data/squad.json",
                      "--cuda-device", "-1"]

        for raw_args in [snake_args, kebab_args]:
            args = parser.parse_args(raw_args)
            metrics = evaluate_from_args(args)
            assert metrics.keys() == {'span_acc', 'end_acc', 'start_acc', 'em', 'f1'}
Esempio n. 14
0
    def test_evaluate_from_args(self):
        kebab_args = [
            "evaluate",
            str(self.FIXTURES_ROOT / "simple_tagger_with_span_f1" /
                "serialization" / "model.tar.gz"),
            str(self.FIXTURES_ROOT / "data" / "conll2003.txt"),
            "--cuda-device",
            "-1",
        ]

        args = self.parser.parse_args(kebab_args)
        metrics = evaluate_from_args(args)
        assert metrics.keys() == {
            "accuracy",
            "accuracy3",
            "precision-overall",
            "recall-overall",
            "f1-measure-overall",
            "loss",
        }
Esempio n. 15
0
    def test_evaluate_from_args(self):
        snake_args = [
            "evaluate", "--archive_file",
            "tests/fixtures/bidaf/serialization/model.tar.gz",
            "--evaluation_data_file", "tests/fixtures/data/squad.json",
            "--cuda_device", "-1"
        ]

        kebab_args = [
            "evaluate", "--archive-file",
            "tests/fixtures/bidaf/serialization/model.tar.gz",
            "--evaluation-data-file", "tests/fixtures/data/squad.json",
            "--cuda-device", "-1"
        ]

        for raw_args in [snake_args, kebab_args]:
            args = self.parser.parse_args(raw_args)
            metrics = evaluate_from_args(args)
            assert metrics.keys() == {
                'span_acc', 'end_acc', 'start_acc', 'em', 'f1'
            }
Esempio n. 16
0
def generate_probs(model_dir, inp_fp, weights_fp, type_, out_fp, out_ext,
                   cuda_device, overwrite, batch_size, extraction_ratio,
                   hparams):
    import_submodules('imojie')

    if out_fp == None:
        inp_base = os.path.basename(inp_fp)
        inp_dir = os.path.basename(os.path.dirname(inp_fp))
        eval_dir = os.path.basename(os.path.dirname(
            os.path.dirname(inp_fp)))  # train, dev or test
        inp_base = inp_base.replace('extractions', eval_dir)
        prob_dir = model_dir + '/prob'
        os.makedirs(prob_dir, exist_ok=True)
        out_fp = prob_dir + '/' + inp_base + '.' + inp_dir + '.' + out_ext
    else:
        os.makedirs(os.path.dirname(out_fp), exist_ok=True)

    if os.path.exists(out_fp) and not overwrite:
        print('found ', out_fp)
        return

    args = argparse.Namespace()
    args.archive_file = model_dir
    args.cuda_device = cuda_device
    args.embedding_sources_mapping = {}
    args.extend_vocab = None
    args.batch_weight_key = ''
    args.output_file = ''
    args.overrides = "{'model': {'token_based_metric': null}, 'iterator': {'batch_size': " + str(
        batch_size) + ", \
        'instances_per_epoch': null}, 'trainer':{'num_epochs':1}, 'dataset_reader': {'max_tokens': 10000, \
            'gradients': false, 'max_extractions': 30, 'extraction_ratio': " + str(
            extraction_ratio) + ", 'probability': true \
                 }, 'validation_dataset_reader': null}"

    args.weights_file = weights_fp
    args.input_file = inp_fp
    probs = evaluate_from_args(args)
    probsD = dict()
    # For some reason the last batch results are repeated in the probs
    # Not an issue as they are just overwritten while forming the probsD
    for i in range(len(probs['example_ids'])):
        probsD[probs['example_ids'][i]] = probs['probs'][i]
    lines = open(inp_fp).readlines()

    all_fields = []
    for line_number, line in enumerate(lines):
        line = line.strip('\n')
        fields = line.split('\t')
        if line_number not in probsD:  # the example is too large and rejected by dataloader ('max_tokens' argument)
            continue
        # Removing appended extractions after reranking
        fields[0] = fields[0].split('[SEP]')[0].strip()
        fields[2] = str(probsD[line_number])
        all_fields.append('\t'.join(fields))

    # if type_ == 'single':
    #     all_fields = []
    #     for line_number, line in enumerate(lines):
    #         line = line.strip('\n')
    #         fields = line.split('\t')
    #         if line_number not in probsD: # the example is too large and rejected by dataloader ('max_tokens' argument)
    #             continue
    #         # Removing appended extractions after reranking
    #         fields[0] = fields[0].split('[SEP]')[0].strip()
    #         fields[2] = str(probsD[line_number])
    #         all_fields.append('\t'.join(fields))
    # elif type_ == 'append':
    #     all_fields = []
    #     all_examples = dict()
    #     extractions = []
    #     lines = lines + ['']
    #     for line_number, line in enumerate(lines):
    #         line = line.strip('\n')
    #         if line_number != len(lines)-1:
    #             sentence, extraction, confidence = line.split('\t')
    #         else:
    #             sentence, extraction, confidence = '', '', 1
    #         if line_number == 0:
    #             old_sentence = sentence
    #         if line_number == len(lines)-1 or sentence != old_sentence:
    #             # if line_number == len(lines)-1:
    #                 # extractions.append(extraction)
    #                 # old_sentence = sentence
    #             all_examples[line_number-1] = [old_sentence, extractions]

    #             old_sentence = sentence
    #             extractions = []
    #         extractions.append(extraction)

    #     for line_number in probsD:
    #         # assert line_number in all_examples
    #         if line_number not in all_examples:
    #             continue
    #         sentence, extractions = all_examples[line_number]
    #         for ext_num, extraction in enumerate(extractions):
    #             confidence = probsD[line_number][ext_num].item()
    #             out = sentence+'\t'+extraction+'\t'+str(confidence)
    #             all_fields.append(out)

    # sorting all_fields according to the confidences assigned by bert_encoder
    all_fields_sorted = []
    prev_sent = None
    exts = []
    for f in all_fields:
        sent = f.split('\t')[0]
        if sent != prev_sent:
            if prev_sent != None:
                exts = sorted(exts,
                              reverse=True,
                              key=lambda x: float(x.split('\t')[2]))
                if hparams.topk != None:
                    exts = exts[:hparams.topk]
                all_fields_sorted.extend(exts)
            prev_sent = sent
            exts = [f]
        else:
            exts.append(f)
    exts = sorted(exts, reverse=True, key=lambda x: float(x.split('\t')[2]))
    all_fields_sorted.extend(exts)

    open(out_fp, 'w').write('\n'.join(all_fields_sorted))

    print('Probabilities written to: ', out_fp)
    return
Esempio n. 17
0
def generate_probs(model_dir, inp_fp, weights_fp, topk, out_ext, cuda_device,
                   overwrite, batch_size, extraction_ratio, out):
    import_submodules('imojie')

    args = argparse.Namespace()
    args.archive_file = model_dir
    args.cuda_device = cuda_device
    args.embedding_sources_mapping = {}
    args.extend_vocab = None
    args.batch_weight_key = ''
    args.output_file = ''
    args.overrides = "{'model': {'token_based_metric': null}, 'iterator': {'batch_size': " + str(
        batch_size) + ", \
        'instances_per_epoch': null}, 'trainer':{'num_epochs':1}, 'dataset_reader': {'max_tokens': 10000, \
            'gradients': false, 'max_extractions': 30, 'extraction_ratio': " + str(
            extraction_ratio) + ", 'probability': true \
                 }, 'validation_dataset_reader': null}"

    args.weights_file = weights_fp
    args.input_file = inp_fp
    probs = evaluate_from_args(args)

    probsD = dict()
    # For some reason the last batch results are repeated in the probs
    # Not an issue as they are just overwritten while forming the probsD
    for i in range(len(probs['example_ids'])):
        probsD[probs['example_ids'][i]] = probs['probs'][i]
    lines = open(inp_fp).readlines()

    all_fields = []
    for line_number, line in enumerate(lines):
        line = line.strip('\n')
        fields = line.split('\t')
        if line_number not in probsD:  # the example is too large and rejected by dataloader ('max_tokens' argument)
            continue
        # Removing appended extractions after reranking
        fields[0] = fields[0].split('[SEP]')[0].strip()
        fields[2] = str(probsD[line_number])
        fields.append(str(line_number))
        all_fields.append('\t'.join(fields))

    if topk == None:
        return all_fields
    else:
        # sorting all_fields according to the confidences assigned by bert_encoder
        all_fields_sorted = []
        prev_sent = None
        exts = []
        for f in all_fields:
            sent = f.split('\t')[0]
            if sent != prev_sent:
                if prev_sent != None:
                    exts = toolz.unique(exts, key=lambda x: x.split('\t')[1])
                    exts = sorted(exts,
                                  reverse=True,
                                  key=lambda x: float(x.split('\t')[2]))
                    if topk != None:
                        exts = exts[:topk]
                    all_fields_sorted.extend(exts)
                prev_sent = sent
                exts = [f]
            else:
                exts.append(f)
        exts = sorted(exts,
                      reverse=True,
                      key=lambda x: float(x.split('\t')[2]))
        all_fields_sorted.extend(exts)
        open(out, 'w').write('\n'.join(all_fields_sorted))
        print('Probabilities written to: ', out)

        return all_fields_sorted