Exemple #1
0
    def test_works_with_known_model(self):
        with open(self.infile, 'w') as f:
            f.write("""{"passage": "the seahawks won the super bowl in 2016", """
                    """ "question": "when did the seahawks win the super bowl?"}\n""")
            f.write("""{"passage": "the mariners won the super bowl in 2037", """
                    """ "question": "when did the mariners win the super bowl?"}\n""")

        sys.argv = ["run.py",      # executable
                    "predict",     # command
                    str(self.bidaf_model_path),
                    str(self.infile),     # input_file
                    "--output-file", str(self.outfile),
                    "--silent"]

        main()

        assert os.path.exists(self.outfile)

        with open(self.outfile, 'r') as f:
            results = [json.loads(line) for line in f]

        assert len(results) == 2
        for result in results:
            assert set(result.keys()) == {"span_start_logits", "span_end_logits",
                                          "passage_question_attention", "question_tokens",
                                          "passage_tokens", "span_start_probs", "span_end_probs",
                                          "best_span", "best_span_str"}

        shutil.rmtree(self.tempdir)
Exemple #2
0
    def test_using_dataset_reader_works_with_known_model(self):

        sys.argv = ["run.py",      # executable
                    "predict",     # command
                    str(self.bidaf_model_path),
                    str(self.bidaf_data_path),     # input_file
                    "--output-file", str(self.outfile),
                    "--silent",
                    "--use-dataset-reader"]

        main()

        assert os.path.exists(self.outfile)

        with open(self.outfile, 'r') as f:
            results = [json.loads(line) for line in f]

        assert len(results) == 5
        for result in results:
            assert set(result.keys()) == {"span_start_logits", "span_end_logits",
                                          "passage_question_attention", "question_tokens",
                                          "passage_tokens", "span_start_probs", "span_end_probs",
                                          "best_span", "best_span_str", "loss"}

        shutil.rmtree(self.tempdir)
Exemple #3
0
    def test_other_modules(self):
        # Create a new package in a temporary dir
        packagedir = self.TEST_DIR / 'testpackage'
        packagedir.mkdir()  # pylint: disable=no-member
        (packagedir / '__init__.py').touch()  # pylint: disable=no-member

        # And add that directory to the path
        sys.path.insert(0, str(self.TEST_DIR))

        # Write out a duplicate predictor there, but registered under a different name.
        from srl_model.predictors import bidaf
        with open(bidaf.__file__) as f:
            code = f.read().replace("""@Predictor.register('machine-comprehension')""",
                                    """@Predictor.register('duplicate-test-predictor')""")

        with open(os.path.join(packagedir, 'predictor.py'), 'w') as f:
            f.write(code)

        self.infile = os.path.join(self.TEST_DIR, "inputs.txt")
        self.outfile = os.path.join(self.TEST_DIR, "outputs.txt")

        with open(self.infile, 'w') as f:
            f.write("""{"passage": "the seahawks won the super bowl in 2016", """
                    """ "question": "when did the seahawks win the super bowl?"}\n""")
            f.write("""{"passage": "the mariners won the super bowl in 2037", """
                    """ "question": "when did the mariners win the super bowl?"}\n""")

        sys.argv = ["run.py",      # executable
                    "predict",     # command
                    str(self.bidaf_model_path),
                    str(self.infile),     # input_file
                    "--output-file", str(self.outfile),
                    "--predictor", "duplicate-test-predictor",
                    "--silent"]

        # Should raise ConfigurationError, because predictor is unknown
        with pytest.raises(ConfigurationError):
            main()

        # But once we include testpackage, it should be known
        sys.argv.extend(["--include-package", "testpackage"])
        main()

        assert os.path.exists(self.outfile)

        with open(self.outfile, 'r') as f:
            results = [json.loads(line) for line in f]

        assert len(results) == 2
        # Overridden predictor should output extra field
        for result in results:
            assert set(result.keys()) == {"span_start_logits", "span_end_logits",
                                          "passage_question_attention", "question_tokens",
                                          "passage_tokens", "span_start_probs", "span_end_probs",
                                          "best_span", "best_span_str"}

        sys.path.remove(str(self.TEST_DIR))
Exemple #4
0
    def test_fails_without_required_args(self):
        sys.argv = ["run.py",            # executable
                    "predict",           # command
                    "/path/to/archive",  # archive, but no input file
                   ]

        with self.assertRaises(SystemExit) as cm:  # pylint: disable=invalid-name
            main()

        assert cm.exception.code == 2  # argparse code for incorrect usage
Exemple #5
0
    def test_alternative_file_formats(self):
        @Predictor.register('bidaf-csv')  # pylint: disable=unused-variable
        class BidafCsvPredictor(BidafPredictor):
            """same as bidaf predictor but using CSV inputs and outputs"""
            def load_line(self, line: str) -> JsonDict:
                reader = csv.reader([line])
                passage, question = next(reader)
                return {"passage": passage, "question": question}

            def dump_line(self, outputs: JsonDict) -> str:
                output = io.StringIO()
                writer = csv.writer(output)
                row = [outputs["span_start_probs"][0],
                       outputs["span_end_probs"][0],
                       *outputs["best_span"],
                       outputs["best_span_str"]]

                writer.writerow(row)
                return output.getvalue()

        with open(self.infile, 'w') as f:
            writer = csv.writer(f)
            writer.writerow(["the seahawks won the super bowl in 2016",
                             "when did the seahawks win the super bowl?"])
            writer.writerow(["the mariners won the super bowl in 2037",
                             "when did the mariners win the super bowl?"])

        sys.argv = ["run.py",      # executable
                    "predict",     # command
                    str(self.bidaf_model_path),
                    str(self.infile),     # input_file
                    "--output-file", str(self.outfile),
                    "--predictor", 'bidaf-csv',
                    "--silent"]

        main()
        assert os.path.exists(self.outfile)

        with open(self.outfile, 'r') as f:
            reader = csv.reader(f)
            results = [row for row in reader]

        assert len(results) == 2
        for row in results:
            assert len(row) == 5
            start_prob, end_prob, span_start, span_end, span = row
            for prob in (start_prob, end_prob):
                assert 0 <= float(prob) <= 1
            assert 0 <= int(span_start) <= int(span_end) <= 8
            assert span != ''

        shutil.rmtree(self.tempdir)
Exemple #6
0
    def test_fails_on_unknown_command(self):
        sys.argv = [
            "bogus",  # command
            "unknown_model",  # model_name
            "bogus file",  # input_file
            "--output-file",
            "bogus out file",
            "--silent"
        ]

        with self.assertRaises(SystemExit) as cm:  # pylint: disable=invalid-name
            main()

        assert cm.exception.code == 2  # argparse code for incorrect usage
Exemple #7
0
    def test_can_specify_predictor(self):

        @Predictor.register('bidaf-explicit')  # pylint: disable=unused-variable
        class Bidaf3Predictor(BidafPredictor):
            """same as bidaf predictor but with an extra field"""
            def predict_json(self, inputs: JsonDict) -> JsonDict:
                result = super().predict_json(inputs)
                result["explicit"] = True
                return result

        with open(self.infile, 'w') as f:
            f.write("""{"passage": "the seahawks won the super bowl in 2016", """
                    """ "question": "when did the seahawks win the super bowl?"}\n""")
            f.write("""{"passage": "the mariners won the super bowl in 2037", """
                    """ "question": "when did the mariners win the super bowl?"}\n""")

        sys.argv = ["run.py",      # executable
                    "predict",     # command
                    str(self.bidaf_model_path),
                    str(self.infile),     # input_file
                    "--output-file", str(self.outfile),
                    "--predictor", "bidaf-explicit",
                    "--silent"]

        main()
        assert os.path.exists(self.outfile)

        with open(self.outfile, 'r') as f:
            results = [json.loads(line) for line in f]

        assert len(results) == 2
        # Overridden predictor should output extra field
        for result in results:
            assert set(result.keys()) == {"span_start_logits", "span_end_logits",
                                          "passage_question_attention", "question_tokens",
                                          "passage_tokens", "span_start_probs", "span_end_probs",
                                          "best_span", "best_span_str", "explicit"}

        shutil.rmtree(self.tempdir)
Exemple #8
0
    def test_subcommand_overrides(self):
        def do_nothing(_):
            pass

        class FakeEvaluate(Subcommand):
            add_subparser_called = False

            def add_subparser(self, name, parser):
                subparser = parser.add_parser(name,
                                              description="fake",
                                              help="fake help")

                subparser.set_defaults(func=do_nothing)
                self.add_subparser_called = True

                return subparser

        fake_evaluate = FakeEvaluate()

        sys.argv = ["allennlp.run", "evaluate"]
        main(subcommand_overrides={"evaluate": fake_evaluate})

        assert fake_evaluate.add_subparser_called
Exemple #9
0
    def test_other_modules(self):
        # Create a new package in a temporary dir
        packagedir = self.TEST_DIR / 'configuretestpackage'
        packagedir.mkdir()  # pylint: disable=no-member
        (packagedir / '__init__.py').touch()  # pylint: disable=no-member

        # And add that directory to the path
        sys.path.insert(0, str(self.TEST_DIR))

        # Write out a duplicate predictor there, but registered under a different name.
        from srl_model.predictors import bidaf
        with open(bidaf.__file__) as f:
            code = f.read().replace(
                """@Predictor.register('machine-comprehension')""",
                """@Predictor.register('configure-test-predictor')""")

        with open(os.path.join(packagedir, 'predictor.py'), 'w') as f:
            f.write(code)

        # Capture stdout
        stdout_saved = sys.stdout
        stdout_captured = StringIO()
        sys.stdout = stdout_captured

        sys.argv = [
            "run.py",  # executable
            "configure",  # command
            "configuretestpackage.predictor.BidafPredictor"
        ]

        main()
        output = stdout_captured.getvalue()
        assert "configure-test-predictor" in output

        sys.stdout = stdout_saved

        sys.path.remove(str(self.TEST_DIR))
Exemple #10
0
    def test_other_modules(self):
        # Create a new package in a temporary dir
        packagedir = self.TEST_DIR / 'testpackage'
        packagedir.mkdir()  # pylint: disable=no-member
        (packagedir / '__init__.py').touch()  # pylint: disable=no-member

        # And add that directory to the path
        sys.path.insert(0, str(self.TEST_DIR))

        # Write out a duplicate model there, but registered under a different name.
        from srl_model.models import simple_tagger
        with open(simple_tagger.__file__) as model_file:
            code = model_file.read().replace(
                """@Model.register("simple_tagger")""",
                """@Model.register("duplicate-test-tagger")""")

        with open(packagedir / 'model.py', 'w') as new_model_file:
            new_model_file.write(code)

        # Copy fixture there too.
        shutil.copy(self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv',
                    self.TEST_DIR)
        data_path = str(self.TEST_DIR / 'sequence_tagging.tsv')

        # Write out config file
        config_path = self.TEST_DIR / 'config.json'
        config_json = """{
                "model": {
                        "type": "duplicate-test-tagger",
                        "text_field_embedder": {
                                "tokens": {
                                        "type": "embedding",
                                        "embedding_dim": 5
                                }
                        },
                        "encoder": {
                                "type": "lstm",
                                "input_size": 5,
                                "hidden_size": 7,
                                "num_layers": 2
                        }
                },
                "dataset_reader": {"type": "sequence_tagging"},
                "train_data_path": "$$$",
                "validation_data_path": "$$$",
                "iterator": {"type": "basic", "batch_size": 2},
                "trainer": {
                        "num_epochs": 2,
                        "optimizer": "adam"
                }
            }""".replace('$$$', data_path)
        with open(config_path, 'w') as config_file:
            config_file.write(config_json)

        serialization_dir = self.TEST_DIR / 'serialization'

        # Run train with using the non-allennlp module.
        sys.argv = [
            "bin/allennlp", "train",
            str(config_path), "-s",
            str(serialization_dir)
        ]

        # Shouldn't be able to find the model.
        with pytest.raises(ConfigurationError):
            main()

        # Now add the --include-package flag and it should work.
        # We also need to add --recover since the output directory already exists.
        sys.argv.extend(["--recover", "--include-package", 'testpackage'])

        main()

        # Rewrite out config file, but change a value.
        with open(config_path, 'w') as new_config_file:
            new_config_file.write(
                config_json.replace('"num_epochs": 2,', '"num_epochs": 4,'))

        # This should fail because the config.json does not match that in the serialization directory.
        with pytest.raises(ConfigurationError):
            main()

        sys.path.remove(str(self.TEST_DIR))
Exemple #11
0
#!/usr/bin/env python
import logging
import os
import sys

if os.environ.get("ALLENNLP_DEBUG"):
    LEVEL = logging.DEBUG
else:
    LEVEL = logging.INFO

sys.path.insert(
    0, os.path.dirname(os.path.abspath(os.path.join(__file__, os.pardir))))
logging.basicConfig(
    format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', level=LEVEL)

from srl_model.commands import main  # pylint: disable=wrong-import-position

if __name__ == "__main__":
    main(prog="allennlp")