Example #1
0
    def test_works_with_known_model(self):
        with open(self.infile, "w") as f:
            f.write(
                """{"sentence": "the seahawks won the super bowl in 2016"}\n"""
            )
            f.write(
                """{"sentence": "the mariners won the super bowl in 2037"}\n"""
            )

        sys.argv = [
            "__main__.py",  # executable
            "predict",  # command
            str(self.classifier_model_path),
            str(self.infile),  # input_file
            "--output-file",
            str(self.outfile),
            "--silent",
        ]

        main()

        assert os.path.exists(self.outfile)

        with open(self.outfile, "r") as f:
            results = [json.loads(line) for line in f]

        assert len(results) == 2
        for result in results:
            assert set(result.keys()) == {"label", "logits", "probs"}

        shutil.rmtree(self.tempdir)
Example #2
0
    def test_other_modules(self):
        # Create a new package in a temporary dir
        packagedir = self.TEST_DIR / "testpackage"
        packagedir.mkdir()
        (packagedir / "__init__.py").touch()

        # And add that directory to the path
        with push_python_path(self.TEST_DIR):
            # Write out a duplicate predictor there, but registered under a different name.
            from allennlp.predictors import text_classifier

            with open(text_classifier.__file__) as f:
                code = f.read().replace(
                    """@Predictor.register("text_classifier")""",
                    """@Predictor.register("duplicate-test-predictor")""",
                )

            with open(os.path.join(packagedir, "predictor.py"), "w") as f:
                f.write(code)

            self.infile = os.path.join(self.TEST_DIR, "inputs.txt")
            self.outfile = os.path.join(self.TEST_DIR, "outputs.txt")

            with open(self.infile, "w") as f:
                f.write(
                    """{"sentence": "the seahawks won the super bowl in 2016"}\n"""
                )
                f.write(
                    """{"sentence": "the mariners won the super bowl in 2037"}\n"""
                )

            sys.argv = [
                "__main__.py",  # executable
                "predict",  # command
                str(self.classifier_model_path),
                str(self.infile),  # input_file
                "--output-file",
                str(self.outfile),
                "--predictor",
                "duplicate-test-predictor",
                "--silent",
            ]

            # Should raise ConfigurationError, because predictor is unknown
            with pytest.raises(ConfigurationError):
                main()

            # But once we include testpackage, it should be known
            sys.argv.extend(["--include-package", "testpackage"])
            main()

            assert os.path.exists(self.outfile)

            with open(self.outfile, "r") as f:
                results = [json.loads(line) for line in f]

            assert len(results) == 2
            # Overridden predictor should output extra field
            for result in results:
                assert set(result.keys()) == {"label", "logits", "probs"}
Example #3
0
    def test_using_dataset_reader_works_with_specified_multitask_head(self):
        sys.argv = [
            "__main__.py",  # executable
            "predict",  # command
            str(self.classifier_model_path),
            "unittest",  # "path" of the input data, but it's not really a path for VQA
            "--output-file",
            str(self.outfile),
            "--silent",
            "--use-dataset-reader",
            "--multitask-head",
            "vqa",
        ]

        main()

        assert os.path.exists(self.outfile)

        with open(self.outfile, "r") as f:
            results = [json.loads(line) for line in f]

        assert len(results) == 3
        for result in results:
            assert "vqa_best_answer" in result.keys()

        shutil.rmtree(self.tempdir)
Example #4
0
    def test_using_dataset_reader_works_with_known_model(self):

        sys.argv = [
            "__main__.py",  # executable
            "predict",  # command
            str(self.classifier_model_path),
            str(self.classifier_data_path),  # input_file
            "--output-file",
            str(self.outfile),
            "--silent",
            "--use-dataset-reader",
        ]

        main()

        assert os.path.exists(self.outfile)

        with open(self.outfile, "r") as f:
            results = [json.loads(line) for line in f]

        assert len(results) == 3
        for result in results:
            assert set(result.keys()) == {"label", "logits", "loss", "probs"}

        shutil.rmtree(self.tempdir)
Example #5
0
    def test_average_embedding_works(self):
        sentence = "Michael went to the store to buy some eggs ."
        with open(self.sentences_path, 'w') as f:
            f.write(sentence)

        sys.argv = ["run.py",  # executable
                    "elmo",  # command
                    self.sentences_path,
                    self.output_path,
                    "--average",
                    "--options-file",
                    self.options_file,
                    "--weight-file",
                    self.weight_file]

        main()

        assert os.path.exists(self.output_path)

        embedder = ElmoEmbedder(options_file=self.options_file, weight_file=self.weight_file)
        expected_embedding = embedder.embed_sentence(sentence.split())
        expected_embedding = (expected_embedding[0] + expected_embedding[1] + expected_embedding[2]) / 3

        with h5py.File(self.output_path, 'r') as h5py_file:
            assert set(h5py_file.keys()) == {"0", "sentence_to_index"}
            # The vectors in the test configuration are smaller (32 length)
            embedding = h5py_file.get("0")
            assert embedding.shape == (len(sentence.split()), 32)
            numpy.testing.assert_allclose(embedding, expected_embedding, rtol=1e-4)
            assert json.loads(h5py_file.get("sentence_to_index")[0]) == {sentence: "0"}
Example #6
0
    def test_other_modules(self):
        # Create a new package in a temporary dir
        packagedir = self.TEST_DIR / 'configuretestpackage'
        packagedir.mkdir()  # pylint: disable=no-member
        (packagedir / '__init__.py').touch()  # pylint: disable=no-member

        # And add that directory to the path
        sys.path.insert(0, str(self.TEST_DIR))

        # Write out a duplicate predictor there, but registered under a different name.
        from allennlp.predictors import bidaf
        with open(bidaf.__file__) as f:
            code = f.read().replace("""@Predictor.register('machine-comprehension')""",
                                    """@Predictor.register('configure-test-predictor')""")

        with open(os.path.join(packagedir, 'predictor.py'), 'w') as f:
            f.write(code)

        # Capture stdout
        stdout_saved = sys.stdout
        stdout_captured = StringIO()
        sys.stdout = stdout_captured

        sys.argv = ["run.py",      # executable
                    "configure",     # command
                    "configuretestpackage.predictor.BidafPredictor"]

        main()
        output = stdout_captured.getvalue()
        assert "configure-test-predictor" in output

        sys.stdout = stdout_saved

        sys.path.remove(str(self.TEST_DIR))
Example #7
0
    def test_batch_embedding_works(self):
        sentences = [
                u"Michael went to the store to buy some eggs .",
                u"Joel rolled down the street on his skateboard .",
                u"test / this is a first sentence",
                u"Take a look , then , at Tuesday 's elections in New York City , New Jersey and Virginia :"
        ]

        with open(self.sentences_path, u'w') as f:
            for line in sentences:
                f.write(line + u'\n')

        sys.argv = [u"run.py",  # executable
                    u"elmo",  # command
                    self.sentences_path,
                    self.output_path,
                    u"--all",
                    u"--options-file",
                    self.options_file,
                    u"--weight-file",
                    self.weight_file]

        main()

        assert os.path.exists(self.output_path)

        with h5py.File(self.output_path, u'r') as h5py_file:
            assert set(h5py_file.keys()) == set([u"0", u"1", u"2", u"3", u"sentence_to_index"])
            # The vectors in the test configuration are smaller (32 length)
            for sentence_id, sentence in izip([u"0", u"1", u"2", u"3"], sentences):
                assert h5py_file.get(sentence_id).shape == (3, len(sentence.split()), 32)
            assert (json.loads(h5py_file.get(u"sentence_to_index")[0]) ==
                    dict((sentences[i], unicode(i)) for i in range(len(sentences))))
Example #8
0
    def test_batch_prediction_works_with_known_model(self):
        tempdir = tempfile.mkdtemp()
        infile = os.path.join(tempdir, "inputs.txt")
        outfile = os.path.join(tempdir, "outputs.txt")

        with open(infile, 'w') as f:
            f.write("""{"passage": "the seahawks won the super bowl in 2016", """
                    """ "question": "when did the seahawks win the super bowl?"}\n""")
            f.write("""{"passage": "the mariners won the super bowl in 2037", """
                    """ "question": "when did the mariners win the super bowl?"}\n""")

        sys.argv = ["run.py",  # executable
                    "predict",  # command
                    "tests/fixtures/bidaf/serialization/model.tar.gz",
                    infile,  # input_file
                    "--output-file", outfile,
                    "--silent",
                    "--batch-size", '2']

        main()

        assert os.path.exists(outfile)
        with open(outfile, 'r') as f:
            results = [json.loads(line) for line in f]

        assert len(results) == 2
        for result in results:
            assert set(result.keys()) == {"span_start_logits", "span_end_logits",
                                          "passage_question_attention", "question_tokens",
                                          "passage_tokens", "span_start_probs", "span_end_probs",
                                          "best_span", "best_span_str"}

        shutil.rmtree(tempdir)
Example #9
0
    def test_batch_prediction_works_with_known_model(self):
        tempdir = tempfile.mkdtemp()
        infile = os.path.join(tempdir, "inputs.txt")
        outfile = os.path.join(tempdir, "outputs.txt")

        with open(infile, 'w') as f:
            f.write("""{"passage": "the seahawks won the super bowl in 2016", """
                    """ "question": "when did the seahawks win the super bowl?"}\n""")
            f.write("""{"passage": "the mariners won the super bowl in 2037", """
                    """ "question": "when did the mariners win the super bowl?"}\n""")

        sys.argv = ["run.py",  # executable
                    "predict",  # command
                    "tests/fixtures/bidaf/serialization/model.tar.gz",
                    infile,  # input_file
                    "--output-file", outfile,
                    "--silent",
                    "--batch-size", '2']

        main()

        assert os.path.exists(outfile)
        with open(outfile, 'r') as f:
            results = [json.loads(line) for line in f]

        assert len(results) == 2
        for result in results:
            assert set(result.keys()) == {"span_start_logits", "span_end_logits",
                                          "passage_question_attention", "question_tokens",
                                          "passage_tokens", "span_start_probs", "span_end_probs",
                                          "best_span", "best_span_str"}

        shutil.rmtree(tempdir)
Example #10
0
    def test_empty_sentences_are_filtered(self):
        tempdir = tempfile.mkdtemp()
        sentences_path = os.path.join(tempdir, "sentences.txt")
        output_path = os.path.join(tempdir, "output.txt")

        sentences = ["A", "", "", "B"]

        with open(sentences_path, 'w') as f:
            for line in sentences:
                f.write(line + '\n')

        sys.argv = [
            "run.py",  # executable
            "elmo",  # command
            sentences_path,
            output_path,
            "--all",
            "--options-file",
            self.options_file,
            "--weight-file",
            self.weight_file
        ]

        main()

        assert os.path.exists(output_path)

        with h5py.File(output_path, 'r') as h5py_file:
            assert len(h5py_file.keys()) == 2
            assert set(h5py_file.keys()) == set(["A", "B"])
Example #11
0
    def test_using_dataset_reader_works_with_known_model(self):

        sys.argv = ["run.py",      # executable
                    "predict",     # command
                    str(self.bidaf_model_path),
                    str(self.bidaf_data_path),     # input_file
                    "--output-file", str(self.outfile),
                    "--silent",
                    "--use-dataset-reader"]

        main()

        assert os.path.exists(self.outfile)

        with open(self.outfile, 'r') as f:
            results = [json.loads(line) for line in f]

        assert len(results) == 5
        for result in results:
            assert set(result.keys()) == {"span_start_logits", "span_end_logits",
                                          "passage_question_attention", "question_tokens",
                                          "passage_tokens", "span_start_probs", "span_end_probs",
                                          "best_span", "best_span_str", "loss"}

        shutil.rmtree(self.tempdir)
Example #12
0
    def test_works_with_known_model(self):
        with open(self.infile, 'w') as f:
            f.write("""{"passage": "the seahawks won the super bowl in 2016", """
                    """ "question": "when did the seahawks win the super bowl?"}\n""")
            f.write("""{"passage": "the mariners won the super bowl in 2037", """
                    """ "question": "when did the mariners win the super bowl?"}\n""")

        sys.argv = ["run.py",      # executable
                    "predict",     # command
                    str(self.bidaf_model_path),
                    str(self.infile),     # input_file
                    "--output-file", str(self.outfile),
                    "--silent"]

        main()

        assert os.path.exists(self.outfile)

        with open(self.outfile, 'r') as f:
            results = [json.loads(line) for line in f]

        assert len(results) == 2
        for result in results:
            assert set(result.keys()) == {"span_start_logits", "span_end_logits",
                                          "passage_question_attention", "question_tokens",
                                          "passage_tokens", "span_start_probs", "span_end_probs",
                                          "best_span", "best_span_str"}

        shutil.rmtree(self.tempdir)
Example #13
0
    def test_other_modules(self):
        # Create a new package in a temporary dir
        packagedir = self.TEST_DIR / u'configuretestpackage'
        packagedir.mkdir()  # pylint: disable=no-member
        (packagedir / u'__init__.py').touch()  # pylint: disable=no-member

        # And add that directory to the path
        sys.path.insert(0, unicode(self.TEST_DIR))

        # Write out a duplicate predictor there, but registered under a different name.
        from allennlp.predictors import bidaf
        with open(bidaf.__file__) as f:
            code = f.read().replace(u"""@Predictor.register('machine-comprehension')""",
                                    u"""@Predictor.register('configure-test-predictor')""")

        with open(os.path.join(packagedir, u'predictor.py'), u'w') as f:
            f.write(code)

        # Capture stdout
        stdout_saved = sys.stdout
        stdout_captured = StringIO()
        sys.stdout = stdout_captured

        sys.argv = [u"run.py",      # executable
                    u"configure",     # command
                    u"configuretestpackage.predictor.BidafPredictor"]

        main()
        output = stdout_captured.getvalue()
        assert u"configure-test-predictor" in output

        sys.stdout = stdout_saved

        sys.path.remove(unicode(self.TEST_DIR))
Example #14
0
    def test_base_predictor(self):
        # Tests when no Predictor is found and the base class implementation is used
        model_path = str(self.classifier_model_path)
        archive = load_archive(model_path)
        model_type = archive.config.get("model").get("type")
        # Makes sure that we don't have a DEFAULT_PREDICTOR for it. Otherwise the base class
        # implementation wouldn't be used
        del DEFAULT_PREDICTORS["basic_classifier"]
        assert model_type not in DEFAULT_PREDICTORS

        # Doesn't use a --predictor
        sys.argv = [
            "__main__.py",  # executable
            "predict",  # command
            model_path,
            str(self.classifier_data_path),  # input_file
            "--output-file",
            str(self.outfile),
            "--silent",
            "--use-dataset-reader",
        ]
        main()
        assert os.path.exists(self.outfile)
        with open(self.outfile, "r") as f:
            results = [json.loads(line) for line in f]

        assert len(results) == 3
        for result in results:
            assert set(result.keys()) == {"logits", "probs", "label", "loss"}
        DEFAULT_PREDICTORS["basic_classifier"] = "text_classifier"
Example #15
0
    def test_top_embedding_works(self):
        sentence = "Michael went to the store to buy some eggs ."
        with open(self.sentences_path, 'w') as f:
            f.write(sentence)

        sys.argv = [
            "run.py",  # executable
            "elmo",  # command
            self.sentences_path,
            self.output_path,
            "--top",
            "--options-file",
            self.options_file,
            "--weight-file",
            self.weight_file
        ]

        main()

        assert os.path.exists(self.output_path)

        embedder = ElmoEmbedder(options_file=self.options_file,
                                weight_file=self.weight_file)
        expected_embedding = embedder.embed_sentence(sentence.split())[2]

        with h5py.File(self.output_path, 'r') as h5py_file:
            assert list(h5py_file.keys()) == [sentence]
            # The vectors in the test configuration are smaller (32 length)
            embedding = h5py_file.get(sentence)
            assert embedding.shape == (len(sentence.split()), 32)
            numpy.testing.assert_allclose(embedding,
                                          expected_embedding,
                                          rtol=1e-4)
Example #16
0
    def test_batch_prediction_works_with_known_model(self):
        with open(self.infile, 'w') as f:
            f.write("""{"passage": "the seahawks won the super bowl in 2016", """
                    """ "question": "when did the seahawks win the super bowl?"}\n""")
            f.write("""{"passage": "the mariners won the super bowl in 2037", """
                    """ "question": "when did the mariners win the super bowl?"}\n""")

        sys.argv = ["run.py",  # executable
                    "predict",  # command
                    str(self.bidaf_model_path),
                    str(self.infile),  # input_file
                    "--output-file", str(self.outfile),
                    "--silent",
                    "--batch-size", '2']

        main()

        assert os.path.exists(self.outfile)
        with open(self.outfile, 'r') as f:
            results = [json.loads(line) for line in f]

        assert len(results) == 2
        for result in results:
            assert set(result.keys()) == {"span_start_logits", "span_end_logits",
                                          "passage_question_attention", "question_tokens",
                                          "passage_tokens", "span_start_probs", "span_end_probs",
                                          "best_span", "best_span_str"}

        shutil.rmtree(self.tempdir)
Example #17
0
    def test_duplicate_sentences(self):
        tempdir = tempfile.mkdtemp()
        sentences_path = os.path.join(tempdir, "sentences.txt")
        output_path = os.path.join(tempdir, "output.txt")

        sentences = [
                "Michael went to the store to buy some eggs .",
                "Michael went to the store to buy some eggs .",
        ]

        with open(sentences_path, 'w') as f:
            for line in sentences:
                f.write(line + '\n')

        sys.argv = ["run.py",  # executable
                    "elmo",  # command
                    sentences_path,
                    output_path,
                    "--all",
                    "--options-file",
                    self.options_file,
                    "--weight-file",
                    self.weight_file]

        main()

        assert os.path.exists(output_path)

        with h5py.File(output_path, 'r') as h5py_file:
            assert len(h5py_file.keys()) == 1
            assert set(h5py_file.keys()) == set(sentences)
            # The vectors in the test configuration are smaller (32 length)
            for sentence in set(sentences):
                assert h5py_file.get(sentence).shape == (3, len(sentence.split()), 32)
Example #18
0
    def test_using_dataset_reader_works_with_known_model(self):

        sys.argv = [
            u"run.py",  # executable
            u"predict",  # command
            unicode(self.bidaf_model_path),
            unicode(self.bidaf_data_path),  # input_file
            u"--output-file",
            unicode(self.outfile),
            u"--silent",
            u"--use-dataset-reader"
        ]

        main()

        assert os.path.exists(self.outfile)

        with open(self.outfile, u'r') as f:
            results = [json.loads(line) for line in f]

        assert len(results) == 5
        for result in results:
            assert set(result.keys()) == set([
                u"span_start_logits", u"span_end_logits",
                u"passage_question_attention", u"question_tokens",
                u"passage_tokens", u"span_start_probs", u"span_end_probs",
                u"best_span", u"best_span_str", u"loss"
            ])

        shutil.rmtree(self.tempdir)
Example #19
0
    def test_empty_sentences_are_filtered(self):
        tempdir = tempfile.mkdtemp()
        sentences_path = os.path.join(tempdir, "sentences.txt")
        output_path = os.path.join(tempdir, "output.txt")

        sentences = [
                "A",
                "",
                "",
                "B"
        ]

        with open(sentences_path, 'w') as f:
            for line in sentences:
                f.write(line + '\n')

        sys.argv = ["run.py",  # executable
                    "elmo",  # command
                    sentences_path,
                    output_path,
                    "--all",
                    "--options-file",
                    self.options_file,
                    "--weight-file",
                    self.weight_file]

        main()

        assert os.path.exists(output_path)

        with h5py.File(output_path, 'r') as h5py_file:
            assert len(h5py_file.keys()) == 2
            assert set(h5py_file.keys()) == set(["A", "B"])
Example #20
0
    def test_duplicate_sentences(self):
        sentences = [
                "Michael went to the store to buy some eggs .",
                "Michael went to the store to buy some eggs .",
        ]

        with open(self.sentences_path, 'w') as f:
            for line in sentences:
                f.write(line + '\n')

        sys.argv = ["run.py",  # executable
                    "elmo",  # command
                    self.sentences_path,
                    self.output_path,
                    "--all",
                    "--options-file",
                    self.options_file,
                    "--weight-file",
                    self.weight_file]

        main()

        assert os.path.exists(self.output_path)

        with h5py.File(self.output_path, 'r') as h5py_file:
            assert len(h5py_file.keys()) == 3
            assert set(h5py_file.keys()) == {"0", "1", "sentence_to_index"}
            # The vectors in the test configuration are smaller (32 length)
            for sentence_id, sentence in zip(["0", "1"], sentences):
                assert h5py_file.get(sentence_id).shape == (3, len(sentence.split()), 32)
Example #21
0
    def test_average_embedding_works(self):
        tempdir = tempfile.mkdtemp()
        sentences_path = os.path.join(tempdir, "sentences.txt")
        output_path = os.path.join(tempdir, "output.txt")

        sentence = "Michael went to the store to buy some eggs ."
        with open(sentences_path, 'w') as f:
            f.write(sentence)

        sys.argv = ["run.py",  # executable
                    "elmo",  # command
                    sentences_path,
                    output_path,
                    "--average",
                    "--options-file",
                    self.options_file,
                    "--weight-file",
                    self.weight_file]

        main()

        assert os.path.exists(output_path)

        embedder = ElmoEmbedder(options_file=self.options_file, weight_file=self.weight_file)
        expected_embedding = embedder.embed_sentence(sentence.split())
        expected_embedding = (expected_embedding[0] + expected_embedding[1] + expected_embedding[2]) / 3

        with h5py.File(output_path, 'r') as h5py_file:
            assert list(h5py_file.keys()) == [sentence]
            # The vectors in the test configuration are smaller (32 length)
            embedding = h5py_file.get(sentence)
            assert embedding.shape == (len(sentence.split()), 32)
            numpy.testing.assert_allclose(embedding, expected_embedding, rtol=1e-4)
Example #22
0
    def test_batch_embedding_works_with_sentences_as_keys(self):
        sentences = [
                "Michael went to the store to buy some eggs .",
                "Joel rolled down the street on his skateboard ."
        ]

        with open(self.sentences_path, 'w') as f:
            for line in sentences:
                f.write(line + '\n')

        sys.argv = ["run.py",  # executable
                    "elmo",  # command
                    self.sentences_path,
                    self.output_path,
                    "--all",
                    "--options-file",
                    self.options_file,
                    "--weight-file",
                    self.weight_file,
                    "--use-sentence-keys"]
        main()

        assert os.path.exists(self.output_path)

        with h5py.File(self.output_path, 'r') as h5py_file:
            assert set(h5py_file.keys()) == set(sentences)
            # The vectors in the test configuration are smaller (32 length)
            for sentence in sentences:
                assert h5py_file.get(sentence).shape == (3, len(sentence.split()), 32)
Example #23
0
    def test_batch_embedding_works_with_forget_sentences(self):
        sentences = [
                "Michael went to the store to buy some eggs .",
                "Joel rolled down the street on his skateboard .",
                "test / this is a first sentence",
                "Take a look , then , at Tuesday 's elections in New York City , New Jersey and Virginia :"
        ]

        with open(self.sentences_path, 'w') as f:
            for line in sentences:
                f.write(line + '\n')

        sys.argv = ["run.py",  # executable
                    "elmo",  # command
                    self.sentences_path,
                    self.output_path,
                    "--all",
                    "--options-file",
                    self.options_file,
                    "--weight-file",
                    self.weight_file,
                    "--forget-sentences"]

        main()

        assert os.path.exists(self.output_path)

        with h5py.File(self.output_path, 'r') as h5py_file:
            assert set(h5py_file.keys()) == {"0", "1", "2", "3"}
            # The vectors in the test configuration are smaller (32 length)
            for sentence_id, sentence in zip(["0", "1", "2", "3"], sentences):
                assert h5py_file.get(sentence_id).shape == (3, len(sentence.split()), 32)
Example #24
0
def run(exp_name, config_file, ):
    print(os.getcwd())
    command = f"allennlp train ./configs/{exp_name}/{config_file}.json -s ./output/debug/{exp_name}/{config_file.split('.')[0]}  -f  -o {override_dict} --include-package allennlp_plugins"
    print(sys.argv)
    print(command)
    sys.argv = command.split()

    main()
Example #25
0
    def test_other_modules(self):
        # Create a new package in a temporary dir
        packagedir = os.path.join(self.TEST_DIR, 'testpackage')
        pathlib.Path(packagedir).mkdir()
        pathlib.Path(os.path.join(packagedir, '__init__.py')).touch()

        # And add that directory to the path
        sys.path.insert(0, self.TEST_DIR)

        # Write out a duplicate predictor there, but registered under a different name.
        from allennlp.service.predictors import bidaf
        with open(bidaf.__file__) as f:
            code = f.read().replace("""@Predictor.register('machine-comprehension')""",
                                    """@Predictor.register('duplicate-test-predictor')""")

        with open(os.path.join(packagedir, 'predictor.py'), 'w') as f:
            f.write(code)

        infile = os.path.join(self.TEST_DIR, "inputs.txt")
        outfile = os.path.join(self.TEST_DIR, "outputs.txt")

        with open(infile, 'w') as f:
            f.write("""{"passage": "the seahawks won the super bowl in 2016", """
                    """ "question": "when did the seahawks win the super bowl?"}\n""")
            f.write("""{"passage": "the mariners won the super bowl in 2037", """
                    """ "question": "when did the mariners win the super bowl?"}\n""")

        sys.argv = ["run.py",      # executable
                    "predict",     # command
                    "tests/fixtures/bidaf/serialization/model.tar.gz",
                    infile,     # input_file
                    "--output-file", outfile,
                    "--predictor", "duplicate-test-predictor",
                    "--silent"]

        # Should raise ConfigurationError, because predictor is unknown
        with pytest.raises(ConfigurationError):
            main()

        # But once we include testpackage, it should be known
        sys.argv.extend(["--include-package", "testpackage"])
        main()

        assert os.path.exists(outfile)

        with open(outfile, 'r') as f:
            results = [json.loads(line) for line in f]

        assert len(results) == 2
        # Overridden predictor should output extra field
        for result in results:
            assert set(result.keys()) == {"span_start_logits", "span_end_logits",
                                          "passage_question_attention", "question_tokens",
                                          "passage_tokens", "span_start_probs", "span_end_probs",
                                          "best_span", "best_span_str"}

        sys.path.remove(self.TEST_DIR)
Example #26
0
    def test_other_modules(self):
        # Create a new package in a temporary dir
        packagedir = os.path.join(self.TEST_DIR, 'testpackage')
        pathlib.Path(packagedir).mkdir()
        pathlib.Path(os.path.join(packagedir, '__init__.py')).touch()

        # And add that directory to the path
        sys.path.insert(0, self.TEST_DIR)

        # Write out a duplicate predictor there, but registered under a different name.
        from allennlp.service.predictors import bidaf
        with open(bidaf.__file__) as f:
            code = f.read().replace("""@Predictor.register('machine-comprehension')""",
                                    """@Predictor.register('duplicate-test-predictor')""")

        with open(os.path.join(packagedir, 'predictor.py'), 'w') as f:
            f.write(code)

        infile = os.path.join(self.TEST_DIR, "inputs.txt")
        outfile = os.path.join(self.TEST_DIR, "outputs.txt")

        with open(infile, 'w') as f:
            f.write("""{"passage": "the seahawks won the super bowl in 2016", """
                    """ "question": "when did the seahawks win the super bowl?"}\n""")
            f.write("""{"passage": "the mariners won the super bowl in 2037", """
                    """ "question": "when did the mariners win the super bowl?"}\n""")

        sys.argv = ["run.py",      # executable
                    "predict",     # command
                    "tests/fixtures/bidaf/serialization/model.tar.gz",
                    infile,     # input_file
                    "--output-file", outfile,
                    "--predictor", "duplicate-test-predictor",
                    "--silent"]

        # Should raise ConfigurationError, because predictor is unknown
        with pytest.raises(ConfigurationError):
            main()

        # But once we include testpackage, it should be known
        sys.argv.extend(["--include-package", "testpackage"])
        main()

        assert os.path.exists(outfile)

        with open(outfile, 'r') as f:
            results = [json.loads(line) for line in f]

        assert len(results) == 2
        # Overridden predictor should output extra field
        for result in results:
            assert set(result.keys()) == {"span_start_logits", "span_end_logits",
                                          "passage_question_attention", "question_tokens",
                                          "passage_tokens", "span_start_probs", "span_end_probs",
                                          "best_span", "best_span_str"}

        sys.path.remove(self.TEST_DIR)
Example #27
0
    def test_alternative_file_formats(self):
        tempdir = tempfile.mkdtemp()
        infile = os.path.join(tempdir, "inputs.txt")
        outfile = os.path.join(tempdir, "outputs.txt")

        @Predictor.register('bidaf-csv')  # pylint: disable=unused-variable
        class BidafCsvPredictor(BidafPredictor):
            """same as bidaf predictor but using CSV inputs and outputs"""
            def load_line(self, line: str) -> JsonDict:
                reader = csv.reader([line])
                passage, question = next(reader)
                return {"passage": passage, "question": question}

            def dump_line(self, outputs: JsonDict) -> str:
                output = io.StringIO()
                writer = csv.writer(output)
                row = [outputs["span_start_probs"][0],
                       outputs["span_end_probs"][0],
                       *outputs["best_span"],
                       outputs["best_span_str"]]

                writer.writerow(row)
                return output.getvalue()

        with open(infile, 'w') as f:
            writer = csv.writer(f)
            writer.writerow(["the seahawks won the super bowl in 2016",
                             "when did the seahawks win the super bowl?"])
            writer.writerow(["the mariners won the super bowl in 2037",
                             "when did the mariners win the super bowl?"])


        sys.argv = ["run.py",      # executable
                    "predict",     # command
                    "tests/fixtures/bidaf/serialization/model.tar.gz",
                    infile,     # input_file
                    "--output-file", outfile,
                    "--silent"]

        main(predictor_overrides={'bidaf': 'bidaf-csv'})
        assert os.path.exists(outfile)

        with open(outfile, 'r') as f:
            reader = csv.reader(f)
            results = [row for row in reader]

        assert len(results) == 2
        for row in results:
            assert len(row) == 5
            start_prob, end_prob, span_start, span_end, span = row
            for prob in (start_prob, end_prob):
                assert 0 <= float(prob) <= 1
            assert 0 <= int(span_start) <= int(span_end) <= 8
            assert span != ''

        shutil.rmtree(tempdir)
Example #28
0
    def test_alternative_file_formats(self):
        tempdir = tempfile.mkdtemp()
        infile = os.path.join(tempdir, "inputs.txt")
        outfile = os.path.join(tempdir, "outputs.txt")

        @Predictor.register('bidaf-csv')  # pylint: disable=unused-variable
        class BidafCsvPredictor(BidafPredictor):
            """same as bidaf predictor but using CSV inputs and outputs"""
            def load_line(self, line: str) -> JsonDict:
                reader = csv.reader([line])
                passage, question = next(reader)
                return {"passage": passage, "question": question}

            def dump_line(self, outputs: JsonDict) -> str:
                output = io.StringIO()
                writer = csv.writer(output)
                row = [outputs["span_start_probs"][0],
                       outputs["span_end_probs"][0],
                       *outputs["best_span"],
                       outputs["best_span_str"]]

                writer.writerow(row)
                return output.getvalue()

        with open(infile, 'w') as f:
            writer = csv.writer(f)
            writer.writerow(["the seahawks won the super bowl in 2016",
                             "when did the seahawks win the super bowl?"])
            writer.writerow(["the mariners won the super bowl in 2037",
                             "when did the mariners win the super bowl?"])

        sys.argv = ["run.py",      # executable
                    "predict",     # command
                    "tests/fixtures/bidaf/serialization/model.tar.gz",
                    infile,     # input_file
                    "--output-file", outfile,
                    "--predictor", 'bidaf-csv',
                    "--silent"]

        main()
        assert os.path.exists(outfile)

        with open(outfile, 'r') as f:
            reader = csv.reader(f)
            results = [row for row in reader]

        assert len(results) == 2
        for row in results:
            assert len(row) == 5
            start_prob, end_prob, span_start, span_end, span = row
            for prob in (start_prob, end_prob):
                assert 0 <= float(prob) <= 1
            assert 0 <= int(span_start) <= int(span_end) <= 8
            assert span != ''

        shutil.rmtree(tempdir)
Example #29
0
    def test_can_specify_extra_args(self):
        @Predictor.register("classification-extra-args")
        class ExtraArgsPredictor(TextClassifierPredictor):
            def __init__(
                self,
                model: Model,
                dataset_reader: DatasetReader,
                frozen: bool = True,
                tag: str = "",
            ) -> None:
                super().__init__(model, dataset_reader, frozen)
                self.tag = tag

            def predict_json(self, inputs: JsonDict) -> JsonDict:
                result = super().predict_json(inputs)
                result["tag"] = self.tag
                return result

        with open(self.infile, "w") as f:
            f.write("""{"sentence": "the seahawks won the super bowl in 2016"}\n""")
            f.write("""{"sentence": "the mariners won the super bowl in 2037"}\n""")

        sys.argv = [
            "__main__.py",  # executable
            "predict",  # command
            str(self.classifier_model_path),
            str(self.infile),  # input_file
            "--output-file",
            str(self.outfile),
            "--predictor",
            "classification-extra-args",
            "--silent",
            "--predictor-args",
            """{"tag": "fish"}""",
        ]

        main()
        assert os.path.exists(self.outfile)

        with open(self.outfile, "r") as f:
            results = [json.loads(line) for line in f]

        assert len(results) == 2
        # Overridden predictor should output extra field
        for result in results:
            assert set(result.keys()) == {
                "label",
                "logits",
                "tag",
                "probs",
                "tokens",
                "token_ids",
            }
            assert result["tag"] == "fish"

        shutil.rmtree(self.tempdir)
 def test_remove_with_missing_positionals(self, capsys):
     sys.argv = [
         "allennlp",
         "cached-path",
         "--cache-dir",
         str(self.TEST_DIR),
         "--remove",
     ]
     with pytest.raises(RuntimeError, match="Missing positional"):
         main()
Example #31
0
    def test_fails_without_required_args(self):
        sys.argv = ["run.py",            # executable
                    "predict",           # command
                    "/path/to/archive",  # archive, but no input file
                   ]

        with self.assertRaises(SystemExit) as cm:  # pylint: disable=invalid-name
            main()

        assert cm.exception.code == 2  # argparse code for incorrect usage
Example #32
0
    def test_fails_without_required_args(self):
        sys.argv = ["run.py",            # executable
                    "predict",           # command
                    "/path/to/archive",  # archive, but no input file
                   ]

        with self.assertRaises(SystemExit) as cm:  # pylint: disable=invalid-name
            main()

        assert cm.exception.code == 2  # argparse code for incorrect usage
Example #33
0
    def test_can_specify_predictor(self):
        @Predictor.register("bidaf-explicit")
        class Bidaf3Predictor(BidafPredictor):
            """same as bidaf predictor but with an extra field"""
            def predict_json(self, inputs: JsonDict) -> JsonDict:
                result = super().predict_json(inputs)
                result["explicit"] = True
                return result

        with open(self.infile, "w") as f:
            f.write(
                """{"passage": "the seahawks won the super bowl in 2016", """
                """ "question": "when did the seahawks win the super bowl?"}\n"""
            )
            f.write(
                """{"passage": "the mariners won the super bowl in 2037", """
                """ "question": "when did the mariners win the super bowl?"}\n"""
            )

        sys.argv = [
            "run.py",  # executable
            "predict",  # command
            str(self.bidaf_model_path),
            str(self.infile),  # input_file
            "--output-file",
            str(self.outfile),
            "--predictor",
            "bidaf-explicit",
            "--silent",
        ]

        main()
        assert os.path.exists(self.outfile)

        with open(self.outfile, "r") as f:
            results = [json.loads(line) for line in f]

        assert len(results) == 2
        # Overridden predictor should output extra field
        for result in results:
            assert set(result.keys()) == {
                "span_start_logits",
                "span_end_logits",
                "passage_question_attention",
                "question_tokens",
                "passage_tokens",
                "span_start_probs",
                "span_end_probs",
                "best_span",
                "best_span_str",
                "token_offsets",
                "explicit",
            }

        shutil.rmtree(self.tempdir)
Example #34
0
    def test_fails_without_required_args(self):
        sys.argv = [
            "run.py",
            "predict",
            "/path/to/archive",
        ]  # executable  # command  # archive, but no input file

        with self.assertRaises(SystemExit) as cm:
            main()

        assert cm.exception.code == 2  # argparse code for incorrect usage
Example #35
0
    def test_can_specify_predictor(self):
        @Predictor.register('bidaf-explicit')  # pylint: disable=unused-variable
        class Bidaf3Predictor(BidafPredictor):
            """same as bidaf predictor but with an extra field"""
            def predict_json(self,
                             inputs: JsonDict,
                             cuda_device: int = -1) -> JsonDict:
                result = super().predict_json(inputs)
                result["explicit"] = True
                return result

        tempdir = tempfile.mkdtemp()
        infile = os.path.join(tempdir, "inputs.txt")
        outfile = os.path.join(tempdir, "outputs.txt")

        with open(infile, 'w') as f:
            f.write(
                """{"passage": "the seahawks won the super bowl in 2016", """
                """ "question": "when did the seahawks win the super bowl?"}\n"""
            )
            f.write(
                """{"passage": "the mariners won the super bowl in 2037", """
                """ "question": "when did the mariners win the super bowl?"}\n"""
            )

        sys.argv = [
            "run.py",  # executable
            "predict",  # command
            "tests/fixtures/bidaf/serialization/model.tar.gz",
            infile,  # input_file
            "--output-file",
            outfile,
            "--predictor",
            "bidaf-explicit",
            "--silent"
        ]

        main()
        assert os.path.exists(outfile)

        with open(outfile, 'r') as f:
            results = [json.loads(line) for line in f]

        assert len(results) == 2
        # Overridden predictor should output extra field
        for result in results:
            assert set(result.keys()) == {
                "span_start_logits", "span_end_logits",
                "passage_question_attention", "question_tokens",
                "passage_tokens", "span_start_probs", "span_end_probs",
                "best_span", "best_span_str", "explicit"
            }

        shutil.rmtree(tempdir)
Example #36
0
    def test_subcommand_plugin_is_available(self):
        # Test originally copied from
        # `allennlp.tests.commands.main_test.TestMain.test_subcommand_plugin_is_available`.

        sys.argv = ["allennlp"]

        with io.StringIO() as buf, redirect_stdout(buf):
            main()
            output = buf.getvalue()

        self.assertIn("    serve", output)
Example #37
0
    def test_fails_without_required_args(self):
        sys.argv = [
            "__main__.py",
            "predict",
            "/path/to/archive",
        ]  # executable  # command  # archive, but no input file

        with pytest.raises(SystemExit) as cm:
            main()

        assert cm.value.code == 2  # argparse code for incorrect usage
Example #38
0
    def test_fails_on_unknown_command(self):
        sys.argv = ["bogus",         # command
                    "unknown_model", # model_name
                    "bogus file",    # input_file
                    "--output-file", "bogus out file",
                    "--silent"]

        with self.assertRaises(SystemExit) as cm:  # pylint: disable=invalid-name
            main()

        assert cm.exception.code == 2  # argparse code for incorrect usage
Example #39
0
    def test_fails_on_unknown_command(self):
        sys.argv = ["bogus",         # command
                    "unknown_model", # model_name
                    "bogus file",    # input_file
                    "--output-file", "bogus out file",
                    "--silent"]

        with self.assertRaises(SystemExit) as cm:  # pylint: disable=invalid-name
            main()

        assert cm.exception.code == 2  # argparse code for incorrect usage
 def test_inspect_with_bad_options(self, capsys):
     sys.argv = [
         "allennlp",
         "cached-path",
         "--cache-dir",
         str(self.TEST_DIR),
         "--inspect",
         "--extract-archive",
     ]
     with pytest.raises(RuntimeError, match="--extract-archive"):
         main()
Example #41
0
def run(exp_name, config_file, qianru = '--recover'):
    # if qianru == 'r':
    #     qianru = '--recover'
    # elif qianru == 'f':
    #     qianru = '--force'
    # else:
    #     raise Exception
    print(os.getcwd())
    command = f"allennlp train ./configs/{exp_name}/{config_file}.json -s ./output/{exp_name}/{config_file.split('.')[0]}   {qianru}   --include-package my_library"
    sys.argv = command.split()
    print(sys.argv)
    main()
Example #42
0
    def test_can_specify_predictor(self):

        @Predictor.register('bidaf-explicit')  # pylint: disable=unused-variable
        class Bidaf3Predictor(BidafPredictor):
            """same as bidaf predictor but with an extra field"""
            def predict_json(self, inputs: JsonDict, cuda_device: int = -1) -> JsonDict:
                result = super().predict_json(inputs)
                result["explicit"] = True
                return result

        tempdir = tempfile.mkdtemp()
        infile = os.path.join(tempdir, "inputs.txt")
        outfile = os.path.join(tempdir, "outputs.txt")

        with open(infile, 'w') as f:
            f.write("""{"passage": "the seahawks won the super bowl in 2016", """
                    """ "question": "when did the seahawks win the super bowl?"}\n""")
            f.write("""{"passage": "the mariners won the super bowl in 2037", """
                    """ "question": "when did the mariners win the super bowl?"}\n""")

        sys.argv = ["run.py",      # executable
                    "predict",     # command
                    "tests/fixtures/bidaf/serialization/model.tar.gz",
                    infile,     # input_file
                    "--output-file", outfile,
                    "--predictor", "bidaf-explicit",
                    "--silent"]

        main()
        assert os.path.exists(outfile)

        with open(outfile, 'r') as f:
            results = [json.loads(line) for line in f]

        assert len(results) == 2
        # Overridden predictor should output extra field
        for result in results:
            assert set(result.keys()) == {"span_start_logits", "span_end_logits",
                                          "passage_question_attention", "question_tokens",
                                          "passage_tokens", "span_start_probs", "span_end_probs",
                                          "best_span", "best_span_str", "explicit"}

        shutil.rmtree(tempdir)
Example #43
0
    def test_other_modules(self):
        # Create a new package in a temporary dir
        packagedir = self.TEST_DIR / 'configuretestpackage'
        packagedir.mkdir()  # pylint: disable=no-member
        (packagedir / '__init__.py').touch()  # pylint: disable=no-member

        # And add that directory to the path
        sys.path.insert(0, str(self.TEST_DIR))

        # Write out a duplicate predictor there, but registered under a different name.
        from allennlp.predictors import bidaf
        with open(bidaf.__file__) as f:
            code = f.read().replace("""@Predictor.register('machine-comprehension')""",
                                    """@Predictor.register('configure-test-predictor')""")

        with open(os.path.join(packagedir, 'predictor.py'), 'w') as f:
            f.write(code)

        app = None

        # Monkeypatch the run function
        def run_wizard(args: argparse.Namespace) -> None:
            nonlocal app

            app = make_app(args.include_package)
            app.testing = True

        configure_command._run_wizard = run_wizard

        sys.argv = ["run.py",      # executable
                    "configure",     # command
                    "--include-package", "configuretestpackage.predictor"]

        main()

        client = app.test_client()

        response = client.get('/api/config/?class=allennlp.predictors.predictor.Predictor&get_choices=true')
        data = json.loads(response.get_data())
        choices = data.get('choices', ())
        assert 'configuretestpackage.predictor.BidafPredictor' in choices
Example #44
0
    def test_subcommand_overrides(self):
        def do_nothing(_):
            pass

        class FakeEvaluate(Subcommand):
            add_subparser_called = False

            def add_subparser(self, name, parser):
                subparser = parser.add_parser(name,
                                              description="fake",
                                              help="fake help")

                subparser.set_defaults(func=do_nothing)
                self.add_subparser_called = True

                return subparser

        fake_evaluate = FakeEvaluate()

        sys.argv = ["allennlp.run", "evaluate"]
        main(subcommand_overrides={"evaluate": fake_evaluate})

        assert fake_evaluate.add_subparser_called
Example #45
0
#!/usr/bin/env python
import logging
import os
import sys

if os.environ.get("ALLENNLP_DEBUG"):
    LEVEL = logging.DEBUG
else:
    LEVEL = logging.INFO

sys.path.insert(0, os.path.dirname(os.path.abspath(os.path.join(__file__, os.pardir))))
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
                    level=LEVEL)

from allennlp.commands import main  # pylint: disable=wrong-import-position

if __name__ == "__main__":
    main(prog="python -m allennlp.run")
Example #46
0
#!/usr/bin/env python
import logging
import os
import sys

if os.environ.get("ALLENNLP_DEBUG"):
    LEVEL = logging.DEBUG
else:
    LEVEL = logging.INFO

sys.path.insert(0, os.path.dirname(os.path.abspath(os.path.join(__file__, os.pardir))))
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
                    level=LEVEL)

from allennlp.commands import main  # pylint: disable=wrong-import-position

if __name__ == "__main__":
    main(prog="allennlp")
Example #47
0
    def test_other_modules(self):
        # Create a new package in a temporary dir
        packagedir = self.TEST_DIR / 'testpackage'
        packagedir.mkdir()  # pylint: disable=no-member
        (packagedir / '__init__.py').touch()  # pylint: disable=no-member

        # And add that directory to the path
        sys.path.insert(0, str(self.TEST_DIR))

        # Write out a duplicate model there, but registered under a different name.
        from allennlp.models import simple_tagger
        with open(simple_tagger.__file__) as model_file:
            code = model_file.read().replace("""@Model.register("simple_tagger")""",
                                             """@Model.register("duplicate-test-tagger")""")

        with open(packagedir / 'model.py', 'w') as new_model_file:
            new_model_file.write(code)

        # Copy fixture there too.
        shutil.copy(self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv', self.TEST_DIR)
        data_path = str(self.TEST_DIR / 'sequence_tagging.tsv')

        # Write out config file
        config_path = self.TEST_DIR / 'config.json'
        config_json = """{
                "model": {
                        "type": "duplicate-test-tagger",
                        "text_field_embedder": {
                                "tokens": {
                                        "type": "embedding",
                                        "embedding_dim": 5
                                }
                        },
                        "encoder": {
                                "type": "lstm",
                                "input_size": 5,
                                "hidden_size": 7,
                                "num_layers": 2
                        }
                },
                "dataset_reader": {"type": "sequence_tagging"},
                "train_data_path": "$$$",
                "validation_data_path": "$$$",
                "iterator": {"type": "basic", "batch_size": 2},
                "trainer": {
                        "num_epochs": 2,
                        "optimizer": "adam"
                }
            }""".replace('$$$', data_path)
        with open(config_path, 'w') as config_file:
            config_file.write(config_json)

        serialization_dir = self.TEST_DIR / 'serialization'

        # Run train with using the non-allennlp module.
        sys.argv = ["bin/allennlp",
                    "train", str(config_path),
                    "-s", str(serialization_dir)]

        # Shouldn't be able to find the model.
        with pytest.raises(ConfigurationError):
            main()

        # Now add the --include-package flag and it should work.
        # We also need to add --recover since the output directory already exists.
        sys.argv.extend(["--recover", "--include-package", 'testpackage'])

        main()

        # Rewrite out config file, but change a value.
        with open(config_path, 'w') as new_config_file:
            new_config_file.write(config_json.replace('"num_epochs": 2,', '"num_epochs": 4,'))

        # This should fail because the config.json does not match that in the serialization directory.
        with pytest.raises(ConfigurationError):
            main()

        sys.path.remove(str(self.TEST_DIR))