Example #1
0
def _get_predictor(args: argparse.Namespace, predictors: Dict[str, str]) -> Predictor:
    archive = load_archive(args.archive_file,
                           weights_file=args.weights_file,
                           cuda_device=args.cuda_device,
                           overrides=args.overrides)

    if args.predictor:
        # Predictor explicitly specified, so use it
        return Predictor.from_archive(archive, args.predictor)

    # Otherwise, use the mapping
    model_type = archive.config.get("model").get("type")
    if model_type not in predictors:
        raise ConfigurationError("no known predictor for model type {}".format(model_type))
    return Predictor.from_archive(archive, predictors[model_type])
Example #2
0
def get_predictor(file_path):
    """get the predictor"""
    archive = load_archive(file_path)
    file_type = archive.config.get("model").get("type")
    predictor_cf = Predictor.from_archive(archive,
                                          DEFAULT_PREDICTORS[file_type])
    return predictor_cf
Example #3
0
    def test_uses_named_inputs(self):
        inputs = {
            "sentence":
            "The squirrel wrote a unit test to make sure its nuts worked as designed."
        }

        archive = load_archive(self.FIXTURES_ROOT / 'srl' / 'serialization' /
                               'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'semantic-role-labeling')

        result = predictor.predict_json(inputs)

        words = result.get("words")
        assert words == [
            "The", "squirrel", "wrote", "a", "unit", "test", "to", "make",
            "sure", "its", "nuts", "worked", "as", "designed", "."
        ]
        num_words = len(words)

        verbs = result.get("verbs")
        assert verbs is not None
        assert isinstance(verbs, list)

        assert any(v["verb"] == "wrote" for v in verbs)
        assert any(v["verb"] == "make" for v in verbs)
        assert any(v["verb"] == "worked" for v in verbs)

        for verb in verbs:
            tags = verb.get("tags")
            assert tags is not None
            assert isinstance(tags, list)
            assert all(isinstance(tag, str) for tag in tags)
            assert len(tags) == num_words
Example #4
0
    def test_uses_named_inputs(self):
        inputs = {
                "sentence": "The squirrel wrote a unit test to make sure its nuts worked as designed."
        }

        archive = load_archive('tests/fixtures/srl/serialization/model.tar.gz')
        predictor = Predictor.from_archive(archive, 'semantic-role-labeling')

        result = predictor.predict_json(inputs)
        words = result.get("words")
        assert words == ["The", "squirrel", "wrote", "a", "unit", "test",
                         "to", "make", "sure", "its", "nuts", "worked", "as", "designed", "."]
        num_words = len(words)

        verbs = result.get("verbs")
        assert verbs is not None
        assert isinstance(verbs, list)

        assert any(v["verb"] == "wrote" for v in verbs)
        assert any(v["verb"] == "make" for v in verbs)
        assert any(v["verb"] == "worked" for v in verbs)

        for verb in verbs:
            tags = verb.get("tags")
            assert tags is not None
            assert isinstance(tags, list)
            assert all(isinstance(tag, str) for tag in tags)
            assert len(tags) == num_words
    def test_uses_named_inputs(self):
        inputs = {
            "premise": "I always write unit tests for my code.",
            "hypothesis": "One time I didn't write any unit tests for my code."
        }

        archive = load_archive(self.FIXTURES_ROOT / 'decomposable_attention' /
                               'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'textual-entailment')
        result = predictor.predict_json(inputs)

        # Label probs should be 3 floats that sum to one
        label_probs = result.get("label_probs")
        assert label_probs is not None
        assert isinstance(label_probs, list)
        assert len(label_probs) == 3
        assert all(isinstance(x, float) for x in label_probs)
        assert all(x >= 0 for x in label_probs)
        assert sum(label_probs) == approx(1.0)

        # Logits should be 3 floats that softmax to label_probs
        label_logits = result.get("label_logits")
        assert label_logits is not None
        assert isinstance(label_logits, list)
        assert len(label_logits) == 3
        assert all(isinstance(x, float) for x in label_logits)

        exps = [math.exp(x) for x in label_logits]
        sumexps = sum(exps)
        for e, p in zip(exps, label_probs):
            assert e / sumexps == approx(p)
    def test_batch_prediction(self):
        inputs = [
                {"sentence": "What a great test sentence."},
                {"sentence": "Here's another good, interesting one."}
        ]

        archive = load_archive('tests/fixtures/constituency_parser/serialization/model.tar.gz')
        predictor = Predictor.from_archive(archive, 'constituency-parser')
        results = predictor.predict_batch_json(inputs)

        result = results[0]
        assert len(result["spans"]) == 21 # number of possible substrings of the sentence.
        assert len(result["class_probabilities"]) == 21
        assert result["tokens"] == ["What", "a", "great", "test", "sentence", "."]
        assert isinstance(result["trees"], str)

        for class_distribution in result["class_probabilities"]:
            self.assertAlmostEqual(sum(class_distribution), 1.0, places=4)

        result = results[1]

        assert len(result["spans"]) == 36 # number of possible substrings of the sentence.
        assert len(result["class_probabilities"]) == 36
        assert result["tokens"] == ["Here", "'s", "another", "good", ",", "interesting", "one", "."]
        assert isinstance(result["trees"], str)

        for class_distribution in result["class_probabilities"]:
            self.assertAlmostEqual(sum(class_distribution), 1.0, places=4)
Example #7
0
def _get_predictor(args: argparse.Namespace, predictors: Dict[str, str]) -> Predictor:
    archive = load_archive(args.archive_file, cuda_device=args.cuda_device, overrides=args.overrides)
    model_type = archive.config.get("model").get("type")
    if model_type not in predictors:
        raise ConfigurationError("no known predictor for model type {}".format(model_type))
    predictor = Predictor.from_archive(archive, predictors[model_type])
    return predictor
    def test_uses_named_inputs(self):
        inputs = {
                "premise": "I always write unit tests for my code.",
                "hypothesis": "One time I didn't write any unit tests for my code."
        }

        archive = load_archive('tests/fixtures/decomposable_attention/serialization/model.tar.gz')
        predictor = Predictor.from_archive(archive, 'textual-entailment')
        result = predictor.predict_json(inputs)

        # Label probs should be 3 floats that sum to one
        label_probs = result.get("label_probs")
        assert label_probs is not None
        assert isinstance(label_probs, list)
        assert len(label_probs) == 3
        assert all(isinstance(x, float) for x in label_probs)
        assert all(x >= 0 for x in label_probs)
        assert sum(label_probs) == approx(1.0)

        # Logits should be 3 floats that softmax to label_probs
        label_logits = result.get("label_logits")
        assert label_logits is not None
        assert isinstance(label_logits, list)
        assert len(label_logits) == 3
        assert all(isinstance(x, float) for x in label_logits)

        exps = [math.exp(x) for x in label_logits]
        sumexps = sum(exps)
        for e, p in zip(exps, label_probs):
            assert e / sumexps == approx(p)
    def setUp(self):
        super().setUp()

        archive = load_archive(
            'tests/fixtures/bidaf/serialization/model.tar.gz')
        self.bidaf_predictor = Predictor.from_archive(archive,
                                                      'machine-comprehension')
    def test_batch_prediction(self):
        inputs = [
                {"sentence": "What a great test sentence."},
                {"sentence": "Here's another good, interesting one."}
        ]

        archive = load_archive('tests/fixtures/constituency_parser/serialization/model.tar.gz')
        predictor = Predictor.from_archive(archive, 'constituency-parser')
        results = predictor.predict_batch_json(inputs)

        result = results[0]
        assert len(result["spans"]) == 21 # number of possible substrings of the sentence.
        assert len(result["class_probabilities"]) == 21
        assert result["tokens"] == ["What", "a", "great", "test", "sentence", "."]
        assert isinstance(result["trees"], str)

        for class_distribution in result["class_probabilities"]:
            self.assertAlmostEqual(sum(class_distribution), 1.0, places=4)

        result = results[1]

        assert len(result["spans"]) == 36 # number of possible substrings of the sentence.
        assert len(result["class_probabilities"]) == 36
        assert result["tokens"] == ["Here", "'s", "another", "good", ",", "interesting", "one", "."]
        assert isinstance(result["trees"], str)

        for class_distribution in result["class_probabilities"]:
            self.assertAlmostEqual(sum(class_distribution), 1.0, places=4)
Example #11
0
    def test_batch_prediction(self,inputs=None):
        inputs = [
                {
                        "question": "What kind of test succeeded on its first attempt?",
                        "passage": "One time I was writing a unit test, and it succeeded on the first attempt."
                },
                {
                        "question": "What kind of test succeeded on its first attempt at batch processing?",
                        "passage": "One time I was writing a unit test, and it always failed!"
                }
        ]

        archive = load_archive('/Users/vibhav/Workspace_8_17/VigneshAmazon/personal/bid_store/bidaf.model')
        predictor = Predictor.from_archive(archive, 'machine-comprehension')

        results = predictor.predict_batch_json(inputs)
        assert len(results) == 2

        for result in results:
            best_span = result.get("best_span")
            best_span_str = result.get("best_span_str")
            start_probs = result.get("span_start_probs")
            end_probs = result.get("span_end_probs")
            assert best_span is not None
            assert isinstance(best_span, list)
            assert len(best_span) == 2
            assert all(isinstance(x, int) for x in best_span)
            assert best_span[0] <= best_span[1]

            assert isinstance(best_span_str, str)
            assert best_span_str != ""
Example #12
0
    def test_textual_entailment(self):
        predictor = Predictor.from_archive(
            load_archive(DEFAULT_MODELS['textual-entailment']),
            'textual-entailment')

        result = predictor.predict_json({
            "premise":
            "An interplanetary spacecraft is in orbit around a gas giant's icy moon.",
            "hypothesis":
            "The spacecraft has the ability to travel between planets."
        })

        assert result["label_probs"][0] > 0.7  # entailment

        result = predictor.predict_json({
            "premise":
            "Two women are wandering along the shore drinking iced tea.",
            "hypothesis":
            "Two women are sitting on a blanket near some rocks talking about politics."
        })

        assert result["label_probs"][1] > 0.8  # contradiction

        result = predictor.predict_json({
            "premise":
            "A large, gray elephant walked beside a herd of zebras.",
            "hypothesis": "The elephant was lost."
        })

        assert result["label_probs"][2] > 0.7  # neutral
Example #13
0
    def test_prediction_with_no_verbs(self):

        input1 = {"sentence": "Blah no verb sentence."}
        archive = load_archive(self.FIXTURES_ROOT / 'srl' / 'serialization' /
                               'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'semantic-role-labeling')
        result = predictor.predict_json(input1)
        assert result == {
            'words': ['Blah', 'no', 'verb', 'sentence', '.'],
            'verbs': []
        }

        input2 = {"sentence": "This sentence has a verb."}
        results = predictor.predict_batch_json([input1, input2])
        assert results[0] == {
            'words': ['Blah', 'no', 'verb', 'sentence', '.'],
            'verbs': []
        }
        assert results[1] == {
            'words': ['This', 'sentence', 'has', 'a', 'verb', '.'],
            'verbs': [{
                'verb': 'has',
                'description': 'This sentence has a verb .',
                'tags': ['O', 'O', 'O', 'O', 'O', 'O']
            }]
        }
Example #14
0
    def test_uses_named_inputs(self):
        inputs = {
                "question": "What kind of test succeeded on its first attempt?",
                "passage": "One time I was writing a unit test, and it succeeded on the first attempt."
        }

        archive = load_archive('tests/fixtures/bidaf/serialization/model.tar.gz')
        predictor = Predictor.from_archive(archive, 'machine-comprehension')

        result = predictor.predict_json(inputs)

        best_span = result.get("best_span")
        assert best_span is not None
        assert isinstance(best_span, list)
        assert len(best_span) == 2
        assert all(isinstance(x, int) for x in best_span)
        assert best_span[0] <= best_span[1]

        best_span_str = result.get("best_span_str")
        assert isinstance(best_span_str, str)
        assert best_span_str != ""

        for probs_key in ("span_start_probs", "span_end_probs"):
            probs = result.get(probs_key)
            assert probs is not None
            assert all(isinstance(x, float) for x in probs)
            assert sum(probs) == approx(1.0)
Example #15
0
    def test_uses_named_inputs(self):
        inputs = {
            "title":
            "Interferring Discourse Relations in Context",
            "paperAbstract":
            ("We investigate various contextual effects on text "
             "interpretation, and account for them by providing "
             "contextual constraints in a logical theory of text "
             "interpretation. On the basis of the way these constraints "
             "interact with the other knowledge sources, we draw some "
             "general conclusions about the role of domain-specific "
             "information, top-down and bottom-up discourse information "
             "flow, and the usefulness of formalisation in discourse theory.")
        }

        archive = load_archive('tests/fixtures/model.tar.gz')
        predictor = Predictor.from_archive(archive, 'paper-classifier')

        result = predictor.predict_json(inputs)

        label = result.get("label")
        assert label in ['AI', 'ML', 'ACL']

        class_probabilities = result.get("class_probabilities")
        assert class_probabilities is not None
        assert all(cp > 0 for cp in class_probabilities)
        assert sum(class_probabilities) == approx(1.0)
Example #16
0
    def test_uses_named_inputs(self):
        inputs = {
            "question":
            "What kind of test succeeded on its first attempt?",
            "passage":
            "One time I was writing a unit test, and it succeeded on the first attempt."
        }

        archive = load_archive(
            'tests/fixtures/bidaf/serialization/model.tar.gz')
        predictor = Predictor.from_archive(archive, 'machine-comprehension')

        result = predictor.predict_json(inputs)

        best_span = result.get("best_span")
        assert best_span is not None
        assert isinstance(best_span, list)
        assert len(best_span) == 2
        assert all(isinstance(x, int) for x in best_span)
        assert best_span[0] <= best_span[1]

        best_span_str = result.get("best_span_str")
        assert isinstance(best_span_str, str)
        assert best_span_str != ""

        for probs_key in ("span_start_probs", "span_end_probs"):
            probs = result.get(probs_key)
            assert probs is not None
            assert all(isinstance(x, float) for x in probs)
            assert sum(probs) == approx(1.0)
def _get_predictor(args: argparse.Namespace) -> Predictor:
    archive = load_archive(args.archive_file,
                           weights_file=args.weights_file,
                           cuda_device=args.cuda_device,
                           overrides=args.overrides)

    return Predictor.from_archive(archive, args.predictor)
Example #18
0
def main(args):
    # Executing this file with no extra options runs the simple service with the bidaf test fixture
    # and the machine-comprehension predictor. There's no good reason you'd want
    # to do this, except possibly to test changes to the stock HTML).

    parser = argparse.ArgumentParser(description='Serve up a simple model')

    parser.add_argument('--archive-path',
                        type=str,
                        help='path to trained archive file')
    parser.add_argument('--predictor', type=str, help='name of predictor')
    parser.add_argument('--title',
                        type=str,
                        help='change the default page title',
                        default="AllenNLP Demo")
    parser.add_argument('--field-name',
                        type=str,
                        action='append',
                        help='field names to include in the demo')
    parser.add_argument('--user',
                        type=str,
                        help='demo app user name',
                        default="admin")
    parser.add_argument('--password',
                        type=str,
                        help='demo app user password',
                        default="AllenNLP")
    parser.add_argument('--port',
                        type=int,
                        default=8000,
                        help='port to serve the demo on')

    parser.add_argument('--include-package',
                        type=str,
                        action='append',
                        default=[],
                        help='additional packages to include')

    args = parser.parse_args(args)

    # Load modules
    for package_name in args.include_package:
        import_submodules(package_name)

    archive = load_archive(args.archive_path or 'tmp/model.tar.gz')
    predictor = Predictor.from_archive(
        archive, args.predictor or 'toxic-comments-classifier')
    field_names = args.field_name or ['comment_text']

    app = make_app(predictor=predictor,
                   field_names=field_names,
                   title=args.title,
                   user=args.user,
                   password=args.password)

    app.css.append_css(
        {'external_url': 'https://codepen.io/chriddyp/pen/bWLwgP.css'})
    #app.css.append_css({'external_url': 'static_html/demo.css'})
    print(f"Model loaded, serving demo on port {args.port}")
    app.run_server(port=args.port, debug=True)
Example #19
0
def main():
    # Executing this file runs the simple service with the bidaf test fixture
    # and the machine-comprehension predictor. There's no good reason you'd want
    # to do this (except maybe to test changes to the stock HTML), but this shows
    # you what you'd do in your own code to run your own demo.

    # Make sure all the classes you need for your Model / Predictor / DatasetReader / etc...
    # are imported here, because otherwise they can't be constructed ``from_params``.

    archive = load_archive('tests/fixtures/bidaf/serialization/model.tar.gz')
    predictor = Predictor.from_archive(archive, 'machine-comprehension')

    def sanitizer(prediction: JsonDict) -> JsonDict:
        """
        Only want best_span results.
        """
        return {
            key: value
            for key, value in prediction.items() if key.startswith("best_span")
        }

    app = make_app(predictor=predictor,
                   field_names=['passage', 'question'],
                   sanitizer=sanitizer)

    http_server = WSGIServer(('0.0.0.0', 8888), app)
    http_server.serve_forever()
Example #20
0
    def test_uses_named_inputs(self):
        inputs = {
            "document":
            "This is a single string document about a test. Sometimes it "
            "contains coreferent parts."
        }
        archive = load_archive(
            'tests/fixtures/coref/serialization/model.tar.gz')
        predictor = Predictor.from_archive(archive, 'coreference-resolution')
        result = predictor.predict_json(inputs)

        document = result["document"]
        assert document == [
            'This', 'is', 'a', 'single', 'string', 'document', 'about', 'a',
            'test', '.', 'Sometimes', 'it', 'contains', 'coreferent', 'parts',
            '.'
        ]

        clusters = result["clusters"]
        assert isinstance(clusters, list)
        for cluster in clusters:
            assert isinstance(cluster, list)
            for mention in cluster:
                # Spans should be integer indices.
                assert isinstance(mention[0], int)
                assert isinstance(mention[1], int)
                # Spans should be inside document.
                assert 0 < mention[0] <= len(document)
                assert 0 < mention[1] <= len(document)
Example #21
0
def _get_predictor(args: argparse.Namespace) -> Predictor:
    archive = load_archive(args.archive_file,
                           weights_file=args.weights_file,
                           cuda_device=args.cuda_device,
                           overrides=args.overrides)

    if args.predictor:
        # Predictor explicitly specified, so use it
        return Predictor.from_archive(archive, args.predictor)

    # Otherwise, use the mapping
    model_type = archive.config.get("model").get("type")
    if model_type not in DEFAULT_PREDICTORS:
        raise ConfigurationError(f"No known predictor for model type {model_type}.\n"
                                 f"Specify one with the --predictor flag.")
    return Predictor.from_archive(archive, DEFAULT_PREDICTORS[model_type])
Example #22
0
    def get_predictor(self, model_path, cuda_device):
        check_for_gpu(cuda_device)
        archive = load_archive(model_path,
                               weights_file=None,
                               cuda_device=cuda_device,
                               overrides="")

        return Predictor.from_archive(archive, None)
Example #23
0
 def test_batch_prediction(self):
     inputs = {
             "sentence": "The squirrel wrote a unit test to make sure its nuts worked as designed."
     }
     archive = load_archive('tests/fixtures/srl/serialization/model.tar.gz')
     predictor = Predictor.from_archive(archive, 'semantic-role-labeling')
     result = predictor.predict_batch_json([inputs, inputs])
     assert result[0] == result[1]
Example #24
0
 def test_batch_prediction(self):
     inputs = {
             "sentence": "The squirrel wrote a unit test to make sure its nuts worked as designed."
     }
     archive = load_archive('tests/fixtures/srl/serialization/model.tar.gz')
     predictor = Predictor.from_archive(archive, 'semantic-role-labeling')
     result = predictor.predict_batch_json([inputs, inputs])
     assert result[0] == result[1]
Example #25
0
 def __init__(self, archive_path):
     print("Initializing Str2Srl...")
     self.archive_path = "srl-model-2018.05.25.tar.gz"
     self.archive_path = archive_path
     self.archive = load_archive(self.archive_path, cuda_device=0)
     self.srl = Predictor.from_archive(self.archive)
     self.batch_size = 30
     print("Initializing Str2Srl Done...")
Example #26
0
def run(port: int, workers: int, config: Dict[str, str]) -> None:
    """Run the server programatically"""
    print("Starting a sanic server on port {}.".format(port))
    app = make_app()
    app.predictors = {
        name: Predictor.from_archive(load_archive(archive_file))
        for name, archive_file in config.items()
    }
    app.run(port=port, host="0.0.0.0", workers=workers)
Example #27
0
    def test_predictor_with_direct_parser(self):
        archive_dir = 'tests/fixtures/semantic_parsing/nlvr_direct_semantic_parser/serialization'
        archive = load_archive(os.path.join(archive_dir, 'model.tar.gz'))
        predictor = Predictor.from_archive(archive, 'nlvr-parser')

        result = predictor.predict_json(self.inputs)
        assert 'logical_form' in result
        assert 'denotations' in result
        assert len(result['denotations']
                   ) == 2  # Because there are two worlds in the input.
def main(args):
    # Executing this file with no extra options runs the simple service with the bidaf test fixture
    # and the machine-comprehension predictor. There's no good reason you'd want
    # to do this, except possibly to test changes to the stock HTML).

    parser = argparse.ArgumentParser(description='Serve up a simple model')

    parser.add_argument('--archive-path',
                        type=str,
                        help='path to trained archive file')
    parser.add_argument('--predictor', type=str, help='name of predictor')
    parser.add_argument('--static-dir',
                        type=str,
                        help='serve index.html from this directory')
    parser.add_argument('--title',
                        type=str,
                        help='change the default page title',
                        default="AllenNLP Demo")
    parser.add_argument('--field-name',
                        type=str,
                        action='append',
                        help='field names to include in the demo')
    parser.add_argument('--port',
                        type=int,
                        default=8000,
                        help='port to serve the demo on')

    parser.add_argument('--include-package',
                        type=str,
                        action='append',
                        default=[],
                        help='additional packages to include')

    args = parser.parse_args(args)

    # Load modules
    for package_name in args.include_package:
        import_submodules(package_name)

    archive = load_archive(
        args.archive_path or 'tests/fixtures/bidaf/serialization/model.tar.gz',
        cuda_device=0)
    predictor = Predictor.from_archive(
        archive, args.predictor or 'machine-comprehension')
    field_names = args.field_name or ['passage', 'question']

    app = make_app(predictor=predictor,
                   field_names=field_names,
                   static_dir=args.static_dir,
                   title=args.title)
    CORS(app)

    http_server = WSGIServer(('0.0.0.0', args.port), app)
    print(f"Model loaded, serving demo on port {args.port}")
    http_server.serve_forever()
Example #29
0
def get_predictor(args):
    archive = load_archive(args.archive_file,
                           weights_file=None,
                           cuda_device=args.cuda_device,
                           overrides="")

    # Otherwise, use the mapping
    model_type = archive.config.get("model").get("type")
    if model_type != 'srl':
        raise Exception('the given model is not for srl.')
    return Predictor.from_archive(archive, 'semantic-role-labeling')
Example #30
0
    def test_uses_named_inputs(self):
        inputs = {
                "premise": "I always write unit tests for my code.",
                "hypothesis": "One time I didn't write any unit tests for my code."
        }

        archive = load_archive('tests/fixtures/decomposable_attention/serialization/model.tar.gz')
        predictor = Predictor.from_archive(archive, 'textual-entailment')
        result = predictor.predict_json(inputs)

        assert "label_probs" in result
Example #31
0
    def setUp(self):
        super().setUp()
        if self.client is None:
            self.app = make_app()
            self.app.predictors = {
                name: Predictor.from_archive(load_archive(archive_file))
                for name, archive_file in TEST_ARCHIVE_FILES.items()
            }

            self.app.testing = True
            self.client = self.app.test_client
Example #32
0
    def test_predictor_with_direct_parser(self):
        archive_dir = self.FIXTURES_ROOT / 'semantic_parsing' / 'nlvr_direct_semantic_parser' / 'serialization'
        archive = load_archive(os.path.join(archive_dir, 'model.tar.gz'))
        predictor = Predictor.from_archive(archive, 'nlvr-parser')

        result = predictor.predict_json(self.inputs)
        assert 'logical_form' in result
        assert 'denotations' in result
        # result['denotations'] is a list corresponding to k-best logical forms, where k is 1 by
        # default.
        assert len(result['denotations']
                   [0]) == 2  # Because there are two worlds in the input.
    def test_uses_named_inputs(self):
        inputs = {
            "para_id":
            "4",
            "sentence_texts": [
                "Plants die.", "They are buried in sediment.",
                "Bacteria is buried in the sediment.",
                "Large amounts of sediment gradually pile on top of the original sediment.",
                "Pressure builds up.", "Heat increases.",
                "The chemical structure of the buried sediment and plants changes.",
                "The sediment and plants are at least one mile underground.",
                "The buried area is extremely hot.",
                "More chemical changes happen eand the buried material becomes oil."
            ],
            "participants": ["plants", "bacteria", "sediment", "oil"],
            "states":
            [[
                "?", "?", "sediment", "sediment", "sediment", "sediment",
                "sediment", "sediment", "one mile underground",
                "one mile underground", "-"
            ],
             [
                 "?", "?", "?", "sediment", "sediment", "sediment", "sediment",
                 "sediment", "sediment", "sediment", "-"
             ],
             [
                 "?", "?", "?", "?", "?", "?", "?", "?", "underground",
                 "underground", "underground"
             ],
             ["-", "-", "-", "-", "-", "-", "-", "-", "-", "-", "underground"]]
        }

        archive = load_archive(
            '../tests/fixtures/prostruct/prostruct_toy_model.tar.gz')
        predictor = Predictor.from_archive(archive, 'prostruct_prediction')
        result = predictor.predict_json(inputs)
        assert (result['para_id'] == '4')
        assert (result["sentence_texts"] == [
            "Plants die.", "They are buried in sediment.",
            "Bacteria is buried in the sediment.",
            "Large amounts of sediment gradually pile on top of the original sediment.",
            "Pressure builds up.", "Heat increases.",
            "The chemical structure of the buried sediment and plants changes.",
            "The sediment and plants are at least one mile underground.",
            "The buried area is extremely hot.",
            "More chemical changes happen eand the buried material becomes oil."
        ])
        assert (result['participants'] == [
            "plants", "bacteria", "sediment", "oil"
        ])
        # This changes with a new model (but some label must be predicted).
        print(f"result['top1_labels']: {result['top1_labels']}")
        assert (len(result['top1_labels']) > 1)
Example #34
0
def predict(args: argparse.Namespace,docdb) -> None:
    archive = load_archive(args.archive_file, cuda_device=args.cuda_device, overrides=args.overrides)
    predictor = Predictor.from_archive(archive, "drwikilookup")

    predictor.set_docdb(docdb)

    # ExitStack allows us to conditionally context-manage `output_file`, which may or may not exist
    with ExitStack() as stack:
        input_file = stack.enter_context(args.input_file)  # type: ignore
        output_file = stack.enter_context(args.output_file)  # type: ignore

        _run(predictor, input_file, output_file, args.batch_size, args.cuda_device)
Example #35
0
    def test_answer_present(self):
        inputs = {
            "question": "Who is 18 years old?",
            "table": "Name\tAge\nShallan\t16\nKaladin\t18"
        }

        archive_dir = 'tests/fixtures/semantic_parsing/wikitables/serialization/'
        archive = load_archive(os.path.join(archive_dir, 'model.tar.gz'))
        predictor = Predictor.from_archive(archive, 'wikitables-parser')

        result = predictor.predict_json(inputs)
        answer = result.get("answer")
        assert answer is not None
Example #36
0
    def test_prediction_with_no_verbs(self):

        input1 = {"sentence": "Blah no verb sentence."}
        archive = load_archive('tests/fixtures/srl/serialization/model.tar.gz')
        predictor = Predictor.from_archive(archive, 'semantic-role-labeling')
        result = predictor.predict_json(input1)
        assert result == {'words': ['Blah', 'no', 'verb', 'sentence', '.'], 'verbs': []}

        input2 = {"sentence": "This sentence has a verb."}
        results = predictor.predict_batch_json([input1, input2])
        assert results[0] == {'words': ['Blah', 'no', 'verb', 'sentence', '.'], 'verbs': []}
        assert results[1] == {'words': ['This', 'sentence', 'has', 'a', 'verb', '.'],
                              'verbs': [{'verb': 'has', 'description': 'This sentence has a verb .',
                                         'tags': ['O', 'O', 'O', 'O', 'O', 'O']}]}
    def test_uses_named_inputs(self):
        inputs = {
                "source": "What kind of test succeeded on its first attempt?",
        }

        archive = load_archive('tests/fixtures/encoder_decoder/simple_seq2seq/serialization/model.tar.gz')
        predictor = Predictor.from_archive(archive, 'simple_seq2seq')

        result = predictor.predict_json(inputs)

        predicted_tokens = result.get("predicted_tokens")
        assert predicted_tokens is not None
        assert isinstance(predicted_tokens, list)
        assert all(isinstance(x, str) for x in predicted_tokens)
    def test_uses_named_inputs(self):
        inputs = {
                "sentence": "What a great test sentence.",
        }

        archive = load_archive('tests/fixtures/constituency_parser/serialization/model.tar.gz')
        predictor = Predictor.from_archive(archive, 'constituency-parser')
        result = predictor.predict_json(inputs)

        assert len(result["spans"]) == 21 # number of possible substrings of the sentence.
        assert len(result["class_probabilities"]) == 21
        assert result["tokens"] == ["What", "a", "great", "test", "sentence", "."]
        assert isinstance(result["trees"], str)

        for class_distribution in result["class_probabilities"]:
            self.assertAlmostEqual(sum(class_distribution), 1.0, places=4)
    def test_batch_prediction(self):
        batch_inputs = [
                {
                        "premise": "I always write unit tests for my code.",
                        "hypothesis": "One time I didn't write any unit tests for my code."
                },
                {
                        "premise": "I also write batched unit tests for throughput!",
                        "hypothesis": "Batch tests are slower."
                },
        ]

        archive = load_archive('tests/fixtures/decomposable_attention/serialization/model.tar.gz')
        predictor = Predictor.from_archive(archive, 'textual-entailment')
        results = predictor.predict_batch_json(batch_inputs)
        print(results)
        assert len(results) == 2

        for result in results:
            # Logits should be 3 floats that softmax to label_probs
            label_logits = result.get("label_logits")
            # Label probs should be 3 floats that sum to one
            label_probs = result.get("label_probs")
            assert label_probs is not None
            assert isinstance(label_probs, list)
            assert len(label_probs) == 3
            assert all(isinstance(x, float) for x in label_probs)
            assert all(x >= 0 for x in label_probs)
            assert sum(label_probs) == approx(1.0)

            assert label_logits is not None
            assert isinstance(label_logits, list)
            assert len(label_logits) == 3
            assert all(isinstance(x, float) for x in label_logits)

            exps = [math.exp(x) for x in label_logits]
            sumexps = sum(exps)
            for e, p in zip(exps, label_probs):
                assert e / sumexps == approx(p)
Example #40
0
    def test_uses_named_inputs(self):
        inputs = {"document": "This is a single string document about a test. Sometimes it "
                              "contains coreferent parts."}
        archive = load_archive('tests/fixtures/coref/serialization/model.tar.gz')
        predictor = Predictor.from_archive(archive, 'coreference-resolution')
        result = predictor.predict_json(inputs)

        document = result["document"]
        assert document == ['This', 'is', 'a', 'single', 'string',
                            'document', 'about', 'a', 'test', '.', 'Sometimes',
                            'it', 'contains', 'coreferent', 'parts', '.']

        clusters = result["clusters"]
        assert isinstance(clusters, list)
        for cluster in clusters:
            assert isinstance(cluster, list)
            for mention in cluster:
                # Spans should be integer indices.
                assert isinstance(mention[0], int)
                assert isinstance(mention[1], int)
                # Spans should be inside document.
                assert 0 < mention[0] <= len(document)
                assert 0 < mention[1] <= len(document)
Example #41
0
    def test_batch_prediction(self):
        inputs = [
                {
                        "question": "What kind of test succeeded on its first attempt?",
                        "passage": "One time I was writing a unit test, and it succeeded on the first attempt."
                },
                {
                        "question": "What kind of test succeeded on its first attempt at batch processing?",
                        "passage": "One time I was writing a unit test, and it always failed!"
                }
        ]

        archive = load_archive('tests/fixtures/bidaf/serialization/model.tar.gz')
        predictor = Predictor.from_archive(archive, 'machine-comprehension')

        results = predictor.predict_batch_json(inputs)
        assert len(results) == 2

        for result in results:
            best_span = result.get("best_span")
            best_span_str = result.get("best_span_str")
            start_probs = result.get("span_start_probs")
            end_probs = result.get("span_end_probs")
            assert best_span is not None
            assert isinstance(best_span, list)
            assert len(best_span) == 2
            assert all(isinstance(x, int) for x in best_span)
            assert best_span[0] <= best_span[1]

            assert isinstance(best_span_str, str)
            assert best_span_str != ""

            for probs in (start_probs, end_probs):
                assert probs is not None
                assert all(isinstance(x, float) for x in probs)
                assert sum(probs) == approx(1.0)
    def test_build_hierplane_tree(self):
        tree = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
        archive = load_archive('tests/fixtures/constituency_parser/serialization/model.tar.gz')
        predictor = Predictor.from_archive(archive, 'constituency-parser')

        hierplane_tree = predictor._build_hierplane_tree(tree, 0, is_root=True)

        # pylint: disable=bad-continuation
        correct_tree = {
                'text': 'the dog chased the cat',
                "linkNameToLabel": LINK_TO_LABEL,
                "nodeTypeToStyle": NODE_TYPE_TO_STYLE,
                'root': {
                        'word': 'the dog chased the cat',
                        'nodeType': 'S',
                        'attributes': ['S'],
                        'link': 'S',
                        'children': [{
                                'word': 'the dog',
                                'nodeType': 'NP',
                                'attributes': ['NP'],
                                'link': 'NP',
                                'children': [{
                                        'word': 'the',
                                        'nodeType': 'D',
                                        'attributes': ['D'],
                                        'link': 'D'
                                        },
                                        {
                                        'word': 'dog',
                                        'nodeType': 'N',
                                        'attributes': ['N'],
                                        'link': 'N'}
                                        ]
                                },
                                {
                                'word': 'chased the cat',
                                'nodeType': 'VP',
                                'attributes': ['VP'],
                                'link': 'VP',
                                'children': [{
                                    'word': 'chased',
                                    'nodeType': 'V',
                                    'attributes': ['V'],
                                    'link': 'V'
                                    },
                                    {
                                    'word':
                                    'the cat',
                                    'nodeType': 'NP',
                                    'attributes': ['NP'],
                                    'link': 'NP',
                                    'children': [{
                                            'word': 'the',
                                            'nodeType': 'D',
                                            'attributes': ['D'],
                                            'link': 'D'
                                            },
                                            {
                                            'word': 'cat',
                                            'nodeType': 'N',
                                            'attributes': ['N'],
                                            'link': 'N'}
                                        ]
                                    }
                                ]
                            }
                        ]
                    }
                }
        # pylint: enable=bad-continuation
        assert correct_tree == hierplane_tree
from allennlp.common.util import JsonDict
from allennlp.common.testing import AllenNlpTestCase
from allennlp.models.archival import load_archive
from allennlp.service.predictors import Predictor
from allennlp.service.server_flask import make_app
from allennlp.service.db import InMemoryDemoDatabase

TEST_ARCHIVE_FILES = {
        'machine-comprehension': 'tests/fixtures/bidaf/serialization/model.tar.gz',
        'semantic-role-labeling': 'tests/fixtures/srl/serialization/model.tar.gz',
        'textual-entailment': 'tests/fixtures/decomposable_attention/serialization/model.tar.gz'
}

PREDICTORS = {
        name: Predictor.from_archive(load_archive(archive_file),
                                     predictor_name=name)
        for name, archive_file in TEST_ARCHIVE_FILES.items()
}


class CountingPredictor(Predictor):
    """
    bogus predictor that just returns a copy of its inputs
    and also counts how many times it was called with a given input
    """
    # pylint: disable=abstract-method
    def __init__(self):                 # pylint: disable=super-init-not-called
        self.calls = defaultdict(int)

    def predict_json(self, inputs: JsonDict, cuda_device: int = -1) -> JsonDict:
        key = json.dumps(inputs)
    def setUp(self):
        super().setUp()

        archive = load_archive('tests/fixtures/bidaf/serialization/model.tar.gz')
        self.bidaf_predictor = Predictor.from_archive(archive, 'machine-comprehension')