Beispiel #1
0
    def test_from_archive_does_not_consume_params(self):
        archive = load_archive(self.FIXTURES_ROOT / 'bidaf' / 'serialization' /
                               'model.tar.gz')
        Predictor.from_archive(archive, 'machine-comprehension')

        # If it consumes the params, this will raise an exception
        Predictor.from_archive(archive, 'machine-comprehension')
Beispiel #2
0
    def test_prediction_with_no_verbs(self):

        input1 = {"sentence": "Blah no verb sentence."}
        archive = load_archive(self.FIXTURES_ROOT / 'srl' / 'serialization' /
                               'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'semantic-role-labeling')
        result = predictor.predict_json(input1)
        assert result == {
            'words': ['Blah', 'no', 'verb', 'sentence', '.'],
            'verbs': []
        }

        input2 = {"sentence": "This sentence has a verb."}
        results = predictor.predict_batch_json([input1, input2])
        assert results[0] == {
            'words': ['Blah', 'no', 'verb', 'sentence', '.'],
            'verbs': []
        }
        assert results[1] == {
            'words': ['This', 'sentence', 'has', 'a', 'verb', '.'],
            'verbs': [{
                'verb': 'has',
                'description': 'This sentence has a verb .',
                'tags': ['O', 'O', 'O', 'O', 'O', 'O']
            }]
        }
Beispiel #3
0
    def test_uses_named_inputs(self):
        inputs = {
            "sentence":
            "The squirrel wrote a unit test to make sure its nuts worked as designed."
        }

        archive = load_archive(self.FIXTURES_ROOT / 'srl' / 'serialization' /
                               'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'semantic-role-labeling')

        result = predictor.predict_json(inputs)
        print(result)
        words = result.get("words")
        assert words == [
            "The", "squirrel", "wrote", "a", "unit", "test", "to", "make",
            "sure", "its", "nuts", "worked", "as", "designed", "."
        ]
        num_words = len(words)

        verbs = result.get("verbs")
        assert verbs is not None
        assert isinstance(verbs, list)

        assert any(v["verb"] == "wrote" for v in verbs)
        assert any(v["verb"] == "make" for v in verbs)
        assert any(v["verb"] == "worked" for v in verbs)

        for verb in verbs:
            tags = verb.get("tags")
            assert tags is not None
            assert isinstance(tags, list)
            assert all(isinstance(tag, str) for tag in tags)
            assert len(tags) == num_words
    def test_uses_named_inputs(self):
        inputs = {
                "premise": "I always write unit tests for my code.",
                "hypothesis": "One time I didn't write any unit tests for my code."
        }

        archive = load_archive(self.FIXTURES_ROOT / 'decomposable_attention' / 'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'textual-entailment')
        result = predictor.predict_json(inputs)

        # Label probs should be 3 floats that sum to one
        label_probs = result.get("label_probs")
        assert label_probs is not None
        assert isinstance(label_probs, list)
        assert len(label_probs) == 3
        assert all(isinstance(x, float) for x in label_probs)
        assert all(x >= 0 for x in label_probs)
        assert sum(label_probs) == approx(1.0)

        # Logits should be 3 floats that softmax to label_probs
        label_logits = result.get("label_logits")
        assert label_logits is not None
        assert isinstance(label_logits, list)
        assert len(label_logits) == 3
        assert all(isinstance(x, float) for x in label_logits)

        exps = [math.exp(x) for x in label_logits]
        sumexps = sum(exps)
        for e, p in zip(exps, label_probs):
            assert e / sumexps == approx(p)
Beispiel #5
0
    def test_uses_named_inputs(self):
        inputs = {
            "question":
            "What kind of test succeeded on its first attempt?",
            "passage":
            "One time I was writing a unit test, and it succeeded on the first attempt."
        }

        archive = load_archive(self.FIXTURES_ROOT / 'bidaf' / 'serialization' /
                               'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'machine-comprehension')

        result = predictor.predict_json(inputs)

        best_span = result.get("best_span")
        assert best_span is not None
        assert isinstance(best_span, list)
        assert len(best_span) == 2
        assert all(isinstance(x, int) for x in best_span)
        assert best_span[0] <= best_span[1]

        best_span_str = result.get("best_span_str")
        assert isinstance(best_span_str, str)
        assert best_span_str != ""

        for probs_key in ("span_start_probs", "span_end_probs"):
            probs = result.get(probs_key)
            assert probs is not None
            assert all(isinstance(x, float) for x in probs)
            assert sum(probs) == approx(1.0)
Beispiel #6
0
    def setUp(self):
        super().setUp()

        archive = load_archive(self.FIXTURES_ROOT / 'bidaf' / 'serialization' /
                               'model.tar.gz')
        self.bidaf_predictor = Predictor.from_archive(archive,
                                                      'machine-comprehension')
Beispiel #7
0
    def test_batch_prediction(self):
        inputs = [{
            "sentence":
            "What kind of test succeeded on its first attempt?",
        }, {
            "sentence":
            "What kind of test succeeded on its first attempt at batch processing?",
        }]

        archive = load_archive(self.FIXTURES_ROOT /
                               'biaffine_dependency_parser' / 'serialization' /
                               'model.tar.gz')
        predictor = Predictor.from_archive(archive,
                                           'biaffine-dependency-parser')

        results = predictor.predict_batch_json(inputs)
        assert len(results) == 2

        for result in results:
            sequence_length = len(result.get("words"))
            predicted_heads = result.get("predicted_heads")
            assert len(predicted_heads) == sequence_length

            predicted_dependencies = result.get("predicted_dependencies")
            assert len(predicted_dependencies) == sequence_length
            assert isinstance(predicted_dependencies, list)
            assert all(isinstance(x, str) for x in predicted_dependencies)
Beispiel #8
0
    def test_uses_named_inputs(self):
        inputs = {
            "document":
            "This is a single string document about a test. Sometimes it "
            "contains coreferent parts."
        }
        archive = load_archive(self.FIXTURES_ROOT / 'coref' / 'serialization' /
                               'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'coreference-resolution')

        result = predictor.predict_json(inputs)

        document = result["document"]
        assert document == [
            'This', 'is', 'a', 'single', 'string', 'document', 'about', 'a',
            'test', '.', 'Sometimes', 'it', 'contains', 'coreferent', 'parts',
            '.'
        ]

        clusters = result["clusters"]
        assert isinstance(clusters, list)
        for cluster in clusters:
            assert isinstance(cluster, list)
            for mention in cluster:
                # Spans should be integer indices.
                assert isinstance(mention[0], int)
                assert isinstance(mention[1], int)
                # Spans should be inside document.
                assert 0 < mention[0] <= len(document)
                assert 0 < mention[1] <= len(document)
Beispiel #9
0
def main(args):
    # Executing this file with no extra options runs the simple service with the bidaf test fixture
    # and the machine-comprehension predictor. There's no good reason you'd want
    # to do this, except possibly to test changes to the stock HTML).

    parser = argparse.ArgumentParser(description='Serve up a simple model')

    parser.add_argument('--archive-path',
                        type=str,
                        required=True,
                        help='path to trained archive file')
    parser.add_argument('--predictor',
                        type=str,
                        required=True,
                        help='name of predictor')
    parser.add_argument('--static-dir',
                        type=str,
                        help='serve index.html from this directory')
    parser.add_argument('--title',
                        type=str,
                        help='change the default page title',
                        default="AllenNLP Demo")
    parser.add_argument('--field-name',
                        type=str,
                        action='append',
                        help='field names to include in the demo')
    parser.add_argument('--port',
                        type=int,
                        default=8000,
                        help='port to serve the demo on')

    parser.add_argument('--include-package',
                        type=str,
                        action='append',
                        default=[],
                        help='additional packages to include')

    args = parser.parse_args(args)

    # Load modules
    for package_name in args.include_package:
        import_submodules(package_name)

    archive = load_archive(args.archive_path)
    predictor = Predictor.from_archive(archive, args.predictor)
    field_names = args.field_name

    app = make_app(predictor=predictor,
                   field_names=field_names,
                   static_dir=args.static_dir,
                   title=args.title)
    CORS(app)

    http_server = WSGIServer(('0.0.0.0', args.port), app)
    print(f"Model loaded, serving demo on port {args.port}")
    http_server.serve_forever()
Beispiel #10
0
 def test_batch_prediction(self):
     inputs = {
         "sentence":
         "The squirrel wrote a unit test to make sure its nuts worked as designed."
     }
     archive = load_archive(self.FIXTURES_ROOT / 'srl' / 'serialization' /
                            'model.tar.gz')
     predictor = Predictor.from_archive(archive, 'semantic-role-labeling')
     result = predictor.predict_batch_json([inputs, inputs])
     assert result[0] == result[1]
Beispiel #11
0
    def test_predictor_with_direct_parser(self):
        archive_dir = self.FIXTURES_ROOT / 'semantic_parsing' / 'nlvr_direct_semantic_parser' / 'serialization'
        archive = load_archive(os.path.join(archive_dir, 'model.tar.gz'))
        predictor = Predictor.from_archive(archive, 'nlvr-parser')

        result = predictor.predict_json(self.inputs)
        assert 'logical_form' in result
        assert 'denotations' in result
        # result['denotations'] is a list corresponding to k-best logical forms, where k is 1 by
        # default.
        assert len(result['denotations']
                   [0]) == 2  # Because there are two worlds in the input.
    def test_answer_present_with_batch_predict(self):
        inputs = [{
                "question": "Who is 18 years old?",
                "table": "Name\tAge\nShallan\t16\nKaladin\t18"
        }]

        archive_path = self.FIXTURES_ROOT / 'semantic_parsing' / 'wikitables' / 'serialization' / 'model.tar.gz'
        archive = load_archive(archive_path)
        predictor = Predictor.from_archive(archive, 'wikitables-parser')

        result = predictor.predict_batch_json(inputs)
        answer = result[0].get("answer")
        assert answer is not None
    def test_uses_named_inputs(self):
        inputs = {
            "source": "What kind of test succeeded on its first attempt?",
        }

        archive = load_archive(self.FIXTURES_ROOT / 'encoder_decoder' /
                               'simple_seq2seq' / 'serialization' /
                               'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'simple_seq2seq')

        result = predictor.predict_json(inputs)

        predicted_tokens = result.get("predicted_tokens")
        assert predicted_tokens is not None
        assert isinstance(predicted_tokens, list)
        assert all(isinstance(x, str) for x in predicted_tokens)
Beispiel #14
0
    def test_predictor_uses_dataset_reader_to_determine_pos_set(self):
        # pylint: disable=protected-access
        archive = load_archive(self.FIXTURES_ROOT /
                               'biaffine_dependency_parser' / 'serialization' /
                               'model.tar.gz')
        predictor = Predictor.from_archive(archive,
                                           'biaffine-dependency-parser')

        inputs = {
            "sentence": "Dogs eat cats.",
        }
        instance_with_ud_pos = predictor._json_to_instance(inputs)
        tags = instance_with_ud_pos.fields["pos_tags"].labels
        assert tags == ['NOUN', 'VERB', 'NOUN', 'PUNCT']

        predictor._dataset_reader.use_language_specific_pos = True

        instance_with_ptb_pos = predictor._json_to_instance(inputs)
        tags = instance_with_ptb_pos.fields["pos_tags"].labels
        assert tags == ['NNS', 'VBP', 'NNS', '.']
Beispiel #15
0
    def test_uses_named_inputs(self):
        inputs = {
            "sentence": "What a great test sentence.",
        }

        archive = load_archive(self.FIXTURES_ROOT / 'constituency_parser' /
                               'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'constituency-parser')

        result = predictor.predict_json(inputs)

        assert len(result["spans"]
                   ) == 21  # number of possible substrings of the sentence.
        assert len(result["class_probabilities"]) == 21
        assert result["tokens"] == [
            "What", "a", "great", "test", "sentence", "."
        ]
        assert isinstance(result["trees"], str)

        for class_distribution in result["class_probabilities"]:
            self.assertAlmostEqual(sum(class_distribution), 1.0, places=4)
    def test_batch_prediction(self):
        batch_inputs = [
                {
                        "premise": "I always write unit tests for my code.",
                        "hypothesis": "One time I didn't write any unit tests for my code."
                },
                {
                        "premise": "I also write batched unit tests for throughput!",
                        "hypothesis": "Batch tests are slower."
                },
        ]

        archive = load_archive(self.FIXTURES_ROOT / 'decomposable_attention' / 'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'textual-entailment')
        results = predictor.predict_batch_json(batch_inputs)
        print(results)
        assert len(results) == 2

        for result in results:
            # Logits should be 3 floats that softmax to label_probs
            label_logits = result.get("label_logits")
            # Label probs should be 3 floats that sum to one
            label_probs = result.get("label_probs")
            assert label_probs is not None
            assert isinstance(label_probs, list)
            assert len(label_probs) == 3
            assert all(isinstance(x, float) for x in label_probs)
            assert all(x >= 0 for x in label_probs)
            assert sum(label_probs) == approx(1.0)

            assert label_logits is not None
            assert isinstance(label_logits, list)
            assert len(label_logits) == 3
            assert all(isinstance(x, float) for x in label_logits)

            exps = [math.exp(x) for x in label_logits]
            sumexps = sum(exps)
            for e, p in zip(exps, label_probs):
                assert e / sumexps == approx(p)
Beispiel #17
0
    def test_batch_prediction(self):
        inputs = [{
            "question":
            "What kind of test succeeded on its first attempt?",
            "passage":
            "One time I was writing a unit test, and it succeeded on the first attempt."
        }, {
            "question":
            "What kind of test succeeded on its first attempt at batch processing?",
            "passage":
            "One time I was writing a unit test, and it always failed!"
        }]

        archive = load_archive(self.FIXTURES_ROOT / 'bidaf' / 'serialization' /
                               'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'machine-comprehension')

        results = predictor.predict_batch_json(inputs)
        assert len(results) == 2

        for result in results:
            best_span = result.get("best_span")
            best_span_str = result.get("best_span_str")
            start_probs = result.get("span_start_probs")
            end_probs = result.get("span_end_probs")
            assert best_span is not None
            assert isinstance(best_span, list)
            assert len(best_span) == 2
            assert all(isinstance(x, int) for x in best_span)
            assert best_span[0] <= best_span[1]

            assert isinstance(best_span_str, str)
            assert best_span_str != ""

            for probs in (start_probs, end_probs):
                assert probs is not None
                assert all(isinstance(x, float) for x in probs)
                assert sum(probs) == approx(1.0)
    def test_uses_named_inputs(self):
        inputs = {
                "question": "names",
                "table": "name\tdate\nmatt\t2017\npradeep\t2018"
        }

        archive_path = self.FIXTURES_ROOT / 'semantic_parsing' / 'wikitables' / 'serialization' / 'model.tar.gz'
        archive = load_archive(archive_path)
        predictor = Predictor.from_archive(archive, 'wikitables-parser')

        result = predictor.predict_json(inputs)

        action_sequence = result.get("best_action_sequence")
        if action_sequence:
            # We don't currently disallow endless loops in the decoder, and an untrained seq2seq
            # model will easily get itself into a loop.  An endless loop isn't a finished logical
            # form, so decoding doesn't return any finished states, which means no actions.  So,
            # sadly, we don't have a great test here.  This is just testing that the predictor
            # runs, basically.
            assert len(action_sequence) > 1
            assert all([isinstance(action, str) for action in action_sequence])

            logical_form = result.get("logical_form")
            assert logical_form is not None
Beispiel #19
0
    def test_batch_prediction(self):
        inputs = [{
            "sentence": "What a great test sentence."
        }, {
            "sentence": "Here's another good, interesting one."
        }]

        archive = load_archive(self.FIXTURES_ROOT / 'constituency_parser' /
                               'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'constituency-parser')
        results = predictor.predict_batch_json(inputs)

        result = results[0]
        assert len(result["spans"]
                   ) == 21  # number of possible substrings of the sentence.
        assert len(result["class_probabilities"]) == 21
        assert result["tokens"] == [
            "What", "a", "great", "test", "sentence", "."
        ]
        assert isinstance(result["trees"], str)

        for class_distribution in result["class_probabilities"]:
            self.assertAlmostEqual(sum(class_distribution), 1.0, places=4)

        result = results[1]

        assert len(result["spans"]
                   ) == 36  # number of possible substrings of the sentence.
        assert len(result["class_probabilities"]) == 36
        assert result["tokens"] == [
            "Here", "'s", "another", "good", ",", "interesting", "one", "."
        ]
        assert isinstance(result["trees"], str)

        for class_distribution in result["class_probabilities"]:
            self.assertAlmostEqual(sum(class_distribution), 1.0, places=4)
Beispiel #20
0
    def test_build_hierplane_tree(self):
        tree = Tree.fromstring(
            "(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
        archive = load_archive(self.FIXTURES_ROOT / 'constituency_parser' /
                               'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'constituency-parser')

        hierplane_tree = predictor._build_hierplane_tree(tree, 0, is_root=True)

        # pylint: disable=bad-continuation
        correct_tree = {
            'text': 'the dog chased the cat',
            "linkNameToLabel": LINK_TO_LABEL,
            "nodeTypeToStyle": NODE_TYPE_TO_STYLE,
            'root': {
                'word':
                'the dog chased the cat',
                'nodeType':
                'S',
                'attributes': ['S'],
                'link':
                'S',
                'children': [{
                    'word':
                    'the dog',
                    'nodeType':
                    'NP',
                    'attributes': ['NP'],
                    'link':
                    'NP',
                    'children': [{
                        'word': 'the',
                        'nodeType': 'D',
                        'attributes': ['D'],
                        'link': 'D'
                    }, {
                        'word': 'dog',
                        'nodeType': 'N',
                        'attributes': ['N'],
                        'link': 'N'
                    }]
                }, {
                    'word':
                    'chased the cat',
                    'nodeType':
                    'VP',
                    'attributes': ['VP'],
                    'link':
                    'VP',
                    'children': [{
                        'word': 'chased',
                        'nodeType': 'V',
                        'attributes': ['V'],
                        'link': 'V'
                    }, {
                        'word':
                        'the cat',
                        'nodeType':
                        'NP',
                        'attributes': ['NP'],
                        'link':
                        'NP',
                        'children': [{
                            'word': 'the',
                            'nodeType': 'D',
                            'attributes': ['D'],
                            'link': 'D'
                        }, {
                            'word': 'cat',
                            'nodeType': 'N',
                            'attributes': ['N'],
                            'link': 'N'
                        }]
                    }]
                }]
            }
        }
        # pylint: enable=bad-continuation
        assert correct_tree == hierplane_tree
Beispiel #21
0
    def test_uses_named_inputs(self):
        inputs = {
            "sentence": "Please could you parse this sentence?",
        }

        archive = load_archive(self.FIXTURES_ROOT /
                               'biaffine_dependency_parser' / 'serialization' /
                               'model.tar.gz')
        predictor = Predictor.from_archive(archive,
                                           'biaffine-dependency-parser')

        result = predictor.predict_json(inputs)

        words = result.get("words")
        predicted_heads = result.get("predicted_heads")
        assert len(predicted_heads) == len(words)

        predicted_dependencies = result.get("predicted_dependencies")
        assert len(predicted_dependencies) == len(words)
        assert isinstance(predicted_dependencies, list)
        assert all(isinstance(x, str) for x in predicted_dependencies)

        assert result.get("loss") is not None
        assert result.get("arc_loss") is not None
        assert result.get("tag_loss") is not None

        hierplane_tree = result.get("hierplane_tree")
        hierplane_tree.pop("nodeTypeToStyle")
        hierplane_tree.pop("linkToPosition")
        # pylint: disable=line-too-long,bad-continuation
        assert result.get("hierplane_tree") == {
            'text': 'Please could you parse this sentence ?',
            'root': {
                'word':
                'Please',
                'nodeType':
                'det',
                'attributes': ['INTJ'],
                'link':
                'det',
                'spans': [{
                    'start': 0,
                    'end': 7
                }],
                'children': [{
                    'word': 'could',
                    'nodeType': 'nummod',
                    'attributes': ['VERB'],
                    'link': 'nummod',
                    'spans': [{
                        'start': 7,
                        'end': 13
                    }]
                }, {
                    'word': 'you',
                    'nodeType': 'nummod',
                    'attributes': ['PRON'],
                    'link': 'nummod',
                    'spans': [{
                        'start': 13,
                        'end': 17
                    }]
                }, {
                    'word': 'parse',
                    'nodeType': 'nummod',
                    'attributes': ['VERB'],
                    'link': 'nummod',
                    'spans': [{
                        'start': 17,
                        'end': 23
                    }]
                }, {
                    'word': 'this',
                    'nodeType': 'nummod',
                    'attributes': ['DET'],
                    'link': 'nummod',
                    'spans': [{
                        'start': 23,
                        'end': 28
                    }]
                }, {
                    'word': 'sentence',
                    'nodeType': 'nummod',
                    'attributes': ['NOUN'],
                    'link': 'nummod',
                    'spans': [{
                        'start': 28,
                        'end': 37
                    }]
                }, {
                    'word': '?',
                    'nodeType': 'nummod',
                    'attributes': ['PUNCT'],
                    'link': 'nummod',
                    'spans': [{
                        'start': 37,
                        'end': 39
                    }]
                }]
            }
        }
Beispiel #22
0
def demo_model(archive_file: str, predictor_name: str) -> Predictor:
    archive = load_archive(archive_file)
    return Predictor.from_archive(archive, predictor_name)