Esempio n. 1
0
    def test_batch_prediction(self):
        inputs = [
            {
                "sentence": "What kind of test succeeded on its first attempt?"
            },
            {
                "sentence":
                "What kind of test succeeded on its first attempt at batch processing?"
            },
        ]

        archive = load_archive(self.FIXTURES_ROOT /
                               "biaffine_dependency_parser" / "serialization" /
                               "model.tar.gz")
        predictor = Predictor.from_archive(archive,
                                           "biaffine-dependency-parser")

        results = predictor.predict_batch_json(inputs)
        assert len(results) == 2

        for result in results:
            sequence_length = len(result.get("words"))
            predicted_heads = result.get("predicted_heads")
            assert len(predicted_heads) == sequence_length

            predicted_dependencies = result.get("predicted_dependencies")
            assert len(predicted_dependencies) == sequence_length
            assert isinstance(predicted_dependencies, list)
            assert all(isinstance(x, str) for x in predicted_dependencies)
Esempio n. 2
0
 def test_sentence(self, sentence):
     # Load pre-trained model
     archive = load_archive('model.tar.gz')
     # Load predictor and predict the language of the name
     predictor = Predictor.from_archive(archive, 'event2mind_predictor')
     result = predictor.predict(sentence)
     print(result)
Esempio n. 3
0
    def __init__(self,
                 target_namespace: str,
                 span_predictor_model, 
                 source_tokenizer: Tokenizer = None,
                 target_tokenizer: Tokenizer = None,
                 source_token_indexers: Dict[str, TokenIndexer] = None,
                 lazy: bool = False,
                 add_rule = True,
                 embed_span = True,
                 add_question = True,
                 add_followup_ques = True,
                 train_using_gold = True)-> None:
        super().__init__(lazy)
        self._target_namespace = target_namespace
        self._source_tokenizer = source_tokenizer or WordTokenizer()
        self._target_tokenizer = target_tokenizer or self._source_tokenizer
        self._source_token_indexers = source_token_indexers or {"tokens": SingleIdTokenIndexer()}
        self.add_rule = add_rule
        self.embed_span = embed_span
        self.add_question = add_question
        self.add_followup_ques = add_followup_ques
        self.train_using_gold = train_using_gold
        if "tokens" not in self._source_token_indexers or \
                not isinstance(self._source_token_indexers["tokens"], SingleIdTokenIndexer):
            raise ConfigurationError("CopyNetDatasetReader expects 'source_token_indexers' to contain "
                                     "a 'single_id' token indexer called 'tokens'.")
        self._target_token_indexers: Dict[str, TokenIndexer] = {
                "tokens": SingleIdTokenIndexer(namespace=self._target_namespace)
        }

        archive = load_archive(span_predictor_model)
        self.dataset_reader = DatasetReader.from_params(archive.config.duplicate()["dataset_reader"])
        self.span_predictor = Predictor.from_archive(archive, 'sharc_predictor')
    def test_batch_prediction(self):
        inputs = [
            {"sentence": "What a great test sentence."},
            {"sentence": "Here's another good, interesting one."},
        ]

        archive = load_archive(
            FIXTURES_ROOT / "syntax" / "constituency_parser" / "serialization" / "model.tar.gz"
        )
        predictor = Predictor.from_archive(archive, "constituency-parser")
        results = predictor.predict_batch_json(inputs)

        result = results[0]
        assert len(result["spans"]) == 21  # number of possible substrings of the sentence.
        assert len(result["class_probabilities"]) == 21
        assert result["tokens"] == ["What", "a", "great", "test", "sentence", "."]
        assert isinstance(result["trees"], str)

        for class_distribution in result["class_probabilities"]:
            self.assertAlmostEqual(sum(class_distribution), 1.0, places=4)

        result = results[1]

        assert len(result["spans"]) == 36  # number of possible substrings of the sentence.
        assert len(result["class_probabilities"]) == 36
        assert result["tokens"] == ["Here", "'s", "another", "good", ",", "interesting", "one", "."]
        assert isinstance(result["trees"], str)

        for class_distribution in result["class_probabilities"]:
            self.assertAlmostEqual(sum(class_distribution), 1.0, places=4)
    def test_uses_named_inputs(self):
        """
        Tests whether the model outputs conform to the expected format.
        """
        inputs = {
                "sentence": "Angela Merkel met and spoke to her EU counterparts during the climate summit."
        }

        archive = load_archive(self.FIXTURES_ROOT / \
                               'srl' / 'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'open-information-extraction')

        result = predictor.predict_json(inputs)

        words = result.get("words")
        assert words == ["Angela", "Merkel", "met", "and", "spoke", "to", "her", "EU", "counterparts",
                         "during", "the", "climate", "summit", "."]
        num_words = len(words)

        verbs = result.get("verbs")
        assert verbs is not None
        assert isinstance(verbs, list)

        for verb in verbs:
            tags = verb.get("tags")
            assert tags is not None
            assert isinstance(tags, list)
            assert all(isinstance(tag, str) for tag in tags)
            assert len(tags) == num_words
Esempio n. 6
0
 def __init__(self,
              model_path: str,
              predictor_type: str,
              cuda_device: int = -1):
     self.predictor_type = predictor_type
     self.predictor = Predictor.from_archive(
         load_archive(model_path, cuda_device=cuda_device), predictor_type)
Esempio n. 7
0
    def test_model_internals(self):
        archive = load_archive(FIXTURES_ROOT / "bidaf" / "serialization" / "model.tar.gz")
        predictor = Predictor.from_archive(archive, "reading_comprehension")

        inputs = {
            "question": "What kind of test succeeded on its first attempt?",
            "passage": "One time I was writing a unit test, and it succeeded on the first attempt.",
        }

        # Context manager to capture model internals
        with predictor.capture_model_internals() as internals:
            predictor.predict_json(inputs)

        assert internals is not None
        assert len(internals) == 24

        linear_50_1 = internals[23]
        print(linear_50_1)
        assert "Linear(in_features=50, out_features=1, bias=True)" in linear_50_1["name"]
        assert len(linear_50_1["output"][0]) == 17
        assert all(len(a) == 1 for a in linear_50_1["output"][0])

        # hooks should be gone
        for module in predictor._model.modules():
            assert not module._forward_hooks
Esempio n. 8
0
    def test_uses_named_inputs(self):
        inputs = {
            "question":
            "What kind of test succeeded on its first attempt?",
            "passage":
            "One time I was writing a unit test, and it succeeded on the first attempt.",
        }

        archive = load_archive(FIXTURES_ROOT / "bidaf" / "serialization" /
                               "model.tar.gz")
        predictor = Predictor.from_archive(archive, "reading-comprehension")

        result = predictor.predict_json(inputs)

        best_span = result.get("best_span")
        assert best_span is not None
        assert isinstance(best_span, list)
        assert len(best_span) == 2
        assert all(isinstance(x, int) for x in best_span)
        assert best_span[0] <= best_span[1]

        best_span_str = result.get("best_span_str")
        assert isinstance(best_span_str, str)
        assert best_span_str != ""

        for probs_key in ("span_start_probs", "span_end_probs"):
            probs = result.get(probs_key)
            assert probs is not None
            assert all(isinstance(x, float) for x in probs)
            assert sum(probs) == approx(1.0)
Esempio n. 9
0
    def test_with_token_characters_indexer(self):

        inputs = {"sentence": "I always write unit tests for my code."}

        archive = load_archive(self.FIXTURES_ROOT / "basic_classifier" /
                               "serialization" / "model.tar.gz")
        predictor = Predictor.from_archive(archive)
        predictor._dataset_reader._token_indexers[
            "chars"] = TokenCharactersIndexer(min_padding_length=1)
        predictor._model._text_field_embedder._token_embedders[
            "chars"] = EmptyEmbedder()

        hotflipper = Hotflip(predictor)
        hotflipper.initialize()
        attack = hotflipper.attack_from_json(inputs, "tokens", "grad_input_1")
        assert attack is not None
        assert "final" in attack
        assert "original" in attack
        assert "outputs" in attack
        assert len(attack["final"][0]) == len(
            attack["original"])  # hotflip replaces words without removing

        # This checks for a bug that arose with a change in the pytorch API.  We want to be sure we
        # can handle the case where we have to re-encode a vocab item because we didn't save it in
        # our fake embedding matrix (see Hotflip docstring for more info).
        hotflipper = Hotflip(predictor, max_tokens=50)
        hotflipper.initialize()
        hotflipper._first_order_taylor(grad=torch.rand((10, )).numpy(),
                                       token_idx=torch.tensor(60),
                                       sign=1)
    def test_batch_prediction(self):
        inputs = [
                {u"sentence": u"What a great test sentence."},
                {u"sentence": u"Here's another good, interesting one."}
        ]

        archive = load_archive(self.FIXTURES_ROOT / u'constituency_parser' / u'serialization' / u'model.tar.gz')
        predictor = Predictor.from_archive(archive, u'constituency-parser')
        results = predictor.predict_batch_json(inputs)

        result = results[0]
        assert len(result[u"spans"]) == 21 # number of possible substrings of the sentence.
        assert len(result[u"class_probabilities"]) == 21
        assert result[u"tokens"] == [u"What", u"a", u"great", u"test", u"sentence", u"."]
        assert isinstance(result[u"trees"], unicode)

        for class_distribution in result[u"class_probabilities"]:
            self.assertAlmostEqual(sum(class_distribution), 1.0, places=4)

        result = results[1]

        assert len(result[u"spans"]) == 36 # number of possible substrings of the sentence.
        assert len(result[u"class_probabilities"]) == 36
        assert result[u"tokens"] == [u"Here", u"'s", u"another", u"good", u",", u"interesting", u"one", u"."]
        assert isinstance(result[u"trees"], unicode)

        for class_distribution in result[u"class_probabilities"]:
            self.assertAlmostEqual(sum(class_distribution), 1.0, places=4)
Esempio n. 11
0
    def test_predictions_to_labeled_instances(self):
        inputs = {
            "document":
            "This is a single string document about a test. Sometimes it "
            "contains coreferent parts."
        }
        archive = load_archive(self.FIXTURES_ROOT / 'coref' / 'serialization' /
                               'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'coreference-resolution')

        instance = predictor._json_to_instance(inputs)
        outputs = predictor._model.forward_on_instance(instance)
        new_instances = predictor.predictions_to_labeled_instances(
            instance, outputs)
        assert new_instances is not None

        for new_instance in new_instances:
            assert 'span_labels' in new_instance
            assert len(new_instance['span_labels']) == 60  # 7 words in input
            true_top_spans = set(tuple(span) for span in outputs['top_spans'])
            pred_clust_spans = set()
            for i, span in enumerate(outputs['top_spans']):
                if new_instance['span_labels'][i]:
                    pred_clust_spans.add(tuple(span))
            assert true_top_spans == pred_clust_spans
Esempio n. 12
0
    def test_uses_named_inputs(self):
        inputs = {
            "document":
            "This is a single string document about a test. Sometimes it "
            "contains coreferent parts."
        }
        archive = load_archive(self.FIXTURES_ROOT / 'coref' / 'serialization' /
                               'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'coreference-resolution')

        result = predictor.predict_json(inputs)

        document = result["document"]
        assert document == [
            'This', 'is', 'a', 'single', 'string', 'document', 'about', 'a',
            'test', '.', 'Sometimes', 'it', 'contains', 'coreferent', 'parts',
            '.'
        ]

        clusters = result["clusters"]
        assert isinstance(clusters, list)
        for cluster in clusters:
            assert isinstance(cluster, list)
            for mention in cluster:
                # Spans should be integer indices.
                assert isinstance(mention[0], int)
                assert isinstance(mention[1], int)
                # Spans should be inside document.
                assert 0 < mention[0] <= len(document)
                assert 0 < mention[1] <= len(document)
    def test_uses_named_inputs(self):
        inputs = {
            "sentence": "The squirrel wrote a unit test to make sure its nuts worked as designed."
        }

        archive = load_archive(FIXTURES_ROOT / "syntax" / "srl" / "serialization" / "model.tar.gz")
        predictor = Predictor.from_archive(archive, "semantic-role-labeling")

        result_json = predictor.predict_json(inputs)
        self.assert_predict_result(result_json)

        words = [
            "The",
            "squirrel",
            "wrote",
            "a",
            "unit",
            "test",
            "to",
            "make",
            "sure",
            "its",
            "nuts",
            "worked",
            "as",
            "designed",
            ".",
        ]

        result_words = predictor.predict_tokenized(words)
        self.assert_predict_result(result_words)
Esempio n. 14
0
def get_predictor(args):
    archive = load_archive(args.archive_file,
                           weights_file=None,
                           cuda_device=args.cuda_device,
                           overrides="")

    model_type = archive.config.get("model").get("type")
    if model_type != 'srl' and model_type != 'coref':
        raise Exception('the given model must be srl or coref.')

    if model_type == 'srl':
        return Predictor.from_archive(archive,
                                      'semantic-role-labeling'), model_type
    if model_type == 'coref':
        return Predictor.from_archive(archive,
                                      'coreference-resolution'), model_type
Esempio n. 15
0
def my_sample_fever():
    logger = logging.getLogger()
    dictConfig({
        'version': 1,
        'formatters': {
            'default': {
                'format':
                '[%(asctime)s] %(levelname)s in %(module)s: %(message)s',
            }
        },
        'handlers': {
            'wsgi': {
                'class': 'logging.StreamHandler',
                'stream': 'ext://sys.stderr',
                'formatter': 'default'
            }
        },
        'root': {
            'level': 'INFO',
            'handlers': ['wsgi']
        },
        'allennlp': {
            'level': 'INFO',
            'handlers': ['wsgi']
        },
    })

    logger.info("My sample FEVER application")
    config = json.load(
        open(os.getenv("CONFIG_PATH", "configs/predict_docker.json")))

    # Create document retrieval model
    logger.info("Load FEVER Document database from {0}".format(
        config["database"]))
    db = FEVERDocumentDatabase(config["database"])

    logger.info("Load DrQA Document retrieval index from {0}".format(
        config['index']))
    retrieval_method = RetrievalMethod.by_name("top_docs")(db, config["index"],
                                                           config["n_docs"],
                                                           config["n_sents"])

    # Load the pre-trained predictor and model from the .tar.gz in the config file.
    # Override the database location for our model as this now comes from a read-only volume
    logger.info("Load Model from {0}".format(config['model']))
    archive = load_archive(config["model"],
                           cuda_device=config["cuda_device"],
                           overrides='{"dataset_reader":{"database":"' +
                           config["database"] + '" }}')
    predictor = Predictor.from_archive(archive, predictor_name="fever")

    # The prediction function that is passed to the web server for FEVER2.0
    def baseline_predict(instances):
        predictions = []
        for instance in instances:
            predictions.append(
                predict_single(predictor, retrieval_method, instance))
        return predictions

    return fever_web_api(baseline_predict)
    def test_batch_prediction(self):
        inputs = [
                {"sentence": "What a great test sentence."},
                {"sentence": "Here's another good, interesting one."}
        ]

        archive = load_archive(self.FIXTURES_ROOT / 'constituency_parser' / 'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'constituency-parser')
        results = predictor.predict_batch_json(inputs)

        result = results[0]
        assert len(result["spans"]) == 21 # number of possible substrings of the sentence.
        assert len(result["class_probabilities"]) == 21
        assert result["tokens"] == ["What", "a", "great", "test", "sentence", "."]
        assert isinstance(result["trees"], str)

        for class_distribution in result["class_probabilities"]:
            self.assertAlmostEqual(sum(class_distribution), 1.0, places=4)

        result = results[1]

        assert len(result["spans"]) == 36 # number of possible substrings of the sentence.
        assert len(result["class_probabilities"]) == 36
        assert result["tokens"] == ["Here", "'s", "another", "good", ",", "interesting", "one", "."]
        assert isinstance(result["trees"], str)

        for class_distribution in result["class_probabilities"]:
            self.assertAlmostEqual(sum(class_distribution), 1.0, places=4)
Esempio n. 17
0
    def test_uses_named_inputs(self):
        inputs = {
                "question": "What kind of test succeeded on its first attempt?",
                "passage": "One time I was writing a unit test, and it succeeded on the first attempt."
        }

        archive = load_archive(self.FIXTURES_ROOT / 'bidaf' / 'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'machine-comprehension')

        result = predictor.predict_json(inputs)

        best_span = result.get("best_span")
        assert best_span is not None
        assert isinstance(best_span, list)
        assert len(best_span) == 2
        assert all(isinstance(x, int) for x in best_span)
        assert best_span[0] <= best_span[1]

        best_span_str = result.get("best_span_str")
        assert isinstance(best_span_str, str)
        assert best_span_str != ""

        for probs_key in ("span_start_probs", "span_end_probs"):
            probs = result.get(probs_key)
            assert probs is not None
            assert all(isinstance(x, float) for x in probs)
            assert sum(probs) == approx(1.0)
Esempio n. 18
0
    def test_uses_named_inputs(self):
        inputs = {
                "sentence": "The squirrel wrote a unit test to make sure its nuts worked as designed."
        }

        archive = load_archive(self.FIXTURES_ROOT / 'srl' / 'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'semantic-role-labeling')

        result = predictor.predict_json(inputs)

        words = result.get("words")
        assert words == ["The", "squirrel", "wrote", "a", "unit", "test",
                         "to", "make", "sure", "its", "nuts", "worked", "as", "designed", "."]
        num_words = len(words)

        verbs = result.get("verbs")
        assert verbs is not None
        assert isinstance(verbs, list)

        assert any(v["verb"] == "wrote" for v in verbs)
        assert any(v["verb"] == "make" for v in verbs)
        assert any(v["verb"] == "worked" for v in verbs)

        for verb in verbs:
            tags = verb.get("tags")
            assert tags is not None
            assert isinstance(tags, list)
            assert all(isinstance(tag, str) for tag in tags)
            assert len(tags) == num_words
    def test_uses_named_inputs(self):
        """
        Tests whether the model outputs conform to the expected format.
        """
        inputs = {
                "sentence": "Angela Merkel met and spoke to her EU counterparts during the climate summit."
        }

        archive = load_archive(self.FIXTURES_ROOT / \
                               'srl' / 'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'open-information-extraction')

        result = predictor.predict_json(inputs)

        words = result.get("words")
        assert words == ["Angela", "Merkel", "met", "and", "spoke", "to", "her", "EU", "counterparts",
                         "during", "the", "climate", "summit", "."]
        num_words = len(words)

        verbs = result.get("verbs")
        assert verbs is not None
        assert isinstance(verbs, list)

        for verb in verbs:
            tags = verb.get("tags")
            assert tags is not None
            assert isinstance(tags, list)
            assert all(isinstance(tag, str) for tag in tags)
            assert len(tags) == num_words
    def test_uses_named_inputs(self):
        inputs = {
                "premise": "I always write unit tests for my code.",
                "hypothesis": "One time I didn't write any unit tests for my code."
        }

        archive = load_archive(self.FIXTURES_ROOT / 'decomposable_attention' / 'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'textual-entailment')
        result = predictor.predict_json(inputs)

        # Label probs should be 3 floats that sum to one
        label_probs = result.get("label_probs")
        assert label_probs is not None
        assert isinstance(label_probs, list)
        assert len(label_probs) == 3
        assert all(isinstance(x, float) for x in label_probs)
        assert all(x >= 0 for x in label_probs)
        assert sum(label_probs) == approx(1.0)

        # Logits should be 3 floats that softmax to label_probs
        label_logits = result.get("label_logits")
        assert label_logits is not None
        assert isinstance(label_logits, list)
        assert len(label_logits) == 3
        assert all(isinstance(x, float) for x in label_logits)

        exps = [math.exp(x) for x in label_logits]
        sumexps = sum(exps)
        for e, p in zip(exps, label_probs):
            assert e / sumexps == approx(p)
    def test_batch_prediction(self):
        inputs = [
                {
                        "sentence": "What kind of test succeeded on its first attempt?",
                },
                {
                        "sentence": "What kind of test succeeded on its first attempt at batch processing?",
                }
        ]

        archive = load_archive(self.FIXTURES_ROOT / 'biaffine_dependency_parser'
                               / 'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'biaffine-dependency-parser')

        results = predictor.predict_batch_json(inputs)
        assert len(results) == 2

        for result in results:
            sequence_length = len(result.get("words"))
            predicted_heads = result.get("predicted_heads")
            assert len(predicted_heads) == sequence_length

            predicted_dependencies = result.get("predicted_dependencies")
            assert len(predicted_dependencies) == sequence_length
            assert isinstance(predicted_dependencies, list)
            assert all(isinstance(x, str) for x in predicted_dependencies)
Esempio n. 22
0
 def test_name(self, name):
     # Load pre-trained model
     archive = load_archive('./pre_trained/model.tar.gz')
     # Load predictor and predict the language of the name
     predictor = Predictor.from_archive(archive, 'name-predictor')
     result = predictor.predict(name)
     print(result)
    def test_uses_named_inputs(self):
        inputs = {
            "premise": "I always write unit tests for my code.",
            "hypothesis": "One time I didn't write any unit tests for my code."
        }

        archive = load_archive(self.FIXTURES_ROOT / 'decomposable_attention' /
                               'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'textual-entailment')
        result = predictor.predict_json(inputs)

        # Label probs should be 3 floats that sum to one
        label_probs = result.get("label_probs")
        assert label_probs is not None
        assert isinstance(label_probs, list)
        assert len(label_probs) == 3
        assert all(isinstance(x, float) for x in label_probs)
        assert all(x >= 0 for x in label_probs)
        assert sum(label_probs) == approx(1.0)

        # Logits should be 3 floats that softmax to label_probs
        label_logits = result.get("label_logits")
        assert label_logits is not None
        assert isinstance(label_logits, list)
        assert len(label_logits) == 3
        assert all(isinstance(x, float) for x in label_logits)

        exps = [math.exp(x) for x in label_logits]
        sumexps = sum(exps)
        for e, p in zip(exps, label_probs):
            assert e / sumexps == approx(p)
Esempio n. 24
0
def load_claim_extraction_model(model_path: str = MODEL_PATH,
                                weight_path: str = WEIGHT_PATH):
    """
    Load the Conditional Random field model using allennlp used by titipat in the repo.

    see: http://github.com/titipata/detecting-scientific-claim
    :param model_path: location of model, can be downloaded offline or link can be given
    :param weight_path: location of model weight, can be downloaded offline or link can be given
    :return: the model using the WEIGHT_PATH specified
    """
    archive = load_archive(model_path)
    predictor = Predictor.from_archive(archive, 'discourse_crf_predictor')
    # NOTE(alpha_darklord): We are creating a CRF model based on how allennlp is creating it
    # , for reference go to: http://github.com/titipata/detecting-scientific-claim
    model = predictor._model
    for param in list(model.parameters()):
        param.requires_grad = False  # not to train weights
    embedding_dim = 300
    num_classes, constraints, include_start_end_transitions = 2, None, False
    model.crf = ConditionalRandomField(
        num_classes,
        constraints,
        include_start_end_transitions=include_start_end_transitions)
    model.label_projection_layer = TimeDistributed(
        Linear(2 * embedding_dim, num_classes))
    model.load_state_dict(
        torch.load(cached_path(weight_path), map_location='cpu'))
    return model
def allennlp(
    path_to_senteval: str,
    path_to_allennlp_archive: str,
    output_filepath: str = None,
    weights_file: str = None,
    cuda_device: int = -1,
    output_dict_field: str = "embeddings",
    predictor_name: str = None,
    include_package: List[str] = None,
    prototyping_config: bool = False,
    verbose: bool = False,
) -> None:
    """Evaluates a trained AllenNLP model against the SentEval benchmark."""

    from allennlp.models.archival import load_archive
    from allennlp.predictors import Predictor

    # SentEval prepare and batcher
    def prepare(params, samples):
        return

    @torch.no_grad()
    def batcher(params, batch):
        batch = _cleanup_batch(batch)
        # Re-tokenize the input text using the tokenizer of the dataset reader
        inputs = [{"text": " ".join(tokens)} for tokens in batch]
        outputs = params.predictor.predict_batch_json(inputs)
        # AllenNLP models return a dictionary, so access the embeddings with the given key.
        embeddings = [output[output_dict_field] for output in outputs]

        embeddings = np.vstack(embeddings)
        return embeddings

    # Allows us to import custom dataset readers and models that may exist in the AllenNLP archive.
    # See: https://tinyurl.com/whkmoqh
    include_package = include_package or []
    for package_name in include_package:
        common_util.import_module_and_submodules(package_name)

    # Load the archived Model
    archive = load_archive(
        path_to_allennlp_archive,
        cuda_device=cuda_device,
        weights_file=weights_file,
        overrides="{'trainer.use_amp': true}",
    )
    predictor = Predictor.from_archive(archive, predictor_name)
    typer.secho(
        f'{SUCCESS} Model from AllenNLP archive "{path_to_allennlp_archive}" loaded successfully.',
        fg=typer.colors.GREEN,
        bold=True,
    )

    # Performs a few setup steps and returns the SentEval params
    params_senteval = _setup_senteval(path_to_senteval, prototyping_config, verbose)
    params_senteval["predictor"] = predictor
    _run_senteval(params_senteval, path_to_senteval, batcher, prepare, output_filepath)

    return
Esempio n. 26
0
 def __init__(self, final_candidates_filename, filter_by_corpus, output_df, encoding="utf-8"):
     self.final_candidates_filename = final_candidates_filename
     self.encoding = encoding
     self.output_df = output_df
     self.filter_by_corpus = filter_by_corpus
     self.predictor = Predictor.from_archive(
         load_archive('https://s3-us-west-2.amazonaws.com/allennlp/models/coref-model-2018.02.05.tar.gz',
                      weights_file=None, overrides=""), 'coreference-resolution')
Esempio n. 27
0
 def __init__(self, data_path, output_name, encoding="utf-8"):
     self.data_path = data_path
     self.output_name = output_name
     self.encoding = encoding
     self.predictor = Predictor.from_archive(
         load_archive('https://s3-us-west-2.amazonaws.com/allennlp/models/coref-model-2018.02.05.tar.gz',
                      weights_file=None,
                      overrides=""), 'coreference-resolution')
Esempio n. 28
0
def _get_predictor(args: argparse.Namespace) -> Predictor:
    check_for_gpu(args.cuda_device)
    archive = load_archive(args.archive_path,
                           weights_file=args.weights_file,
                           cuda_device=args.cuda_device,
                           overrides=args.overrides)

    return Predictor.from_archive(archive, args.predictor)
Esempio n. 29
0
 def test_sentence(self, sentence):
     # Load pre-trained model
     archive = load_archive('./pre-trained/bert.tar.gz')
     # Load predictor and predict the language of the name
     predictor = Predictor.from_archive(archive,
                                        'sentence_classifier_predictor')
     result = predictor.predict(sentence)
     print(result)
def get_predictor(args):
    print(f"Loading Model from {args.archive_file}")
    archive = load_archive(
        args.archive_file,
        cuda_device=args.cuda_device,
    )

    return Predictor.from_archive(archive, predictor_name="base_predictor")
Esempio n. 31
0
    def test_sentence2instance(self):
        inputs = {"sentence": "我是大哥大"}

        archive = load_archive('tests/fixture/model.tar.gz')
        predictor = Predictor.from_archive(archive, 'sentence-segment')

        result = predictor.predict_json(inputs)
        print(result)
Esempio n. 32
0
    def setUp(self):
        super().setUp()

        importlib.import_module("allennlp_rc.models")
        archive = load_archive("allennlp_server/tests/fixtures/bidaf/model.tar.gz")
        self.bidaf_predictor = Predictor.from_archive(
            archive, "allennlp_rc.predictors.ReadingComprehensionPredictor"
        )
Esempio n. 33
0
def load_predictor(model_dir: str,
                   predictor_name: str,
                   cuda_device: int = -1,
                   archive_filename: str = "model.tar.gz",
                   weights_file: Optional[str] = None) -> Predictor:
    archive_path = join(model_dir, archive_filename)
    archive = load_archive(archive_path, cuda_device, weights_file)
    return Predictor.from_archive(archive, predictor_name)
 def test_batch_prediction(self):
     inputs = {
         "sentence": "The squirrel wrote a unit test to make sure its nuts worked as designed."
     }
     archive = load_archive(self.FIXTURES_ROOT / "srl" / "serialization" / "model.tar.gz")
     predictor = Predictor.from_archive(archive, "semantic-role-labeling")
     result = predictor.predict_batch_json([inputs, inputs])
     assert result[0] == result[1]
Esempio n. 35
0
 def test_batch_prediction(self):
     inputs = {
             "sentence": "The squirrel wrote a unit test to make sure its nuts worked as designed."
     }
     archive = load_archive(self.FIXTURES_ROOT / 'srl' / 'serialization' / 'model.tar.gz')
     predictor = Predictor.from_archive(archive, 'semantic-role-labeling')
     result = predictor.predict_batch_json([inputs, inputs])
     assert result[0] == result[1]
Esempio n. 36
0
def main(args):
    # Executing this file with no extra options runs the simple service with the bidaf test fixture
    # and the machine-comprehension predictor. There's no good reason you'd want
    # to do this, except possibly to test changes to the stock HTML).

    parser = argparse.ArgumentParser(description='Serve up a simple model')

    parser.add_argument('--archive-path',
                        type=str,
                        required=True,
                        help='path to trained archive file')
    parser.add_argument('--predictor',
                        type=str,
                        required=True,
                        help='name of predictor')
    parser.add_argument('--static-dir',
                        type=str,
                        help='serve index.html from this directory')
    parser.add_argument('--title',
                        type=str,
                        help='change the default page title',
                        default="AllenNLP Demo")
    parser.add_argument('--field-name',
                        type=str,
                        required=True,
                        action='append',
                        help='field names to include in the demo')
    parser.add_argument('--port',
                        type=int,
                        default=8000,
                        help='port to serve the demo on')

    parser.add_argument('--include-package',
                        type=str,
                        action='append',
                        default=[],
                        help='additional packages to include')

    args = parser.parse_args(args)

    # Load modules
    for package_name in args.include_package:
        import_submodules(package_name)

    archive = load_archive(args.archive_path)
    predictor = Predictor.from_archive(archive, args.predictor)
    field_names = args.field_name

    app = make_app(predictor=predictor,
                   field_names=field_names,
                   static_dir=args.static_dir,
                   title=args.title)
    CORS(app)

    http_server = WSGIServer(('0.0.0.0', args.port), app)
    print(f"Model loaded, serving demo on port {args.port}")
    http_server.serve_forever()
Esempio n. 37
0
    def test_coref_resolved(self):
        """Tests I/O of coref_resolved method"""

        document = "This is a test sentence."
        archive = load_archive(FIXTURES_ROOT / "coref" / "serialization" /
                               "model.tar.gz")
        predictor = Predictor.from_archive(archive, "coreference-resolution")
        result = predictor.coref_resolved(document)
        assert isinstance(result, str)
    def test_prediction_with_no_verbs(self):
        """
        Tests whether the model copes with sentences without verbs.
        """
        input1 = {"sentence": "Blah no verb sentence."}
        archive = load_archive(self.FIXTURES_ROOT / \
                               'srl' / 'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'open-information-extraction')

        result = predictor.predict_json(input1)
        assert result == {'words': ['Blah', 'no', 'verb', 'sentence', '.'], 'verbs': []}
Esempio n. 39
0
    def test_predictor_with_direct_parser(self):
        archive_dir = self.FIXTURES_ROOT / 'semantic_parsing' / 'nlvr_direct_semantic_parser' / 'serialization'
        archive = load_archive(os.path.join(archive_dir, 'model.tar.gz'))
        predictor = Predictor.from_archive(archive, 'nlvr-parser')

        result = predictor.predict_json(self.inputs)
        assert 'logical_form' in result
        assert 'denotations' in result
        # result['denotations'] is a list corresponding to k-best logical forms, where k is 1 by
        # default.
        assert len(result['denotations'][0]) == 2  # Because there are two worlds in the input.
Esempio n. 40
0
    def test_atis_parser_batch_predicted_sql_present(self):
        inputs = [{
                "utterance": "show me flights to seattle",
        }]

        archive_path = self.FIXTURES_ROOT / 'semantic_parsing' / 'atis' / 'serialization' / 'model.tar.gz'
        archive = load_archive(archive_path)
        predictor = Predictor.from_archive(archive, 'atis-parser')

        result = predictor.predict_batch_json(inputs)
        predicted_sql_query = result[0].get("predicted_sql_query")
        assert predicted_sql_query is not None
Esempio n. 41
0
 def test_copynet_predictions(self):
     archive = load_archive(self.FIXTURES_ROOT / 'encoder_decoder' / 'copynet_seq2seq' /
                            'serialization' / 'model.tar.gz')
     predictor = Predictor.from_archive(archive, 'seq2seq')
     model = predictor._model
     end_token = model.vocab.get_token_from_index(model._end_index, model._target_namespace)
     output_dict = predictor.predict("these tokens should be copied over : hello world")
     assert len(output_dict["predictions"]) == model._beam_search.beam_size
     assert len(output_dict["predicted_tokens"]) == model._beam_search.beam_size
     for predicted_tokens in output_dict["predicted_tokens"]:
         assert all(isinstance(x, str) for x in predicted_tokens)
         assert end_token not in predicted_tokens
Esempio n. 42
0
    def test_answer_present_with_batch_predict(self):
        inputs = [{
                "question": "Who is 18 years old?",
                "table": "Name\tAge\nShallan\t16\nKaladin\t18"
        }]

        archive_path = self.FIXTURES_ROOT / 'semantic_parsing' / 'wikitables' / 'serialization' / 'model.tar.gz'
        archive = load_archive(archive_path)
        predictor = Predictor.from_archive(archive, 'wikitables-parser')

        result = predictor.predict_batch_json(inputs)
        answer = result[0].get("answer")
        assert answer is not None
Esempio n. 43
0
    def test_prediction_with_no_verbs(self):

        input1 = {"sentence": "Blah no verb sentence."}
        archive = load_archive(self.FIXTURES_ROOT / 'srl' / 'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'semantic-role-labeling')
        result = predictor.predict_json(input1)
        assert result == {'words': ['Blah', 'no', 'verb', 'sentence', '.'], 'verbs': []}

        input2 = {"sentence": "This sentence has a verb."}
        results = predictor.predict_batch_json([input1, input2])
        assert results[0] == {'words': ['Blah', 'no', 'verb', 'sentence', '.'], 'verbs': []}
        assert results[1] == {'words': ['This', 'sentence', 'has', 'a', 'verb', '.'],
                              'verbs': [{'verb': 'has', 'description': 'This sentence has a verb .',
                                         'tags': ['O', 'O', 'O', 'O', 'O', 'O']}]}
Esempio n. 44
0
    def test_uses_named_inputs_with_simple_seq2seq(self):
        inputs = {
                "source": "What kind of test succeeded on its first attempt?",
        }

        archive = load_archive(self.FIXTURES_ROOT / 'encoder_decoder' / 'simple_seq2seq' /
                               'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'seq2seq')

        result = predictor.predict_json(inputs)

        predicted_tokens = result.get("predicted_tokens")
        assert predicted_tokens is not None
        assert isinstance(predicted_tokens, list)
        assert all(isinstance(x, str) for x in predicted_tokens)
Esempio n. 45
0
    def test_uses_named_inputs(self):
        inputs = {"document": "This is a single string document about a test. Sometimes it "
                              "contains coreferent parts."}
        archive = load_archive(self.FIXTURES_ROOT / 'coref' / 'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'coreference-resolution')

        result = predictor.predict_json(inputs)
        self.assert_predict_result(result)

        document = ['This', 'is', 'a', 'single', 'string',
                    'document', 'about', 'a', 'test', '.', 'Sometimes',
                    'it', 'contains', 'coreferent', 'parts', '.']

        result_doc_words = predictor.predict_tokenized(document)
        self.assert_predict_result(result_doc_words)
Esempio n. 46
0
    def test_uses_named_inputs(self):
        inputs = {
                "sentence": "The squirrel wrote a unit test to make sure its nuts worked as designed."
        }

        archive = load_archive(self.FIXTURES_ROOT / 'srl' / 'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'semantic-role-labeling')

        result_json = predictor.predict_json(inputs)
        self.assert_predict_result(result_json)

        words = ["The", "squirrel", "wrote", "a", "unit", "test",
                 "to", "make", "sure", "its", "nuts", "worked", "as", "designed", "."]

        result_words = predictor.predict_tokenized(words)
        self.assert_predict_result(result_words)
    def test_uses_named_inputs(self):
        inputs = {
                "sentence": "What a great test sentence.",
        }

        archive = load_archive(self.FIXTURES_ROOT / 'constituency_parser' / 'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'constituency-parser')

        result = predictor.predict_json(inputs)

        assert len(result["spans"]) == 21 # number of possible substrings of the sentence.
        assert len(result["class_probabilities"]) == 21
        assert result["tokens"] == ["What", "a", "great", "test", "sentence", "."]
        assert isinstance(result["trees"], str)

        for class_distribution in result["class_probabilities"]:
            self.assertAlmostEqual(sum(class_distribution), 1.0, places=4)
    def test_predictor_uses_dataset_reader_to_determine_pos_set(self):
        # pylint: disable=protected-access
        archive = load_archive(self.FIXTURES_ROOT / 'biaffine_dependency_parser'
                               / 'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'biaffine-dependency-parser')

        inputs = {
                "sentence": "Dogs eat cats.",
        }
        instance_with_ud_pos = predictor._json_to_instance(inputs)
        tags = instance_with_ud_pos.fields["pos_tags"].labels
        assert tags == ['NOUN', 'VERB', 'NOUN', 'PUNCT']

        predictor._dataset_reader.use_language_specific_pos = True

        instance_with_ptb_pos = predictor._json_to_instance(inputs)
        tags = instance_with_ptb_pos.fields["pos_tags"].labels
        assert tags == ['NNS', 'VBP', 'NNS', '.']
Esempio n. 49
0
    def test_atis_parser_uses_named_inputs(self):
        inputs = {
                "utterance": "show me the flights to seattle",
        }

        archive_path = self.FIXTURES_ROOT / 'semantic_parsing' / 'atis' / 'serialization' / 'model.tar.gz'
        archive = load_archive(archive_path)
        predictor = Predictor.from_archive(archive, 'atis-parser')

        result = predictor.predict_json(inputs)
        action_sequence = result.get("best_action_sequence")
        if action_sequence:
            # An untrained model will likely get into a loop, and not produce at finished states.
            # When the model gets into a loop it will not produce any valid SQL, so we don't get
            # any actions. This basically just tests if the model runs.
            assert len(action_sequence) > 1
            assert all([isinstance(action, str) for action in action_sequence])
            predicted_sql_query = result.get("predicted_sql_query")
            assert predicted_sql_query is not None
Esempio n. 50
0
    def test_uses_named_inputs(self):
        inputs = {"paragraphs": [{"qas": [{"followup": "y", "yesno": "x", "question": "When was the first one?",
                                           "answers": [{"answer_start": 0, "text": "One time"}], "id": "C_q#0"},
                                          {"followup": "n", "yesno": "x", "question": "What were you doing?",
                                           "answers": [{"answer_start": 15, "text": "writing a"}], "id": "C_q#1"},
                                          {"followup": "m", "yesno": "y", "question": "How often?",
                                           "answers": [{"answer_start": 4, "text": "time I"}], "id": "C_q#2"}],
                                  "context": "One time I was writing a unit test,\
                                   and it succeeded on the first attempt."}]}

        archive = load_archive(self.FIXTURES_ROOT / 'dialog_qa' / 'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'dialog_qa')

        result = predictor.predict_json(inputs)

        best_span_str_list = result.get("best_span_str")
        for best_span_str in best_span_str_list:
            assert isinstance(best_span_str, str)
            assert best_span_str != ""
Esempio n. 51
0
    def test_answer_present(self):
        inputs = {
                'question':  'Mike was snowboarding on the snow and hit a piece of ice. He went much faster on the ice because _____ is smoother. (A) snow (B) ice',  # pylint: disable=line-too-long
                'world_literals': {'world1': 'snow', 'world2': 'ice'},  # Added to avoid world tagger
                'qrspec': '[smoothness, +speed]',
                'entitycues': 'smoothness: smoother\nspeed:faster'
        }

        archive_path = self.FIXTURES_ROOT / 'semantic_parsing' / 'quarel' / 'serialization_parser_zeroshot' / 'model.tar.gz'  # pylint: disable=line-too-long
        archive = load_archive(archive_path)
        predictor = Predictor.from_archive(archive, 'quarel-parser')

        result = predictor.predict_json(inputs)
        answer_index = result.get('answer_index')
        assert answer_index is not None

        # Check input modality where entity cues are not given
        del inputs['entitycues']
        result = predictor.predict_json(inputs)
        answer_index = result.get('answer_index')
        assert answer_index is not None
    def test_uses_named_inputs(self):
        inputs = {
                "sentence": "Please could you parse this sentence?",
        }

        archive = load_archive(self.FIXTURES_ROOT / 'biaffine_dependency_parser'
                               / 'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'biaffine-dependency-parser')

        result = predictor.predict_json(inputs)

        words = result.get("words")
        predicted_heads = result.get("predicted_heads")
        assert len(predicted_heads) == len(words)

        predicted_dependencies = result.get("predicted_dependencies")
        assert len(predicted_dependencies) == len(words)
        assert isinstance(predicted_dependencies, list)
        assert all(isinstance(x, str) for x in predicted_dependencies)

        assert result.get("loss") is not None
        assert result.get("arc_loss") is not None
        assert result.get("tag_loss") is not None

        hierplane_tree = result.get("hierplane_tree")
        hierplane_tree.pop("nodeTypeToStyle")
        hierplane_tree.pop("linkToPosition")
        # pylint: disable=line-too-long,bad-continuation
        assert result.get("hierplane_tree") == {'text': 'Please could you parse this sentence ?',
                                                'root': {'word': 'Please', 'nodeType': 'det', 'attributes': ['INTJ'], 'link': 'det', 'spans': [{'start': 0, 'end': 7}],
                                                    'children': [
                                                            {'word': 'could', 'nodeType': 'nummod', 'attributes': ['VERB'], 'link': 'nummod', 'spans': [{'start': 7, 'end': 13}]},
                                                            {'word': 'you', 'nodeType': 'nummod', 'attributes': ['PRON'], 'link': 'nummod', 'spans': [{'start': 13, 'end': 17}]},
                                                            {'word': 'parse', 'nodeType': 'nummod', 'attributes': ['VERB'], 'link': 'nummod', 'spans': [{'start': 17, 'end': 23}]},
                                                            {'word': 'this', 'nodeType': 'nummod', 'attributes': ['DET'], 'link': 'nummod', 'spans': [{'start': 23, 'end': 28}]},
                                                            {'word': 'sentence', 'nodeType': 'nummod', 'attributes':['NOUN'], 'link': 'nummod', 'spans': [{'start': 28, 'end': 37}]},
                                                            {'word': '?', 'nodeType': 'nummod', 'attributes': ['PUNCT'], 'link': 'nummod', 'spans': [{'start': 37, 'end': 39}]}
                                                            ]
                                                        }
                                               }
Esempio n. 53
0
def main(args):
    # Executing this file with no extra options runs the simple service with the bidaf test fixture
    # and the machine-comprehension predictor. There's no good reason you'd want
    # to do this, except possibly to test changes to the stock HTML).

    parser = argparse.ArgumentParser(description='Serve up a simple model')

    parser.add_argument('--archive-path', type=str, required=True, help='path to trained archive file')
    parser.add_argument('--predictor', type=str, required=True, help='name of predictor')
    parser.add_argument('--static-dir', type=str, help='serve index.html from this directory')
    parser.add_argument('--title', type=str, help='change the default page title', default="AllenNLP Demo")
    parser.add_argument('--field-name', type=str, required=True, action='append',
                        help='field names to include in the demo')
    parser.add_argument('--port', type=int, default=8000, help='port to serve the demo on')

    parser.add_argument('--include-package',
                        type=str,
                        action='append',
                        default=[],
                        help='additional packages to include')

    args = parser.parse_args(args)

    # Load modules
    for package_name in args.include_package:
        import_submodules(package_name)

    archive = load_archive(args.archive_path)
    predictor = Predictor.from_archive(archive, args.predictor)
    field_names = args.field_name

    app = make_app(predictor=predictor,
                   field_names=field_names,
                   static_dir=args.static_dir,
                   title=args.title)
    CORS(app)

    http_server = WSGIServer(('0.0.0.0', args.port), app)
    print(f"Model loaded, serving demo on port {args.port}")
    http_server.serve_forever()
    def test_batch_prediction(self):
        batch_inputs = [
                {
                        "premise": "I always write unit tests for my code.",
                        "hypothesis": "One time I didn't write any unit tests for my code."
                },
                {
                        "premise": "I also write batched unit tests for throughput!",
                        "hypothesis": "Batch tests are slower."
                },
        ]

        archive = load_archive(self.FIXTURES_ROOT / 'decomposable_attention' / 'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'textual-entailment')
        results = predictor.predict_batch_json(batch_inputs)
        print(results)
        assert len(results) == 2

        for result in results:
            # Logits should be 3 floats that softmax to label_probs
            label_logits = result.get("label_logits")
            # Label probs should be 3 floats that sum to one
            label_probs = result.get("label_probs")
            assert label_probs is not None
            assert isinstance(label_probs, list)
            assert len(label_probs) == 3
            assert all(isinstance(x, float) for x in label_probs)
            assert all(x >= 0 for x in label_probs)
            assert sum(label_probs) == approx(1.0)

            assert label_logits is not None
            assert isinstance(label_logits, list)
            assert len(label_logits) == 3
            assert all(isinstance(x, float) for x in label_logits)

            exps = [math.exp(x) for x in label_logits]
            sumexps = sum(exps)
            for e, p in zip(exps, label_probs):
                assert e / sumexps == approx(p)
Esempio n. 55
0
    def test_uses_named_inputs(self):
        inputs = {
                "source": "personx gave persony a present",
        }

        archive = load_archive(self.FIXTURES_ROOT / 'event2mind' /
                               'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'event2mind')

        result = predictor.predict_json(inputs)

        token_names = [
                'xintent_top_k_predicted_tokens',
                'xreact_top_k_predicted_tokens',
                'oreact_top_k_predicted_tokens'
        ]

        for token_name in token_names:
            all_predicted_tokens = result.get(token_name)
            for predicted_tokens in all_predicted_tokens:
                assert isinstance(predicted_tokens, list)
                assert all(isinstance(x, str) for x in predicted_tokens)
Esempio n. 56
0
    def test_batch_prediction(self):
        inputs = [{"paragraphs": [{"qas": [{"followup": "y", "yesno": "x", "question": "When was the first one?",
                                            "answers": [{"answer_start": 0, "text": "One time"}], "id": "C_q#0"},
                                           {"followup": "n", "yesno": "x", "question": "What were you doing?",
                                            "answers": [{"answer_start": 15, "text": "writing a"}], "id": "C_q#1"},
                                           {"followup": "m", "yesno": "y", "question": "How often?",
                                            "answers": [{"answer_start": 4, "text": "time I"}], "id": "C_q#2"}],
                                   "context": "One time I was writing a unit test,\
                                    and it succeeded on the first attempt."}]},
                  {"paragraphs": [{"qas": [{"followup": "y", "yesno": "x", "question": "When was the first one?",
                                            "answers": [{"answer_start": 0, "text": "One time"}], "id": "C_q#0"},
                                           {"followup": "n", "yesno": "x", "question": "What were you doing?",
                                            "answers": [{"answer_start": 15, "text": "writing a"}], "id": "C_q#1"},
                                           {"followup": "m", "yesno": "y", "question": "How often?",
                                            "answers": [{"answer_start": 4, "text": "time I"}], "id": "C_q#2"}],
                                   "context": "One time I was writing a unit test,\
                                    and it succeeded on the first attempt."}]}]

        archive = load_archive(self.FIXTURES_ROOT / 'dialog_qa' / 'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'dialog_qa')

        results = predictor.predict_batch_json(inputs)
        assert len(results) == 2
Esempio n. 57
0
    def test_uses_named_inputs(self):
        inputs = {
                "question": "names",
                "table": "name\tdate\nmatt\t2017\npradeep\t2018"
        }

        archive_path = self.FIXTURES_ROOT / 'semantic_parsing' / 'wikitables' / 'serialization' / 'model.tar.gz'
        archive = load_archive(archive_path)
        predictor = Predictor.from_archive(archive, 'wikitables-parser')

        result = predictor.predict_json(inputs)

        action_sequence = result.get("best_action_sequence")
        if action_sequence:
            # We don't currently disallow endless loops in the decoder, and an untrained seq2seq
            # model will easily get itself into a loop.  An endless loop isn't a finished logical
            # form, so decoding doesn't return any finished states, which means no actions.  So,
            # sadly, we don't have a great test here.  This is just testing that the predictor
            # runs, basically.
            assert len(action_sequence) > 1
            assert all([isinstance(action, str) for action in action_sequence])

            logical_form = result.get("logical_form")
            assert logical_form is not None
Esempio n. 58
0
    def test_uses_named_inputs(self):
        inputs = {"document": "This is a single string document about a test. Sometimes it "
                              "contains coreferent parts."}
        archive = load_archive(self.FIXTURES_ROOT / 'coref' / 'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'coreference-resolution')

        result = predictor.predict_json(inputs)

        document = result["document"]
        assert document == ['This', 'is', 'a', 'single', 'string',
                            'document', 'about', 'a', 'test', '.', 'Sometimes',
                            'it', 'contains', 'coreferent', 'parts', '.']

        clusters = result["clusters"]
        assert isinstance(clusters, list)
        for cluster in clusters:
            assert isinstance(cluster, list)
            for mention in cluster:
                # Spans should be integer indices.
                assert isinstance(mention[0], int)
                assert isinstance(mention[1], int)
                # Spans should be inside document.
                assert 0 < mention[0] <= len(document)
                assert 0 < mention[1] <= len(document)
Esempio n. 59
0
    def test_batch_prediction(self):
        inputs = [
                {
                        "question": "What kind of test succeeded on its first attempt?",
                        "passage": "One time I was writing a unit test, and it succeeded on the first attempt."
                },
                {
                        "question": "What kind of test succeeded on its first attempt at batch processing?",
                        "passage": "One time I was writing a unit test, and it always failed!"
                }
        ]

        archive = load_archive(self.FIXTURES_ROOT / 'bidaf' / 'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'machine-comprehension')

        results = predictor.predict_batch_json(inputs)
        assert len(results) == 2

        for result in results:
            best_span = result.get("best_span")
            best_span_str = result.get("best_span_str")
            start_probs = result.get("span_start_probs")
            end_probs = result.get("span_end_probs")
            assert best_span is not None
            assert isinstance(best_span, list)
            assert len(best_span) == 2
            assert all(isinstance(x, int) for x in best_span)
            assert best_span[0] <= best_span[1]

            assert isinstance(best_span_str, str)
            assert best_span_str != ""

            for probs in (start_probs, end_probs):
                assert probs is not None
                assert all(isinstance(x, float) for x in probs)
                assert sum(probs) == approx(1.0)
    def test_build_hierplane_tree(self):
        tree = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
        archive = load_archive(self.FIXTURES_ROOT / 'constituency_parser' / 'serialization' / 'model.tar.gz')
        predictor = Predictor.from_archive(archive, 'constituency-parser')

        hierplane_tree = predictor._build_hierplane_tree(tree, 0, is_root=True)

        # pylint: disable=bad-continuation
        correct_tree = {
                'text': 'the dog chased the cat',
                "linkNameToLabel": LINK_TO_LABEL,
                "nodeTypeToStyle": NODE_TYPE_TO_STYLE,
                'root': {
                        'word': 'the dog chased the cat',
                        'nodeType': 'S',
                        'attributes': ['S'],
                        'link': 'S',
                        'children': [{
                                'word': 'the dog',
                                'nodeType': 'NP',
                                'attributes': ['NP'],
                                'link': 'NP',
                                'children': [{
                                        'word': 'the',
                                        'nodeType': 'D',
                                        'attributes': ['D'],
                                        'link': 'D'
                                        },
                                        {
                                        'word': 'dog',
                                        'nodeType': 'N',
                                        'attributes': ['N'],
                                        'link': 'N'}
                                        ]
                                },
                                {
                                'word': 'chased the cat',
                                'nodeType': 'VP',
                                'attributes': ['VP'],
                                'link': 'VP',
                                'children': [{
                                    'word': 'chased',
                                    'nodeType': 'V',
                                    'attributes': ['V'],
                                    'link': 'V'
                                    },
                                    {
                                    'word':
                                    'the cat',
                                    'nodeType': 'NP',
                                    'attributes': ['NP'],
                                    'link': 'NP',
                                    'children': [{
                                            'word': 'the',
                                            'nodeType': 'D',
                                            'attributes': ['D'],
                                            'link': 'D'
                                            },
                                            {
                                            'word': 'cat',
                                            'nodeType': 'N',
                                            'attributes': ['N'],
                                            'link': 'N'}
                                        ]
                                    }
                                ]
                            }
                        ]
                    }
                }
        # pylint: enable=bad-continuation
        assert correct_tree == hierplane_tree