Example #1
0
    def set_annotations(self, docs: Iterable[Doc], outputs: Dict):
        """Set udify's output, which is calculated in self.predict, to docs"""
        try:
            from udify.models.udify_model import OUTPUTS as UdifyOUTPUTS  # type: ignore
        except ImportError:
            Errors.E0(package="unofficial-udify")

        for sent, output in zip(flatten_docs_to_sents(docs), outputs):
            words = output[UdifyOUTPUTS.words]
            _doc_tokens = [token.text for token in sent]
            if words != _doc_tokens:
                raise ValueError("Internal error has occured."
                                 f"Input text: {sent.text}\n"
                                 f"Input tokens: {_doc_tokens}\n"
                                 f"Model words: {words}")

            for token, dep, upos, lemma in zip(
                    sent,
                    output[UdifyOUTPUTS.predicted_dependencies],
                    output[UdifyOUTPUTS.upos],
                    output[UdifyOUTPUTS.lemmas],
            ):
                token.dep_ = dep
                token.lemma_ = lemma
                token.pos_ = upos
            sent = set_heads(sent, output[UdifyOUTPUTS.predicted_heads])
            sent.doc.is_parsed = True
Example #2
0
def test_flatten_docs_to_sens(vocab):
    sentencizer = Sentencizer(".")
    nlp = spacy.blank("en")
    nlp.add_pipe(sentencizer)
    texts = ["Foo is bar. Bar is baz.", "It is a sentence."]
    docs = nlp.pipe(texts)
    all_sents = flatten_docs_to_sents(docs)
    assert len(all_sents) == 3
Example #3
0
 def predict(self, docs: Iterable[Doc]) -> Dict:
     self.model.eval()
     all_sents = flatten_docs_to_sents(docs)
     with torch.no_grad():
         tokens_list = [[t.text for t in sent] for sent in all_sents]
         instances = [
             self.dataset_reader.text_to_instance(tokens)
             for tokens in tokens_list
         ]
         outputs = self.model.forward_on_instances(instances)
     return outputs