Esempio n. 1
0
    def from_naf(self, article, naf):
        def _int(x):
            return None if x is None else int(x)
        naf = KafNafParser(BytesIO(naf.encode("utf-8")))

        deps = {dep.get_to(): (dep.get_function(), dep.get_from())
                for dep in naf.get_dependencies()}
        tokendict = {token.get_id(): token for token in naf.get_tokens()}

        for term in naf.get_terms():
            tokens = [tokendict[id] for id in term.get_span().get_span_ids()]
            for token in tokens:
                tid = term.get_id()
                tok = {"aid": article,
                       "token_id": token.get_id(),
                       "offset": _int(token.get_offset()),
                       "sentence": _int(token.get_sent()),
                       "para": _int(token.get_para()),
                       "word": token.get_text(),
                       "term_id": tid,
                       "lemma": term.get_lemma(),
                       "pos": term.get_pos()}
                if tid in deps:
                    rel, parent = deps[tid]
                    tok['parent'] = parent
                    tok['relation'] = rel.split("/")[-1]
                yield tok
Esempio n. 2
0
File: nlpipe.py Progetto: BBie/amcat
    def from_naf(self, naf):
        naf = KafNafParser(BytesIO(naf.encode("utf-8")))
        tokendict = {token.get_id(): token for token in naf.get_tokens()}

        for term in naf.get_terms():
            tokens = [tokendict[id] for id in term.get_span().get_span_ids()]
            for token in tokens:
                yield {"aid": article.pk,
                       "token_id": token.get_id(),
                       "offset": token.get_offset(),
                       "sentence": token.get_sent(),
                       "para": token.get_para(),
                       "word": token.get_text(),
                       "term_id": term.get_id(),
                       "lemma": term.get_lemma(),
                       "pos": term.get_pos()}