Python KafNafParser.create_term Examples

Programming Language: Python

Namespace/Package Name: KafNafParserPy

Class/Type: KafNafParser

Method/Function: create_term

Examples at hotexamples.com: 4

Python KafNafParser.create_term - 4 examples found. These are the top rated real world Python examples of KafNafParserPy.KafNafParser.create_term extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

KafNafParser(30)

get_terms(15)

dump(10)

get_tokens(10)

get_opinions(6)

get_token(5)

get_term(5)

set_version(5)

set_language(5)

set_raw(5)

raw(4)

get_dependencies(4)

lang(4)

create_wf(3)

get_entities(3)

encode(2)

header(2)

get_dict_tokens_for_termid(2)

get_language(2)

create_term(2)

get_properties(2)

get_type(2)

remove_this_opinion(1)

map_tokens_to_terms(1)

get_trees(1)

to_kaf(1)

get_linguisticProcessors(1)

get_raw(1)

get_predicates(1)

add_linguistic_processor(1)

get_dependency_extractor(1)

get_corefs(1)

get_constituency_extractor(1)

create_linguistic_processor(1)

create_entity(1)

create_dependency(1)

create_coreference(1)

add_wf(1)

add_term(1)

add_predicate(1)

to_naf(1)

Example #1

Show file

File: test_create_terms.py Project: cltl/KafNafParserPy

def test_create_terms():
    """
    Can we create_terms via the create_{term,token} functions?
    """
    
    naf = KafNafParser(type="NAF")
    sent=1; offset=0
    input = [(u'dit', u'dit', u'O', u'VNW'),
             (u'is', u'zijn', u'V', u'WW'),
             (u'een', u'een', u'D', u'LID'),
             (u'test', u'test', u'N', u'N')]

    offset = 0
    for (word, lemma, pos, morph) in input:
        token = naf.create_wf(word, 1, offset)
        offset += len(word)
        term = naf.create_term(lemma, pos, morph, [token])

    tokens = {t.get_id(): t for t in naf.get_tokens()}
    assert_equal(len(tokens), 4)
    
    result = {}
    for term in naf.get_terms():
        for token_id in term.get_span().get_span_ids():
            token = tokens[token_id]
            result[term.get_id()] = (token.get_text(), term.get_lemma(),
                                     term.get_pos(), term.get_morphofeat())
    result = [result[tid] for tid in sorted(result.keys())]
    assert_equal(input, result)

Example #2

Show file

File: test_create_terms.py Project: vanatteveldt/KafNafParserPy

def test_create_terms():
    """
    Can we create_terms via the create_{term,token} functions?
    """

    naf = KafNafParser(type="NAF")
    sent = 1
    offset = 0
    input = [(u'dit', u'dit', u'O', u'VNW'), (u'is', u'zijn', u'V', u'WW'),
             (u'een', u'een', u'D', u'LID'), (u'test', u'test', u'N', u'N')]

    offset = 0
    for (word, lemma, pos, morph) in input:
        token = naf.create_wf(word, 1, offset)
        offset += len(word)
        term = naf.create_term(lemma, pos, morph, [token])

    tokens = {t.get_id(): t for t in naf.get_tokens()}
    assert_equal(len(tokens), 4)

    result = {}
    for term in naf.get_terms():
        for token_id in term.get_span().get_span_ids():
            token = tokens[token_id]
            result[term.get_id()] = (token.get_text(), term.get_lemma(),
                                     term.get_pos(), term.get_morphofeat())
    result = [result[tid] for tid in sorted(result.keys())]
    assert_equal(input, result)

Example #3

Show file

File: corenlp.py Project: amcat/nlpipe

def corenlp2naf(xml_bytes, annotators):
    """
    Call from on the text and return a Naf object
    """
    naf = KafNafParser(type="NAF")

    try:
        doc = Document(xml_bytes)
    except:
        log.exception("Error on parsing xml")
        raise

    terms = {}  # (xml_sentid, xml_tokenid) : term
    for sent in doc.sentences:
        for t in sent.tokens:
            wf = naf.create_wf(t.word, sent.id, t.character_offset_begin)
            term = naf.create_term(t.lemma, POSMAP[t.pos], t.pos, [wf])
            terms[sent.id, t.id] = term
            if t.ner not in (None, 'O'):
                naf.create_entity(t.ner, [term.get_id()])
        if sent.collapsed_ccprocessed_dependencies:
            dependencies = True
            for dep in sent.collapsed_ccprocessed_dependencies.links:
                if dep.type != 'root':
                    child = terms[sent.id, dep.dependent.idx]
                    parent = terms[sent.id, dep.governor.idx]
                    comment = "{t}({o}, {s})".format(s=child.get_lemma(),
                                                     t=dep.type,
                                                     o=parent.get_lemma())
                    naf.create_dependency(child.get_id(),
                                          parent.get_id(),
                                          dep.type,
                                          comment=comment)

    if doc.coreferences:
        for coref in doc.coreferences:
            cterms = set()
            for m in coref.mentions:
                cterms |= {
                    terms[m.sentence.id, t.id].get_id()
                    for t in m.tokens
                }
            naf.create_coreference("term", cterms)

    for annotator in annotators:
        if annotator in LAYERMAP:
            naf.create_linguistic_processor(
                LAYERMAP[annotator], "CoreNLP {annotator}".format(**locals()),
                get_corenlp_version())
    s = BytesIO()
    naf.dump(s)
    return s.getvalue()

Example #4

Show file

File: corenlp.py Project: amcat/nlpipe

def corenlp2naf(xml_bytes, annotators):
    """
    Call from on the text and return a Naf object
    """
    naf = KafNafParser(type="NAF")

    try:
        doc = Document(xml_bytes)
    except:
        log.exception("Error on parsing xml")
        raise

    terms = {} # (xml_sentid, xml_tokenid) : term
    for sent in doc.sentences:
        for t in sent.tokens:
            wf = naf.create_wf(t.word, sent.id, t.character_offset_begin)
            term = naf.create_term(t.lemma, POSMAP[t.pos], t.pos, [wf])
            terms[sent.id, t.id] = term
            if t.ner not in (None, 'O'):
                naf.create_entity(t.ner, [term.get_id()])
        if sent.collapsed_ccprocessed_dependencies:
            dependencies = True
            for dep in sent.collapsed_ccprocessed_dependencies.links:
                if dep.type != 'root':
                    child = terms[sent.id, dep.dependent.idx]
                    parent = terms[sent.id, dep.governor.idx]
                    comment = "{t}({o}, {s})".format(s=child.get_lemma(), t=dep.type, o=parent.get_lemma())
                    naf.create_dependency(child.get_id(), parent.get_id(), dep.type, comment=comment)

    if doc.coreferences:
        for coref in doc.coreferences:
            cterms = set()
            for m in coref.mentions:
                cterms |= {terms[m.sentence.id, t.id].get_id() for t in m.tokens}
            naf.create_coreference("term", cterms)
        
    for annotator in annotators:
        if annotator in LAYERMAP:
            naf.create_linguistic_processor(LAYERMAP[annotator], "CoreNLP {annotator}".format(**locals()),
                                            get_corenlp_version())
    s = BytesIO()
    naf.dump(s)
    return s.getvalue()