Esempio n. 1
0
#!usr/bin/env python3
# coding: utf-8
"""Automatic annotation of potential relations with semantic label."""

import sys

sys.path.append('../../../../../tools/data-api/derinet-python/')
import derinet_api

# load DeriNet
derinet = derinet_api.DeriNet(sys.argv[2])


def searchLexeme(lem, p=None):
    """Search lemma in DeriNet. Return None if lemma is not in DeriNet."""
    def divideWord(word):
        """Return lemma and pos of word in annotated data (sep=ALT+0150)."""
        word = word.split('–')
        lemma = word[0]

        pos = None
        if len(word) > 1:
            if word[1] != 'None':
                pos = word[1]

        return lemma, pos

    lem, p = divideWord(lem)
    candidates = derinet.search_lexemes(lem, pos=p)
    if len(candidates) == 0:  # not in
        return None
Esempio n. 2
0
#!/usr/bin/env python3
# coding: utf-8

import sys
sys.path.append('../../../../../../tools/data-api/derinet-python/')
import derinet_api

der = derinet_api.DeriNet('../../../../../releases/cs/derinet-1-5-1.tsv')


def checkAmbig(word):
    word = word.split('–')
    lem = word[0]

    p = None
    if len(word) > 1:
        if word[1] != 'None':
            p = word[1]

    m = None
    if len(word) > 2:
        m = word[2]

    return der.search_lexemes(lemma=lem, pos=p, morph=m)


for i in range(1, len(sys.argv)):

    print(5 * '-', sys.argv[i], 5 * '-')
    lnum = 0
Esempio n. 3
0

def add_verb_deriv(data_folder, suppress_warnings=True, almost_silent=True):
    """Verb derivation dataset."""
    for fname in sorted(
            filter(
                lambda x: x.endswith('_final_populated.tsv') and not x.
                startswith('aspect'), os.listdir(data_folder))):
        print('\nAdding {}'.format(fname))
        with open(os.path.join(data_folder, fname), 'r',
                  encoding='utf-8') as import_file:
            for line in import_file:
                line_parts = line.strip().split('\t')
                child = get_info(line_parts[2].strip('"'), 'V')
                parent = get_info(line_parts[5].strip('"'), 'V')
                if not almost_silent:
                    print('Setting {} <- {}'.format(child, parent))
                check_and_add(child,
                              parent,
                              suppress_warnings=suppress_warnings)


if __name__ == "__main__":
    derinet = derinet_api.DeriNet(original_derinet_fname)
    add_changed_edge_probabilities(changed_edge_fname)
    correct_changed_edge_probabilities(dvojiti_rodice_fname)
    add_jk((jk_fname_01, jk_fname_02))
    #add_verb_deriv(verb_data_folder)

    derinet.save(fname=new_derinet_fname, sort=True)
Esempio n. 4
0
#!usr/bin/env python3
# coding: utf-8
"""Lists unmotivated lemmas."""

import sys

sys.path.append('../../../../../../tools/data-api/derinet-python/')
import derinet_api

derinet = derinet_api.DeriNet('../../../../../releases/cs/derinet-1-7.tsv')


def divideWord(word):
    """Return lemma and pos of word in annotated data.
    Used for ambiguous words. _ is separator.
    """
    word = word.split('_')
    lemma = word[0]

    pos = None
    if len(word) > 1:
        if word[1] != 'None':
            pos = word[1]

    return lemma, pos


def searchLexeme(lem, p=None):
    """Search lemma in DeriNet. Raise warnings for not beeing inside the
    DeriNet and for homonymous lemma.
    """