Ejemplo n.º 1
0
def parse_toloka_lrwc(lines):
    skip_header(lines)
    records = parse_tsv(lines)
    for record in records:
        hyponym, hypernym, genitive, judgement, confidence = record
        judgement = parse_judgement(judgement)
        confidence = parse_confidence(confidence)
        yield LRWCRecord(hyponym, hypernym, genitive, judgement, confidence)
Ejemplo n.º 2
0
def parse_meta(file, encoding='utf8'):
    lines = TextIOWrapper(file, encoding)
    rows = parse_tsv(lines)
    header = skip_header(rows)
    for row in rows:
        yield dict(zip(header, row))
Ejemplo n.º 3
0
def parse_ruadrect(lines):
    rows = parse_tsv(lines)
    skip_header(rows)
    for cells in rows:
        yield RuADReCTRecord(*cells)
Ejemplo n.º 4
0
def parse_simlex(lines):
    skip_header(lines)
    records = parse_tsv(lines)
    for word1, word2, score in records:
        score = float(score)
        yield SimlexRecord(word1, word2, score)