def parse_toloka_lrwc(lines): skip_header(lines) records = parse_tsv(lines) for record in records: hyponym, hypernym, genitive, judgement, confidence = record judgement = parse_judgement(judgement) confidence = parse_confidence(confidence) yield LRWCRecord(hyponym, hypernym, genitive, judgement, confidence)
def parse_meta(file, encoding='utf8'): lines = TextIOWrapper(file, encoding) rows = parse_tsv(lines) header = skip_header(rows) for row in rows: yield dict(zip(header, row))
def parse_ruadrect(lines): rows = parse_tsv(lines) skip_header(rows) for cells in rows: yield RuADReCTRecord(*cells)
def parse_simlex(lines): skip_header(lines) records = parse_tsv(lines) for word1, word2, score in records: score = float(score) yield SimlexRecord(word1, word2, score)