Python skip_header Beispiele

Programmiersprache: Python

Namespace / Paketname: corus.io

Methode / Funktion: skip_header

Beispiele auf hotexamples.com: 9

Python skip_header - 9 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die corus.io.skip_header, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

Datei: toloka.py Projekt: natasha/corus

def parse_toloka_lrwc(lines):
    skip_header(lines)
    records = parse_tsv(lines)
    for record in records:
        hyponym, hypernym, genitive, judgement, confidence = record
        judgement = parse_judgement(judgement)
        confidence = parse_confidence(confidence)
        yield LRWCRecord(hyponym, hypernym, genitive, judgement, confidence)

Beispiel #2

Datei anzeigen

Datei: buriy.py Projekt: natasha/corus

def parse_buriy(lines, max_text=10000000):
    rows = parse_csv(lines, max_field=max_text)
    skip_header(rows)
    for row in rows:
        timestamp, url, edition, topics, title, text = row
        timestamp = parse_timestamp(timestamp)
        edition = maybe_none(edition, ('', '-'))
        topics = maybe_none(topics)
        yield BuriyRecord(timestamp=timestamp,
                          url=url,
                          edition=edition,
                          topics=topics,
                          title=title,
                          text=text)

Beispiel #3

Datei anzeigen

Datei: ods.py Projekt: natasha/corus

def parse_news(lines):
    # tass raises "field larger than field limit"
    rows = parse_csv(lines, max_field=100000000)
    skip_header(rows)
    for row in rows:
        (timestamp, url, edition, topics, authors, title, text, fb, vk, ok,
         twitter, lj, tg, likes, views, comments) = none_row(row)

        timestamp = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')

        if authors:
            authors = authors.split(',')

        # empty texts in meduza
        text = text or ''

        stats = Stats(maybe_int(fb), maybe_int(vk), maybe_int(ok),
                      maybe_int(twitter), maybe_int(lj), maybe_int(tg),
                      maybe_int(likes), maybe_int(views), maybe_int(comments))
        yield NewsRecord(timestamp, url, edition, topics, authors, title, text,
                         stats)

Beispiel #4

Datei anzeigen

def parse_news(lines):
    rows = parse_csv(fix_csv(lines))
    header = skip_header(rows)
    for row in rows:
        row = list(none_row(row))
        if len(row) != len(header) + 1:  # extra , before EOL
            # rare Д.Акулинин, а также М.Кузовлев.\n\",-,-,-,-,-,-,-,-,-
            continue

        (timestamp, url, edition, topics, authors, title, text, fb, vk, ok,
         twitter, lj, tg, likes, views, comments, _) = row
        timestamp = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')
        stats = Stats(maybe_int(fb), maybe_int(vk), maybe_int(ok),
                      maybe_int(twitter), maybe_int(lj), maybe_int(tg),
                      maybe_int(likes), maybe_int(views), maybe_int(comments))
        if authors:
            authors = authors.split(',')
        yield NewsRecord(timestamp, url, fix_new_line(edition),
                         fix_new_line(topics), authors, fix_new_line(title),
                         fix_new_line(text), stats)

Beispiel #5

Datei anzeigen

def parse_meta(file, encoding='utf8'):
    lines = TextIOWrapper(file, encoding)
    rows = parse_tsv(lines)
    header = skip_header(rows)
    for row in rows:
        yield dict(zip(header, row))

Beispiel #6

Datei anzeigen

def parse_lenta(lines):
    rows = parse_csv(lines)
    skip_header(rows)
    for cells in rows:
        yield LentaRecord(*cells)

Beispiel #7

Datei anzeigen

Datei: lenta.py Projekt: accplan/corus

def parse_lenta_(lines, lenta_class):
    rows = parse_csv(lines)
    skip_header(rows)
    for cells in rows:
        yield lenta_class(*cells)

Beispiel #8

Datei anzeigen

Datei: toloka.py Projekt: natasha/corus

def parse_ruadrect(lines):
    rows = parse_tsv(lines)
    skip_header(rows)
    for cells in rows:
        yield RuADReCTRecord(*cells)

Beispiel #9

Datei anzeigen

Datei: simlex.py Projekt: natasha/corus

def parse_simlex(lines):
    skip_header(lines)
    records = parse_tsv(lines)
    for word1, word2, score in records:
        score = float(score)
        yield SimlexRecord(word1, word2, score)