예제 #1
0
def split_sections(texts, size):
    for source, text in enumerate(texts):
        tokens = tokenize(text)
        chunks = chop(tokens, size)
        for chunk in chunks:
            start, stop = chunk[0].start, chunk[-1].stop
            yield Section(source, start, stop, text[start:stop])
예제 #2
0
def map_deepavlov(items,
                  host,
                  port,
                  batch_size=DEEPPAVLOV_BATCH,
                  mode=DEEPPAVLOV):
    batches = chop(items, batch_size)
    for batch in batches:
        markups = call_deeppavlov(batch, host, port, mode)
        for markup in markups:
            yield markup
예제 #3
0
def map_deepavlov(texts,
                  host,
                  port,
                  section_size=DEEPPAVLOV_SECTION,
                  batch_size=DEEPPAVLOV_BATCH,
                  mode=DEEPPAVLOV):
    texts = patch_texts(texts)
    sections = split_sections(texts, section_size)
    batches = chop(sections, batch_size)  # group sections for speed
    sections = map_batches(batches, host, port,
                           mode)  # same sections with annotation
    groups = group_sections(sections)  # group by text
    for group in groups:
        yield sections_markup(group)
예제 #4
0
def map_slovnet(items, host, port):
    chunks = chop(items, SLOVNET_CHUNK)
    for chunk in chunks:
        yield from call_slovnet(chunk, host, port)
예제 #5
0
파일: stanza.py 프로젝트: natasha/naeval
def map_stanza(items, host, port, batch_size=STANZA_BATCH):
    batches = chop(items, batch_size)
    for batch in batches:
        markups = call_stanza(batch, host, port)
        for markup in markups:
            yield markup