Esempi in Python per segmentor

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: atf2cts

Metodo/funzione: segmentor

Esempi su hotexamples.com: 3

segmentor in Python: 3 esempi trovati. Questi sono i migliori esempi reali in Python per atf2cts.segmentor, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

File: cdli2cts.py Progetto: rillian/cdli-cts

def read_atf(data_path):
    'Read and segment the atf data export.'
    filename = os.path.join(data_path, 'cdliatf_unblocked.atf')
    fp = io.open(filename, encoding='utf-8')
    for atf in atf2cts.segmentor(fp):
        # Parse out the CDLI id code.
        if atf.startswith('&P'):
            # Drop the '&' sigil and any trailing garbage.
            cdli_id = atf[1:8]
        elif atf.startswith('&'):
            # Handle broken entries with whitespace around the id.
            token = atf.split()[1]
            cdli_id = token[0:7]
            print('Warning: whitespace at the start of &-line.')
        else:
            cdli_id = ''
        # Check if we found what looks like a cdli id.
        if not cdli_id.startswith('P') or not cdli_id[1:].isdecimal():
            print("Error: ATF record doesn't start with a CDLI id!")
            print(atf.splitlines()[0])
            continue
        # Parse out the language header, if any.
        language = None
        for line in atf.splitlines():
            if line.startswith('#atf') and 'lang' in line:
                part = line.split('lang')
                # Skip spurious equal signs.
                # These are invalid syntax, but occur sometimes.
                if part[1].strip() == '=':
                    del part[1]
                language = part[1].strip()
                break
        yield (cdli_id, language, atf)

Esempio n. 2

Mostra file

def test_segmentor(count):
    '''Verify segmentation of multiple blocks.

    Concatenate the test file with itself and check we get back
    the name number of copies.'''
    with io.open(test_filename, encoding='utf-8') as f:
        text = f.read()
    assert text

    multi = repeat(text, count)
    multi = '\n\n'.join(multi)
    multi = io.StringIO(multi)
    assert (len(list(atf2cts.segmentor(multi)))) == count

Esempio n. 3

Mostra file

def test_segmentor_single():
    '''Verify segmentation of a test file.'''
    with io.open(test_filename, encoding='utf-8') as f:
        assert len(list(atf2cts.segmentor(f))) == 1