Esempi in Python per pipe, esempi in Python per conllutils.pipe

Esempio n. 1

0

Mostra file

def test_txt(tmp_path):
    lines1 = ["text"] * 10
    filename = tmp_path / 'data.txt'
    pipe(lines1).write_file(filename, 'txt')
    lines2 = pipe().read_file(filename, 'txt').collect()

    assert lines2 == [l + '\n' for l in lines1]

Esempio n. 2

0

Mostra file

def test_merge_feats(data2):
    sentences1 = pipe().read_conllu(data2).unwind_feats().remove_fields(
        'feats').collect()
    sentences2 = pipe().read_conllu(data2).collect()
    assert [[t.get('feats') for t in s]
            for s in pipe(sentences1).merge_feats()
            ] == [[t.get('feats') for t in s] for s in sentences2]

Esempio n. 3

0

Mostra file

def test_only_fields(data2):
    sentences = pipe().read_conllu(data2).only_fields({'id', 'form'}).collect()
    assert [[t.keys() for t in s] for s in sentences
            ] == [[{'id', 'form'}] * len(s) for s in sentences]

    sentences = pipe().read_conllu(data2).only_fields('id', 'form').collect()
    assert [[t.keys() for t in s] for s in sentences
            ] == [[{'id', 'form'}] * len(s) for s in sentences]

Esempio n. 4

0

Mostra file

def test_pipe():
    p = pipe(range(10),
             pipe().filter(lambda x: x < 5),
             pipe(range(10)).map(lambda x: x * 2))
    assert p.collect() == [0, 2, 4, 6, 8]

    with pytest.raises(RuntimeError):
        pipe().collect()

Esempio n. 5

0

Mostra file

def test_to_instance(data2):
    sentences = pipe().read_conllu(data2).collect()
    index = pipe(sentences).create_index(fields=set(FIELDS) - {ID, HEAD})
    inverse_index = create_inverse_index(index)

    p = pipe().read_conllu(data2).to_instance(index)
    k = pipe(p).to_sentence(inverse_index)

    assert k.collect() == sentences

Esempio n. 6

0

Mostra file

def test_only_projective(data2, data5):
    p = pipe().read_conllu(data2).only_projective()
    assert [s.is_projective() for s in p.collect()] == [True, True]

    p = pipe().read_conllu(data5).only_projective()
    assert p.collect() == []

    p = pipe().read_conllu(data5).only_projective(False)
    assert [s.is_projective() for s in p.collect()] == [False]

Esempio n. 7

0

Mostra file

def test_batch():
    p = pipe(range(10)).filter(lambda x: x < 5).stream(10).batch(3)
    assert p.collect() == [[0, 1, 2], [3, 4, 0], [1, 2, 3], [4]]

    p = pipe(range(5)).batch(3, size=lambda _: 2)
    assert p.collect() == [[0, 1], [2, 3], [4]]

    p = pipe(range(5)).batch(3, size=lambda _: 3)
    assert p.collect() == [[0], [1], [2], [3], [4]]

Esempio n. 8

0

Mostra file

def test_replace_missing(data4):
    sentences = pipe().read_conllu(data4).collect()
    del sentences[0][0].form
    pipe(sentences).replace_missing('form', '__missing__', 'new').collect()
    assert [t.get('new') for t in sentences[0]
            ] == ['__missing__', None, None, None, None, None, None]

    pipe(sentences).replace_missing('form', None, 'new').collect()
    assert [t.get('new') for t in sentences[0]
            ] == [None, None, None, None, None, None, None]

Esempio n. 9

0

Mostra file

def test_from_conllu():
    p = pipe().from_conllu(_DATA1_CONLLU)
    assert [[t.form for t in s] for s in p.collect()
            ] == [['vámonos', 'vamos', 'nos', 'al', 'a', 'el', 'mar'],
                  ['Sue', 'likes', 'coffee', 'and', 'Bill', 'likes', 'tea']]

    with pytest.raises(RuntimeError):
        pipe().map(lambda x: x).from_conllu(_DATA1_CONLLU)

    with pytest.raises(RuntimeError):
        pipe(range(10)).from_conllu(_DATA1_CONLLU)

Esempio n. 10

0

Mostra file

def test_replace(data4):
    sentences = pipe().read_conllu(data4).replace(
        'form', r"[0-9]+|[0-9]+\.[0-9]+|[0-9]+[0-9,]+", '__number__',
        'new').collect()
    assert [t.get('new') for t in sentences[0]] == [
        'Posledná', 'revízia', 'vyšla', 'v', 'roku', '__number__', '.'
    ]

    del sentences[0][0].form
    pipe(sentences).replace('form', None, '__missing__', 'new').collect()
    assert [t.get('new') for t in sentences[0]] == [
        '__missing__', 'revízia', 'vyšla', 'v', 'roku', '__number__', '.'
    ]

Esempio n. 11

0

Mostra file

def test_remove_fields(data2):
    sentences = pipe().read_conllu(data2).remove_fields({'id',
                                                         'form'}).collect()
    assert [['id' in t.keys() for t in s]
            for s in sentences] == [[False] * len(s) for s in sentences]
    assert [['form' in t.keys() for t in s]
            for s in sentences] == [[False] * len(s) for s in sentences]

    sentences = pipe().read_conllu(data2).remove_fields('id', 'form').collect()
    assert [['id' in t.keys() for t in s]
            for s in sentences] == [[False] * len(s) for s in sentences]
    assert [['form' in t.keys() for t in s]
            for s in sentences] == [[False] * len(s) for s in sentences]

Esempio n. 12

0

Mostra file

def test_flatten_tokens(data1):
    tokens = pipe().read_conllu(data1).flatten().only_words().lowercase(
        'form').collect()
    assert [t.form for t in tokens] == [
        'vamos', 'nos', 'a', 'el', 'mar', 'sue', 'likes', 'coffee', 'and',
        'bill', 'tea'
    ]

Esempio n. 13

0

Mostra file

def test_upos_feats(data2):
    sentences = pipe().read_conllu(data2).upos_feats('new').collect()
    assert [t.get('new') for t in sentences[0]] == [
        'POS=PRON|Case=Nom|Number=Plur',
        'POS=VERB|Number=Plur|Person=3|Tense=Pres', 'POS=CONJ',
        'POS=VERB|Number=Plur|Person=3|Tense=Pres', 'POS=NOUN|Number=Plur',
        'POS=PUNCT'
    ]

Esempio n. 14

0

Mostra file

def test_only_universal_deprel(data4):
    sentences = pipe().read_conllu(data4).collect()
    sentences[0][0].deprel = 'test:test'
    sentences[0][0].deps = '1:test1:test:test|2:test2'
    sentences = pipe(sentences).only_universal_deprel().collect()

    assert sentences[0][0].deprel == 'test'
    assert sentences[0][0].deps == '1:test1|2:test2'
    assert sentences[0][-1].deps == '3:punct'

    sentences = pipe().read_conllu(data4, parse_deps=True).collect()
    sentences[0][0].deps = {(1, 'test1:test'), (2, 'test2')}
    sentences = pipe(sentences).only_universal_deprel().collect()

    assert sentences[0][0].deprel == 'amod'
    assert sentences[0][0].deps == {(1, 'test1'), (2, 'test2')}
    assert sentences[0][-1].deps == {(3, 'punct')}

Esempio n. 15

0

Mostra file

def test_unwind_feats(data2):
    sentences = pipe().read_conllu(data2).unwind_feats().collect()
    assert [[t.get('feats:Number') for t in s] for s in sentences
            ] == [['Plur', 'Plur', None, 'Plur', 'Plur', None],
                  ['Sing', 'Sing', None, 'Sing', None]]
    assert [[t.get('feats:Case') for t in s]
            for s in sentences] == [['Nom', None, None, None, None, None],
                                    ['Nom', None, None, None, None]]

Esempio n. 16

0

Mostra file

def test_hdf5(data2, data3, tmp_path):
    index = pipe().read_conllu(data2).create_index()
    instances1 = pipe().read_conllu(data2).to_instance(index).collect()

    filename = tmp_path / 'data.hdf5'
    pipe(instances1).write_file(filename, 'hdf5')
    instances2 = pipe().read_file(filename, 'hdf5').collect()

    for ins1, ins2 in zip(instances1, instances2):
        equal_instance(ins1, ins2)

    index = pipe().read_conllu(data3).create_index()
    instances1 = pipe().read_conllu(data3).to_instance(index).collect()

    filename = tmp_path / 'data.hdf5'
    pipe(instances1).write_file(filename, 'hdf5')
    instances2 = pipe().read_file(filename, 'hdf5').collect()

    for ins1, ins2 in zip(instances1, instances2):
        equal_instance(ins1, ins2)

Esempio n. 17

0

Mostra file

def test_to_flatten():
    p = pipe(range(10)).batch(3).flatten()
    assert p.collect() == list(range(10))

    p = pipe(range(10)).flatten()
    assert p.collect() == list(range(10))

Esempio n. 18

0

Mostra file

def test_collu(data2, tmp_path):
    sentences = pipe().read_conllu(data2).collect()
    filename = tmp_path / 'data.conllu'

    pipe(sentences).write_file(filename, 'conllu')
    assert pipe().read_file(filename, 'conllu').collect() == sentences

Esempio n. 19

0

Mostra file

def test_read_write_file():
    with pytest.raises(ValueError):
        pipe().read_file('temp', 'unknown').collect()

Esempio n. 20

0

Mostra file

def test_collect():
    assert pipe(range(5)).collect(l=[-1]) == list(range(-1, 5))
    assert pipe(range(5)).collect(3, [-1]) == list(range(-1, 3))

Esempio n. 21

0

Mostra file

def test_first():
    assert pipe(range(5)).first() == 0
    assert pipe([]).first() == None
    assert pipe([]).first(0) == 0

Esempio n. 22

0

Mostra file

def test_count():
    assert pipe(range(5)).count() == 5
    assert pipe([]).count() == 0

Esempio n. 23

0

Mostra file

def test_only_words(data1):
    p = pipe().read_conllu(data1).only_words().text()
    assert p.collect() == [
        'vamos nos a el mar ', 'Sue likes coffee and Bill tea '
    ]

Esempio n. 24

0

Mostra file

def test_map():
    p = pipe(range(10)).map(lambda x: 2 * x)
    assert p.collect() == [0, 2, 4, 6, 8, 10, 12, 14, 16, 18]

Esempio n. 25

0

Mostra file

def test_filter():
    p = pipe(range(10)).filter(lambda x: x < 5)
    assert p.collect() == [0, 1, 2, 3, 4]

Esempio n. 26

0

Mostra file

def test_split_chars(data2):
    sentences = pipe().read_conllu(data2).split_chars('form').collect()
    assert [[t['form:chars'] for t in s] for s in sentences
            ] == [[tuple(t.form) for t in s] for s in sentences]

Esempio n. 27

0

Mostra file

def test_uppercase(data1):
    p = pipe().read_conllu(data1).only_words().uppercase('form').text()
    assert p.collect() == [
        'vamos nos a el mar '.upper(),
        'Sue likes coffee and Bill tea '.upper()
    ]

Esempio n. 28

0

Mostra file

def test_filter_field(data1):
    tokens = pipe().read_conllu(data1).flatten().filter_field(
        'form', lambda s: False).collect()
    assert [t for t in tokens if 'form' in t] == []

Esempio n. 29

0

Mostra file

def test_map_token(data1):
    sentences = pipe().read_conllu(data1).map_token(lambda t: None).collect()
    assert sentences == [[], []]

Esempio n. 30

0

Mostra file

def test_filter_token(data1):
    sentences = pipe().read_conllu(data1).filter_token(
        lambda t: False).collect()
    assert sentences == [[], []]