Ejemplo n.º 1
0
def test_empty_set_format_error():
    """
    Test that outputing an empty collection for the values of a column errors.
    """
    token_line = '33	cintre	cintre	NOUN	_	Gender=Fem|Number=Sing	' \
        '30	nmod	2:nsubj|4:root	SpaceAfter=No'
    token = Token(token_line)

    token.feats['Gender'].pop()

    with pytest.raises(FormatError):
        token.conll()
Ejemplo n.º 2
0
def test_all_empty_deps_component_error():
    """
    Test that an error is thrown when all components of a dep value are None.
    """
    token_line = '33	cintre	cintre	NOUN	_	Gender=Fem|Number=Sing	' \
        '30	nmod	2:nsubj|4:root	SpaceAfter=No'
    token = Token(token_line)

    cur_list = [None] + list(token.deps['2'][1:])
    token.deps['2'] = cur_list

    with pytest.raises(FormatError):
        token.conll()
Ejemplo n.º 3
0
def test_to_string():
    """
    Test if a token's string representation is accurate.
    """
    token_line =  '26	surmonté	surmonter	VERB	_	' \
        'Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part	22	acl	_	_'
    token = Token(token_line)

    assert token.conll() == token_line
Ejemplo n.º 4
0
def test_feats_keep_case_insensitive_order():
    """
    Test that the features are kept sorted via case insensitive attributes.
    """
    token_line = '10	gave	give	VERB	_	gender=Fem|Number=Sing	' \
        '0	root	_	SpaceAfter=No'
    token = Token(token_line)
    conll = token.conll()

    assert conll == token_line
Ejemplo n.º 5
0
def test_deps_parsing():
    """
    Test that the deps field is properly parsed.
    """
    token_line = '33	cintre	cintre	NOUN	_	Gender=Masc|Number=Sing	' \
        '30	nmod	2:nsubj|4:nmod	SpaceAfter=No'
    token = Token(token_line)

    assert token.deps['2'] == ('nsubj', None, None, None)
    assert token.deps['4'] == ('nmod', None, None, None)
    assert token.conll() == token_line
Ejemplo n.º 6
0
def test_feats_induce_case_insensitive_order():
    """
    Test that case insensitive sorting of feature attributes is induced.
    """
    token_line = '10	gave	give	VERB	_	Number=Sing|gender=Fem	' \
        '0	root	_	SpaceAfter=No'
    token = Token(token_line)
    conll = token.conll()

    formatted_line = '10	gave	give	VERB	_	gender=Fem|Number=Sing	' \
        '0	root	_	SpaceAfter=No'

    assert conll == formatted_line
Ejemplo n.º 7
0
def test_deps_sort_order_decimal():
    """
    Test that enhanced dependencies are sorted properly for ranges.
    """
    token_line = '10	gave	give	VERB	_	Number=Sing|Gender=Fem	' \
            '0	root	10.2:nsubj|2:nmod|10.1:nsubj	SpaceAfter=No'

    token = Token(token_line)
    conll = token.conll()

    formatted_line = '10	gave	give	VERB	_	Gender=Fem|Number=Sing	' \
            '0	root	2:nmod|10.1:nsubj|10.2:nsubj	SpaceAfter=No'

    assert conll == formatted_line
Ejemplo n.º 8
0
def test_deps_sort_order_double_digits():
    """
    Test that enhanced dependencies are sorted via numeric index and not string.
    """
    token_line = '10	gave	give	VERB	_	Number=Sing|Gender=Fem	' \
            '0	root	10:nsubj|2:nmod	SpaceAfter=No'

    token = Token(token_line)
    conll = token.conll()

    formatted_line = '10	gave	give	VERB	_	Gender=Fem|Number=Sing	' \
            '0	root	2:nmod|10:nsubj	SpaceAfter=No'

    assert conll == formatted_line
Ejemplo n.º 9
0
def test_deps_sort_order():
    """
    Test that the enhanced dependencies order is properly sorted.
    """
    token_line = '10	gave	give	VERB	_	Number=Sing|Gender=Fem	' \
            '0	root	4:nsubj|2:nmod	SpaceAfter=No'

    token = Token(token_line)
    conll = token.conll()

    formatted_line = '10	gave	give	VERB	_	Gender=Fem|Number=Sing	' \
            '0	root	2:nmod|4:nsubj	SpaceAfter=No'

    assert conll == formatted_line
Ejemplo n.º 10
0
def test_remove_feature_to_string():
    """
    Test a token's string representation after removing a feature completely.
    """
    token_line = '33	cintre	cintre	NOUN	_	Gender=Masc|Number=Sing	' \
        '30	nmod	_	SpaceAfter=No'
    token = Token(token_line)

    del token.feats['Gender']

    new_token_line = '33	cintre	cintre	NOUN	_	' \
        'Number=Sing	30	nmod	_	SpaceAfter=No'

    assert token.conll() == new_token_line
Ejemplo n.º 11
0
def test_modify_dict_field_to_string():
    """
    Test a token's string representation after adding a feature.
    """
    token_line = '33	cintre	cintre	NOUN	_	Gender=Masc|Number=Sing	' \
        '30	nmod	_	SpaceAfter=No'
    token = Token(token_line)

    token.feats['Gender'].add('Fem')

    new_token_line = '33	cintre	cintre	NOUN	_	' \
        'Gender=Fem,Masc|Number=Sing	30	nmod	_	SpaceAfter=No'

    assert token.conll() == new_token_line
Ejemplo n.º 12
0
def test_modify_unit_field_to_string():
    """
    Test a token's string representation after changing one of it's fields.
    """
    token_line = '33	cintre	cintre	NOUN	_	Gender=Masc|Number=Sing	' \
        '30	nmod	_	SpaceAfter=No'
    token = Token(token_line)

    token.lemma = 'pain'

    new_token_line = '33	cintre	pain	NOUN	_	' \
        'Gender=Masc|Number=Sing	30	nmod	_	SpaceAfter=No'

    assert token.conll() == new_token_line
Ejemplo n.º 13
0
def test_del_values():
    """
    Test that values and features can be deleted from different token columns.
    """
    token_line = '33	cintre	cintre	NOUN	_	Gender=Fem|Number=Sing	' \
        '30	nmod	2:nsubj|4:root	SpaceAfter=No'
    token = Token(token_line)

    del token.feats['Gender']
    del token.misc['SpaceAfter']

    expected = '33	cintre	cintre	NOUN	_	Number=Sing	' \
        '30	nmod	2:nsubj|4:root	_'

    assert expected == token.conll()
Ejemplo n.º 14
0
def test_misc_parsing_output():
    """
    Test that the misc field is properly output in CoNLL-U format.
    """
    token_line = '33	cintre	cintre	NOUN	_	Gender=Fem|Number=Sing	' \
        '30	nmod	2:nsubj|4:root	SpaceAfter=No'
    token = Token(token_line)

    token.misc['Independent'] = None
    token.misc['SpaceAfter'].add('Yes')

    token.misc['OtherTest'] = set()
    token.misc['OtherTest'].add('X')
    token.misc['OtherTest'].add('Z')
    token.misc['OtherTest'].add('Y')

    expected_output = '33	cintre	cintre	NOUN	_	Gender=Fem|Number=Sing	' \
        '30	nmod	2:nsubj|4:root	Independent|OtherTest=X,Y,Z|SpaceAfter=No,Yes'
    assert expected_output == token.conll()