Exemple #1
0
def test_empty_set_format_error():
    """
    Test that outputting an empty collection for the values of a column errors.
    """
    token_line = '33	cintre	cintre	NOUN	_	Gender=Fem|Number=Sing	' \
        '30	nmod	2:nsubj|4:root	SpaceAfter=No'
    token = Token(token_line)

    token.feats['Gender'].pop()

    with pytest.raises(FormatError):
        token.conll()
Exemple #2
0
def test_all_empty_deps_component_error():
    """
    Test that an error is thrown when all components of a dep value are None.
    """
    token_line = '33	cintre	cintre	NOUN	_	Gender=Fem|Number=Sing	' \
        '30	nmod	2:nsubj|4:root	SpaceAfter=No'
    token = Token(token_line)

    cur_list = [None] + list(token.deps['2'][1:])
    token.deps['2'] = cur_list

    with pytest.raises(FormatError):
        token.conll()
Exemple #3
0
def test_to_string():
    """
    Test if a token's string representation is accurate.
    """
    token_line =  '26	surmonté	surmonter	VERB	_	' \
        'Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part	22	acl	_	_'
    token = Token(token_line)

    assert token.conll() == token_line
Exemple #4
0
def test_feats_keep_case_insensitive_order():
    """
    Test that the features are kept sorted via case insensitive attributes.
    """
    token_line = '10	gave	give	VERB	_	gender=Fem|Number=Sing	' \
        '0	root	_	SpaceAfter=No'
    token = Token(token_line)
    conll = token.conll()

    assert conll == token_line
Exemple #5
0
def test_deps_parsing():
    """
    Test that the deps field is properly parsed.
    """
    token_line = '33	cintre	cintre	NOUN	_	Gender=Masc|Number=Sing	' \
        '30	nmod	2:nsubj|4:nmod	SpaceAfter=No'
    token = Token(token_line)

    assert token.deps['2'] == ('nsubj', None, None, None)
    assert token.deps['4'] == ('nmod', None, None, None)
    assert token.conll() == token_line
Exemple #6
0
def test_feats_induce_case_insensitive_order():
    """
    Test that case insensitive sorting of feature attributes is induced.
    """
    token_line = '10	gave	give	VERB	_	Number=Sing|gender=Fem	' \
        '0	root	_	SpaceAfter=No'
    token = Token(token_line)
    conll = token.conll()

    formatted_line = '10	gave	give	VERB	_	gender=Fem|Number=Sing	' \
        '0	root	_	SpaceAfter=No'

    assert conll == formatted_line
Exemple #7
0
def test_deps_sort_order_decimal():
    """
    Test that enhanced dependencies are sorted properly for ranges.
    """
    token_line = '10	gave	give	VERB	_	Number=Sing|Gender=Fem	' \
            '0	root	10.2:nsubj|2:nmod|10.1:nsubj	SpaceAfter=No'

    token = Token(token_line)
    conll = token.conll()

    formatted_line = '10	gave	give	VERB	_	Gender=Fem|Number=Sing	' \
            '0	root	2:nmod|10.1:nsubj|10.2:nsubj	SpaceAfter=No'

    assert conll == formatted_line
Exemple #8
0
def test_deps_sort_order_double_digits():
    """
    Test that enhanced dependencies are sorted via numeric index and not string.
    """
    token_line = '10	gave	give	VERB	_	Number=Sing|Gender=Fem	' \
            '0	root	10:nsubj|2:nmod	SpaceAfter=No'

    token = Token(token_line)
    conll = token.conll()

    formatted_line = '10	gave	give	VERB	_	Gender=Fem|Number=Sing	' \
            '0	root	2:nmod|10:nsubj	SpaceAfter=No'

    assert conll == formatted_line
Exemple #9
0
def test_deps_sort_order():
    """
    Test that the enhanced dependencies order is properly sorted.
    """
    token_line = '10	gave	give	VERB	_	Number=Sing|Gender=Fem	' \
            '0	root	4:nsubj|2:nmod	SpaceAfter=No'

    token = Token(token_line)
    conll = token.conll()

    formatted_line = '10	gave	give	VERB	_	Gender=Fem|Number=Sing	' \
            '0	root	2:nmod|4:nsubj	SpaceAfter=No'

    assert conll == formatted_line
Exemple #10
0
def test_remove_feature_to_string():
    """
    Test a token's string representation after removing a feature completely.
    """
    token_line = '33	cintre	cintre	NOUN	_	Gender=Masc|Number=Sing	' \
        '30	nmod	_	SpaceAfter=No'
    token = Token(token_line)

    del token.feats['Gender']

    new_token_line = '33	cintre	cintre	NOUN	_	' \
        'Number=Sing	30	nmod	_	SpaceAfter=No'

    assert token.conll() == new_token_line
Exemple #11
0
def test_modify_dict_field_to_string():
    """
    Test a token's string representation after adding a feature.
    """
    token_line = '33	cintre	cintre	NOUN	_	Gender=Masc|Number=Sing	' \
        '30	nmod	_	SpaceAfter=No'
    token = Token(token_line)

    token.feats['Gender'].add('Fem')

    new_token_line = '33	cintre	cintre	NOUN	_	' \
        'Gender=Fem,Masc|Number=Sing	30	nmod	_	SpaceAfter=No'

    assert token.conll() == new_token_line
Exemple #12
0
def test_modify_unit_field_to_string():
    """
    Test a token's string representation after changing one of it's fields.
    """
    token_line = '33	cintre	cintre	NOUN	_	Gender=Masc|Number=Sing	' \
        '30	nmod	_	SpaceAfter=No'
    token = Token(token_line)

    token.lemma = 'pain'

    new_token_line = '33	cintre	pain	NOUN	_	' \
        'Gender=Masc|Number=Sing	30	nmod	_	SpaceAfter=No'

    assert token.conll() == new_token_line
Exemple #13
0
def test_del_values():
    """
    Test that values and features can be deleted from different token columns.
    """
    token_line = '33	cintre	cintre	NOUN	_	Gender=Fem|Number=Sing	' \
        '30	nmod	2:nsubj|4:root	SpaceAfter=No'
    token = Token(token_line)

    del token.feats['Gender']
    del token.misc['SpaceAfter']

    expected = '33	cintre	cintre	NOUN	_	Number=Sing	' \
        '30	nmod	2:nsubj|4:root	_'

    assert expected == token.conll()
Exemple #14
0
def test_misc_parsing_output():
    """
    Test that the misc field is properly output in CoNLL-U format.
    """
    token_line = '33	cintre	cintre	NOUN	_	Gender=Fem|Number=Sing	' \
        '30	nmod	2:nsubj|4:root	SpaceAfter=No'
    token = Token(token_line)

    token.misc['Independent'] = None
    token.misc['SpaceAfter'].add('Yes')

    token.misc['OtherTest'] = set()
    token.misc['OtherTest'].add('X')
    token.misc['OtherTest'].add('Z')
    token.misc['OtherTest'].add('Y')

    expected_output = '33	cintre	cintre	NOUN	_	Gender=Fem|Number=Sing	' \
        '30	nmod	2:nsubj|4:root	Independent|OtherTest=X,Y,Z|SpaceAfter=No,Yes'
    assert expected_output == token.conll()