Beispiel #1
0
def test_encode_decode_different_levels(encoder_dict):

    test = BitEncoder(encoder_dict, SNGram)
    pattern = SNGram.from_element_list([
        PatternElement('Noun', 'pos'), SNGram.LEFT_BRACKET,
        PatternElement('The', 'form'), SNGram.COMMA,
        PatternElement('quick', 'form'), SNGram.COMMA,
        PatternElement('brown', 'form'), SNGram.RIGHT_BRACKET
    ])

    assert test.decode(test.encode(pattern)) == pattern
Beispiel #2
0
def test_encode_decode_unknown_bitencoder():

    unknown_token = "__unknown__"

    test = BitEncoder({'form': set(['fox', 'quick', 'brown'])}, SNGram,
                      unknown_token)

    pattern_list = [
        PatternElement('fox', 'form'), SNGram.LEFT_BRACKET,
        PatternElement('The', 'form'), SNGram.COMMA,
        PatternElement('quick', 'form'), SNGram.COMMA,
        PatternElement('brown', 'form'), SNGram.RIGHT_BRACKET
    ]
    pattern = SNGram.from_element_list(pattern_list)

    expected_pattern_list = pattern_list
    expected_pattern_list[2] = PatternElement(unknown_token, 'form')
    expected_pattern = SNGram.from_element_list(expected_pattern_list)

    assert test.decode(test.encode(pattern)) == expected_pattern