Ejemplo n.º 1
0
def test_encode_decode_unknown_huffman():

    unknown_token = "__unknown__"

    test = HuffmanEncoder({'form': {
        'fox': 1,
        'quick': 2,
        'brown': 3
    }},
                          SNGram,
                          unknown=unknown_token)

    pattern_list = [
        PatternElement('fox', 'form'), SNGram.LEFT_BRACKET,
        PatternElement('The', 'form'), SNGram.COMMA,
        PatternElement('quick', 'form'), SNGram.COMMA,
        PatternElement('brown', 'form'), SNGram.RIGHT_BRACKET
    ]
    pattern = SNGram.from_element_list(pattern_list)

    expected_pattern_list = pattern_list
    expected_pattern_list[2] = PatternElement(unknown_token, 'form')
    expected_pattern = SNGram.from_element_list(expected_pattern_list)

    assert test.decode(test.encode(pattern)) == expected_pattern
Ejemplo n.º 2
0
def test_sngram_from_element_list(sngram, expected):

    assert sngram.get_pattern_list(['form'])[0] == SNGram.from_element_list(
        expected['repr'],
        left_bracket=sngram.LEFT_BRACKET,
        right_bracket=sngram.RIGHT_BRACKET,
        comma=sngram.COMMA)
Ejemplo n.º 3
0
def test_encode_decode(encoder):

    pattern = SNGram.from_element_list([
        PatternElement('fox', 'form'), SNGram.LEFT_BRACKET,
        PatternElement('The', 'form'), SNGram.COMMA,
        PatternElement('quick', 'form'), SNGram.COMMA,
        PatternElement('brown', 'form'), SNGram.RIGHT_BRACKET
    ])

    assert encoder.decode(encoder.encode(pattern)) == pattern
Ejemplo n.º 4
0
def test_encode_decode_different_levels(encoder_dict):

    test = BitEncoder(encoder_dict, SNGram)
    pattern = SNGram.from_element_list([
        PatternElement('Noun', 'pos'), SNGram.LEFT_BRACKET,
        PatternElement('The', 'form'), SNGram.COMMA,
        PatternElement('quick', 'form'), SNGram.COMMA,
        PatternElement('brown', 'form'), SNGram.RIGHT_BRACKET
    ])

    assert test.decode(test.encode(pattern)) == pattern
Ejemplo n.º 5
0
def test_encode_decode_with_full_token(encoder):

    pattern = SNGram.from_element_list([{
        'form': 'fox'
    }, SNGram.LEFT_BRACKET,
                                        PatternElement('The',
                                                       'form'), SNGram.COMMA,
                                        PatternElement('quick', 'form'),
                                        SNGram.COMMA, {
                                            'form': 'brown'
                                        }, SNGram.RIGHT_BRACKET])

    assert encoder.decode(encoder.encode(pattern)) == pattern
Ejemplo n.º 6
0
def test_encode_unknown_not_set_huffman():

    test = HuffmanEncoder({'form': {'fox': 1, 'quick': 2, 'brown': 3}}, SNGram)

    pattern_list = [
        PatternElement('fox', 'form'), SNGram.LEFT_BRACKET,
        PatternElement('The', 'form'), SNGram.COMMA,
        PatternElement('quick', 'form'), SNGram.COMMA,
        PatternElement('brown', 'form'), SNGram.RIGHT_BRACKET
    ]
    pattern = SNGram.from_element_list(pattern_list)

    with pytest.raises(EncodeError):
        test.encode(pattern)
Ejemplo n.º 7
0
def test_encode_unknown_not_set_bitencoder():

    test = BitEncoder({'form': set(['fox', 'quick', 'brown'])}, SNGram)

    pattern_list = [
        PatternElement('fox', 'form'), SNGram.LEFT_BRACKET,
        PatternElement('The', 'form'), SNGram.COMMA,
        PatternElement('quick', 'form'), SNGram.COMMA,
        PatternElement('brown', 'form'), SNGram.RIGHT_BRACKET
    ]
    pattern = SNGram.from_element_list(pattern_list)

    with pytest.raises(EncodeError):
        test.encode(pattern)
Ejemplo n.º 8
0
def test_append(encoder):

    pattern_list = [
        PatternElement('fox', 'form'), SNGram.LEFT_BRACKET,
        PatternElement('The', 'form'), SNGram.COMMA,
        PatternElement('quick', 'form'), SNGram.COMMA,
        PatternElement('brown', 'form'), SNGram.RIGHT_BRACKET
    ]
    expected_pattern = SNGram.from_element_list(pattern_list)

    pattern = b''
    for element in pattern_list:
        pattern = encoder.append(pattern, encoder.encode_item(element))

    assert encoder.decode(pattern) == expected_pattern
Ejemplo n.º 9
0
def test_encode_decode_item_with_full_token(encoder):

    element = {'form': 'fox'}
    expected_pattern = SNGram.from_element_list([element])

    assert encoder.decode(encoder.encode_item(element)) == expected_pattern
Ejemplo n.º 10
0
def test_encode_item(encoder):

    element = PatternElement('fox', 'form')
    expected_pattern = SNGram.from_element_list([element])

    assert encoder.decode(encoder.encode_item(element)) == expected_pattern