def test_encode_decode_unknown_huffman(): unknown_token = "__unknown__" test = HuffmanEncoder({'form': { 'fox': 1, 'quick': 2, 'brown': 3 }}, SNGram, unknown=unknown_token) pattern_list = [ PatternElement('fox', 'form'), SNGram.LEFT_BRACKET, PatternElement('The', 'form'), SNGram.COMMA, PatternElement('quick', 'form'), SNGram.COMMA, PatternElement('brown', 'form'), SNGram.RIGHT_BRACKET ] pattern = SNGram.from_element_list(pattern_list) expected_pattern_list = pattern_list expected_pattern_list[2] = PatternElement(unknown_token, 'form') expected_pattern = SNGram.from_element_list(expected_pattern_list) assert test.decode(test.encode(pattern)) == expected_pattern
def test_sngram_from_element_list(sngram, expected): assert sngram.get_pattern_list(['form'])[0] == SNGram.from_element_list( expected['repr'], left_bracket=sngram.LEFT_BRACKET, right_bracket=sngram.RIGHT_BRACKET, comma=sngram.COMMA)
def test_encode_decode(encoder): pattern = SNGram.from_element_list([ PatternElement('fox', 'form'), SNGram.LEFT_BRACKET, PatternElement('The', 'form'), SNGram.COMMA, PatternElement('quick', 'form'), SNGram.COMMA, PatternElement('brown', 'form'), SNGram.RIGHT_BRACKET ]) assert encoder.decode(encoder.encode(pattern)) == pattern
def test_encode_decode_different_levels(encoder_dict): test = BitEncoder(encoder_dict, SNGram) pattern = SNGram.from_element_list([ PatternElement('Noun', 'pos'), SNGram.LEFT_BRACKET, PatternElement('The', 'form'), SNGram.COMMA, PatternElement('quick', 'form'), SNGram.COMMA, PatternElement('brown', 'form'), SNGram.RIGHT_BRACKET ]) assert test.decode(test.encode(pattern)) == pattern
def test_encode_decode_with_full_token(encoder): pattern = SNGram.from_element_list([{ 'form': 'fox' }, SNGram.LEFT_BRACKET, PatternElement('The', 'form'), SNGram.COMMA, PatternElement('quick', 'form'), SNGram.COMMA, { 'form': 'brown' }, SNGram.RIGHT_BRACKET]) assert encoder.decode(encoder.encode(pattern)) == pattern
def test_encode_unknown_not_set_huffman(): test = HuffmanEncoder({'form': {'fox': 1, 'quick': 2, 'brown': 3}}, SNGram) pattern_list = [ PatternElement('fox', 'form'), SNGram.LEFT_BRACKET, PatternElement('The', 'form'), SNGram.COMMA, PatternElement('quick', 'form'), SNGram.COMMA, PatternElement('brown', 'form'), SNGram.RIGHT_BRACKET ] pattern = SNGram.from_element_list(pattern_list) with pytest.raises(EncodeError): test.encode(pattern)
def test_encode_unknown_not_set_bitencoder(): test = BitEncoder({'form': set(['fox', 'quick', 'brown'])}, SNGram) pattern_list = [ PatternElement('fox', 'form'), SNGram.LEFT_BRACKET, PatternElement('The', 'form'), SNGram.COMMA, PatternElement('quick', 'form'), SNGram.COMMA, PatternElement('brown', 'form'), SNGram.RIGHT_BRACKET ] pattern = SNGram.from_element_list(pattern_list) with pytest.raises(EncodeError): test.encode(pattern)
def test_append(encoder): pattern_list = [ PatternElement('fox', 'form'), SNGram.LEFT_BRACKET, PatternElement('The', 'form'), SNGram.COMMA, PatternElement('quick', 'form'), SNGram.COMMA, PatternElement('brown', 'form'), SNGram.RIGHT_BRACKET ] expected_pattern = SNGram.from_element_list(pattern_list) pattern = b'' for element in pattern_list: pattern = encoder.append(pattern, encoder.encode_item(element)) assert encoder.decode(pattern) == expected_pattern
def test_encode_decode_item_with_full_token(encoder): element = {'form': 'fox'} expected_pattern = SNGram.from_element_list([element]) assert encoder.decode(encoder.encode_item(element)) == expected_pattern
def test_encode_item(encoder): element = PatternElement('fox', 'form') expected_pattern = SNGram.from_element_list([element]) assert encoder.decode(encoder.encode_item(element)) == expected_pattern