def test_encode_decode_different_levels(encoder_dict): test = BitEncoder(encoder_dict, SNGram) pattern = SNGram.from_element_list([ PatternElement('Noun', 'pos'), SNGram.LEFT_BRACKET, PatternElement('The', 'form'), SNGram.COMMA, PatternElement('quick', 'form'), SNGram.COMMA, PatternElement('brown', 'form'), SNGram.RIGHT_BRACKET ]) assert test.decode(test.encode(pattern)) == pattern
def test_encode_decode_unknown_bitencoder(): unknown_token = "__unknown__" test = BitEncoder({'form': set(['fox', 'quick', 'brown'])}, SNGram, unknown_token) pattern_list = [ PatternElement('fox', 'form'), SNGram.LEFT_BRACKET, PatternElement('The', 'form'), SNGram.COMMA, PatternElement('quick', 'form'), SNGram.COMMA, PatternElement('brown', 'form'), SNGram.RIGHT_BRACKET ] pattern = SNGram.from_element_list(pattern_list) expected_pattern_list = pattern_list expected_pattern_list[2] = PatternElement(unknown_token, 'form') expected_pattern = SNGram.from_element_list(expected_pattern_list) assert test.decode(test.encode(pattern)) == expected_pattern