def decode(string): """ Decode SMILE format string into a Python Object :param basestring string: SMILE formatted data string :returns: Decoded python object :rtype: list | dict """ log.debug('Decoding: {!r}'.format(string)) state = DecodeState(string) while state.mode not in (DecodeMode.BAD, DecodeMode.DONE): if state.mode == DecodeMode.HEAD: head = state.pull_bits(3) if not (head and head.startswith(HEADER_BYTE_1+HEADER_BYTE_2+HEADER_BYTE_3)): state.mode = DecodeMode.BAD state.error = 'Invalid Header!' continue state.mode = DecodeMode.ROOT features = state.pull_byte() version = features & HEADER_BIT_VERSION shared_keys = bool(features & HEADER_BIT_HAS_SHARED_NAMES) shared_values = bool((features & HEADER_BIT_HAS_SHARED_STRING_VALUES) >> 1) raw_binary = bool((features & HEADER_BIT_HAS_RAW_BINARY) >> 2) state.header = SmileHeader(version, raw_binary, shared_keys, shared_values) elif state.mode in (DecodeMode.ROOT, DecodeMode.ARRAY, DecodeMode.VALUE): byt = state.pull_byte() if byt is None: log.debug('No bytes left to read!') state.mode = DecodeMode.DONE break log.debug('Pulled Byte: 0x{:x}'.format(byt)) if state.in_array[state.nested_depth]: if state.first_array_element[state.nested_depth]: state.first_array_element[state.nested_depth] = False elif byt != TOKEN_LITERAL_END_ARRAY: state.write(',') if byt == NULL_BIT: log.debug('Token: Null Bit (skip)') elif 0x01 <= byt <= 0x1F: log.debug('Token: Shared Value String') state.copy_shared_value_string() elif TOKEN_LITERAL_EMPTY_STRING <= byt <= TOKEN_LITERAL_TRUE: # Simple literals, numbers if byt == TOKEN_LITERAL_EMPTY_STRING: log.debug('Token: Empty String') state.write('""') elif byt == TOKEN_LITERAL_NULL: log.debug('Token: Literal Null') state.write('null') elif byt == TOKEN_LITERAL_FALSE: log.debug('Token: Literal False') state.write('false') elif byt == TOKEN_LITERAL_TRUE: log.debug('Token: Literal True') state.write('true') elif TOKEN_PREFIX_INTEGER <= byt < TOKEN_PREFIX_FP: # Integral numbers log.debug('Token: Integral Numbers') smile_value_length = byt & 0x03 if smile_value_length < 2: state.zzvarint_decode() elif smile_value_length == 2: # BigInteger log.warn('Not Yet Implemented: Value BigInteger') else: # Reserved for future use log.warn('Reserved: integral numbers with length >= 3') elif TOKEN_PREFIX_FP <= byt <= 0x2B: # Floating point numbers if byt == TOKEN_BYTE_FLOAT_32: fp = state.pull_bits(5) b1 = fp[0] b2 = fp[1] << 7 b3 = fp[2] << 7 << 7 b4 = fp[3] << 7 << 7 << 7 b5 = fp[4] << 7 << 7 << 7 << 7 byt = (b1 | b2 | b3 | b4 | b5) try: flt = util.bits_to_float(byt) except struct.error: flt = util.long_bits_to_float(byt) state.write(flt) elif byt == TOKEN_BYTE_FLOAT_64: fp = state.pull_bits(9) b1 = fp[0] b2 = fp[1] << 7 b3 = fp[2] << 7 << 7 b4 = fp[3] << 7 << 7 << 7 b5 = fp[4] << 7 << 7 << 7 << 7 b6 = fp[4] << 7 << 7 << 7 << 7 << 7 b7 = fp[4] << 7 << 7 << 7 << 7 << 7 << 7 b8 = fp[4] << 7 << 7 << 7 << 7 << 7 << 7 << 7 b9 = fp[4] << 7 << 7 << 7 << 7 << 7 << 7 << 7 << 7 byt = (b1 | b2 | b3 | b4 | b5 | b6 | b7 | b8 | b9) flt = util.long_bits_to_float(byt) state.write(flt) else: log.warn('Not Yet Implemented') elif 0x2C <= byt <= 0x3F: # Reserved for future use log.warn('Reserved: 0x2C <= value <= 0x3F') elif 0x40 <= byt <= 0x7F: # Tiny/short ASCII log.debug('Token: Tiny/short ASCII') smile_value_length = (byt & 0x3F) + 1 state.copy_value_string(smile_value_length) elif 0x80 <= byt <= 0xBF: # Tiny/short Unicode log.debug('Token: Tiny/short Unicode') smile_value_length = (byt & 0x3F) + 2 state.copy_value_string(smile_value_length) elif 0xC0 <= byt <= 0xDF: # Small Integers log.debug('Token: Small Integer') state.write(util.zigzag_decode(byt & 0x1F)) else: # Misc binary / text / structure markers if TOKEN_MISC_LONG_TEXT_ASCII <= byt < TOKEN_MISC_LONG_TEXT_UNICODE: # Long (variable length) ASCII text log.debug('Token: Long (var length) ASCII Test') state.copy_variable_length_string() elif TOKEN_MISC_LONG_TEXT_UNICODE <= byt < INT_MISC_BINARY_7BIT: log.warn('Not Yet Implemented: Value Long Unicode') elif INT_MISC_BINARY_7BIT <= byt < TOKEN_PREFIX_SHARED_STRING_LONG: log.warn('Not Yet Implemented: Value Long Shared String Reference') elif TOKEN_PREFIX_SHARED_STRING_LONG <= byt < HEADER_BIT_VERSION: # Binary, 7-bit encoded log.warn('Not Yet Implemented: Value Binary') elif HEADER_BIT_VERSION <= byt < TOKEN_LITERAL_START_ARRAY: log.warn('Reserved: 0xF0 <= value <= 0xF8') elif byt == TOKEN_LITERAL_START_ARRAY: # START_ARRAY log.debug('Token: Start Array') state.write('[') state.nested_depth += 1 state.in_array[state.nested_depth] = True state.first_array_element[state.nested_depth] = True state.first_key[state.nested_depth] = False elif byt == TOKEN_LITERAL_END_ARRAY: # END_ARRAY log.debug('Token: End Array') state.write(']') state.nested_depth -= 1 elif byt == TOKEN_LITERAL_START_OBJECT: # START_OBJECT log.debug('Token: Start Object') state.write('{') state.nested_depth += 1 state.in_array[state.nested_depth] = False state.first_array_element[state.nested_depth] = False state.first_key[state.nested_depth] = True state.mode = DecodeMode.KEY continue elif byt == TOKEN_LITERAL_END_OBJECT: log.debug('Token: End Object') log.warn('Reserved: value == 0xFB') elif byt == BYTE_MARKER_END_OF_STRING: log.error('Found end-of-String marker (0xFC) in value mode') elif byt == INT_MISC_BINARY_RAW: log.warn('Not Yet Implemented: Raw Binary Data') elif byt == BYTE_MARKER_END_OF_CONTENT: log.debug('Token: End Marker') state.mode = DecodeMode.DONE break if not state.in_array[state.nested_depth]: state.mode = DecodeMode.KEY elif state.mode == DecodeMode.KEY: byt = state.pull_byte() if byt is None or byt == BYTE_MARKER_END_OF_CONTENT: log.debug('No bytes left to read!') state.mode = DecodeMode.DONE break log.debug('Pulled Byte: 0x{:x}'.format(byt)) try: if state.first_key[state.nested_depth]: state.first_key[state.nested_depth] = False elif byt != TOKEN_LITERAL_END_OBJECT: state.write(',') except IndexError: state.first_key.append(False) # Byte ranges are divided in 4 main sections (64 byte values each) if 0x00 <= byt <= 0x1F: log.warn('Reserved: 0x01 <= key <= 0x1F') elif byt == TOKEN_LITERAL_EMPTY_STRING: # Empty String log.debug('Token: Literal Empty String') state.write('""') elif TOKEN_LITERAL_NULL <= byt <= 0x2F: log.warn('Reserved: 0x21 <= key <= 0x2F') elif TOKEN_PREFIX_KEY_SHARED_LONG <= byt <= 0x33: # "Long" shared key name reference log.warn('Not Yet Implemented: Long Shared Key Name Reference') elif byt == 0x32: # Long (not-yet-shared) Unicode name, 64 bytes or more log.warn('Not Yet Implemented: Long Key Name') elif 0x35 <= byt <= 0x39: log.warn('Reserved: 0x35 <= key <= 0x39') elif byt == 0x3A: log.error('0x3A NOT allowed in Key mode') elif 0x3B <= byt <= 0x3F: log.warn('Reserved: 0x3B <= key <= 0x3F') elif TOKEN_PREFIX_KEY_SHARED_SHORT <= byt <= 0x7F: # "Short" shared key name reference (1 byte lookup) log.debug('Token: Short Shared Key Name Reference') state.copy_shared_key_string() elif TOKEN_PREFIX_KEY_ASCII <= byt <= 0xBF: # Short Ascii names # 5 LSB used to indicate lengths from 2 to 32 (bytes == chars) log.debug('Token: Short ASCII Name') smile_key_length = (byt & 0x3F) + 1 state.copy_key_string(smile_key_length) elif TOKEN_PREFIX_KEY_UNICODE <= byt <= TOKEN_RESERVED: # Short Unicode names # 5 LSB used to indicate lengths from 2 to 57 log.debug('Token: Short Unicode Name') smile_key_length = (byt - 0xC0) + 2 state.copy_key_string(smile_key_length) elif TOKEN_LITERAL_START_ARRAY <= byt <= TOKEN_LITERAL_START_OBJECT: log.warn('Reserved: 0xF8 <= key <= 0xFA') elif byt == TOKEN_LITERAL_END_OBJECT: log.debug('Token: Literal End Object') state.write('}') state.nested_depth -= 1 try: in_arry = state.in_array[state.nested_depth] except IndexError: in_arry = False if in_arry: state.mode = DecodeMode.VALUE else: state.mode = DecodeMode.KEY continue elif byt >= BYTE_MARKER_END_OF_STRING: log.warn('Reserved: key >= 0xFC') state.mode = DecodeMode.VALUE elif state.mode == DecodeMode.BAD: if state.error is None: state.error = 'Unknown Error!' break elif state.mode == DecodeMode.DONE: log.debug('Decoding Done!') break if state.mode == DecodeMode.BAD: raise SMILEDecodeError('Bad State: {}'.format(state.error), state.get_value()) ret_val = state.get_value() try: jsonified = json.loads(ret_val) except (ValueError, UnicodeDecodeError): msg = 'Unable to jsonify string: {!r}'.format(ret_val) log.exception(msg) raise SMILEDecodeError(msg, ret_val) return jsonified
def decode(string): """ Decode SMILE format string into a Python Object :param basestring string: SMILE formatted data string :returns: Decoded python object :rtype: list | dict """ log.debug('Decoding: {!r}'.format(string)) state = DecodeState(string) while state.mode not in (DecodeMode.BAD, DecodeMode.DONE): if state.mode == DecodeMode.HEAD: head = state.pull_bits(3) if not (head and head.startswith(HEADER_BYTE_1+HEADER_BYTE_2+HEADER_BYTE_3)): state.mode = DecodeMode.BAD state.error = 'Invalid Header!' continue state.mode = DecodeMode.ROOT features = state.pull_byte() version = features & HEADER_BIT_VERSION shared_keys = bool(features & HEADER_BIT_HAS_SHARED_NAMES) shared_values = bool((features & HEADER_BIT_HAS_SHARED_STRING_VALUES) >> 1) raw_binary = bool((features & HEADER_BIT_HAS_RAW_BINARY) >> 2) state.header = SmileHeader(version, raw_binary, shared_keys, shared_values) elif state.mode in (DecodeMode.ROOT, DecodeMode.ARRAY, DecodeMode.VALUE): byt = state.pull_byte() if byt is None: log.debug('No bytes left to read!') state.mode = DecodeMode.DONE break log.debug('Pulled Byte: 0x{:x}'.format(byt)) if state.in_array[state.nested_depth]: if state.first_array_element[state.nested_depth]: state.first_array_element[state.nested_depth] = False elif byt != TOKEN_LITERAL_END_ARRAY: state.write(',') if byt == NULL_BIT: log.debug('Token: Null Bit (skip)') elif 0x01 <= byt <= 0x1F: log.debug('Token: Shared Value String') state.copy_shared_value_string() elif TOKEN_LITERAL_EMPTY_STRING <= byt <= TOKEN_LITERAL_TRUE: # Simple literals, numbers if byt == TOKEN_LITERAL_EMPTY_STRING: log.debug('Token: Empty String') state.write('""') elif byt == TOKEN_LITERAL_NULL: log.debug('Token: Literal Null') state.write('null') elif byt == TOKEN_LITERAL_FALSE: log.debug('Token: Literal False') state.write('false') elif byt == TOKEN_LITERAL_TRUE: log.debug('Token: Literal True') state.write('true') elif TOKEN_PREFIX_INTEGER <= byt < TOKEN_PREFIX_FP: # Integral numbers log.debug('Token: Integral Numbers') smile_value_length = byt & 0x03 if smile_value_length < 2: state.zzvarint_decode() elif smile_value_length == 2: # BigInteger log.warn('Not Yet Implemented: Value BigInteger') else: # Reserved for future use log.warn('Reserved: integral numbers with length >= 3') elif TOKEN_PREFIX_FP <= byt <= 0x2B: # Floating point numbers if byt == TOKEN_BYTE_FLOAT_32: fp = state.pull_bits(5) b1 = fp[0] b2 = fp[1] << 7 b3 = fp[2] << 7 << 7 b4 = fp[3] << 7 << 7 << 7 b5 = fp[4] << 7 << 7 << 7 << 7 byt = (b1 | b2 | b3 | b4 | b5) try: flt = util.bits_to_float(byt) except struct.error: flt = util.long_bits_to_float(byt) state.write(flt) elif byt == TOKEN_BYTE_FLOAT_64: fp = state.pull_bits(9) b1 = fp[0] b2 = fp[1] << 7 b3 = fp[2] << 7 << 7 b4 = fp[3] << 7 << 7 << 7 b5 = fp[4] << 7 << 7 << 7 << 7 b6 = fp[4] << 7 << 7 << 7 << 7 << 7 b7 = fp[4] << 7 << 7 << 7 << 7 << 7 << 7 b8 = fp[4] << 7 << 7 << 7 << 7 << 7 << 7 << 7 b9 = fp[4] << 7 << 7 << 7 << 7 << 7 << 7 << 7 << 7 byt = (b1 | b2 | b3 | b4 | b5 | b6 | b7 | b8 | b9) flt = util.long_bits_to_float(byt) state.write(flt) else: log.warn('Not Yet Implemented') elif 0x2C <= byt <= 0x3F: # Reserved for future use log.warn('Reserved: 0x2C <= value <= 0x3F') elif 0x40 <= byt <= 0x5F or 0x80 <= byt <= 0x9F: # Tiny ASCII/Unicode log.debug('Token: Tiny ASCII/Unicode') smile_value_length = (byt & 0x1F) + 1 state.copy_value_string(smile_value_length) elif 0x60 <= byt <= 0x7F or 0xA0 <= byt <= 0xBF: # Small ASCII/Unicode log.debug('Token: Small ASCII/Unicode') smile_value_length = (byt & 0x1F) + 33 state.copy_value_string(smile_value_length) elif 0xC0 <= byt <= 0xDF: # Small Integers log.debug('Token: Small Integer') state.write(util.zigzag_decode(byt & 0x1F)) else: # Misc binary / text / structure markers if TOKEN_MISC_LONG_TEXT_ASCII <= byt < TOKEN_MISC_LONG_TEXT_UNICODE: # Long (variable length) ASCII text log.debug('Token: Long (var length) ASCII Test') state.copy_variable_length_string() elif TOKEN_MISC_LONG_TEXT_UNICODE <= byt < INT_MISC_BINARY_7BIT: log.warn('Not Yet Implemented: Value Long Unicode') elif INT_MISC_BINARY_7BIT <= byt < TOKEN_PREFIX_SHARED_STRING_LONG: log.warn('Not Yet Implemented: Value Long Shared String Reference') elif TOKEN_PREFIX_SHARED_STRING_LONG <= byt < HEADER_BIT_VERSION: # Binary, 7-bit encoded log.warn('Not Yet Implemented: Value Binary') elif HEADER_BIT_VERSION <= byt < TOKEN_LITERAL_START_ARRAY: log.warn('Reserved: 0xF0 <= value <= 0xF8') elif byt == TOKEN_LITERAL_START_ARRAY: # START_ARRAY log.debug('Token: Start Array') state.write('[') state.nested_depth += 1 state.in_array[state.nested_depth] = True state.first_array_element[state.nested_depth] = True state.first_key[state.nested_depth] = False elif byt == TOKEN_LITERAL_END_ARRAY: # END_ARRAY log.debug('Token: End Array') state.write(']') state.nested_depth -= 1 elif byt == TOKEN_LITERAL_START_OBJECT: # START_OBJECT log.debug('Token: Start Object') state.write('{') state.nested_depth += 1 state.in_array[state.nested_depth] = False state.first_array_element[state.nested_depth] = False state.first_key[state.nested_depth] = True state.mode = DecodeMode.KEY continue elif byt == TOKEN_LITERAL_END_OBJECT: log.debug('Token: End Object') log.warn('Reserved: value == 0xFB') elif byt == BYTE_MARKER_END_OF_STRING: log.error('Found end-of-String marker (0xFC) in value mode') elif byt == INT_MISC_BINARY_RAW: log.warn('Not Yet Implemented: Raw Binary Data') elif byt == BYTE_MARKER_END_OF_CONTENT: log.debug('Token: End Marker') state.mode = DecodeMode.DONE break if not state.in_array[state.nested_depth]: state.mode = DecodeMode.KEY elif state.mode == DecodeMode.KEY: byt = state.pull_byte() if byt is None or byt == BYTE_MARKER_END_OF_CONTENT: log.debug('No bytes left to read!') state.mode = DecodeMode.DONE break log.debug('Pulled Byte: 0x{:x}'.format(byt)) try: if state.first_key[state.nested_depth]: state.first_key[state.nested_depth] = False elif byt != TOKEN_LITERAL_END_OBJECT: state.write(',') except IndexError: state.first_key.append(False) # Byte ranges are divided in 4 main sections (64 byte values each) if 0x00 <= byt <= 0x1F: log.warn('Reserved: 0x01 <= key <= 0x1F') elif byt == TOKEN_LITERAL_EMPTY_STRING: # Empty String log.debug('Token: Literal Empty String') state.write('""') elif TOKEN_LITERAL_NULL <= byt <= 0x2F: log.warn('Reserved: 0x21 <= key <= 0x2F') elif TOKEN_PREFIX_KEY_SHARED_LONG <= byt <= 0x33: # "Long" shared key name reference log.warn('Not Yet Implemented: Long Shared Key Name Reference') elif byt == 0x32: # Long (not-yet-shared) Unicode name, 64 bytes or more log.warn('Not Yet Implemented: Long Key Name') elif 0x35 <= byt <= 0x39: log.warn('Reserved: 0x35 <= key <= 0x39') elif byt == 0x3A: log.error('0x3A NOT allowed in Key mode') elif 0x3B <= byt <= 0x3F: log.warn('Reserved: 0x3B <= key <= 0x3F') elif TOKEN_PREFIX_KEY_SHARED_SHORT <= byt <= 0x7F: # "Short" shared key name reference (1 byte lookup) log.debug('Token: Short Shared Key Name Reference') state.copy_shared_key_string() elif TOKEN_PREFIX_KEY_ASCII <= byt <= 0xBF: # Short Ascii names # 5 LSB used to indicate lengths from 2 to 32 (bytes == chars) log.debug('Token: Short ASCII Name') smile_key_length = (byt & 0x1F) + 1 state.copy_key_string(smile_key_length) elif TOKEN_PREFIX_KEY_UNICODE <= byt <= TOKEN_RESERVED: # Short Unicode names # 5 LSB used to indicate lengths from 2 to 57 log.debug('Token: Short Unicode Name') smile_key_length = (byt - 0xC0) + 2 state.copy_key_string(smile_key_length) elif TOKEN_LITERAL_START_ARRAY <= byt <= TOKEN_LITERAL_START_OBJECT: log.warn('Reserved: 0xF8 <= key <= 0xFA') elif byt == TOKEN_LITERAL_END_OBJECT: log.debug('Token: Literal End Object') state.write('}') state.nested_depth -= 1 try: in_arry = state.in_array[state.nested_depth] except IndexError: in_arry = False if in_arry: state.mode = DecodeMode.VALUE else: state.mode = DecodeMode.KEY continue elif byt >= BYTE_MARKER_END_OF_STRING: log.warn('Reserved: key >= 0xFC') state.mode = DecodeMode.VALUE elif state.mode == DecodeMode.BAD: if state.error is None: state.error = 'Unknown Error!' break elif state.mode == DecodeMode.DONE: log.debug('Decoding Done!') break if state.mode == DecodeMode.BAD: raise SMILEDecodeError('Bad State: {}'.format(state.error), state.get_value()) ret_val = state.get_value() try: jsonified = json.loads(ret_val) except (ValueError, UnicodeDecodeError): msg = 'Unable to jsonify string: {!r}'.format(ret_val) log.exception(msg) raise SMILEDecodeError(msg, ret_val) return jsonified
def zzvarint_decode(self): self.write(util.zigzag_decode(self.varint_decode()))