def decode_guess(buf, pos, depth=0, path=None): """Try to decode as an empty message first, then just do as bytes Returns the value + the type Max recursion should help with edge cases where it keeps guessing deeper and deeper into the stack. """ if path is None: path = [] try: if depth > max_guess_recursion: raise DecoderException(("Maximum guess recursion exceeded during decoding." " current_depth: %d limit: %d") % (depth, max_guess_recursion)) return decode_lendelim_message(buf, {}, pos, depth=depth+1, path=path), 'message' except DecoderException as exc: # This case is normal and expected, but if there is a field that we # care about and want to know why it isn't decoding, this could be # useful logging.debug(("Attempted to decode lengh delimited message at %s, but " "failed to find a message, treating field as binary. " "Exception:\n %r"), "->".join(map(str, path)), str(exc)) default_type = blackboxprotobuf.lib.types.default_binary_type if (blackboxprotobuf.lib.types.wiretypes[default_type] != wire_format.WIRETYPE_LENGTH_DELIMITED): raise BlackboxProtobufException( "Incorrect \'default_type\' specified in wiretypes.py: %s" % default_type) return blackboxprotobuf.lib.types.decoders[default_type](buf, pos), default_type
def decode_string(value, pos): """Decode varint for length and then the bytes""" length, pos = varint.decode_varint(value, pos) end = pos+length try: return value[pos:end].decode('utf-8', 'backslashreplace'), end except TypeError as exc: six.raise_from(DecoderException("Error decoding UTF-8 string %s" % value[pos:end]), exc)
def decode_struct(fmt, buf, pos): """Generic method for decoding arbitrary python "struct" values""" new_pos = pos + struct.calcsize(fmt) try: return struct.unpack(fmt, buf[pos:new_pos])[0], new_pos except struct.error as exc: six.raise_from( DecoderException("Error deocding format string %s from bytes: %s" % (fmt, binascii.hexlify(buf[pos:new_pos]))), exc)
def decode_bytes(value, pos): """Decode varint for the length and then returns that number of bytes""" length, pos = varint.decode_varint(value, pos) end = pos+length try: return value[pos:end], end except IndexError as exc: six.raise_from(DecoderException( ("Error decoding bytes. Decoded length %d is longer than bytes" " available %d") % (length, len(value)-pos)), exc)
def decode_string(value, pos): """Decode varint for length and then the bytes""" length, pos = varint.decode_varint(value, pos) end = pos + length try: # backslash escaping isn't reversible easily return value[pos:end].decode("utf-8"), end except (TypeError, UnicodeDecodeError) as exc: six.raise_from( DecoderException("Error decoding UTF-8 string %s" % value[pos:end]), exc)
def decode_group(buf, typedef=None, pos=0, end=None, depth=0, path=None): """Decode a protobuf group type""" if typedef is None: depth = depth+1 else: depth = 0 if depth > max_guess_recursion: raise DecoderException( "Maximum guess recursion exceeded. current_depth: %d limit: %d" % (depth, max_guess_recursion)) return decode_message(buf, typedef, pos, end, group=True, depth=depth, path=path)
def decode_varint(buf, pos): """Decode bytearray into a long.""" # Convert buffer to string if six.PY2: buf = str(buf) try: value, pos = decoder._DecodeSignedVarint(buf, pos) except (TypeError, IndexError, decoder._DecodeError) as exc: six.raise_from( DecoderException("Error decoding varint from %s..." % binascii.hexlify(buf[pos:pos + 8])), exc) return (value, pos)
def length_wrapper(buf, pos): """Decode repeat values prefixed with the length""" length, pos = varint.decode_varint(buf, pos) end = pos + length output = [] while pos < end: value, pos = wrapped_decoder(buf, pos) output.append(value) if pos > end: raise DecoderException( ("Error decoding packed field. Packed length larger than" " buffer: decoded = %d, left = %d") % (length, len(buf) - pos)) return output, pos
def _group_by_number(buf, pos, end, path): """Parse through the whole message and return buffers based on wire type. This forces us to parse the whole message at once, but I think we're doing that anyway. Returns a dictionary like: { "2": (<wiretype>, [<data>]) } """ output_map = {} while pos < end: # Read in a field try: if six.PY2: tag, pos = decoder._DecodeVarint(str(buf), pos) else: tag, pos = decoder._DecodeVarint(buf, pos) except (IndexError, decoder._DecodeError) as exc: six.raise_from( DecoderException( "Error decoding length from buffer: %r..." % (binascii.hexlify(buf[pos:pos + 8])), path=path, ), exc, ) field_number, wire_type = wire_format.UnpackTag(tag) # We want field numbers as strings everywhere field_number = str(field_number) path = path[:] + [field_number] if field_number in output_map and output_map[field_number][ 0] != wire_type: """This should never happen""" raise DecoderException( "Field %s has mistmatched wiretypes. Previous: %s Now: %s" % (field_number, output_map[field_number][0], wire_type), path=path, ) length = None if wire_type == wire_format.WIRETYPE_VARINT: # We actually have to read in the whole varint to figure out it's size _, new_pos = varint.decode_varint(buf, pos) length = new_pos - pos elif wire_type == wire_format.WIRETYPE_FIXED32: length = 4 elif wire_type == wire_format.WIRETYPE_FIXED64: length = 8 elif wire_type == wire_format.WIRETYPE_LENGTH_DELIMITED: # Read the length from the start of the message # add on the length of the length tag as well bytes_length, new_pos = varint.decode_varint(buf, pos) length = bytes_length + (new_pos - pos) elif wire_type in [ wire_format.WIRETYPE_START_GROUP, wire_format.WIRETYPE_END_GROUP, ]: raise DecoderException("GROUP wire types not supported", path=path) else: raise DecoderException("Got unkown wire type: %d" % wire_type, path=path) if pos + length > end: raise DecoderException( "Decoded length for field %s goes over end: %d > %d" % (field_number, pos + length, end), path=path, ) field_buf = buf[pos:pos + length] if field_number in output_map: output_map[field_number][1].append(field_buf) else: output_map[field_number] = (wire_type, [field_buf]) pos += length return output_map, pos
def decode_message(buf, config, typedef=None, pos=0, end=None, depth=0, path=None): """Decode a protobuf message with no length delimiter""" # TODO recalculate and re-add path for errors if end is None: end = len(buf) logging.debug("End: %d", end) if typedef is None: typedef = {} else: # Don't want to accidentally modify the original typedef = copy.deepcopy(typedef) if path is None: path = [] output = {} grouped_fields, pos = _group_by_number(buf, pos, end, path) for (field_number, (wire_type, buffers)) in grouped_fields.items(): # wire_type should already be validated by _group_by_number path = path[:] + [field_number] field_outputs = None field_typedef = typedef.get(field_number, {}) field_key = _get_field_key(field_number, typedef, path) # Easy cases. Fixed size or bytes/string if (wire_type in [ wire_format.WIRETYPE_FIXED32, wire_format.WIRETYPE_FIXED64, wire_format.WIRETYPE_VARINT, ] or ("type" in field_typedef and field_typedef["type"] != "message")): if "type" not in field_typedef: field_typedef["type"] = config.get_default_type(wire_type) else: # have a type, but make sure it matches the wiretype if (blackboxprotobuf.lib.types.WIRETYPES[field_typedef["type"]] != wire_type): raise DecoderException( "Type %s from typedef did not match wiretype %s for " "field %s" % (field_typedef["type"], wire_type, field_key), path=path, ) # we already have a type, just map the decoder if field_typedef[ "type"] not in blackboxprotobuf.lib.types.DECODERS: raise TypedefException( "Got unkown type %s for field_number %s" % (field_typedef["type"], field_number), path=path, ) decoder = blackboxprotobuf.lib.types.DECODERS[ field_typedef["type"]] field_outputs = [decoder(buf, 0) for buf in buffers] # this shouldn't happen, but let's check just in case for buf, _pos in zip(buffers, [y for _, y in field_outputs]): assert len(buf) == _pos field_outputs = [value for (value, _) in field_outputs] if len(field_outputs) == 1: output[field_key] = field_outputs[0] else: output[field_key] = field_outputs elif wire_type == wire_format.WIRETYPE_LENGTH_DELIMITED: _try_decode_lendelim_fields(buffers, field_key, field_typedef, output, config) # Save the field typedef/type back to the typedef typedef[field_number] = field_typedef return output, typedef, pos
def decode_message(buf, typedef=None, pos=0, end=None, group=False, depth=0, path=None): """Decode a protobuf message with no length delimiter""" if end is None: end = len(buf) if typedef is None: typedef = {} else: # Don't want to accidentally modify the original typedef = copy.deepcopy(typedef) if path is None: path = [] output = {} while pos < end: # Read in a field try: if six.PY2: tag, pos = decoder._DecodeVarint(str(buf), pos) else: tag, pos = decoder._DecodeVarint(buf, pos) except (IndexError, decoder._DecodeError) as exc: six.raise_from(DecoderException( "Error decoding length from buffer: %r..." % (binascii.hexlify(buf[pos : pos+8]))), exc) field_number, wire_type = wire_format.UnpackTag(tag) # Convert to str field_number = str(field_number) orig_field_number = field_number field_path = path[:] field_path.append(field_number) if wire_type not in blackboxprotobuf.lib.types.wire_type_defaults: raise DecoderException('%d is not a valid wire type at pos %d.' % (wire_type, pos), field_path) field_typedef = None if field_number in typedef: field_typedef = typedef[field_number] else: field_typedef = {} field_typedef['type'] = blackboxprotobuf.lib.types.wire_type_defaults[wire_type] field_type = field_typedef['type'] # If field_type is None, its either an unsupported wire type, length delim or group # length delim we have to try and decode first field_out = None if field_type is None: if wire_type == wire_format.WIRETYPE_LENGTH_DELIMITED: out, field_type = decode_guess(buf, pos, depth=depth, path=field_path) if field_type == 'message': field_out, message_typedef, pos = out field_typedef['message_typedef'] = message_typedef else: field_out, pos = out elif wire_type == wire_format.WIRETYPE_END_GROUP: # TODO Should probably match the field_number to START_GROUP if not group: raise DecoderException( "Found END_GROUP before START_GROUP", field_path) # exit out return output, typedef, pos else: raise DecoderException("Could not find default type for wiretype: %d" % wire_type, field_path) else: if field_type == 'message': #TODO probably big enough to factor out message_typedef = None # Check for a anonymous type if 'message_typedef' in field_typedef: message_typedef = field_typedef['message_typedef'] # Check for type defined by message type name elif 'message_type_name' in field_typedef: message_typedef = blackboxprotobuf.lib.known_messages[ field_typedef['message_type_name']] try: field_out, message_typedef, pos = decode_lendelim_message( buf, message_typedef, pos, path=field_path) # Save type definition field_typedef['message_typedef'] = message_typedef except DecoderException as exc: # If this is the root message just fail if pos == 0: six.reraise(*sys.exc_info()) logging.debug( ("Encountered exception when decoding message at %s " "with known typdef. Trying alt typedefs and then " "anonymous. Exception: \n%s"), "->".join(map(str, field_path)), str(exc)) if field_out is None and 'alt_typedefs' in field_typedef: # check for an alternative type definition for alt_field_number, alt_typedef in field_typedef['alt_typedefs'].items(): try: field_out, message_typedef, pos = decode_lendelim_message( buf, alt_typedef, pos, path=field_path) except DecoderException as exc: logging.debug( ("Encountered exception when decoding message at %s with alt_typedef %s. Trying anonymous decoding next. Exception:\n%s"), "->".join(map(str, field_path)), str(alt_field_number), str(exc)) if field_out is not None: # Found working typedef field_typedef['alt_typedefs'][alt_field_number] = message_typedef field_number = field_number + "-" + alt_field_number break if field_out is None: # Still no typedef, try anonymous, and let the error propogate if it fails field_out, message_typedef, pos = \ decode_lendelim_message(buf, {}, pos, path=field_path) if 'alt_typedefs' in field_typedef: # get the next higher alt field number alt_field_number = str( max(map(int, field_typedef['alt_typedefs'].keys())) + 1) else: field_typedef['alt_typedefs'] = {} alt_field_number = '1' field_typedef['alt_typedefs'][alt_field_number] = message_typedef field_number = field_number + "-" + alt_field_number elif field_type == 'group': group_typedef = None # Check for a anonymous type if 'group_typedef' in field_typedef: group_typedef = field_typedef['group_typedef'] field_out, group_typedef, pos = \ decode_group(buf, group_typedef, pos, depth=depth, path=field_path) # Save type definition field_typedef['group_typedef'] = group_typedef else: # Verify wiretype matches if blackboxprotobuf.lib.types.wiretypes[field_type] != wire_type: raise DecoderException( "Invalid wiretype for field number %s. %s is not wiretype %s" % (field_number, field_type, wire_type), field_path) # Simple type, just look up the decoder try: field_out, pos = blackboxprotobuf.lib.types.decoders[field_type](buf, pos) except DecoderException as exc: exc.set_path(field_path) six.reraise(*sys.exc_info()) field_typedef['type'] = field_type if 'name' not in field_typedef: field_typedef['name'] = '' field_key = field_number if '-' not in field_number and 'name' in field_typedef and field_typedef['name'] != '': field_key = field_typedef['name'] # Deal with repeats if field_key in output: if isinstance(field_out, list): if isinstance(output[field_number], list): output[field_key] += field_out else: output[field_key] = field_out.append(output[field_key]) else: if isinstance(output[field_number], list): output[field_key].append(field_out) else: output[field_key] = [output[field_key], field_out] else: output[field_key] = field_out typedef[orig_field_number] = field_typedef if pos > end: raise DecoderException( "Field sizes are greater than designated length. pos: %d end_pos: %d" % (pos, end)) # Should never hit here as a group if group: raise DecoderException("Got START_GROUP with no END_GROUP.") return output, typedef, pos