예제 #1
0
def decode_guess(buf, pos, depth=0, path=None):
    """Try to decode as an empty message first, then just do as bytes
       Returns the value + the type
       Max recursion should help with edge cases where it keeps guessing deeper
       and deeper into the stack.
    """
    if path is None:
        path = []

    try:
        if depth > max_guess_recursion:
            raise DecoderException(("Maximum guess recursion exceeded during decoding."
                                    " current_depth: %d limit: %d") % (depth, max_guess_recursion))
        return decode_lendelim_message(buf, {}, pos, depth=depth+1, path=path), 'message'
    except DecoderException as exc:
        # This case is normal and expected, but if there is a field that we
        # care about and want to know why it isn't decoding, this could be
        # useful
        logging.debug(("Attempted to decode lengh delimited message at %s, but "
                       "failed to find a message, treating field as binary. "
                       "Exception:\n %r"), "->".join(map(str, path)),
                                             str(exc))
        default_type = blackboxprotobuf.lib.types.default_binary_type
        if (blackboxprotobuf.lib.types.wiretypes[default_type]
                != wire_format.WIRETYPE_LENGTH_DELIMITED):
            raise BlackboxProtobufException(
                "Incorrect \'default_type\' specified in wiretypes.py: %s"
                % default_type)
        return blackboxprotobuf.lib.types.decoders[default_type](buf, pos), default_type
예제 #2
0
def decode_string(value, pos):
    """Decode varint for length and then the bytes"""
    length, pos = varint.decode_varint(value, pos)
    end = pos+length
    try:
        return value[pos:end].decode('utf-8', 'backslashreplace'), end
    except TypeError as exc:
        six.raise_from(DecoderException("Error decoding UTF-8 string %s" % value[pos:end]), exc)
예제 #3
0
def decode_struct(fmt, buf, pos):
    """Generic method for decoding arbitrary python "struct" values"""
    new_pos = pos + struct.calcsize(fmt)
    try:
        return struct.unpack(fmt, buf[pos:new_pos])[0], new_pos
    except struct.error as exc:
        six.raise_from(
            DecoderException("Error deocding format string %s from bytes: %s" %
                             (fmt, binascii.hexlify(buf[pos:new_pos]))), exc)
예제 #4
0
def decode_bytes(value, pos):
    """Decode varint for the length and then returns that number of bytes"""
    length, pos = varint.decode_varint(value, pos)
    end = pos+length
    try:
        return value[pos:end], end
    except IndexError as exc:
        six.raise_from(DecoderException(
            ("Error decoding bytes. Decoded length %d is longer than bytes"
             " available %d") % (length, len(value)-pos)), exc)
예제 #5
0
def decode_string(value, pos):
    """Decode varint for length and then the bytes"""
    length, pos = varint.decode_varint(value, pos)
    end = pos + length
    try:
        # backslash escaping isn't reversible easily
        return value[pos:end].decode("utf-8"), end
    except (TypeError, UnicodeDecodeError) as exc:
        six.raise_from(
            DecoderException("Error decoding UTF-8 string %s" %
                             value[pos:end]), exc)
예제 #6
0
def decode_group(buf, typedef=None, pos=0, end=None, depth=0, path=None):
    """Decode a protobuf group type"""
    if typedef is None:
        depth = depth+1
    else:
        depth = 0
    if depth > max_guess_recursion:
        raise DecoderException(
            "Maximum guess recursion exceeded. current_depth: %d limit: %d"
            % (depth, max_guess_recursion))
    return decode_message(buf, typedef, pos, end, group=True, depth=depth, path=path)
예제 #7
0
def decode_varint(buf, pos):
    """Decode bytearray into a long."""
    # Convert buffer to string
    if six.PY2:
        buf = str(buf)
    try:
        value, pos = decoder._DecodeSignedVarint(buf, pos)
    except (TypeError, IndexError, decoder._DecodeError) as exc:
        six.raise_from(
            DecoderException("Error decoding varint from %s..." %
                             binascii.hexlify(buf[pos:pos + 8])), exc)
    return (value, pos)
예제 #8
0
 def length_wrapper(buf, pos):
     """Decode repeat values prefixed with the length"""
     length, pos = varint.decode_varint(buf, pos)
     end = pos + length
     output = []
     while pos < end:
         value, pos = wrapped_decoder(buf, pos)
         output.append(value)
     if pos > end:
         raise DecoderException(
             ("Error decoding packed field. Packed length larger than"
              " buffer: decoded = %d, left = %d") %
             (length, len(buf) - pos))
     return output, pos
예제 #9
0
def _group_by_number(buf, pos, end, path):
    """Parse through the whole message and return buffers based on wire type.
    This forces us to parse the whole message at once, but I think we're
    doing that anyway.
    Returns a dictionary like:
        {
            "2": (<wiretype>, [<data>])
        }
    """

    output_map = {}
    while pos < end:
        # Read in a field
        try:
            if six.PY2:
                tag, pos = decoder._DecodeVarint(str(buf), pos)
            else:
                tag, pos = decoder._DecodeVarint(buf, pos)
        except (IndexError, decoder._DecodeError) as exc:
            six.raise_from(
                DecoderException(
                    "Error decoding length from buffer: %r..." %
                    (binascii.hexlify(buf[pos:pos + 8])),
                    path=path,
                ),
                exc,
            )

        field_number, wire_type = wire_format.UnpackTag(tag)

        # We want field numbers as strings everywhere
        field_number = str(field_number)

        path = path[:] + [field_number]

        if field_number in output_map and output_map[field_number][
                0] != wire_type:
            """This should never happen"""
            raise DecoderException(
                "Field %s has mistmatched wiretypes. Previous: %s Now: %s" %
                (field_number, output_map[field_number][0], wire_type),
                path=path,
            )

        length = None
        if wire_type == wire_format.WIRETYPE_VARINT:
            # We actually have to read in the whole varint to figure out it's size
            _, new_pos = varint.decode_varint(buf, pos)
            length = new_pos - pos
        elif wire_type == wire_format.WIRETYPE_FIXED32:
            length = 4
        elif wire_type == wire_format.WIRETYPE_FIXED64:
            length = 8
        elif wire_type == wire_format.WIRETYPE_LENGTH_DELIMITED:
            # Read the length from the start of the message
            # add on the length of the length tag as well
            bytes_length, new_pos = varint.decode_varint(buf, pos)
            length = bytes_length + (new_pos - pos)
        elif wire_type in [
                wire_format.WIRETYPE_START_GROUP,
                wire_format.WIRETYPE_END_GROUP,
        ]:
            raise DecoderException("GROUP wire types not supported", path=path)
        else:
            raise DecoderException("Got unkown wire type: %d" % wire_type,
                                   path=path)
        if pos + length > end:
            raise DecoderException(
                "Decoded length for field %s goes over end: %d > %d" %
                (field_number, pos + length, end),
                path=path,
            )

        field_buf = buf[pos:pos + length]

        if field_number in output_map:
            output_map[field_number][1].append(field_buf)
        else:
            output_map[field_number] = (wire_type, [field_buf])
        pos += length
    return output_map, pos
예제 #10
0
def decode_message(buf,
                   config,
                   typedef=None,
                   pos=0,
                   end=None,
                   depth=0,
                   path=None):
    """Decode a protobuf message with no length delimiter"""
    # TODO recalculate and re-add path for errors
    if end is None:
        end = len(buf)

    logging.debug("End: %d", end)
    if typedef is None:
        typedef = {}
    else:
        # Don't want to accidentally modify the original
        typedef = copy.deepcopy(typedef)

    if path is None:
        path = []

    output = {}

    grouped_fields, pos = _group_by_number(buf, pos, end, path)
    for (field_number, (wire_type, buffers)) in grouped_fields.items():
        # wire_type should already be validated by _group_by_number

        path = path[:] + [field_number]
        field_outputs = None
        field_typedef = typedef.get(field_number, {})
        field_key = _get_field_key(field_number, typedef, path)
        # Easy cases. Fixed size or bytes/string
        if (wire_type in [
                wire_format.WIRETYPE_FIXED32,
                wire_format.WIRETYPE_FIXED64,
                wire_format.WIRETYPE_VARINT,
        ] or ("type" in field_typedef and field_typedef["type"] != "message")):

            if "type" not in field_typedef:
                field_typedef["type"] = config.get_default_type(wire_type)
            else:
                # have a type, but make sure it matches the wiretype
                if (blackboxprotobuf.lib.types.WIRETYPES[field_typedef["type"]]
                        != wire_type):
                    raise DecoderException(
                        "Type %s from typedef did not match wiretype %s for "
                        "field %s" %
                        (field_typedef["type"], wire_type, field_key),
                        path=path,
                    )

            # we already have a type, just map the decoder
            if field_typedef[
                    "type"] not in blackboxprotobuf.lib.types.DECODERS:
                raise TypedefException(
                    "Got unkown type %s for field_number %s" %
                    (field_typedef["type"], field_number),
                    path=path,
                )

            decoder = blackboxprotobuf.lib.types.DECODERS[
                field_typedef["type"]]
            field_outputs = [decoder(buf, 0) for buf in buffers]

            # this shouldn't happen, but let's check just in case
            for buf, _pos in zip(buffers, [y for _, y in field_outputs]):
                assert len(buf) == _pos

            field_outputs = [value for (value, _) in field_outputs]
            if len(field_outputs) == 1:
                output[field_key] = field_outputs[0]
            else:
                output[field_key] = field_outputs

        elif wire_type == wire_format.WIRETYPE_LENGTH_DELIMITED:
            _try_decode_lendelim_fields(buffers, field_key, field_typedef,
                                        output, config)

        # Save the field typedef/type back to the typedef
        typedef[field_number] = field_typedef

    return output, typedef, pos
예제 #11
0
def decode_message(buf, typedef=None, pos=0, end=None, group=False, depth=0, path=None):
    """Decode a protobuf message with no length delimiter"""
    if end is None:
        end = len(buf)

    if typedef is None:
        typedef = {}
    else:
        # Don't want to accidentally modify the original
        typedef = copy.deepcopy(typedef)

    if path is None:
        path = []

    output = {}

    while pos < end:
        # Read in a field
        try:
            if six.PY2:
                tag, pos = decoder._DecodeVarint(str(buf), pos)
            else:
                tag, pos = decoder._DecodeVarint(buf, pos)
        except (IndexError, decoder._DecodeError) as exc:
            six.raise_from(DecoderException(
                "Error decoding length from buffer: %r..." %
                (binascii.hexlify(buf[pos : pos+8]))), exc)

        field_number, wire_type = wire_format.UnpackTag(tag)

        # Convert to str
        field_number = str(field_number)
        orig_field_number = field_number

        field_path = path[:]
        field_path.append(field_number)

        if wire_type not in blackboxprotobuf.lib.types.wire_type_defaults:
            raise DecoderException('%d is not a valid wire type at pos %d.' % (wire_type, pos), field_path)

        field_typedef = None
        if field_number in typedef:
            field_typedef = typedef[field_number]
        else:
            field_typedef = {}
            field_typedef['type'] = blackboxprotobuf.lib.types.wire_type_defaults[wire_type]

        field_type = field_typedef['type']

        # If field_type is None, its either an unsupported wire type, length delim or group
        # length delim we have to try and decode first
        field_out = None
        if field_type is None:
            if wire_type == wire_format.WIRETYPE_LENGTH_DELIMITED:
                out, field_type = decode_guess(buf, pos, depth=depth, path=field_path)
                if field_type == 'message':
                    field_out, message_typedef, pos = out
                    field_typedef['message_typedef'] = message_typedef
                else:
                    field_out, pos = out
            elif  wire_type == wire_format.WIRETYPE_END_GROUP:
                # TODO Should probably match the field_number to START_GROUP
                if not group:
                    raise DecoderException( "Found END_GROUP before START_GROUP", field_path)
                # exit out
                return output, typedef, pos
            else:
                raise DecoderException("Could not find default type for wiretype: %d" % wire_type, field_path)
        else:
            if field_type == 'message':
                #TODO probably big enough to factor out
                message_typedef = None
                # Check for a anonymous type
                if 'message_typedef' in field_typedef:
                    message_typedef = field_typedef['message_typedef']
                # Check for type defined by message type name
                elif 'message_type_name' in field_typedef:
                    message_typedef = blackboxprotobuf.lib.known_messages[
                        field_typedef['message_type_name']]

                try:
                    field_out, message_typedef, pos = decode_lendelim_message(
                        buf, message_typedef, pos, path=field_path)
                    # Save type definition
                    field_typedef['message_typedef'] = message_typedef
                except DecoderException as exc:
                    # If this is the root message just fail
                    if pos == 0:
                        six.reraise(*sys.exc_info())
                    logging.debug(
                        ("Encountered exception when decoding message at %s "
                         "with known typdef. Trying alt typedefs and then "
                         "anonymous. Exception: \n%s"),
                        "->".join(map(str, field_path)), str(exc))

                if field_out is None and 'alt_typedefs' in field_typedef:
                    # check for an alternative type definition
                    for alt_field_number, alt_typedef in field_typedef['alt_typedefs'].items():
                        try:
                            field_out, message_typedef, pos = decode_lendelim_message(
                                buf, alt_typedef, pos, path=field_path)
                        except DecoderException as exc:
                            logging.debug(
                                ("Encountered exception when decoding message at %s with alt_typedef %s. Trying anonymous decoding next. Exception:\n%s"),
                                "->".join(map(str, field_path)),
                                str(alt_field_number),
                                str(exc))

                        if field_out is not None:
                            # Found working typedef
                            field_typedef['alt_typedefs'][alt_field_number] = message_typedef
                            field_number = field_number + "-" + alt_field_number
                            break

                if field_out is None:
                    # Still no typedef, try anonymous, and let the error propogate if it fails
                    field_out, message_typedef, pos = \
                        decode_lendelim_message(buf, {}, pos, path=field_path)

                    if 'alt_typedefs' in field_typedef:
                        # get the next higher alt field number
                        alt_field_number = str(
                            max(map(int, field_typedef['alt_typedefs'].keys()))
                            + 1)
                    else:
                        field_typedef['alt_typedefs'] = {}
                        alt_field_number = '1'

                    field_typedef['alt_typedefs'][alt_field_number] = message_typedef
                    field_number = field_number + "-" + alt_field_number
            elif field_type == 'group':
                group_typedef = None
                # Check for a anonymous type
                if 'group_typedef' in field_typedef:
                    group_typedef = field_typedef['group_typedef']
                field_out, group_typedef, pos = \
                    decode_group(buf, group_typedef, pos, depth=depth, path=field_path)
                # Save type definition
                field_typedef['group_typedef'] = group_typedef
            else:
                # Verify wiretype matches
                if blackboxprotobuf.lib.types.wiretypes[field_type] != wire_type:
                    raise DecoderException(
                        "Invalid wiretype for field number %s. %s is not wiretype %s"
                        % (field_number, field_type, wire_type), field_path)

                # Simple type, just look up the decoder
                try:
                    field_out, pos = blackboxprotobuf.lib.types.decoders[field_type](buf, pos)
                except DecoderException as exc:
                    exc.set_path(field_path)
                    six.reraise(*sys.exc_info())
        field_typedef['type'] = field_type
        if 'name' not in field_typedef:
            field_typedef['name'] = ''

        field_key = field_number
        if '-' not in field_number  and 'name' in field_typedef and field_typedef['name'] != '':
            field_key = field_typedef['name']
        # Deal with repeats
        if field_key in output:
            if isinstance(field_out, list):
                if isinstance(output[field_number], list):
                    output[field_key] += field_out
                else:
                    output[field_key] = field_out.append(output[field_key])
            else:
                if isinstance(output[field_number], list):
                    output[field_key].append(field_out)
                else:
                    output[field_key] = [output[field_key], field_out]
        else:
            output[field_key] = field_out
            typedef[orig_field_number] = field_typedef
    if pos > end:
        raise DecoderException(
            "Field sizes are greater than designated length. pos: %d end_pos: %d" % (pos, end))
    # Should never hit here as a group
    if group:
        raise DecoderException("Got START_GROUP with no END_GROUP.")
    return output, typedef, pos