def _element_to_bson(key: Any, value: Any, check_keys: bool, opts: CodecOptions) -> bytes:
    """Encode a single key, value pair."""
    if not isinstance(key, str):
        raise InvalidDocument("documents must have only string keys, key was %r" % (key,))
    if check_keys:
        if key.startswith("$"):
            raise InvalidDocument("key %r must not start with '$'" % (key,))
        if "." in key:
            raise InvalidDocument("key %r must not contain '.'" % (key,))

    name = _make_name(key)
    return _name_value_to_bson(name, value, check_keys, opts)
def _make_c_string_check(string: Union[str, bytes]) -> bytes:
    """Make a 'C' string, checking for embedded NUL characters."""
    if isinstance(string, bytes):
        if b"\x00" in string:
            raise InvalidDocument("BSON keys / regex patterns must not contain a NUL character")
        try:
            _utf_8_decode(string, None, True)
            return string + b"\x00"
        except UnicodeError:
            raise InvalidStringData("strings in documents must be valid UTF-8: %r" % string)
    else:
        if "\x00" in string:
            raise InvalidDocument("BSON keys / regex patterns must not contain a NUL character")
        return cast(bytes, _utf_8_encode(string)[0]) + b"\x00"
def _element_to_bson(key, value, check_keys, opts):
    """Encode a single key, value pair."""
    if not isinstance(key, string_type):
        raise InvalidDocument("documents must have only string keys, "
                              "key was %r" % (key, ))
    if check_keys:
        if key.startswith("$"):
            raise InvalidDocument("key %r must not start with '$'" % (key, ))
        if "." in key:
            raise InvalidDocument("key %r must not contain '.'" % (key, ))
    if opts.use_unicode:
        name = _make_name(key)
    else:
        name = key
    return _name_value_to_bson(name, value, check_keys, opts)
def _make_name(string):
    """Make a 'C' string suitable for a BSON key."""
    # Keys can only be text in python 3.
    if "\x00" in string:
        raise InvalidDocument("BSON keys / regex patterns must not "
                              "contain a NUL character")
    return _utf_8_encode(string)[0] + b"\x00"
def _name_value_to_bson(name, value, check_keys, opts):
    """Encode a single name, value pair."""

    # First see if the type is already cached. KeyError will only ever
    # happen once per subtype.
    try:
        return _ENCODERS[type(value)](name, value, check_keys, opts)
    except KeyError:
        pass

    # Second, fall back to trying _type_marker. This has to be done
    # before the loop below since users could subclass one of our
    # custom types that subclasses a python built-in (e.g. Binary)
    marker = getattr(value, "_type_marker", None)
    if isinstance(marker, int) and marker in _MARKERS:
        func = _MARKERS[marker]
        # Cache this type for faster subsequent lookup.
        _ENCODERS[type(value)] = func
        return func(name, value, check_keys, opts)

    # If all else fails test each base type. This will only happen once for
    # a subtype of a supported base type.
    for base in _ENCODERS:
        if isinstance(value, base):
            func = _ENCODERS[base]
            # Cache this type for faster subsequent lookup.
            _ENCODERS[type(value)] = func
            return func(name, value, check_keys, opts)

    raise InvalidDocument("cannot convert value of type %s to bson" %
                          type(value))
Beispiel #6
0
def _make_c_string(string, check_null=False):
    if isinstance(string, text_type):
        if check_null and "\x00" in string:
            raise InvalidDocument("BSON keys / regex patterns must not "
                                  "contain a NULL character")
        return string.encode("utf-8") + ZERO
    else:
        if check_null and ZERO in string:
            raise InvalidDocument("BSON keys / regex patterns must not "
                                  "contain a NULL character")
        try:
            string.decode("utf-8")
            return string + ZERO
        except UnicodeError:
            raise InvalidStringData("strings in documents must be valid "
                                    "UTF-8: %r" % string)
Beispiel #7
0
def _p_skeleton(query_part):
    """
    Generate a "skeleton" of a document (or embedded document). A
    skeleton is a (unicode) string indicating the keys present in
    a document, but not the values, and is used to group queries
    together which have identical key patterns regardless of the
    particular values used. Keys in the skeleton are always sorted
    lexicographically.

    Raises :class:`~bson.errors.InvalidDocument` when the document
    cannot be converted into a skeleton (this usually indicates that
    the type of a key or value in the document is not known to
    Professor).

    """
    t = type(query_part)
    if t == list:
        out = []
        for element in query_part:
            sub = skeleton(element)
            if sub is not None:
                out.append(sub)
        return u'[%s]' % ','.join(out)
    elif t in (dict, SON):
        out = []
        for key in sorted(query_part.keys()):
            sub = skeleton(query_part[key])
            if sub is not None:
                out.append('%s:%s' % (key, sub))
            else:
                out.append(key)
        return u'{%s}' % ','.join(out)
    elif t not in BSON_TYPES:
        raise InvalidDocument('unknown BSON type %r' % t)
def _name_value_to_bson(name,
                        value,
                        check_keys,
                        opts,
                        in_custom_call=False,
                        in_fallback_call=False):
    """Encode a single name, value pair."""
    # First see if the type is already cached. KeyError will only ever
    # happen once per subtype.
    try:
        return _ENCODERS[type(value)](name, value, check_keys, opts)
    except KeyError:
        pass

    # Second, fall back to trying _type_marker. This has to be done
    # before the loop below since users could subclass one of our
    # custom types that subclasses a python built-in (e.g. Binary)
    marker = getattr(value, "_type_marker", None)
    if isinstance(marker, int) and marker in _MARKERS:
        func = _MARKERS[marker]
        # Cache this type for faster subsequent lookup.
        _ENCODERS[type(value)] = func
        return func(name, value, check_keys, opts)

    # Third, check if a type encoder is registered for this type.
    # Note that subtypes of registered custom types are not auto-encoded.
    if not in_custom_call and opts.type_registry._encoder_map:
        custom_encoder = opts.type_registry._encoder_map.get(type(value))
        if custom_encoder is not None:
            return _name_value_to_bson(name,
                                       custom_encoder(value),
                                       check_keys,
                                       opts,
                                       in_custom_call=True)

    # Fourth, test each base type. This will only happen once for
    # a subtype of a supported base type. Unlike in the C-extensions, this
    # is done after trying the custom type encoder because checking for each
    # subtype is expensive.
    for base in _BUILT_IN_TYPES:
        if isinstance(value, base):
            func = _ENCODERS[base]
            # Cache this type for faster subsequent lookup.
            _ENCODERS[type(value)] = func
            return func(name, value, check_keys, opts)

    # As a last resort, try using the fallback encoder, if the user has
    # provided one.
    fallback_encoder = opts.type_registry._fallback_encoder
    if not in_fallback_call and fallback_encoder is not None:
        return _name_value_to_bson(name,
                                   fallback_encoder(value),
                                   check_keys,
                                   opts,
                                   in_fallback_call=True)

    raise InvalidDocument("cannot encode object: %r, of type: %r" %
                          (value, type(value)))
Beispiel #9
0
def desanitize(value):
    """Does the inverse of :func:`~professor.skeleton.sanitize`.
    """
    t = type(value)
    if t == list:
        return map(desanitize, value)
    elif t == dict:
        return dict((k.replace('_$_', '$').replace('_,_', '.'), desanitize(v))
                    for k, v in value.iteritems())
    elif t not in BSON_TYPES:
        raise InvalidDocument('unknown BSON type %r' % t)
    else:
        return value
Beispiel #10
0
def sanitize(value):
    """"Sanitize" a value (e.g. a document) for safe storage
    in MongoDB. Converts periods (``.``) and dollar signs
    (``$``) in key names to escaped versions. See
    :func:`~professor.skeleton.desanitize` for the inverse.
    """
    t = type(value)
    if t == list:
        return map(sanitize, value)
    elif t == dict:
        return dict((k.replace('$', '_$_').replace('.', '_,_'), sanitize(v))
                    for k, v in value.iteritems())
    elif t not in BSON_TYPES:
        raise InvalidDocument('unknown BSON type %r' % t)
    else:
        return value
Beispiel #11
0
def _dict_to_bson(dict, check_keys, top_level=True):
    try:
        elements = ""
        if top_level and "_id" in dict:
            elements += _element_to_bson("_id", dict["_id"], False)
        for (key, value) in dict.iteritems():
            if not top_level or key != "_id":
                elements += _element_to_bson(key, value, check_keys)
    except AttributeError:
        raise TypeError("encoder expected a mapping type but got: %r" % dict)

    length = len(elements) + 5
    if length > 4 * 1024 * 1024:
        raise InvalidDocument("document too large - BSON documents are"
                              "limited to 4 MB")
    return struct.pack("<i", length) + elements + "\x00"
Beispiel #12
0
def _name_value_to_bson(name, value, check_keys, opts,
                        in_fallback_call=False):
    """Encode a single name, value pair."""
    # Custom encoder (if any) takes precedence over default encoders.
    # Using 'if' instead of 'try...except' for performance since this will
    # usually not be true.
    # No support for auto-encoding subtypes of registered custom types.
    if opts.type_registry._encoder_map:
        custom_encoder = opts.type_registry._encoder_map.get(type(value))
        if custom_encoder is not None:
            value = custom_encoder(value)

    # First see if the type is already cached. KeyError will only ever
    # happen once per subtype.
    try:
        return _ENCODERS[type(value)](name, value, check_keys, opts)
    except KeyError:
        pass

    # Second, fall back to trying _type_marker. This has to be done
    # before the loop below since users could subclass one of our
    # custom types that subclasses a python built-in (e.g. Binary)
    marker = getattr(value, "_type_marker", None)
    if isinstance(marker, int) and marker in _MARKERS:
        func = _MARKERS[marker]
        # Cache this type for faster subsequent lookup.
        _ENCODERS[type(value)] = func
        return func(name, value, check_keys, opts)

    # If all else fails test each base type. This will only happen once for
    # a subtype of a supported base type.
    for base in _ENCODERS:
        if isinstance(value, base):
            func = _ENCODERS[base]
            # Cache this type for faster subsequent lookup.
            _ENCODERS[type(value)] = func
            return func(name, value, check_keys, opts)

    # As a last resort, try using the fallback encoder, if the user has
    # provided one.
    fallback_encoder = opts.type_registry._fallback_encoder
    if not in_fallback_call and fallback_encoder is not None:
        return _name_value_to_bson(
            name, fallback_encoder(value), check_keys, opts, True)

    raise InvalidDocument(
        "cannot convert value of type %s to bson" % type(value))
Beispiel #13
0
def _element_to_bson(key, value, check_keys, uuid_subtype):
    if not isinstance(key, str_type):
        raise InvalidDocument("documents must have only string keys, "
                              "key was %r" % key)

    if check_keys:
        if key.startswith("$"):
            raise InvalidDocument("key %r must not start with '$'" % key)
        if "." in key:
            raise InvalidDocument("key %r must not contain '.'" % key)

    name = _make_c_string(key, True)
    if isinstance(value, float):
        return BSONNUM + name + struct.pack("d", value)

    if _use_uuid:
        if isinstance(value, uuid.UUID):
            # Java Legacy
            if uuid_subtype == JAVA_LEGACY:
                # Python 3.0(.1) returns a bytearray instance for bytes (3.1
                # and newer just return a bytes instance). Convert that to
                # binary_type (here and below) for compatibility.
                from_uuid = binary_type(value.bytes)
                as_legacy_java = from_uuid[0:8][::-1] + from_uuid[8:16][::-1]
                value = Binary(as_legacy_java, subtype=OLD_UUID_SUBTYPE)
            # C# legacy
            elif uuid_subtype == CSHARP_LEGACY:
                # Microsoft GUID representation.
                value = Binary(binary_type(value.bytes_le),
                               subtype=OLD_UUID_SUBTYPE)
            # Python
            else:
                value = Binary(binary_type(value.bytes), subtype=uuid_subtype)

    if isinstance(value, Binary):
        subtype = value.subtype
        if subtype == 2:
            value = struct.pack("i", len(value)) + value
        return (BSONBIN + name +
                struct.pack("i", len(value)) + b(chr(subtype)) + value)
    if isinstance(value, Code):
        cstring = _make_c_string(value)
        if not value.scope:
            length = struct.pack("i", len(cstring))
            return BSONCOD + name + length + cstring
        scope = _dict_to_bson(value.scope, False, uuid_subtype, False)
        full_length = struct.pack("i", 8 + len(cstring) + len(scope))
        length = struct.pack("i", len(cstring))
        return BSONCWS + name + full_length + length + cstring + scope
    if isinstance(value, binary_type):
        if PY3:
            # Python3 special case. Store 'bytes' as BSON binary subtype 0.
            return (BSONBIN + name +
                    struct.pack("i", len(value)) + ZERO + value)
        cstring = _make_c_string(value)
        length = struct.pack("i", len(cstring))
        return BSONSTR + name + length + cstring
    if isinstance(value, text_type):
        cstring = _make_c_string(value)
        length = struct.pack("i", len(cstring))
        return BSONSTR + name + length + cstring
    if isinstance(value, dict):
        return BSONOBJ + name + _dict_to_bson(value, check_keys, uuid_subtype, False)
    if isinstance(value, (list, tuple)):
        as_dict = SON(zip([str(i) for i in range(len(value))], value))
        return BSONARR + name + _dict_to_bson(as_dict, check_keys, uuid_subtype, False)
    if isinstance(value, ObjectId):
        return BSONOID + name + value.binary
    if value is True:
        return BSONBOO + name + ONE
    if value is False:
        return BSONBOO + name + ZERO
    if isinstance(value, int):
        # TODO this is an ugly way to check for this...
        if value > MAX_INT64 or value < MIN_INT64:
            raise OverflowError("BSON can only handle up to 8-byte ints")
        if value > MAX_INT32 or value < MIN_INT32:
            return BSONLON + name + struct.pack("q", value)
        return BSONINT + name + struct.pack("i", value)
    # 2to3 will convert long to int here since there is no long in python3.
    # That's OK. The previous if block will match instead.
    if isinstance(value, long_type):
        if value > MAX_INT64 or value < MIN_INT64:
            raise OverflowError("BSON can only handle up to 8-byte ints")
        return BSONLON + name + struct.pack("q", value)
    if isinstance(value, datetime.datetime):
        if value.utcoffset() is not None:
            value = value - value.utcoffset()
        millis = int(calendar.timegm(value.timetuple()) * 1000 +
                     value.microsecond / 1000)
        return BSONDAT + name + struct.pack("q", millis)
    if isinstance(value, Timestamp):
        time = struct.pack("i", value.time)
        inc = struct.pack("i", value.inc)
        return BSONTIM + name + inc + time
    if value is None:
        return BSONNUL + name
    if isinstance(value, (RE_TYPE, Regex)):
        pattern = value.pattern
        flags = ""
        if value.flags & re.IGNORECASE:
            flags += "i"
        if value.flags & re.LOCALE:
            flags += "l"
        if value.flags & re.MULTILINE:
            flags += "m"
        if value.flags & re.DOTALL:
            flags += "s"
        if value.flags & re.UNICODE:
            flags += "u"
        if value.flags & re.VERBOSE:
            flags += "x"
        return BSONRGX + name + _make_c_string(pattern, True) + \
               _make_c_string(flags)
    if isinstance(value, DBRef):
        return _element_to_bson(key, value.as_doc(), False, uuid_subtype)
    if isinstance(value, MinKey):
        return BSONMIN + name
    if isinstance(value, MaxKey):
        return BSONMAX + name
    if isinstance(value, Decimal):
        return BSONDECIMAL + name + value._to_bson_element_value()  # struct.pack(fmt, a) #value.to_bson_element_value()

    raise InvalidDocument("cannot convert value of type %s to bson" %
                          type(value))
Beispiel #14
0
def _element_to_bson(key, value, check_keys):
    if not isinstance(key, basestring):
        raise InvalidDocument("documents must have only string keys, "
                              "key was %r" % key)

    if check_keys:
        if key.startswith("$"):
            raise InvalidDocument("key %r must not start with '$'" % key)
        if "." in key:
            raise InvalidDocument("key %r must not contain '.'" % key)

    name = _make_c_string(key, True)
    if isinstance(value, float):
        return "\x01" + name + struct.pack("<d", value)

    # Use Binary w/ subtype 3 for UUID instances
    try:
        import uuid

        if isinstance(value, uuid.UUID):
            value = Binary(value.bytes, subtype=3)
    except ImportError:
        pass

    if isinstance(value, Binary):
        subtype = value.subtype
        if subtype == 2:
            value = struct.pack("<i", len(value)) + value
        return "\x05%s%s%s%s" % (name, struct.pack(
            "<i", len(value)), chr(subtype), value)
    if isinstance(value, Code):
        cstring = _make_c_string(value)
        scope = _dict_to_bson(value.scope, False, False)
        full_length = struct.pack("<i", 8 + len(cstring) + len(scope))
        length = struct.pack("<i", len(cstring))
        return "\x0F" + name + full_length + length + cstring + scope
    if isinstance(value, str):
        cstring = _make_c_string(value)
        length = struct.pack("<i", len(cstring))
        return "\x02" + name + length + cstring
    if isinstance(value, unicode):
        cstring = _make_c_string(value)
        length = struct.pack("<i", len(cstring))
        return "\x02" + name + length + cstring
    if isinstance(value, dict):
        return "\x03" + name + _dict_to_bson(value, check_keys, False)
    if isinstance(value, (list, tuple)):
        as_dict = SON(zip([str(i) for i in range(len(value))], value))
        return "\x04" + name + _dict_to_bson(as_dict, check_keys, False)
    if isinstance(value, ObjectId):
        return "\x07" + name + value.binary
    if value is True:
        return "\x08" + name + "\x01"
    if value is False:
        return "\x08" + name + "\x00"
    if isinstance(value, int):
        # TODO this is an ugly way to check for this...
        if value > MAX_INT64 or value < MIN_INT64:
            raise OverflowError("BSON can only handle up to 8-byte ints")
        if value > MAX_INT32 or value < MIN_INT32:
            return "\x12" + name + struct.pack("<q", value)
        return "\x10" + name + struct.pack("<i", value)
    if isinstance(value, long):
        # XXX No long type in Python 3
        if value > MAX_INT64 or value < MIN_INT64:
            raise OverflowError("BSON can only handle up to 8-byte ints")
        return "\x12" + name + struct.pack("<q", value)
    if isinstance(value, datetime.datetime):
        if value.utcoffset() is not None:
            value = value - value.utcoffset()
        millis = int(
            calendar.timegm(value.timetuple()) * 1000 +
            value.microsecond / 1000)
        return "\x09" + name + struct.pack("<q", millis)
    if isinstance(value, Timestamp):
        time = struct.pack("<I", value.time)
        inc = struct.pack("<I", value.inc)
        return "\x11" + name + inc + time
    if value is None:
        return "\x0A" + name
    if isinstance(value, RE_TYPE):
        pattern = value.pattern
        flags = ""
        if value.flags & re.IGNORECASE:
            flags += "i"
        if value.flags & re.LOCALE:
            flags += "l"
        if value.flags & re.MULTILINE:
            flags += "m"
        if value.flags & re.DOTALL:
            flags += "s"
        if value.flags & re.UNICODE:
            flags += "u"
        if value.flags & re.VERBOSE:
            flags += "x"
        return "\x0B" + name + _make_c_string(pattern, True) + \
            _make_c_string(flags)
    if isinstance(value, DBRef):
        return _element_to_bson(key, value.as_doc(), False)
    if isinstance(value, MinKey):
        return "\xFF" + name
    if isinstance(value, MaxKey):
        return "\x7F" + name

    raise InvalidDocument("cannot convert value of type %s to bson" %
                          type(value))