def _element_to_bson(key: Any, value: Any, check_keys: bool, opts: CodecOptions) -> bytes: """Encode a single key, value pair.""" if not isinstance(key, str): raise InvalidDocument("documents must have only string keys, key was %r" % (key,)) if check_keys: if key.startswith("$"): raise InvalidDocument("key %r must not start with '$'" % (key,)) if "." in key: raise InvalidDocument("key %r must not contain '.'" % (key,)) name = _make_name(key) return _name_value_to_bson(name, value, check_keys, opts)
def _make_c_string_check(string: Union[str, bytes]) -> bytes: """Make a 'C' string, checking for embedded NUL characters.""" if isinstance(string, bytes): if b"\x00" in string: raise InvalidDocument("BSON keys / regex patterns must not contain a NUL character") try: _utf_8_decode(string, None, True) return string + b"\x00" except UnicodeError: raise InvalidStringData("strings in documents must be valid UTF-8: %r" % string) else: if "\x00" in string: raise InvalidDocument("BSON keys / regex patterns must not contain a NUL character") return cast(bytes, _utf_8_encode(string)[0]) + b"\x00"
def _element_to_bson(key, value, check_keys, opts): """Encode a single key, value pair.""" if not isinstance(key, string_type): raise InvalidDocument("documents must have only string keys, " "key was %r" % (key, )) if check_keys: if key.startswith("$"): raise InvalidDocument("key %r must not start with '$'" % (key, )) if "." in key: raise InvalidDocument("key %r must not contain '.'" % (key, )) if opts.use_unicode: name = _make_name(key) else: name = key return _name_value_to_bson(name, value, check_keys, opts)
def _make_name(string): """Make a 'C' string suitable for a BSON key.""" # Keys can only be text in python 3. if "\x00" in string: raise InvalidDocument("BSON keys / regex patterns must not " "contain a NUL character") return _utf_8_encode(string)[0] + b"\x00"
def _name_value_to_bson(name, value, check_keys, opts): """Encode a single name, value pair.""" # First see if the type is already cached. KeyError will only ever # happen once per subtype. try: return _ENCODERS[type(value)](name, value, check_keys, opts) except KeyError: pass # Second, fall back to trying _type_marker. This has to be done # before the loop below since users could subclass one of our # custom types that subclasses a python built-in (e.g. Binary) marker = getattr(value, "_type_marker", None) if isinstance(marker, int) and marker in _MARKERS: func = _MARKERS[marker] # Cache this type for faster subsequent lookup. _ENCODERS[type(value)] = func return func(name, value, check_keys, opts) # If all else fails test each base type. This will only happen once for # a subtype of a supported base type. for base in _ENCODERS: if isinstance(value, base): func = _ENCODERS[base] # Cache this type for faster subsequent lookup. _ENCODERS[type(value)] = func return func(name, value, check_keys, opts) raise InvalidDocument("cannot convert value of type %s to bson" % type(value))
def _make_c_string(string, check_null=False): if isinstance(string, text_type): if check_null and "\x00" in string: raise InvalidDocument("BSON keys / regex patterns must not " "contain a NULL character") return string.encode("utf-8") + ZERO else: if check_null and ZERO in string: raise InvalidDocument("BSON keys / regex patterns must not " "contain a NULL character") try: string.decode("utf-8") return string + ZERO except UnicodeError: raise InvalidStringData("strings in documents must be valid " "UTF-8: %r" % string)
def _p_skeleton(query_part): """ Generate a "skeleton" of a document (or embedded document). A skeleton is a (unicode) string indicating the keys present in a document, but not the values, and is used to group queries together which have identical key patterns regardless of the particular values used. Keys in the skeleton are always sorted lexicographically. Raises :class:`~bson.errors.InvalidDocument` when the document cannot be converted into a skeleton (this usually indicates that the type of a key or value in the document is not known to Professor). """ t = type(query_part) if t == list: out = [] for element in query_part: sub = skeleton(element) if sub is not None: out.append(sub) return u'[%s]' % ','.join(out) elif t in (dict, SON): out = [] for key in sorted(query_part.keys()): sub = skeleton(query_part[key]) if sub is not None: out.append('%s:%s' % (key, sub)) else: out.append(key) return u'{%s}' % ','.join(out) elif t not in BSON_TYPES: raise InvalidDocument('unknown BSON type %r' % t)
def _name_value_to_bson(name, value, check_keys, opts, in_custom_call=False, in_fallback_call=False): """Encode a single name, value pair.""" # First see if the type is already cached. KeyError will only ever # happen once per subtype. try: return _ENCODERS[type(value)](name, value, check_keys, opts) except KeyError: pass # Second, fall back to trying _type_marker. This has to be done # before the loop below since users could subclass one of our # custom types that subclasses a python built-in (e.g. Binary) marker = getattr(value, "_type_marker", None) if isinstance(marker, int) and marker in _MARKERS: func = _MARKERS[marker] # Cache this type for faster subsequent lookup. _ENCODERS[type(value)] = func return func(name, value, check_keys, opts) # Third, check if a type encoder is registered for this type. # Note that subtypes of registered custom types are not auto-encoded. if not in_custom_call and opts.type_registry._encoder_map: custom_encoder = opts.type_registry._encoder_map.get(type(value)) if custom_encoder is not None: return _name_value_to_bson(name, custom_encoder(value), check_keys, opts, in_custom_call=True) # Fourth, test each base type. This will only happen once for # a subtype of a supported base type. Unlike in the C-extensions, this # is done after trying the custom type encoder because checking for each # subtype is expensive. for base in _BUILT_IN_TYPES: if isinstance(value, base): func = _ENCODERS[base] # Cache this type for faster subsequent lookup. _ENCODERS[type(value)] = func return func(name, value, check_keys, opts) # As a last resort, try using the fallback encoder, if the user has # provided one. fallback_encoder = opts.type_registry._fallback_encoder if not in_fallback_call and fallback_encoder is not None: return _name_value_to_bson(name, fallback_encoder(value), check_keys, opts, in_fallback_call=True) raise InvalidDocument("cannot encode object: %r, of type: %r" % (value, type(value)))
def desanitize(value): """Does the inverse of :func:`~professor.skeleton.sanitize`. """ t = type(value) if t == list: return map(desanitize, value) elif t == dict: return dict((k.replace('_$_', '$').replace('_,_', '.'), desanitize(v)) for k, v in value.iteritems()) elif t not in BSON_TYPES: raise InvalidDocument('unknown BSON type %r' % t) else: return value
def sanitize(value): """"Sanitize" a value (e.g. a document) for safe storage in MongoDB. Converts periods (``.``) and dollar signs (``$``) in key names to escaped versions. See :func:`~professor.skeleton.desanitize` for the inverse. """ t = type(value) if t == list: return map(sanitize, value) elif t == dict: return dict((k.replace('$', '_$_').replace('.', '_,_'), sanitize(v)) for k, v in value.iteritems()) elif t not in BSON_TYPES: raise InvalidDocument('unknown BSON type %r' % t) else: return value
def _dict_to_bson(dict, check_keys, top_level=True): try: elements = "" if top_level and "_id" in dict: elements += _element_to_bson("_id", dict["_id"], False) for (key, value) in dict.iteritems(): if not top_level or key != "_id": elements += _element_to_bson(key, value, check_keys) except AttributeError: raise TypeError("encoder expected a mapping type but got: %r" % dict) length = len(elements) + 5 if length > 4 * 1024 * 1024: raise InvalidDocument("document too large - BSON documents are" "limited to 4 MB") return struct.pack("<i", length) + elements + "\x00"
def _name_value_to_bson(name, value, check_keys, opts, in_fallback_call=False): """Encode a single name, value pair.""" # Custom encoder (if any) takes precedence over default encoders. # Using 'if' instead of 'try...except' for performance since this will # usually not be true. # No support for auto-encoding subtypes of registered custom types. if opts.type_registry._encoder_map: custom_encoder = opts.type_registry._encoder_map.get(type(value)) if custom_encoder is not None: value = custom_encoder(value) # First see if the type is already cached. KeyError will only ever # happen once per subtype. try: return _ENCODERS[type(value)](name, value, check_keys, opts) except KeyError: pass # Second, fall back to trying _type_marker. This has to be done # before the loop below since users could subclass one of our # custom types that subclasses a python built-in (e.g. Binary) marker = getattr(value, "_type_marker", None) if isinstance(marker, int) and marker in _MARKERS: func = _MARKERS[marker] # Cache this type for faster subsequent lookup. _ENCODERS[type(value)] = func return func(name, value, check_keys, opts) # If all else fails test each base type. This will only happen once for # a subtype of a supported base type. for base in _ENCODERS: if isinstance(value, base): func = _ENCODERS[base] # Cache this type for faster subsequent lookup. _ENCODERS[type(value)] = func return func(name, value, check_keys, opts) # As a last resort, try using the fallback encoder, if the user has # provided one. fallback_encoder = opts.type_registry._fallback_encoder if not in_fallback_call and fallback_encoder is not None: return _name_value_to_bson( name, fallback_encoder(value), check_keys, opts, True) raise InvalidDocument( "cannot convert value of type %s to bson" % type(value))
def _element_to_bson(key, value, check_keys, uuid_subtype): if not isinstance(key, str_type): raise InvalidDocument("documents must have only string keys, " "key was %r" % key) if check_keys: if key.startswith("$"): raise InvalidDocument("key %r must not start with '$'" % key) if "." in key: raise InvalidDocument("key %r must not contain '.'" % key) name = _make_c_string(key, True) if isinstance(value, float): return BSONNUM + name + struct.pack("d", value) if _use_uuid: if isinstance(value, uuid.UUID): # Java Legacy if uuid_subtype == JAVA_LEGACY: # Python 3.0(.1) returns a bytearray instance for bytes (3.1 # and newer just return a bytes instance). Convert that to # binary_type (here and below) for compatibility. from_uuid = binary_type(value.bytes) as_legacy_java = from_uuid[0:8][::-1] + from_uuid[8:16][::-1] value = Binary(as_legacy_java, subtype=OLD_UUID_SUBTYPE) # C# legacy elif uuid_subtype == CSHARP_LEGACY: # Microsoft GUID representation. value = Binary(binary_type(value.bytes_le), subtype=OLD_UUID_SUBTYPE) # Python else: value = Binary(binary_type(value.bytes), subtype=uuid_subtype) if isinstance(value, Binary): subtype = value.subtype if subtype == 2: value = struct.pack("i", len(value)) + value return (BSONBIN + name + struct.pack("i", len(value)) + b(chr(subtype)) + value) if isinstance(value, Code): cstring = _make_c_string(value) if not value.scope: length = struct.pack("i", len(cstring)) return BSONCOD + name + length + cstring scope = _dict_to_bson(value.scope, False, uuid_subtype, False) full_length = struct.pack("i", 8 + len(cstring) + len(scope)) length = struct.pack("i", len(cstring)) return BSONCWS + name + full_length + length + cstring + scope if isinstance(value, binary_type): if PY3: # Python3 special case. Store 'bytes' as BSON binary subtype 0. return (BSONBIN + name + struct.pack("i", len(value)) + ZERO + value) cstring = _make_c_string(value) length = struct.pack("i", len(cstring)) return BSONSTR + name + length + cstring if isinstance(value, text_type): cstring = _make_c_string(value) length = struct.pack("i", len(cstring)) return BSONSTR + name + length + cstring if isinstance(value, dict): return BSONOBJ + name + _dict_to_bson(value, check_keys, uuid_subtype, False) if isinstance(value, (list, tuple)): as_dict = SON(zip([str(i) for i in range(len(value))], value)) return BSONARR + name + _dict_to_bson(as_dict, check_keys, uuid_subtype, False) if isinstance(value, ObjectId): return BSONOID + name + value.binary if value is True: return BSONBOO + name + ONE if value is False: return BSONBOO + name + ZERO if isinstance(value, int): # TODO this is an ugly way to check for this... if value > MAX_INT64 or value < MIN_INT64: raise OverflowError("BSON can only handle up to 8-byte ints") if value > MAX_INT32 or value < MIN_INT32: return BSONLON + name + struct.pack("q", value) return BSONINT + name + struct.pack("i", value) # 2to3 will convert long to int here since there is no long in python3. # That's OK. The previous if block will match instead. if isinstance(value, long_type): if value > MAX_INT64 or value < MIN_INT64: raise OverflowError("BSON can only handle up to 8-byte ints") return BSONLON + name + struct.pack("q", value) if isinstance(value, datetime.datetime): if value.utcoffset() is not None: value = value - value.utcoffset() millis = int(calendar.timegm(value.timetuple()) * 1000 + value.microsecond / 1000) return BSONDAT + name + struct.pack("q", millis) if isinstance(value, Timestamp): time = struct.pack("i", value.time) inc = struct.pack("i", value.inc) return BSONTIM + name + inc + time if value is None: return BSONNUL + name if isinstance(value, (RE_TYPE, Regex)): pattern = value.pattern flags = "" if value.flags & re.IGNORECASE: flags += "i" if value.flags & re.LOCALE: flags += "l" if value.flags & re.MULTILINE: flags += "m" if value.flags & re.DOTALL: flags += "s" if value.flags & re.UNICODE: flags += "u" if value.flags & re.VERBOSE: flags += "x" return BSONRGX + name + _make_c_string(pattern, True) + \ _make_c_string(flags) if isinstance(value, DBRef): return _element_to_bson(key, value.as_doc(), False, uuid_subtype) if isinstance(value, MinKey): return BSONMIN + name if isinstance(value, MaxKey): return BSONMAX + name if isinstance(value, Decimal): return BSONDECIMAL + name + value._to_bson_element_value() # struct.pack(fmt, a) #value.to_bson_element_value() raise InvalidDocument("cannot convert value of type %s to bson" % type(value))
def _element_to_bson(key, value, check_keys): if not isinstance(key, basestring): raise InvalidDocument("documents must have only string keys, " "key was %r" % key) if check_keys: if key.startswith("$"): raise InvalidDocument("key %r must not start with '$'" % key) if "." in key: raise InvalidDocument("key %r must not contain '.'" % key) name = _make_c_string(key, True) if isinstance(value, float): return "\x01" + name + struct.pack("<d", value) # Use Binary w/ subtype 3 for UUID instances try: import uuid if isinstance(value, uuid.UUID): value = Binary(value.bytes, subtype=3) except ImportError: pass if isinstance(value, Binary): subtype = value.subtype if subtype == 2: value = struct.pack("<i", len(value)) + value return "\x05%s%s%s%s" % (name, struct.pack( "<i", len(value)), chr(subtype), value) if isinstance(value, Code): cstring = _make_c_string(value) scope = _dict_to_bson(value.scope, False, False) full_length = struct.pack("<i", 8 + len(cstring) + len(scope)) length = struct.pack("<i", len(cstring)) return "\x0F" + name + full_length + length + cstring + scope if isinstance(value, str): cstring = _make_c_string(value) length = struct.pack("<i", len(cstring)) return "\x02" + name + length + cstring if isinstance(value, unicode): cstring = _make_c_string(value) length = struct.pack("<i", len(cstring)) return "\x02" + name + length + cstring if isinstance(value, dict): return "\x03" + name + _dict_to_bson(value, check_keys, False) if isinstance(value, (list, tuple)): as_dict = SON(zip([str(i) for i in range(len(value))], value)) return "\x04" + name + _dict_to_bson(as_dict, check_keys, False) if isinstance(value, ObjectId): return "\x07" + name + value.binary if value is True: return "\x08" + name + "\x01" if value is False: return "\x08" + name + "\x00" if isinstance(value, int): # TODO this is an ugly way to check for this... if value > MAX_INT64 or value < MIN_INT64: raise OverflowError("BSON can only handle up to 8-byte ints") if value > MAX_INT32 or value < MIN_INT32: return "\x12" + name + struct.pack("<q", value) return "\x10" + name + struct.pack("<i", value) if isinstance(value, long): # XXX No long type in Python 3 if value > MAX_INT64 or value < MIN_INT64: raise OverflowError("BSON can only handle up to 8-byte ints") return "\x12" + name + struct.pack("<q", value) if isinstance(value, datetime.datetime): if value.utcoffset() is not None: value = value - value.utcoffset() millis = int( calendar.timegm(value.timetuple()) * 1000 + value.microsecond / 1000) return "\x09" + name + struct.pack("<q", millis) if isinstance(value, Timestamp): time = struct.pack("<I", value.time) inc = struct.pack("<I", value.inc) return "\x11" + name + inc + time if value is None: return "\x0A" + name if isinstance(value, RE_TYPE): pattern = value.pattern flags = "" if value.flags & re.IGNORECASE: flags += "i" if value.flags & re.LOCALE: flags += "l" if value.flags & re.MULTILINE: flags += "m" if value.flags & re.DOTALL: flags += "s" if value.flags & re.UNICODE: flags += "u" if value.flags & re.VERBOSE: flags += "x" return "\x0B" + name + _make_c_string(pattern, True) + \ _make_c_string(flags) if isinstance(value, DBRef): return _element_to_bson(key, value.as_doc(), False) if isinstance(value, MinKey): return "\xFF" + name if isinstance(value, MaxKey): return "\x7F" + name raise InvalidDocument("cannot convert value of type %s to bson" % type(value))