def decode_all(data, codec_options=DEFAULT_CODEC_OPTIONS): if not isinstance(codec_options, CodecOptions): raise bson._CODEC_OPTIONS_TYPE_ERROR docs = [] position = 0 end = len(data) - 1 use_raw = _raw_document_class(codec_options.document_class) try: while position < end: obj_size = bson._UNPACK_INT(data[position:position + 4])[0] if len(data) - position < obj_size: raise bson.InvalidBSON("invalid object size") obj_end = position + obj_size - 1 if data[obj_end:position + obj_size] != b"\x00": raise bson.InvalidBSON("bad eoo") if use_raw: docs.append( codec_options.document_class(data[position:obj_end + 1], codec_options)) else: docs.append( _elements_to_dict(data, position + 4, obj_end, codec_options)) position += obj_size return docs except bson.InvalidBSON: raise except Exception: # Change exception type to InvalidBSON but preserve traceback. _, exc_value, exc_tb = sys.exc_info() bson.reraise(bson.InvalidBSON, exc_value, exc_tb)
def getvals(data, opts): import ipdb; ipdb.set_trace(); obj_size = bson._UNPACK_INT(data[:4])[0] obj_end = obj_size - 1 pos = 4 while pos < obj_size - 1: ename, value, pos = bson._element_to_dict(data, pos, obj_end, opts) yield ename, value
def getvals(data, opts): import ipdb ipdb.set_trace() obj_size = bson._UNPACK_INT(data[:4])[0] obj_end = obj_size - 1 pos = 4 while pos < obj_size - 1: ename, value, pos = bson._element_to_dict(data, pos, obj_end, opts) yield ename, value
def _get_object(data, position, obj_end, opts, dummy): """Decode a BSON subdocument to opts.document_class or bson.dbref.DBRef.""" obj_size = bson._UNPACK_INT(data[position:position + 4])[0] end = position + obj_size - 1 if data[end:position + obj_size] != b"\x00": raise bson.InvalidBSON("bad eoo") if end >= obj_end: raise bson.InvalidBSON("invalid object length") return data[position:end + 1], end + 1
def getval(data, opts, name): obj_size = bson._UNPACK_INT(data[:4])[0] obj_end = obj_size - 1 pos = 4 while pos < obj_size - 1: ename, value, pos = bson._element_to_dict(data, pos, obj_end, opts) if ename == name: return value raise KeyError(name)
def __inflated(self): if self.__inflated_doc is None: # We already validated the object's size when this document was # created, so no need to do that again. self.__inflated_doc = dict( element for element in _iterate_elements( self.__raw, 4, _UNPACK_INT(self.__raw[:4])[0] - 1, self.__codec_options)) return self.__inflated_doc
def _get_string(data, position, obj_end, opts, dummy): """Decode a BSON string to python unicode string.""" length = bson._UNPACK_INT(data[position:position + 4])[0] position += 4 if length < 1 or obj_end - position < length: raise bson.InvalidBSON("invalid string length") end = position + length - 1 if data[end:end + 1] != b"\x00": raise bson.InvalidBSON("invalid end of string") return data[position:end].decode('utf8'), end + 1
def __inflated(self): if self.__inflated_doc is None: # We already validated the object's size when this document was # created, so no need to do that again. We still need to check the # size of all the elements and compare to the document size. object_size = _UNPACK_INT(self.__raw[:4])[0] - 1 position = 0 self.__inflated_doc = {} for key, value, position in _iterate_elements( self.__raw, 4, object_size, self.__codec_options): self.__inflated_doc[key] = value if position != object_size: raise InvalidBSON('bad object or element length') return self.__inflated_doc
def _iter_elements(data, opts, recurser=None): obj_size = bson._UNPACK_INT(data[:4])[0] obj_end = obj_size - 1 pos = 4 while pos < obj_size - 1: etype = data[pos] pos += 1 ename, pos = _get_c_string(data, pos) value, pos = _ELEMENT_GETTER[etype](data, pos, obj_end, opts, ename) if recurser and etype == bson.BSONOBJ: if ename in recurser: for k, v in _iter_elements(value, opts, recurser(ename)): yield [ename] + k, v else: yield [ename], value else: yield [ename], value
def _iter_elements(data, opts, recurser=None): obj_size = bson._UNPACK_INT(data[:4])[0] obj_end = obj_size - 1 pos = 4 while pos < obj_size - 1: etype = data[pos] pos += 1 ename, pos = _get_c_string(data, pos) value, pos = _ELEMENT_GETTER[etype]( data, pos, obj_end, opts, ename) if recurser and etype == bson.BSONOBJ: if ename in recurser: for k, v in _iter_elements(value, opts, recurser(ename)): yield [ename] + k, v else: yield [ename], value else: yield [ename], value
def _get_object(data, position, obj_end, opts, dummy): """Decode a BSON subdocument to opts.document_class or bson.dbref.DBRef.""" obj_size = bson._UNPACK_INT(data[position:position + 4])[0] end = position + obj_size - 1 if data[end:position + obj_size] != b"\x00": raise bson.InvalidBSON("bad eoo") if end >= obj_end: raise bson.InvalidBSON("invalid object length") if _raw_document_class(opts.document_class): return (opts.document_class(data[position:end + 1], opts), position + obj_size) obj = _elements_to_dict(data, position + 4, end, opts, subdocument=True) position += obj_size if "$ref" in obj: return (bson.DBRef(obj.pop("$ref"), obj.pop("$id", None), obj.pop("$db", None), obj), position) return obj, position
def get_fields(data, opts, trie): """Return list of (name, data) tuples where name is in the trie.""" obj_size = bson._UNPACK_INT(data[:4])[0] obj_end = obj_size - 1 pos = 4 result = [] while pos < obj_size - 1: etype = data[pos] pos += 1 ename, pos = _get_c_string(data, pos) value, pos = _ELEMENT_GETTER[etype](data, pos, obj_end, opts, ename) if ename in trie: result.append(([ename], value)) continue new_trie = trie.get(ename) if new_trie: result += [([ename] + key, v) for key, v in get_fields(value, opts, new_trie)] return result
def _decode_batch(batch): """ NOTE: This implementation is the same as the bson.decode_iter implementation, with some minor differences. This methods decodes a raw bson data batch, and yields dict items for every valid batch item. Batch items that cannot be decoded from their bson format will be skipped. """ position = 0 end = len(batch) - 1 while position < end: obj_size = bson._UNPACK_INT(batch[position:position + 4])[0] elements = batch[position:position + obj_size] position += obj_size try: yield bson._bson_to_dict(elements, codec_options.DEFAULT_CODEC_OPTIONS) except bson.InvalidBSON as err: logging.warning("ignored invalid record: {}".format(str(err))) continue
def get_fields(data, opts, trie): """Return list of (name, data) tuples where name is in the trie.""" obj_size = bson._UNPACK_INT(data[:4])[0] obj_end = obj_size - 1 pos = 4 result = [] while pos < obj_size - 1: etype = data[pos] pos += 1 ename, pos = _get_c_string(data, pos) value, pos = _ELEMENT_GETTER[etype]( data, pos, obj_end, opts, ename) if ename in trie: result.append(([ename], value)) continue new_trie = trie.get(ename) if new_trie: result += [ ([ename] + key, v) for key, v in get_fields(value, opts, new_trie) ] return result
def _obj_end(self): obj_size = bson._UNPACK_INT(self._raw[:4])[0] return obj_size - 1