def load(fh, as_class=dict, tz_aware=True, uuid_subtype=bson.OLD_UUID_SUBTYPE): """Decode BSON data to multiple documents. `fh` must be a file-like object of concatenated, valid, BSON-encoded documents. :Parameters: - `fh`: a file-like object supporting ``.read()`` - `as_class` (optional): the class to use for the resulting documents - `tz_aware` (optional): if ``True``, return timezone-aware :class:`~datetime.datetime` instances """ while True: obj_size = fh.read(S_INT32) if len(obj_size) == 0: return obj_size = struct.unpack("<i", obj_size)[0] data = fh.read(obj_size - S_INT32) if len(data) + S_INT32 < obj_size: raise bson.InvalidBSON("objsize too large") if not data or data[-1] != ZERO: raise bson.InvalidBSON("bad eoo") elements = data[:-1] yield bson._elements_to_dict( elements, as_class, tz_aware, uuid_subtype)
def load(fh, as_class=dict, tz_aware=True, uuid_subtype=bson.OLD_UUID_SUBTYPE): """Decode BSON data to multiple documents. `fh` must be a file-like object of concatenated, valid, BSON-encoded documents. :Parameters: - `fh`: a file-like object supporting ``.read()`` - `as_class` (optional): the class to use for the resulting documents - `tz_aware` (optional): if ``True``, return timezone-aware :class:`~datetime.datetime` instances """ while True: obj_size = fh.read(S_INT32) if len(obj_size) == 0: return obj_size = struct.unpack("<i", obj_size)[0] data = fh.read(obj_size - S_INT32) if len(data) + S_INT32 < obj_size: raise bson.InvalidBSON("objsize too large") #if bytes(data[-1]) != ZERO: # raise bson.InvalidBSON("bad eoo") if data[-1] != 0: raise bson.InvalidBSON("bad eoo") elements = data[:-1] yield bson._elements_to_dict(elements, as_class, tz_aware, uuid_subtype)
def __inflated(self): if self.__inflated_doc is None: # We already validated the object's size when this document was # created, so no need to do that again. # Use SON to preserve ordering of elements. self.__inflated_doc = _elements_to_dict( self.__raw, 4, len(self.__raw)-1, self.__codec_options, SON()) return self.__inflated_doc
def __inflated(self): if self.__inflated_doc is None: # We already validated the object's size when this document was # created, so no need to do that again. # Use SON to preserve ordering of elements. self.__inflated_doc = _elements_to_dict(self.__raw, 4, len(self.__raw) - 1, self.__codec_options, SON()) return self.__inflated_doc
def _read(self): try: size_bits = self.fh.read(4) size = struct.unpack("<i", size_bits)[0] - 4 # BSON size byte includes itself data = self.fh.read(size) if len(data) != size: raise struct.error("Unable to cleanly read expected BSON Chunk; EOF, underful buffer or invalid object size.") if data[size - 1] != "\x00": raise InvalidBSON("Bad EOO in BSON Data") chunk = data[:size - 1] doc = _elements_to_dict(chunk, dict, True) return doc except struct.error, e: #print >> sys.stderr, "Parsing Length record failed: %s" % e self.eof = True raise StopIteration(e)
def _get_object(data, position, as_class, tz_aware, uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True): obj_size = struct.unpack("<i", data[position:position + 4])[0] encoded = data[position + 4:position + obj_size - 1] object = _elements_to_dict(encoded, as_class, tz_aware, uuid_subtype, compile_re) position += obj_size if "$ref" in object: return (DBRef(object.pop("$ref"), object.pop("$id"), object.pop("$db", None), object), position) if '__customtype__' in object: code = object.pop('__customtype__') unpickler = Extension.getunpickler(code) if not unpickler: raise ValueError("Unknown custom type: %r" % code) return unpickler(code, object), position return object, position
def readBSONFile( fileName , callback ): inp = open( fileName ) while True: x = inp.read(4) if len(x) == 0: break if len(x) < 4: raise Exception( "bad - need int for lenght and only got %d bytes " % len(x) ) obj_size = struct.unpack( "<i" , x )[0] elements = inp.read( obj_size - 5 ) callback( bson._elements_to_dict( elements , dict , True ) ) # this is because of the -5 above inp.read(1)
def readBSONFile(fileName, callback): inp = open(fileName) while True: x = inp.read(4) if len(x) == 0: break if len(x) < 4: raise Exception( "bad - need int for lenght and only got %d bytes " % len(x)) obj_size = struct.unpack("<i", x)[0] elements = inp.read(obj_size - 5) callback(bson._elements_to_dict(elements, dict, True)) # this is because of the -5 above inp.read(1)
class BSONSource(pump.Source): """Reads bson file.""" def __init__(self, opts, spec, source_bucket, source_node, source_map, sink_map, ctl, cur): super(BSONSource, self).__init__(opts, spec, source_bucket, source_node, source_map, sink_map, ctl, cur) self.done = False self.f = None @staticmethod def can_handle(opts, spec): return spec.startswith(BSON_SCHEME) and \ os.path.isfile(spec.replace(BSON_SCHEME, "")) @staticmethod def check(opts, spec): return 0, {'spec': spec, 'buckets': [{'name': os.path.basename(spec), 'nodes': [{'hostname': 'N/A'}]}]} @staticmethod def provide_design(opts, source_spec, source_bucket, source_map): return 0, None def provide_batch(self): if self.done: return 0, None if not self.f: try: self.f = open(self.spec.replace(BSON_SCHEME, "")) except IOError, e: return "error: could not open bson: %s; exception: %s" % \ (self.spec, e), None batch = pump.Batch(self) batch_max_size = self.opts.extra['batch_max_size'] batch_max_bytes = self.opts.extra['batch_max_bytes'] cmd = couchbaseConstants.CMD_TAP_MUTATION vbucket_id = 0x0000ffff cas, exp, flg = 0, 0, 0 while (self.f and batch.size() < batch_max_size and batch.bytes < batch_max_bytes): doc_size_buf = self.f.read(4) if not doc_size_buf: self.done = True self.f.close() self.f = None break doc_size, = struct.unpack("<i", doc_size_buf) doc_buf = self.f.read(doc_size - 4) if not doc_buf: self.done = True self.f.close() self.f = None break doc = bson._elements_to_dict(doc_buf, dict, True) key = doc['_id'] doc_json = json.dumps(doc) msg = (cmd, vbucket_id, key, flg, exp, cas, '', doc_json, 0, 0, 0) batch.append(msg, len(doc)) if batch.size() <= 0: return 0, None return 0, batch