def object_hook(dct): print('oh', dct) if '_id' in dct: dct['_id'] = ObjectId(str(dct["_id"])) return dct if "$oid" in dct: return ObjectId(str(dct["$oid"])) if "$ref" in dct: return DBRef(dct["$ref"], dct["$id"], dct.get("$db", None)) if "$date" in dct: return datetime.datetime.fromtimestamp( float(dct["$date"]) / 1000.0, utc) if "$regex" in dct: flags = 0 if "i" in dct["$options"]: flags |= re.IGNORECASE if "m" in dct["$options"]: flags |= re.MULTILINE return re.compile(dct["$regex"], flags) if "$minKey" in dct: return MinKey() if "$maxKey" in dct: return MaxKey() if _use_uuid and "$uuid" in dct: return uuid.UUID(dct["$uuid"]) return dct
def _parse_canonical_minkey(doc: Any) -> MinKey: """Decode a JSON MinKey to bson.min_key.MinKey.""" if type(doc["$minKey"]) is not int or doc["$minKey"] != 1: raise TypeError("$minKey value must be 1: %s" % (doc, )) if len(doc) != 1: raise TypeError("Bad $minKey, extra field(s): %s" % (doc, )) return MinKey()
def test_minkey_pickling(self): mink = MinKey() pickled_with_3 = (b'\x80\x04\x95\x1e\x00\x00\x00\x00\x00\x00\x00\x8c' b'\x0cbson.min_key\x94\x8c\x06MinKey\x94\x93\x94)' b'\x81\x94.') self.round_trip_pickle(mink, pickled_with_3)
def object_hook(dct, compile_re=True): if "$oid" in dct: return ObjectId(str(dct["$oid"])) if "$ref" in dct: return DBRef(dct["$ref"], dct["$id"], dct.get("$db", None)) if "$date" in dct: secs = float(dct["$date"]) / 1000.0 return EPOCH_AWARE + datetime.timedelta(seconds=secs) if "$regex" in dct: flags = 0 # PyMongo always adds $options but some other tools may not. for opt in dct.get("$options", ""): flags |= _RE_OPT_TABLE.get(opt, 0) if compile_re: return re.compile(dct["$regex"], flags) else: return Regex(dct["$regex"], flags) if "$minKey" in dct: return MinKey() if "$maxKey" in dct: return MaxKey() if "$binary" in dct: if isinstance(dct["$type"], int): dct["$type"] = "%02x" % dct["$type"] subtype = int(dct["$type"], 16) if subtype >= 0xffffff80: # Handle mongoexport values subtype = int(dct["$type"][6:], 16) return Binary(base64.b64decode(dct["$binary"].encode()), subtype) if "$code" in dct: return Code(dct["$code"], dct.get("$scope")) if bson.has_uuid() and "$uuid" in dct: return bson.uuid.UUID(dct["$uuid"]) return dct
def test_qop_gt_13(monty_find, mongo_find): oid_0 = ObjectId(b"000000000000") max_k = MaxKey() min_k = MinKey() docs = [ { "a": oid_0 }, { "a": max_k }, { "a": min_k }, { "a": 55 }, ] spec = {"a": {"$gt": max_k}} monty_c = monty_find(docs, spec) mongo_c = mongo_find(docs, spec) assert FieldWalker(docs[1]).go("a").get().value == [max_k] assert mongo_c.count() == 3 assert monty_c.count() == mongo_c.count() for i in range(3): assert next(mongo_c) == next(monty_c)
def object_hook(dct, json_options=DEFAULT_JSON_OPTIONS): if "$oid" in dct: return ObjectId(str(dct["$oid"])) if "$ref" in dct: return DBRef(dct["$ref"], dct["$id"], dct.get("$db", None)) if "$date" in dct: return _get_date(dct, json_options) if "$regex" in dct: flags = 0 # PyMongo always adds $options but some other tools may not. for opt in dct.get("$options", ""): flags |= _RE_OPT_TABLE.get(opt, 0) return Regex(dct["$regex"], flags) if "$minKey" in dct: return MinKey() if "$maxKey" in dct: return MaxKey() if "$binary" in dct: return _get_binary(dct, json_options) if "$code" in dct: return Code(dct["$code"], dct.get("$scope")) if "$uuid" in dct: return uuid.UUID(dct["$uuid"]) if "$undefined" in dct: return None if "$numberLong" in dct: return Int64(dct["$numberLong"]) if "$timestamp" in dct: tsp = dct["$timestamp"] return Timestamp(tsp["t"], tsp["i"]) if "$numberDecimal" in dct: return Decimal128(dct["$numberDecimal"]) return dct
def test_qop_lt_13(monty_find, mongo_find): oid_0 = ObjectId(b"000000000000") max_k = MaxKey() min_k = MinKey() docs = [ { "a": oid_0 }, { "a": max_k }, { "a": min_k }, { "a": 55 }, ] spec = {"a": {"$lt": min_k}} monty_c = monty_find(docs, spec) mongo_c = mongo_find(docs, spec) assert mongo_c.count() == 3 assert monty_c.count() == mongo_c.count() for i in range(3): assert next(mongo_c) == next(monty_c)
def object_hook(dct: dict): """Helper function for converting json to mongo bson Args: dct: json data Returns: bson json format """ if "$oid" in dct: return ObjectId(str(dct["$oid"])) if "$ref" in dct: return DBRef(dct["$ref"], dct["$id"], dct.get("$db", None)) if "$date" in dct: return datetime.datetime.fromtimestamp(float(dct["$date"]) / 1000.0, utc) if "$regex" in dct: flags = 0 if "i" in dct["$options"]: flags |= re.IGNORECASE if "m" in dct["$options"]: flags |= re.MULTILINE return re.compile(dct["$regex"], flags) if "$minKey" in dct: return MinKey() if "$maxKey" in dct: return MaxKey() if _use_uuid and "$uuid" in dct: return uuid.UUID(dct["$uuid"]) return dct
def object_hook(dct): if "$oid" in dct: return ObjectId(str(dct["$oid"])) if "$ref" in dct: return DBRef(dct["$ref"], dct["$id"], dct.get("$db", None)) if "$date" in dct: secs = float(dct["$date"]) / 1000.0 return EPOCH_AWARE + datetime.timedelta(seconds=secs) if "$regex" in dct: flags = 0 if "i" in dct["$options"]: flags |= re.IGNORECASE if "m" in dct["$options"]: flags |= re.MULTILINE return re.compile(dct["$regex"], flags) if "$minKey" in dct: return MinKey() if "$maxKey" in dct: return MaxKey() if "$binary" in dct: return Binary(base64.b64decode(dct["$binary"].encode()), dct["$type"]) if "$code" in dct: return Code(dct["$code"], dct.get("$scope")) if bson.has_uuid() and "$uuid" in dct: return bson.uuid.UUID(dct["$uuid"]) return dct
def object_hook(dct): if "$oid" in dct: return ObjectId(str(dct["$oid"])) if "$ref" in dct: return DBRef(dct["$ref"], dct["$id"], dct.get("$db", None)) if "$date" in dct: secs = float(dct["$date"]) / 1000.0 return EPOCH_AWARE + datetime.timedelta(seconds=secs) if "$regex" in dct: flags = 0 if "i" in dct["$options"]: flags |= re.IGNORECASE if "m" in dct["$options"]: flags |= re.MULTILINE return re.compile(dct["$regex"], flags) if "$minKey" in dct: return MinKey() if "$maxKey" in dct: return MaxKey() if "$binary" in dct: if isinstance(dct["$type"], int): dct["$type"] = "%02x" % dct["$type"] subtype = int(dct["$type"], 16) if subtype >= 0xffffff80: # Handle mongoexport values subtype = int(dct["$type"][6:], 16) return Binary(base64.b64decode(dct["$binary"].encode()), subtype) if "$code" in dct: return Code(dct["$code"], dct.get("$scope")) if bson.has_uuid() and "$uuid" in dct: return bson.uuid.UUID(dct["$uuid"]) return dct
def _parse_canonical_minkey(doc): """Decode a JSON MinKey to bson.min_key.MinKey.""" if type(doc['$minKey']) is not int or doc['$minKey'] != 1: raise TypeError('$minKey value must be 1: %s' % (doc, )) if len(doc) != 1: raise TypeError('Bad $minKey, extra field(s): %s' % (doc, )) return MinKey()
def test_qop_type_20(monty_find, mongo_find): docs = [{"a": MinKey()}] spec = {"a": {"$type": -1}} # minKey monty_c = monty_find(docs, spec) mongo_c = mongo_find(docs, spec) assert mongo_c.count() == 1 assert monty_c.count() == mongo_c.count()
def test_sort_8(monty_sort, mongo_sort): docs = [{"a": MinKey()}, {"a": MaxKey()}] sort = [("a", -1)] monty_c = monty_sort(docs, sort) mongo_c = mongo_sort(docs, sort) for i in range(len(docs)): assert next(mongo_c)["_id"] == next(monty_c)["_id"]
def object_hook(dct, compile_re=True): if "$oid" in dct: return ObjectId(str(dct["$oid"])) if "$numberLong" in dct: return int(dct["$numberLong"]) if "$decimal" in dct: v = str(dct["$decimal"]) if "$precision" in dct: precision = dct["$precision"][0] scale = dct["$precision"][1] d = Decimal(v, precision, scale) else: d = Decimal(v) return d if "$ref" in dct: return DBRef(dct["$ref"], dct["$id"], dct.get("$db", None)) if "$date" in dct: try: secs = float(dct["$date"]) / 1000.0 return EPOCH_AWARE + datetime.timedelta(seconds=secs) except ValueError: return datetime.datetime.strptime(dct["$date"], "%Y-%m-%d") if "$timestamp" in dct: try: ms = long_type(dct["$timestamp"]) return Timestamp(ms / 1000, ms % 1000 * 1000) except ValueError: dt = datetime.datetime.strptime(dct["$timestamp"], "%Y-%m-%d-%H.%M.%S.%f") secs = long_type(time.mktime(dt.timetuple())) return Timestamp(secs, dt.microsecond) if "$regex" in dct: flags = 0 # PyMongo always adds $options but some other tools may not. for opt in dct.get("$options", ""): flags |= _RE_OPT_TABLE.get(opt, 0) if compile_re: return re.compile(dct["$regex"], flags) else: return Regex(dct["$regex"], flags) if "$minKey" in dct: return MinKey() if "$maxKey" in dct: return MaxKey() if "$binary" in dct: if isinstance(dct["$type"], int): dct["$type"] = "%d" % dct["$type"] subtype = int(dct["$type"]) return Binary(base64.b64decode(dct["$binary"].encode()), subtype) if "$code" in dct: return Code(dct["$code"], dct.get("$scope")) if bson.has_uuid() and "$uuid" in dct: return bson.uuid.UUID(dct["$uuid"]) return dct
def check_encode_then_decode(self, doc_class=dict, decoder=decode, encoder=encode): # Work around http://bugs.jython.org/issue1728 if sys.platform.startswith('java'): doc_class = SON def helper(doc): self.assertEqual(doc, (decoder(encoder(doc_class(doc))))) self.assertEqual(doc, decoder(encoder(doc))) helper({}) helper({"test": u"hello"}) self.assertTrue( isinstance( decoder(encoder({"hello": "world"}))["hello"], text_type)) helper({"mike": -10120}) helper({"long": Int64(10)}) helper({"really big long": 2147483648}) helper({u"hello": 0.0013109}) helper({"something": True}) helper({"false": False}) helper({"an array": [1, True, 3.8, u"world"]}) helper({"an object": doc_class({"test": u"something"})}) helper({"a binary": Binary(b"test", 100)}) helper({"a binary": Binary(b"test", 128)}) helper({"a binary": Binary(b"test", 254)}) helper({"another binary": Binary(b"test", 2)}) helper(SON([(u'test dst', datetime.datetime(1993, 4, 4, 2))])) helper( SON([(u'test negative dst', datetime.datetime(1, 1, 1, 1, 1, 1))])) helper({"big float": float(10000000000)}) helper({"ref": DBRef("coll", 5)}) helper({"ref": DBRef("coll", 5, foo="bar", bar=4)}) helper({"ref": DBRef("coll", 5, "foo")}) helper({"ref": DBRef("coll", 5, "foo", foo="bar")}) helper({"ref": Timestamp(1, 2)}) helper({"foo": MinKey()}) helper({"foo": MaxKey()}) helper({"$field": Code("function(){ return true; }")}) helper({ "$field": Code("return function(){ return x; }", scope={'x': False}) }) def encode_then_decode(doc): return doc_class(doc) == decoder( encode(doc), CodecOptions(document_class=doc_class)) qcheck.check_unittest(self, encode_then_decode, qcheck.gen_mongo_dict(3))
def test_qop_lt_12(monty_find, mongo_find): min_k = MinKey() max_k = MaxKey() docs = [{"a": min_k}, {"a": max_k}] spec = {"a": {"$lt": max_k}} monty_c = monty_find(docs, spec) mongo_c = mongo_find(docs, spec) assert mongo_c.count() == 1 assert monty_c.count() == mongo_c.count() assert next(mongo_c) == next(monty_c)
def test_qop_gt_12(monty_find, mongo_find): min_k = MinKey() max_k = MaxKey() docs = [{"a": min_k}, {"a": max_k}] spec = {"a": {"$gt": min_k}} monty_c = monty_find(docs, spec) mongo_c = mongo_find(docs, spec) assert FieldWalker(docs[1]).go("a").get().value == [max_k] assert mongo_c.count() == 1 assert monty_c.count() == mongo_c.count() assert next(mongo_c) == next(monty_c)
def test_encode_then_decode(self): def helper(dict): self.assertEqual(dict, (BSON.encode(dict)).decode()) helper({}) helper({"test": u"hello"}) self.assertTrue( isinstance( BSON.encode({ "hello": "world" }).decode()["hello"], unicode)) helper({"mike": -10120}) helper({"long": long(10)}) helper({"really big long": 2147483648}) helper({u"hello": 0.0013109}) helper({"something": True}) helper({"false": False}) helper({"an array": [1, True, 3.8, u"world"]}) helper({"an object": {"test": u"something"}}) helper({"a binary": Binary(b("test"), 100)}) helper({"a binary": Binary(b("test"), 128)}) helper({"a binary": Binary(b("test"), 254)}) helper({"another binary": Binary(b("test"), 2)}) helper(SON([(u'test dst', datetime.datetime(1993, 4, 4, 2))])) helper( SON([(u'test negative dst', datetime.datetime(1, 1, 1, 1, 1, 1))])) helper({"big float": float(10000000000)}) helper({"ref": DBRef("coll", 5)}) helper({"ref": DBRef("coll", 5, foo="bar", bar=4)}) helper({"ref": DBRef("coll", 5, "foo")}) helper({"ref": DBRef("coll", 5, "foo", foo="bar")}) helper({"ref": Timestamp(1, 2)}) helper({"foo": MinKey()}) helper({"foo": MaxKey()}) helper({"$field": Code("function(){ return true; }")}) helper({ "$field": Code("return function(){ return x; }", scope={'x': False}) }) doc_class = dict # Work around http://bugs.jython.org/issue1728 if (sys.platform.startswith('java') and sys.version_info[:3] == (2, 5, 2)): doc_class = SON def encode_then_decode(doc): return doc == (BSON.encode(doc)).decode(as_class=doc_class) qcheck.check_unittest(self, encode_then_decode, qcheck.gen_mongo_dict(3))
def get_sub_table_bound(self): if self.config_rd.get( "subtbl_bd" ) is not None and '' != self.config_rd.get("subtbl_bd"): sub_tbl_bound = eval(self.config_rd.get("subtbl_bd")) # get LowBound for sub_bound in sub_tbl_bound.get("LowBound"): if 'MinKey()' == sub_tbl_bound.get("LowBound").get(sub_bound): sub_tbl_bound["LowBound"][sub_bound] = MinKey() # get UpBound for sub_bound in sub_tbl_bound.get("UpBound"): if 'MaxKey()' == sub_tbl_bound.get("UpBound").get(sub_bound): sub_tbl_bound["UpBound"][sub_bound] = MaxKey() return sub_tbl_bound
def test_qop_gte_12(monty_find, mongo_find): min_k = MinKey() max_k = MaxKey() docs = [ {"a": min_k}, {"a": max_k} ] spec = {"a": {"$gte": min_k}} monty_c = monty_find(docs, spec) mongo_c = mongo_find(docs, spec) assert mongo_c.count() == 2 assert monty_c.count() == mongo_c.count() for i in range(2): assert next(mongo_c) == next(monty_c)
def test_sort_19(monty_sort, mongo_sort): docs = [ {"a": ["x", True]}, {"a": None}, {"a": []}, {"a": [5, []]}, {"a": {"s": 7}}, {"a": {"s": [9]}}, {"a": {"s": 10}}, {"a": 6}, {"a": 4}, {"a": [5, None]}, {"a": [5, [1]]}, {"a": [Decimal128("4.5"), Binary(b"0")]}, {"a": [{"s": 5}, False]}, {"a": [{"s": 9}]}, {"a": [True, "y"]}, {"a": Binary(b"a")}, {"a": b"bytes"}, {"a": ["abc"]}, {"a": "banana"}, {"a": "appple"}, {"a": [Regex("^a", "ix")]}, {"a": Regex("^b")}, {"a": Code("x", {"m": 0})}, {"a": Code("y")}, {"a": Code("y", {})}, {"a": Code("y", {"m": 0})}, {"a": MinKey()}, {"a": MaxKey()}, {"a": Timestamp(0, 1)}, {"a": Timestamp(1, 1)}, {"a": ObjectId(b"000000000000")}, {"a": ObjectId(b"000000000001")}, {"a": datetime(1900, 1, 1)}, {"a": datetime(1900, 1, 2)}, ] sort = [("a", 1)] monty_c = monty_sort(docs, sort) mongo_c = mongo_sort(docs, sort) for i in range(len(docs)): assert next(mongo_c)["_id"] == next(monty_c)["_id"]
def splitChunkMiddle(self, chunk): if 'size' in chunk: for key, value in chunk['min'].iteritems(): if value == MinKey(): print("We skipped {} due to having a minKey on {}".format( chunk['_id'], key)) return False for key, value in chunk['max'].iteritems(): if value == MaxKey(): print("We skipped {} due to having a maxKey on {}".format( chunk['_id'], key)) return False key_name = chunk['min'].iteritems().next()[0] min_value = Decimal(chunk['min'].iteritems().next()[1]) max_value = Decimal(chunk['max'].iteritems().next()[1]) desired_chunks = Decimal( math.ceil(chunk['size'] / self.max_chunk_size())) step_size = ((min_value - max_value) / desired_chunks).quantize( Decimal(0e-50), rounding=decimal.ROUND_DOWN) if (step_size == 0 or step_size == -0): print("Step was %s" % step_size) return False if (Decimal(min_value).is_signed() or Decimal(max_value).is_signed()): split_points = range(max_value, min_value, step_size) else: split_points = range(min_value, max_value, step_size) errors = 0 for split_point in split_points: try: self.conn.admin.command( "split", chunk['ns'], middle={key_name: long(split_point)}) except Exception as e: print(e) print("Failed to run split due to {}".format(e)) pass errors += 1 return True if errors != len(split_points) else False else: return False
def test_encode_then_decode(self): def helper(dict): self.assertEqual(dict, (BSON.encode(dict)).decode()) helper({}) helper({"test": u"hello"}) self.assert_( isinstance( BSON.encode({ "hello": "world" }).decode()["hello"], unicode)) helper({"mike": -10120}) helper({"long": long(10)}) helper({"really big long": 2147483648}) helper({u"hello": 0.0013109}) helper({"something": True}) helper({"false": False}) helper({"an array": [1, True, 3.8, u"world"]}) helper({"an object": {"test": u"something"}}) helper({"a binary": Binary("test", 100)}) helper({"a binary": Binary("test", 128)}) helper({"a binary": Binary("test", 254)}) helper({"another binary": Binary("test")}) helper(SON([(u'test dst', datetime.datetime(1993, 4, 4, 2))])) helper( SON([(u'test negative dst', datetime.datetime(1, 1, 1, 1, 1, 1))])) helper({"big float": float(10000000000)}) helper({"ref": DBRef("coll", 5)}) helper({"ref": DBRef("coll", 5, foo="bar", bar=4)}) helper({"ref": DBRef("coll", 5, "foo")}) helper({"ref": DBRef("coll", 5, "foo", foo="bar")}) helper({"ref": Timestamp(1, 2)}) helper({"foo": MinKey()}) helper({"foo": MaxKey()}) def encode_then_decode(dict): return dict == (BSON.encode(dict)).decode() qcheck.check_unittest(self, encode_then_decode, qcheck.gen_mongo_dict(3))
def object_hook(dct): if "$oid" in dct: return ObjectId(str(dct["$oid"])) if "$ref" in dct: return DBRef(dct["$ref"], dct["$id"], dct.get("$db", None)) if "$date" in dct: secs = float(dct["$date"]) / 1000.0 return EPOCH_AWARE + datetime.timedelta(seconds=secs) if "$regex" in dct: flags = 0 if "i" in dct["$options"]: flags |= re.IGNORECASE if "m" in dct["$options"]: flags |= re.MULTILINE return re.compile(dct["$regex"], flags) if "$minKey" in dct: return MinKey() if "$maxKey" in dct: return MaxKey() if _use_uuid and "$uuid" in dct: return uuid.UUID(dct["$uuid"]) return dct
def object_hook(dct, compile_re=True): if "$oid" in dct: return ObjectId(str(dct["$oid"])) if "$ref" in dct: return DBRef(dct["$ref"], dct["$id"], dct.get("$db", None)) if "$date" in dct: dtm = dct["$date"] # mongoexport 2.6 and newer if isinstance(dtm, str): # datetime.datetime.strptime is new in python 2.5 naive = datetime.datetime( *(time.strptime(dtm[:19], "%Y-%m-%dT%H:%M:%S")[0:6])) # The %f format is new in python 2.6 micros = int(dtm[20:23]) * 1000 aware = naive.replace(microsecond=micros, tzinfo=utc) offset = dtm[23:] if not offset or offset == 'Z': # UTC return aware else: if len(offset) == 5: # Offset from mongoexport is in format (+|-)HHMM secs = (int(offset[1:3]) * 3600 + int(offset[3:]) * 60) elif ':' in offset and len(offset) == 6: # RFC-3339 format (+|-)HH:MM hours, minutes = offset[1:].split(':') secs = (int(hours) * 3600 + int(minutes) * 60) else: # Not RFC-3339 compliant or mongoexport output. raise ValueError("invalid format for offset") if offset[0] == "-": secs *= -1 return aware - datetime.timedelta(seconds=secs) # mongoexport 2.6 and newer, time before the epoch (SERVER-15275) elif isinstance(dtm, dict): secs = float(dtm["$numberLong"]) / 1000.0 # mongoexport before 2.6 else: secs = float(dtm) / 1000.0 return EPOCH_AWARE + datetime.timedelta(seconds=secs) if "$regex" in dct: flags = 0 # PyMongo always adds $options but some other tools may not. for opt in dct.get("$options", ""): flags |= _RE_OPT_TABLE.get(opt, 0) if compile_re: return re.compile(dct["$regex"], flags) else: return Regex(dct["$regex"], flags) if "$minKey" in dct: return MinKey() if "$maxKey" in dct: return MaxKey() if "$binary" in dct: if isinstance(dct["$type"], int): dct["$type"] = "%02x" % dct["$type"] subtype = int(dct["$type"], 16) if subtype >= 0xffffff80: # Handle mongoexport values subtype = int(dct["$type"][6:], 16) return Binary(base64.b64decode(dct["$binary"].encode()), subtype) if "$code" in dct: return Code(dct["$code"], dct.get("$scope")) if bson.has_uuid() and "$uuid" in dct: return bson.uuid.UUID(dct["$uuid"]) if "$undefined" in dct: return None if "$numberLong" in dct: # 2to3 will change this to int. PyMongo 3.0 supports # a new type, Int64, to avoid round trip issues. return int(dct["$numberLong"]) if "$timestamp" in dct: tsp = dct["$timestamp"] return Timestamp(tsp["t"], tsp["i"]) return dct
ord(BSONBIN): _get_binary, ord(BSONUND): lambda u, v, w, x, y, z: (None, w), # Deprecated undefined ord(BSONOID): _get_oid, ord(BSONBOO): _get_boolean, ord(BSONDAT): _get_date, ord(BSONNUL): lambda u, v, w, x, y, z: (None, w), ord(BSONRGX): _get_regex, ord(BSONREF): _get_ref, # Deprecated DBPointer ord(BSONCOD): _get_code, ord(BSONSYM): _get_string, # Deprecated symbol ord(BSONCWS): _get_code_w_scope, ord(BSONINT): _get_int, ord(BSONTIM): _get_timestamp, ord(BSONLON): _get_int64, ord(BSONDEC): _get_decimal128, ord(BSONMIN): lambda u, v, w, x, y, z: (MinKey(), w), ord(BSONMAX): lambda u, v, w, x, y, z: (MaxKey(), w) } if _USE_C: def _element_to_dict(data, view, position, obj_end, opts): return _cbson._element_to_dict(data, position, obj_end, opts) else: def _element_to_dict(data, view, position, obj_end, opts): """Decode a single key, value pair.""" element_type = data[position] position += 1 element_name, position = _get_c_string(data, view, position, opts) try:
BSONBIN: _get_binary, BSONUND: lambda v, w, x, y, z: (None, w), # Deprecated undefined BSONOID: _get_oid, BSONBOO: _get_boolean, BSONDAT: _get_date, BSONNUL: lambda v, w, x, y, z: (None, w), BSONRGX: _get_regex, BSONREF: _get_ref, # Deprecated DBPointer BSONCOD: _get_code, BSONSYM: _get_string, # Deprecated symbol BSONCWS: _get_code_w_scope, BSONINT: _get_int, BSONTIM: _get_timestamp, BSONLON: _get_int64, BSONDEC: _get_decimal128, BSONMIN: lambda v, w, x, y, z: (MinKey(), w), BSONMAX: lambda v, w, x, y, z: (MaxKey(), w) } def _element_to_dict(data, position, obj_end, opts): """Decode a single key, value pair.""" element_type = data[position:position + 1] position += 1 element_name, position = _get_c_string(data, position, opts) try: value, position = _ELEMENT_GETTER[element_type](data, position, obj_end, opts, element_name) except KeyError: _raise_unknown_type(element_type, element_name)
def test_minkey(self): self.round_trip({"m": MinKey()})
def presplit(host, database, collection, shardkey, shardnumber=None, chunkspershard=1, verbose=False): """ Presplit chunks for sharding. Get information about the number of shards, then split chunks and distribute over shards. Currently assumes shardkey to be hex string, for example ObjectId or UUID. host: host and port to connect to, e.g. "192.168.0.1:27017", "localhost:30000" database: database name to enable sharding collection: collection name to shard shardkey: shardkey to pre-split on (must be hex string, e.g. ObjectId or UUID) shardnumber: if None, automatically presplit over all available shards. if integer, only presplit over the given number of shards (maximum is the number of actual shards) """ con = Connection(host) namespace = '%s.%s' % (database, collection) # disable balancer con['config']['settings'].update({'_id': "balancer"}, {'$set': {'stopped': True}}, upsert=True) # enable sharding on database if not yet enabled db_info = con['config']['databases'].find_one({'_id': database}) if not db_info or db_info['partitioned'] is False: con['admin'].command(SON({'enableSharding': database})) # shard collection if not yet sharded coll_info = con['config']['collections'].find_one({'_id': namespace}) if coll_info and not coll_info['dropped']: # if it is sharded already, quit. something is not right. if verbose: print("collection already sharded.") return else: con[database][collection].ensure_index(shardkey) con['admin'].command(SON({'shardCollection': namespace, 'key': {shardkey: 1}})) # get shard number and names and calculate split points shards = list(con['config']['shards'].find()) if len(shards) == 1: if verbose: print("only one shard found. no pre-splitting required.") return # limit number of shards if shardnumber given if shardnumber and shardnumber <= len(shards): shards = shards[:shardnumber] shard_names = [s['_id'] for s in shards] splits_total = len(shards) * chunkspershard split_interval = 16**4 / splits_total split_points = ["%0.4x" % s for s in range(split_interval, splits_total * split_interval, split_interval)] # pre-splitting commands for s in split_points: con['admin'].command(SON([('split', namespace), ('middle', {shardkey: s})])) split_points = [MinKey()] + split_points # move chunks to shards (catch the one error where the chunk resides # on that shard already) for i, s in enumerate(split_points): try: if verbose: print('moving chunk %s in collection %s to shard %s.' % (s, namespace, shard_names[i % len(shards)])) res = con['admin'].command(SON([('moveChunk', namespace), ('find', {shardkey: s}), ('to', shard_names[i % len(shards)])])) except OperationFailure as e: if verbose: print(e) if verbose: print('chunk distribution:', end=' ') chunk_group = (con['config']['chunks'] .group(key={'shard': 1}, condition={'ns': namespace}, initial={'nChunks': 0}, reduce=(" function (doc, out) " "{ out.nChunks++; } "))) print(', '.join(["%s: %i" % (ch['shard'], ch['nChunks']) for ch in chunk_group]))
BSONARR: _get_array, BSONBIN: _get_binary, BSONUND: lambda w, x, y, z: (None, x), # Deprecated undefined BSONOID: _get_oid, BSONBOO: _get_boolean, BSONDAT: _get_date, BSONNUL: lambda w, x, y, z: (None, x), BSONRGX: _get_regex, BSONREF: _get_ref, # Deprecated DBPointer BSONCOD: _get_code, BSONSYM: _get_string, # Deprecated symbol BSONCWS: _get_code_w_scope, BSONINT: _get_int, BSONTIM: _get_timestamp, BSONLON: _get_int64, BSONMIN: lambda w, x, y, z: (MinKey(), x), BSONMAX: lambda w, x, y, z: (MaxKey(), x)} def _element_to_dict(data, position, obj_end, opts): """Decode a single key, value pair.""" element_type = data[position:position + 1] position += 1 element_name, position = _get_c_string(data, position) value, position = _ELEMENT_GETTER[element_type](data, position, obj_end, opts) return element_name, value, position def _elements_to_dict(data, position, obj_end, opts, subdocument=None): """Decode a BSON document."""