Beispiel #1
0
def object_hook(dct):
    print('oh', dct)
    if '_id' in dct:
        dct['_id'] = ObjectId(str(dct["_id"]))
        return dct
    if "$oid" in dct:
        return ObjectId(str(dct["$oid"]))
    if "$ref" in dct:
        return DBRef(dct["$ref"], dct["$id"], dct.get("$db", None))
    if "$date" in dct:
        return datetime.datetime.fromtimestamp(
            float(dct["$date"]) / 1000.0, utc)
    if "$regex" in dct:
        flags = 0
        if "i" in dct["$options"]:
            flags |= re.IGNORECASE
        if "m" in dct["$options"]:
            flags |= re.MULTILINE
        return re.compile(dct["$regex"], flags)
    if "$minKey" in dct:
        return MinKey()
    if "$maxKey" in dct:
        return MaxKey()
    if _use_uuid and "$uuid" in dct:
        return uuid.UUID(dct["$uuid"])
    return dct
def _parse_canonical_minkey(doc: Any) -> MinKey:
    """Decode a JSON MinKey to bson.min_key.MinKey."""
    if type(doc["$minKey"]) is not int or doc["$minKey"] != 1:
        raise TypeError("$minKey value must be 1: %s" % (doc, ))
    if len(doc) != 1:
        raise TypeError("Bad $minKey, extra field(s): %s" % (doc, ))
    return MinKey()
    def test_minkey_pickling(self):
        mink = MinKey()
        pickled_with_3 = (b'\x80\x04\x95\x1e\x00\x00\x00\x00\x00\x00\x00\x8c'
                          b'\x0cbson.min_key\x94\x8c\x06MinKey\x94\x93\x94)'
                          b'\x81\x94.')

        self.round_trip_pickle(mink, pickled_with_3)
Beispiel #4
0
def object_hook(dct, compile_re=True):
    if "$oid" in dct:
        return ObjectId(str(dct["$oid"]))
    if "$ref" in dct:
        return DBRef(dct["$ref"], dct["$id"], dct.get("$db", None))
    if "$date" in dct:
        secs = float(dct["$date"]) / 1000.0
        return EPOCH_AWARE + datetime.timedelta(seconds=secs)
    if "$regex" in dct:
        flags = 0
        # PyMongo always adds $options but some other tools may not.
        for opt in dct.get("$options", ""):
            flags |= _RE_OPT_TABLE.get(opt, 0)

        if compile_re:
            return re.compile(dct["$regex"], flags)
        else:
            return Regex(dct["$regex"], flags)
    if "$minKey" in dct:
        return MinKey()
    if "$maxKey" in dct:
        return MaxKey()
    if "$binary" in dct:
        if isinstance(dct["$type"], int):
            dct["$type"] = "%02x" % dct["$type"]
        subtype = int(dct["$type"], 16)
        if subtype >= 0xffffff80:  # Handle mongoexport values
            subtype = int(dct["$type"][6:], 16)
        return Binary(base64.b64decode(dct["$binary"].encode()), subtype)
    if "$code" in dct:
        return Code(dct["$code"], dct.get("$scope"))
    if bson.has_uuid() and "$uuid" in dct:
        return bson.uuid.UUID(dct["$uuid"])
    return dct
def test_qop_gt_13(monty_find, mongo_find):
    oid_0 = ObjectId(b"000000000000")
    max_k = MaxKey()
    min_k = MinKey()
    docs = [
        {
            "a": oid_0
        },
        {
            "a": max_k
        },
        {
            "a": min_k
        },
        {
            "a": 55
        },
    ]
    spec = {"a": {"$gt": max_k}}

    monty_c = monty_find(docs, spec)
    mongo_c = mongo_find(docs, spec)

    assert FieldWalker(docs[1]).go("a").get().value == [max_k]
    assert mongo_c.count() == 3
    assert monty_c.count() == mongo_c.count()
    for i in range(3):
        assert next(mongo_c) == next(monty_c)
def object_hook(dct, json_options=DEFAULT_JSON_OPTIONS):
    if "$oid" in dct:
        return ObjectId(str(dct["$oid"]))
    if "$ref" in dct:
        return DBRef(dct["$ref"], dct["$id"], dct.get("$db", None))
    if "$date" in dct:
        return _get_date(dct, json_options)
    if "$regex" in dct:
        flags = 0
        # PyMongo always adds $options but some other tools may not.
        for opt in dct.get("$options", ""):
            flags |= _RE_OPT_TABLE.get(opt, 0)
        return Regex(dct["$regex"], flags)
    if "$minKey" in dct:
        return MinKey()
    if "$maxKey" in dct:
        return MaxKey()
    if "$binary" in dct:
        return _get_binary(dct, json_options)
    if "$code" in dct:
        return Code(dct["$code"], dct.get("$scope"))
    if "$uuid" in dct:
        return uuid.UUID(dct["$uuid"])
    if "$undefined" in dct:
        return None
    if "$numberLong" in dct:
        return Int64(dct["$numberLong"])
    if "$timestamp" in dct:
        tsp = dct["$timestamp"]
        return Timestamp(tsp["t"], tsp["i"])
    if "$numberDecimal" in dct:
        return Decimal128(dct["$numberDecimal"])
    return dct
def test_qop_lt_13(monty_find, mongo_find):
    oid_0 = ObjectId(b"000000000000")
    max_k = MaxKey()
    min_k = MinKey()
    docs = [
        {
            "a": oid_0
        },
        {
            "a": max_k
        },
        {
            "a": min_k
        },
        {
            "a": 55
        },
    ]
    spec = {"a": {"$lt": min_k}}

    monty_c = monty_find(docs, spec)
    mongo_c = mongo_find(docs, spec)

    assert mongo_c.count() == 3
    assert monty_c.count() == mongo_c.count()
    for i in range(3):
        assert next(mongo_c) == next(monty_c)
Beispiel #8
0
def object_hook(dct: dict):
    """Helper function for converting json to mongo bson
    Args:
        dct: json data

    Returns:
        bson json format
    """
    if "$oid" in dct:
        return ObjectId(str(dct["$oid"]))
    if "$ref" in dct:
        return DBRef(dct["$ref"], dct["$id"], dct.get("$db", None))
    if "$date" in dct:
        return datetime.datetime.fromtimestamp(float(dct["$date"]) / 1000.0,
                                               utc)
    if "$regex" in dct:
        flags = 0
        if "i" in dct["$options"]:
            flags |= re.IGNORECASE
        if "m" in dct["$options"]:
            flags |= re.MULTILINE
        return re.compile(dct["$regex"], flags)
    if "$minKey" in dct:
        return MinKey()
    if "$maxKey" in dct:
        return MaxKey()
    if _use_uuid and "$uuid" in dct:
        return uuid.UUID(dct["$uuid"])
    return dct
Beispiel #9
0
def object_hook(dct):
    if "$oid" in dct:
        return ObjectId(str(dct["$oid"]))
    if "$ref" in dct:
        return DBRef(dct["$ref"], dct["$id"], dct.get("$db", None))
    if "$date" in dct:
        secs = float(dct["$date"]) / 1000.0
        return EPOCH_AWARE + datetime.timedelta(seconds=secs)
    if "$regex" in dct:
        flags = 0
        if "i" in dct["$options"]:
            flags |= re.IGNORECASE
        if "m" in dct["$options"]:
            flags |= re.MULTILINE
        return re.compile(dct["$regex"], flags)
    if "$minKey" in dct:
        return MinKey()
    if "$maxKey" in dct:
        return MaxKey()
    if "$binary" in dct:
        return Binary(base64.b64decode(dct["$binary"].encode()), dct["$type"])
    if "$code" in dct:
        return Code(dct["$code"], dct.get("$scope"))
    if bson.has_uuid() and "$uuid" in dct:
        return bson.uuid.UUID(dct["$uuid"])
    return dct
Beispiel #10
0
def object_hook(dct):
    if "$oid" in dct:
        return ObjectId(str(dct["$oid"]))
    if "$ref" in dct:
        return DBRef(dct["$ref"], dct["$id"], dct.get("$db", None))
    if "$date" in dct:
        secs = float(dct["$date"]) / 1000.0
        return EPOCH_AWARE + datetime.timedelta(seconds=secs)
    if "$regex" in dct:
        flags = 0
        if "i" in dct["$options"]:
            flags |= re.IGNORECASE
        if "m" in dct["$options"]:
            flags |= re.MULTILINE
        return re.compile(dct["$regex"], flags)
    if "$minKey" in dct:
        return MinKey()
    if "$maxKey" in dct:
        return MaxKey()
    if "$binary" in dct:
        if isinstance(dct["$type"], int):
            dct["$type"] = "%02x" % dct["$type"]
        subtype = int(dct["$type"], 16)
        if subtype >= 0xffffff80:  # Handle mongoexport values
            subtype = int(dct["$type"][6:], 16)
        return Binary(base64.b64decode(dct["$binary"].encode()), subtype)
    if "$code" in dct:
        return Code(dct["$code"], dct.get("$scope"))
    if bson.has_uuid() and "$uuid" in dct:
        return bson.uuid.UUID(dct["$uuid"])
    return dct
Beispiel #11
0
def _parse_canonical_minkey(doc):
    """Decode a JSON MinKey to bson.min_key.MinKey."""
    if type(doc['$minKey']) is not int or doc['$minKey'] != 1:
        raise TypeError('$minKey value must be 1: %s' % (doc, ))
    if len(doc) != 1:
        raise TypeError('Bad $minKey, extra field(s): %s' % (doc, ))
    return MinKey()
Beispiel #12
0
def test_qop_type_20(monty_find, mongo_find):
    docs = [{"a": MinKey()}]
    spec = {"a": {"$type": -1}}  # minKey

    monty_c = monty_find(docs, spec)
    mongo_c = mongo_find(docs, spec)

    assert mongo_c.count() == 1
    assert monty_c.count() == mongo_c.count()
Beispiel #13
0
def test_sort_8(monty_sort, mongo_sort):
    docs = [{"a": MinKey()}, {"a": MaxKey()}]
    sort = [("a", -1)]

    monty_c = monty_sort(docs, sort)
    mongo_c = mongo_sort(docs, sort)

    for i in range(len(docs)):
        assert next(mongo_c)["_id"] == next(monty_c)["_id"]
Beispiel #14
0
def object_hook(dct, compile_re=True):
    if "$oid" in dct:
        return ObjectId(str(dct["$oid"]))
    if "$numberLong" in dct:
        return int(dct["$numberLong"])
    if "$decimal" in dct:
        v = str(dct["$decimal"])
        if "$precision" in dct:
            precision = dct["$precision"][0]
            scale = dct["$precision"][1]
            d = Decimal(v, precision, scale)
        else:
            d = Decimal(v)
        return d
    if "$ref" in dct:
        return DBRef(dct["$ref"], dct["$id"], dct.get("$db", None))
    if "$date" in dct:
        try:
            secs = float(dct["$date"]) / 1000.0
            return EPOCH_AWARE + datetime.timedelta(seconds=secs)
        except ValueError:
            return datetime.datetime.strptime(dct["$date"], "%Y-%m-%d")
    if "$timestamp" in dct:
        try:
            ms = long_type(dct["$timestamp"])
            return Timestamp(ms / 1000, ms % 1000 * 1000)
        except ValueError:
            dt = datetime.datetime.strptime(dct["$timestamp"],
                                            "%Y-%m-%d-%H.%M.%S.%f")
            secs = long_type(time.mktime(dt.timetuple()))
            return Timestamp(secs, dt.microsecond)
    if "$regex" in dct:
        flags = 0
        # PyMongo always adds $options but some other tools may not.
        for opt in dct.get("$options", ""):
            flags |= _RE_OPT_TABLE.get(opt, 0)

        if compile_re:
            return re.compile(dct["$regex"], flags)
        else:
            return Regex(dct["$regex"], flags)
    if "$minKey" in dct:
        return MinKey()
    if "$maxKey" in dct:
        return MaxKey()
    if "$binary" in dct:
        if isinstance(dct["$type"], int):
            dct["$type"] = "%d" % dct["$type"]
        subtype = int(dct["$type"])
        return Binary(base64.b64decode(dct["$binary"].encode()), subtype)
    if "$code" in dct:
        return Code(dct["$code"], dct.get("$scope"))
    if bson.has_uuid() and "$uuid" in dct:
        return bson.uuid.UUID(dct["$uuid"])
    return dct
Beispiel #15
0
    def check_encode_then_decode(self,
                                 doc_class=dict,
                                 decoder=decode,
                                 encoder=encode):

        # Work around http://bugs.jython.org/issue1728
        if sys.platform.startswith('java'):
            doc_class = SON

        def helper(doc):
            self.assertEqual(doc, (decoder(encoder(doc_class(doc)))))
            self.assertEqual(doc, decoder(encoder(doc)))

        helper({})
        helper({"test": u"hello"})
        self.assertTrue(
            isinstance(
                decoder(encoder({"hello": "world"}))["hello"], text_type))
        helper({"mike": -10120})
        helper({"long": Int64(10)})
        helper({"really big long": 2147483648})
        helper({u"hello": 0.0013109})
        helper({"something": True})
        helper({"false": False})
        helper({"an array": [1, True, 3.8, u"world"]})
        helper({"an object": doc_class({"test": u"something"})})
        helper({"a binary": Binary(b"test", 100)})
        helper({"a binary": Binary(b"test", 128)})
        helper({"a binary": Binary(b"test", 254)})
        helper({"another binary": Binary(b"test", 2)})
        helper(SON([(u'test dst', datetime.datetime(1993, 4, 4, 2))]))
        helper(
            SON([(u'test negative dst', datetime.datetime(1, 1, 1, 1, 1, 1))]))
        helper({"big float": float(10000000000)})
        helper({"ref": DBRef("coll", 5)})
        helper({"ref": DBRef("coll", 5, foo="bar", bar=4)})
        helper({"ref": DBRef("coll", 5, "foo")})
        helper({"ref": DBRef("coll", 5, "foo", foo="bar")})
        helper({"ref": Timestamp(1, 2)})
        helper({"foo": MinKey()})
        helper({"foo": MaxKey()})
        helper({"$field": Code("function(){ return true; }")})
        helper({
            "$field":
            Code("return function(){ return x; }", scope={'x': False})
        })

        def encode_then_decode(doc):
            return doc_class(doc) == decoder(
                encode(doc), CodecOptions(document_class=doc_class))

        qcheck.check_unittest(self, encode_then_decode,
                              qcheck.gen_mongo_dict(3))
def test_qop_lt_12(monty_find, mongo_find):
    min_k = MinKey()
    max_k = MaxKey()
    docs = [{"a": min_k}, {"a": max_k}]
    spec = {"a": {"$lt": max_k}}

    monty_c = monty_find(docs, spec)
    mongo_c = mongo_find(docs, spec)

    assert mongo_c.count() == 1
    assert monty_c.count() == mongo_c.count()
    assert next(mongo_c) == next(monty_c)
def test_qop_gt_12(monty_find, mongo_find):
    min_k = MinKey()
    max_k = MaxKey()
    docs = [{"a": min_k}, {"a": max_k}]
    spec = {"a": {"$gt": min_k}}

    monty_c = monty_find(docs, spec)
    mongo_c = mongo_find(docs, spec)

    assert FieldWalker(docs[1]).go("a").get().value == [max_k]
    assert mongo_c.count() == 1
    assert monty_c.count() == mongo_c.count()
    assert next(mongo_c) == next(monty_c)
Beispiel #18
0
    def test_encode_then_decode(self):
        def helper(dict):
            self.assertEqual(dict, (BSON.encode(dict)).decode())

        helper({})
        helper({"test": u"hello"})
        self.assertTrue(
            isinstance(
                BSON.encode({
                    "hello": "world"
                }).decode()["hello"], unicode))
        helper({"mike": -10120})
        helper({"long": long(10)})
        helper({"really big long": 2147483648})
        helper({u"hello": 0.0013109})
        helper({"something": True})
        helper({"false": False})
        helper({"an array": [1, True, 3.8, u"world"]})
        helper({"an object": {"test": u"something"}})
        helper({"a binary": Binary(b("test"), 100)})
        helper({"a binary": Binary(b("test"), 128)})
        helper({"a binary": Binary(b("test"), 254)})
        helper({"another binary": Binary(b("test"), 2)})
        helper(SON([(u'test dst', datetime.datetime(1993, 4, 4, 2))]))
        helper(
            SON([(u'test negative dst', datetime.datetime(1, 1, 1, 1, 1, 1))]))
        helper({"big float": float(10000000000)})
        helper({"ref": DBRef("coll", 5)})
        helper({"ref": DBRef("coll", 5, foo="bar", bar=4)})
        helper({"ref": DBRef("coll", 5, "foo")})
        helper({"ref": DBRef("coll", 5, "foo", foo="bar")})
        helper({"ref": Timestamp(1, 2)})
        helper({"foo": MinKey()})
        helper({"foo": MaxKey()})
        helper({"$field": Code("function(){ return true; }")})
        helper({
            "$field":
            Code("return function(){ return x; }", scope={'x': False})
        })

        doc_class = dict
        # Work around http://bugs.jython.org/issue1728
        if (sys.platform.startswith('java')
                and sys.version_info[:3] == (2, 5, 2)):
            doc_class = SON

        def encode_then_decode(doc):
            return doc == (BSON.encode(doc)).decode(as_class=doc_class)

        qcheck.check_unittest(self, encode_then_decode,
                              qcheck.gen_mongo_dict(3))
Beispiel #19
0
    def get_sub_table_bound(self):
        if self.config_rd.get(
                "subtbl_bd"
        ) is not None and '' != self.config_rd.get("subtbl_bd"):
            sub_tbl_bound = eval(self.config_rd.get("subtbl_bd"))
            # get LowBound
            for sub_bound in sub_tbl_bound.get("LowBound"):
                if 'MinKey()' == sub_tbl_bound.get("LowBound").get(sub_bound):
                    sub_tbl_bound["LowBound"][sub_bound] = MinKey()
            # get UpBound
            for sub_bound in sub_tbl_bound.get("UpBound"):
                if 'MaxKey()' == sub_tbl_bound.get("UpBound").get(sub_bound):
                    sub_tbl_bound["UpBound"][sub_bound] = MaxKey()

            return sub_tbl_bound
Beispiel #20
0
def test_qop_gte_12(monty_find, mongo_find):
    min_k = MinKey()
    max_k = MaxKey()
    docs = [
        {"a": min_k},
        {"a": max_k}
    ]
    spec = {"a": {"$gte": min_k}}

    monty_c = monty_find(docs, spec)
    mongo_c = mongo_find(docs, spec)

    assert mongo_c.count() == 2
    assert monty_c.count() == mongo_c.count()
    for i in range(2):
        assert next(mongo_c) == next(monty_c)
Beispiel #21
0
def test_sort_19(monty_sort, mongo_sort):
    docs = [
        {"a": ["x", True]},
        {"a": None},
        {"a": []},
        {"a": [5, []]},
        {"a": {"s": 7}},
        {"a": {"s": [9]}},
        {"a": {"s": 10}},
        {"a": 6},
        {"a": 4},
        {"a": [5, None]},
        {"a": [5, [1]]},
        {"a": [Decimal128("4.5"), Binary(b"0")]},
        {"a": [{"s": 5}, False]},
        {"a": [{"s": 9}]},
        {"a": [True, "y"]},
        {"a": Binary(b"a")},
        {"a": b"bytes"},
        {"a": ["abc"]},
        {"a": "banana"},
        {"a": "appple"},
        {"a": [Regex("^a", "ix")]},
        {"a": Regex("^b")},
        {"a": Code("x", {"m": 0})},
        {"a": Code("y")},
        {"a": Code("y", {})},
        {"a": Code("y", {"m": 0})},
        {"a": MinKey()},
        {"a": MaxKey()},
        {"a": Timestamp(0, 1)},
        {"a": Timestamp(1, 1)},
        {"a": ObjectId(b"000000000000")},
        {"a": ObjectId(b"000000000001")},
        {"a": datetime(1900, 1, 1)},
        {"a": datetime(1900, 1, 2)},
    ]
    sort = [("a", 1)]

    monty_c = monty_sort(docs, sort)
    mongo_c = mongo_sort(docs, sort)

    for i in range(len(docs)):
        assert next(mongo_c)["_id"] == next(monty_c)["_id"]
Beispiel #22
0
 def splitChunkMiddle(self, chunk):
     if 'size' in chunk:
         for key, value in chunk['min'].iteritems():
             if value == MinKey():
                 print("We skipped {} due to having a minKey on {}".format(
                     chunk['_id'], key))
                 return False
         for key, value in chunk['max'].iteritems():
             if value == MaxKey():
                 print("We skipped {} due to having a maxKey on {}".format(
                     chunk['_id'], key))
                 return False
         key_name = chunk['min'].iteritems().next()[0]
         min_value = Decimal(chunk['min'].iteritems().next()[1])
         max_value = Decimal(chunk['max'].iteritems().next()[1])
         desired_chunks = Decimal(
             math.ceil(chunk['size'] / self.max_chunk_size()))
         step_size = ((min_value - max_value) / desired_chunks).quantize(
             Decimal(0e-50), rounding=decimal.ROUND_DOWN)
         if (step_size == 0 or step_size == -0):
             print("Step was %s" % step_size)
             return False
         if (Decimal(min_value).is_signed()
                 or Decimal(max_value).is_signed()):
             split_points = range(max_value, min_value, step_size)
         else:
             split_points = range(min_value, max_value, step_size)
         errors = 0
         for split_point in split_points:
             try:
                 self.conn.admin.command(
                     "split",
                     chunk['ns'],
                     middle={key_name: long(split_point)})
             except Exception as e:
                 print(e)
                 print("Failed to  run split due to {}".format(e))
                 pass
                 errors += 1
         return True if errors != len(split_points) else False
     else:
         return False
    def test_encode_then_decode(self):
        def helper(dict):
            self.assertEqual(dict, (BSON.encode(dict)).decode())

        helper({})
        helper({"test": u"hello"})
        self.assert_(
            isinstance(
                BSON.encode({
                    "hello": "world"
                }).decode()["hello"], unicode))
        helper({"mike": -10120})
        helper({"long": long(10)})
        helper({"really big long": 2147483648})
        helper({u"hello": 0.0013109})
        helper({"something": True})
        helper({"false": False})
        helper({"an array": [1, True, 3.8, u"world"]})
        helper({"an object": {"test": u"something"}})
        helper({"a binary": Binary("test", 100)})
        helper({"a binary": Binary("test", 128)})
        helper({"a binary": Binary("test", 254)})
        helper({"another binary": Binary("test")})
        helper(SON([(u'test dst', datetime.datetime(1993, 4, 4, 2))]))
        helper(
            SON([(u'test negative dst', datetime.datetime(1, 1, 1, 1, 1, 1))]))
        helper({"big float": float(10000000000)})
        helper({"ref": DBRef("coll", 5)})
        helper({"ref": DBRef("coll", 5, foo="bar", bar=4)})
        helper({"ref": DBRef("coll", 5, "foo")})
        helper({"ref": DBRef("coll", 5, "foo", foo="bar")})
        helper({"ref": Timestamp(1, 2)})
        helper({"foo": MinKey()})
        helper({"foo": MaxKey()})

        def encode_then_decode(dict):
            return dict == (BSON.encode(dict)).decode()

        qcheck.check_unittest(self, encode_then_decode,
                              qcheck.gen_mongo_dict(3))
Beispiel #24
0
def object_hook(dct):
    if "$oid" in dct:
        return ObjectId(str(dct["$oid"]))
    if "$ref" in dct:
        return DBRef(dct["$ref"], dct["$id"], dct.get("$db", None))
    if "$date" in dct:
        secs = float(dct["$date"]) / 1000.0
        return EPOCH_AWARE + datetime.timedelta(seconds=secs)
    if "$regex" in dct:
        flags = 0
        if "i" in dct["$options"]:
            flags |= re.IGNORECASE
        if "m" in dct["$options"]:
            flags |= re.MULTILINE
        return re.compile(dct["$regex"], flags)
    if "$minKey" in dct:
        return MinKey()
    if "$maxKey" in dct:
        return MaxKey()
    if _use_uuid and "$uuid" in dct:
        return uuid.UUID(dct["$uuid"])
    return dct
def object_hook(dct, compile_re=True):
    if "$oid" in dct:
        return ObjectId(str(dct["$oid"]))
    if "$ref" in dct:
        return DBRef(dct["$ref"], dct["$id"], dct.get("$db", None))
    if "$date" in dct:
        dtm = dct["$date"]
        # mongoexport 2.6 and newer
        if isinstance(dtm, str):
            # datetime.datetime.strptime is new in python 2.5
            naive = datetime.datetime(
                *(time.strptime(dtm[:19], "%Y-%m-%dT%H:%M:%S")[0:6]))
            # The %f format is new in python 2.6
            micros = int(dtm[20:23]) * 1000
            aware = naive.replace(microsecond=micros, tzinfo=utc)
            offset = dtm[23:]
            if not offset or offset == 'Z':
                # UTC
                return aware
            else:
                if len(offset) == 5:
                    # Offset from mongoexport is in format (+|-)HHMM
                    secs = (int(offset[1:3]) * 3600 + int(offset[3:]) * 60)
                elif ':' in offset and len(offset) == 6:
                    # RFC-3339 format (+|-)HH:MM
                    hours, minutes = offset[1:].split(':')
                    secs = (int(hours) * 3600 + int(minutes) * 60)
                else:
                    # Not RFC-3339 compliant or mongoexport output.
                    raise ValueError("invalid format for offset")
                if offset[0] == "-":
                    secs *= -1
                return aware - datetime.timedelta(seconds=secs)
        # mongoexport 2.6 and newer, time before the epoch (SERVER-15275)
        elif isinstance(dtm, dict):
            secs = float(dtm["$numberLong"]) / 1000.0
        # mongoexport before 2.6
        else:
            secs = float(dtm) / 1000.0
        return EPOCH_AWARE + datetime.timedelta(seconds=secs)
    if "$regex" in dct:
        flags = 0
        # PyMongo always adds $options but some other tools may not.
        for opt in dct.get("$options", ""):
            flags |= _RE_OPT_TABLE.get(opt, 0)

        if compile_re:
            return re.compile(dct["$regex"], flags)
        else:
            return Regex(dct["$regex"], flags)
    if "$minKey" in dct:
        return MinKey()
    if "$maxKey" in dct:
        return MaxKey()
    if "$binary" in dct:
        if isinstance(dct["$type"], int):
            dct["$type"] = "%02x" % dct["$type"]
        subtype = int(dct["$type"], 16)
        if subtype >= 0xffffff80:  # Handle mongoexport values
            subtype = int(dct["$type"][6:], 16)
        return Binary(base64.b64decode(dct["$binary"].encode()), subtype)
    if "$code" in dct:
        return Code(dct["$code"], dct.get("$scope"))
    if bson.has_uuid() and "$uuid" in dct:
        return bson.uuid.UUID(dct["$uuid"])
    if "$undefined" in dct:
        return None
    if "$numberLong" in dct:
        # 2to3 will change this to int. PyMongo 3.0 supports
        # a new type, Int64, to avoid round trip issues.
        return int(dct["$numberLong"])
    if "$timestamp" in dct:
        tsp = dct["$timestamp"]
        return Timestamp(tsp["t"], tsp["i"])
    return dct
    ord(BSONBIN): _get_binary,
    ord(BSONUND): lambda u, v, w, x, y, z: (None, w),  # Deprecated undefined
    ord(BSONOID): _get_oid,
    ord(BSONBOO): _get_boolean,
    ord(BSONDAT): _get_date,
    ord(BSONNUL): lambda u, v, w, x, y, z: (None, w),
    ord(BSONRGX): _get_regex,
    ord(BSONREF): _get_ref,  # Deprecated DBPointer
    ord(BSONCOD): _get_code,
    ord(BSONSYM): _get_string,  # Deprecated symbol
    ord(BSONCWS): _get_code_w_scope,
    ord(BSONINT): _get_int,
    ord(BSONTIM): _get_timestamp,
    ord(BSONLON): _get_int64,
    ord(BSONDEC): _get_decimal128,
    ord(BSONMIN): lambda u, v, w, x, y, z: (MinKey(), w),
    ord(BSONMAX): lambda u, v, w, x, y, z: (MaxKey(), w)
}

if _USE_C:

    def _element_to_dict(data, view, position, obj_end, opts):
        return _cbson._element_to_dict(data, position, obj_end, opts)
else:

    def _element_to_dict(data, view, position, obj_end, opts):
        """Decode a single key, value pair."""
        element_type = data[position]
        position += 1
        element_name, position = _get_c_string(data, view, position, opts)
        try:
    BSONBIN: _get_binary,
    BSONUND: lambda v, w, x, y, z: (None, w),  # Deprecated undefined
    BSONOID: _get_oid,
    BSONBOO: _get_boolean,
    BSONDAT: _get_date,
    BSONNUL: lambda v, w, x, y, z: (None, w),
    BSONRGX: _get_regex,
    BSONREF: _get_ref,  # Deprecated DBPointer
    BSONCOD: _get_code,
    BSONSYM: _get_string,  # Deprecated symbol
    BSONCWS: _get_code_w_scope,
    BSONINT: _get_int,
    BSONTIM: _get_timestamp,
    BSONLON: _get_int64,
    BSONDEC: _get_decimal128,
    BSONMIN: lambda v, w, x, y, z: (MinKey(), w),
    BSONMAX: lambda v, w, x, y, z: (MaxKey(), w)
}


def _element_to_dict(data, position, obj_end, opts):
    """Decode a single key, value pair."""
    element_type = data[position:position + 1]
    position += 1
    element_name, position = _get_c_string(data, position, opts)
    try:
        value, position = _ELEMENT_GETTER[element_type](data, position,
                                                        obj_end, opts,
                                                        element_name)
    except KeyError:
        _raise_unknown_type(element_type, element_name)
 def test_minkey(self):
     self.round_trip({"m": MinKey()})
Beispiel #29
0
def presplit(host, database, collection, shardkey, shardnumber=None,
             chunkspershard=1, verbose=False):
    """
    Presplit chunks for sharding.

    Get information about the number of shards, then split chunks and
    distribute over shards. Currently assumes shardkey to be hex string, for
    example ObjectId or UUID.

    host: host and port to connect to, e.g. "192.168.0.1:27017",
          "localhost:30000"
    database: database name to enable sharding
    collection: collection name to shard
    shardkey: shardkey to pre-split on (must be hex string, e.g. ObjectId or
              UUID)
    shardnumber: if None, automatically presplit over all available shards.
                 if integer, only presplit over the given number of shards
                 (maximum is the number of actual shards)
    """
    con = Connection(host)
    namespace = '%s.%s' % (database, collection)

    # disable balancer
    con['config']['settings'].update({'_id': "balancer"},
                                     {'$set': {'stopped': True}}, upsert=True)

    # enable sharding on database if not yet enabled
    db_info = con['config']['databases'].find_one({'_id': database})
    if not db_info or db_info['partitioned'] is False:
        con['admin'].command(SON({'enableSharding': database}))

    # shard collection if not yet sharded
    coll_info = con['config']['collections'].find_one({'_id': namespace})
    if coll_info and not coll_info['dropped']:
        # if it is sharded already, quit. something is not right.
        if verbose:
            print("collection already sharded.")
        return
    else:
        con[database][collection].ensure_index(shardkey)
        con['admin'].command(SON({'shardCollection': namespace,
                                  'key': {shardkey: 1}}))

    # get shard number and names and calculate split points
    shards = list(con['config']['shards'].find())

    if len(shards) == 1:
        if verbose:
            print("only one shard found. no pre-splitting required.")
        return

    # limit number of shards if shardnumber given
    if shardnumber and shardnumber <= len(shards):
        shards = shards[:shardnumber]

    shard_names = [s['_id'] for s in shards]
    splits_total = len(shards) * chunkspershard
    split_interval = 16**4 / splits_total
    split_points = ["%0.4x" % s for s in range(split_interval,
                                               splits_total * split_interval,
                                               split_interval)]

    # pre-splitting commands
    for s in split_points:
        con['admin'].command(SON([('split', namespace),
                                  ('middle', {shardkey: s})]))

    split_points = [MinKey()] + split_points

    # move chunks to shards (catch the one error where the chunk resides
    # on that shard already)
    for i, s in enumerate(split_points):
        try:
            if verbose:
                print('moving chunk %s in collection %s to shard %s.'
                      % (s, namespace, shard_names[i % len(shards)]))
            res = con['admin'].command(SON([('moveChunk', namespace),
                                            ('find', {shardkey: s}),
                                            ('to',
                                             shard_names[i % len(shards)])]))
        except OperationFailure as e:
            if verbose:
                print(e)

    if verbose:
        print('chunk distribution:', end=' ')
        chunk_group = (con['config']['chunks']
                       .group(key={'shard': 1}, condition={'ns': namespace},
                              initial={'nChunks': 0},
                              reduce=(" function (doc, out) "
                                      "{ out.nChunks++; } ")))
        print(', '.join(["%s: %i" % (ch['shard'], ch['nChunks'])
                         for ch in chunk_group]))
Beispiel #30
0
    BSONARR: _get_array,
    BSONBIN: _get_binary,
    BSONUND: lambda w, x, y, z: (None, x),  # Deprecated undefined
    BSONOID: _get_oid,
    BSONBOO: _get_boolean,
    BSONDAT: _get_date,
    BSONNUL: lambda w, x, y, z: (None, x),
    BSONRGX: _get_regex,
    BSONREF: _get_ref,  # Deprecated DBPointer
    BSONCOD: _get_code,
    BSONSYM: _get_string,  # Deprecated symbol
    BSONCWS: _get_code_w_scope,
    BSONINT: _get_int,
    BSONTIM: _get_timestamp,
    BSONLON: _get_int64,
    BSONMIN: lambda w, x, y, z: (MinKey(), x),
    BSONMAX: lambda w, x, y, z: (MaxKey(), x)}


def _element_to_dict(data, position, obj_end, opts):
    """Decode a single key, value pair."""
    element_type = data[position:position + 1]
    position += 1
    element_name, position = _get_c_string(data, position)
    value, position = _ELEMENT_GETTER[element_type](data,
                                                    position, obj_end, opts)
    return element_name, value, position


def _elements_to_dict(data, position, obj_end, opts, subdocument=None):
    """Decode a BSON document."""