def test_regex(self): for regex_instance in ( re.compile("a*b", re.IGNORECASE), Regex("a*b", re.IGNORECASE)): res = self.round_tripped({"r": regex_instance})["r"] self.assertEqual("a*b", res.pattern) res = self.round_tripped({"r": Regex("a*b", re.IGNORECASE)})["r"] self.assertEqual("a*b", res.pattern) self.assertEqual(re.IGNORECASE, res.flags) unicode_options = re.I | re.M | re.S | re.U | re.X regex = re.compile("a*b", unicode_options) res = self.round_tripped({"r": regex})["r"] self.assertEqual(unicode_options, res.flags) # Some tools may not add $options if no flags are set. res = bsonjs_loads('{"r": {"$regex": "a*b"}}')['r'] self.assertEqual(0, res.flags) self.assertEqual( Regex(".*", "ilm"), bsonjs_loads( '{"r": {"$regex": ".*", "$options": "ilm"}}')['r']) # Order should be $regex then $options self.assertEqual( '{ "regex" : { "$regex" : ".*", "$options" : "mx" } }', bsonjs_dumps({"regex": Regex(".*", re.M | re.X)})) self.assertEqual( '{ "regex" : { "$regex" : ".*", "$options" : "mx" } }', bsonjs_dumps({"regex": re.compile(b".*", re.M | re.X)}))
def test_regex(self): for regex_instance in (re.compile("a*b", re.IGNORECASE), Regex("a*b", re.IGNORECASE)): res = self.round_tripped({"r": regex_instance})["r"] self.assertEqual("a*b", res.pattern) res = self.round_tripped({"r": Regex("a*b", re.IGNORECASE)})["r"] self.assertEqual("a*b", res.pattern) self.assertEqual(re.IGNORECASE, res.flags) unicode_options = re.I | re.M | re.S | re.U | re.X regex = re.compile("a*b", unicode_options) res = self.round_tripped({"r": regex})["r"] self.assertEqual(unicode_options, res.flags) # Some tools may not add $options if no flags are set. # https://jira.mongodb.org/browse/CDRIVER-3773 self.assertRaises(ValueError, bsonjs_loads, '{"r": {"$regex": ' '"a*b"}}') self.assertEqual( Regex(".*", "ilm"), bsonjs_loads('{"r": {"$regex": ".*", "$options": "ilm"}}')['r']) # Order should be $regex then $options self.assertEqual( '{ "regex" : { "$regex" : ".*", "$options" : "mx" } }', bsonjs_dumps({"regex": Regex(".*", re.M | re.X)})) self.assertEqual( '{ "regex" : { "$regex" : ".*", "$options" : "mx" } }', bsonjs_dumps({"regex": re.compile(b".*", re.M | re.X)}))
def test_regex(self): for regex_instance in ( re.compile("a*b", re.IGNORECASE), Regex("a*b", re.IGNORECASE)): res = self.round_tripped({"r": regex_instance})["r"] self.assertEqual("a*b", res.pattern) res = self.round_tripped({"r": Regex("a*b", re.IGNORECASE)})["r"] self.assertEqual("a*b", res.pattern) self.assertEqual(re.IGNORECASE, res.flags) unicode_options = re.I|re.M|re.S|re.U|re.X regex = re.compile("a*b", unicode_options) res = self.round_tripped({"r": regex})["r"] self.assertEqual(unicode_options, res.flags) # Some tools may not add $options if no flags are set. res = json_util.loads('{"r": {"$regex": "a*b"}}')['r'] self.assertEqual(0, res.flags) self.assertEqual( Regex('.*', 'ilm'), json_util.loads( '{"r": {"$regex": ".*", "$options": "ilm"}}')['r']) # Check order. self.assertEqual( '{"$regex": ".*", "$options": "mx"}', json_util.dumps(Regex('.*', re.M | re.X))) self.assertEqual( '{"$regex": ".*", "$options": "mx"}', json_util.dumps(re.compile(b'.*', re.M | re.X)))
def test_sort_12(monty_sort, mongo_sort): docs = [{"a": Regex("^a")}, {"a": Regex("^b")}] sort = [("a", -1)] monty_c = monty_sort(docs, sort) mongo_c = mongo_sort(docs, sort) for i in range(len(docs)): assert next(mongo_c)["_id"] == next(monty_c)["_id"]
def test_qop_nin_10(monty_find, mongo_find): docs = [ { "a": [Regex("*")] }, ] spec = {"a": {"$nin": [[Regex("*")]]}} monty_c = monty_find(docs, spec) mongo_c = mongo_find(docs, spec) assert mongo_c.count() == 0 assert monty_c.count() == mongo_c.count()
def test_qop_lt_28(monty_find, mongo_find): regex_0 = Regex("^0") regex_a = Regex("^a") docs = [ {"a": regex_0}, ] spec = {"a": {"$lt": regex_a}} monty_c = monty_find(docs, spec) # Can't have RegEx as arg to predicate with pytest.raises(OperationFailure): next(monty_c)
def search_artists(): params = {item[0]: item[1] for item in request.forms.items() if item[1]} if int(params.pop('name_search_condition')) and 'name' in params: params['name'] = Regex(params['name']) if int(params.pop('alias_search_condition')) and 'aliases.name' in params: params['aliases.name'] = Regex(params['aliases.name']) results = collection.find(params, { '_id': 0, 'name': 1, 'aliases.name': 1, 'rating.value': 1 }).sort([('rating.value', -1)]).limit(50) if params else [] return results
def salesReportByCountry(self, year): from bson.regex import Regex pattern = '^' + str(year) regex = Regex(pattern) query = dict({'dateOfPurchase': {'$regex': regex}}) proj = dict({ 'dateOfPurchase': 1, 'extendedPrice': 1, 'country': 1, '_id': 0 }) order = [('country', 1), ('dateOfPurchase', 1)] results = {} ## put results from generic fetch into nested dictionaries for doc in self.genericFetch(query, proj, order): country = doc['country'] dop = doc['dateOfPurchase'] month = str(dop[5:7]) if country in results: temp = results[country] if month in temp: price = temp[month] + doc['extendedPrice'] else: price = doc['extendedPrice'] temp.update({month: price}) else: results.update({country: {month: doc['extendedPrice']}}) return results
def object_hook(dct, compile_re=True): if "$oid" in dct: return ObjectId(str(dct["$oid"])) if "$ref" in dct: return DBRef(dct["$ref"], dct["$id"], dct.get("$db", None)) if "$date" in dct: secs = float(dct["$date"]) / 1000.0 return EPOCH_AWARE + datetime.timedelta(seconds=secs) if "$regex" in dct: flags = 0 # PyMongo always adds $options but some other tools may not. for opt in dct.get("$options", ""): flags |= _RE_OPT_TABLE.get(opt, 0) if compile_re: return re.compile(dct["$regex"], flags) else: return Regex(dct["$regex"], flags) if "$minKey" in dct: return MinKey() if "$maxKey" in dct: return MaxKey() if "$binary" in dct: if isinstance(dct["$type"], int): dct["$type"] = "%02x" % dct["$type"] subtype = int(dct["$type"], 16) if subtype >= 0xffffff80: # Handle mongoexport values subtype = int(dct["$type"][6:], 16) return Binary(base64.b64decode(dct["$binary"].encode()), subtype) if "$code" in dct: return Code(dct["$code"], dct.get("$scope")) if bson.has_uuid() and "$uuid" in dct: return bson.uuid.UUID(dct["$uuid"]) return dct
def search(prefix: str, mongo_client: MongoClient, collection_name: str) -> Cursor: if mongo_client is not None: regex: Regex = Regex('^{}'.format(prefix)) return mongo_client[DB_NAME][collection_name].find({DATA_FIELD: {"$regex" : regex}}, {DATA_FIELD: 1, ID_FIELD: 0}) else: raise IOError("Failed to connect to the DB.")
def object_hook(dct, json_options=DEFAULT_JSON_OPTIONS): if "$oid" in dct: return ObjectId(str(dct["$oid"])) if "$ref" in dct: return DBRef(dct["$ref"], dct["$id"], dct.get("$db", None)) if "$date" in dct: return _get_date(dct, json_options) if "$regex" in dct: flags = 0 # PyMongo always adds $options but some other tools may not. for opt in dct.get("$options", ""): flags |= _RE_OPT_TABLE.get(opt, 0) return Regex(dct["$regex"], flags) if "$minKey" in dct: return MinKey() if "$maxKey" in dct: return MaxKey() if "$binary" in dct: return _get_binary(dct, json_options) if "$code" in dct: return Code(dct["$code"], dct.get("$scope")) if "$uuid" in dct: return uuid.UUID(dct["$uuid"]) if "$undefined" in dct: return None if "$numberLong" in dct: return Int64(dct["$numberLong"]) if "$timestamp" in dct: tsp = dct["$timestamp"] return Timestamp(tsp["t"], tsp["i"]) if "$numberDecimal" in dct: return Decimal128(dct["$numberDecimal"]) return dct
async def test_command_with_regex(self, test_db): await test_db.test.insert_one({'r': re.compile('.*')}) await test_db.test.insert_one({'r': Regex('.*')}) result = await test_db.command('aggregate', 'test', pipeline=[]) for doc in result['result']: assert isinstance(doc['r'], Regex)
def test_validate_regex_valid_regex(): class MyModel(Model): field: Regex regex = Regex("^.*$") instance = MyModel(field=regex) assert isinstance(instance.field, Regex) assert instance.field == regex
def query_nlike(self, value): # WILDCARD NOT CONTAINS if isinstance(value, list): value = value[0] return MongoQuery( {self.field: { '$not': Regex('.*' + value + '.*', 'i') }})
def query_nend(self, value): # DOESN'T END WITH if isinstance(value, list): value = value[0] return MongoQuery( {self.field: { '$not': Regex('*.' + value + '$', 'i') }})
def query_nstart(self, value): # DOESN'T START WITH if isinstance(value, list): value = value[0] return MongoQuery( {self.field: { '$not': Regex('^' + value + '.*', 'i') }})
def bbc_articles(): # TODO consider adding more query parameters title = request.args.get('title', '') tag = request.args.get('tag', '') scraped_date = get_scraped_date(request) items = collection.find({ "$and": [{ 'title': Regex(title) }, { 'tag': Regex(tag) }, { 'scraped_date': scraped_date }] }) return dumps(items)
def _get_regex( data: Any, view: Any, position: int, dummy0: Any, opts: CodecOptions, dummy1: Any ) -> Tuple[Regex, int]: """Decode a BSON regex to bson.regex.Regex or a python pattern object.""" pattern, position = _get_c_string(data, view, position, opts) bson_flags, position = _get_c_string(data, view, position, opts) bson_re = Regex(pattern, bson_flags) return bson_re, position
def _get_regex(data, position, as_class, tz_aware, uuid_subtype, compile_re): pattern, position = _get_c_string(data, position) bson_flags, position = _get_c_string(data, position) bson_re = Regex(pattern, bson_flags) if compile_re: return bson_re.try_compile(), position else: return bson_re, position
def _parse_canonical_regex(doc): """Decode a JSON regex to bson.regex.Regex.""" regex = doc['$regularExpression'] if len(doc) != 1: raise TypeError('Bad $regularExpression, extra field(s): %s' % (doc, )) if len(regex) != 2: raise TypeError('Bad $regularExpression must include only "pattern"' 'and "options" components: %s' % (doc, )) return Regex(regex['pattern'], regex['options'])
def test_command_with_regex(self): db = self.client.pymongo_test db.test.drop() db.test.insert_one({'r': re.compile('.*')}) db.test.insert_one({'r': Regex('.*')}) result = db.command('aggregate', 'test', pipeline=[]) for doc in result['result']: self.assertTrue(isinstance(doc['r'], Regex))
def test_qop_type_11(monty_find, mongo_find): docs = [{"a": Regex("^a")}, {"a": re.compile("^a")}] spec = {"a": {"$type": 11}} # regex monty_c = monty_find(docs, spec) mongo_c = mongo_find(docs, spec) assert mongo_c.count() == 2 assert monty_c.count() == mongo_c.count()
def test_sort_19(monty_sort, mongo_sort): docs = [ {"a": ["x", True]}, {"a": None}, {"a": []}, {"a": [5, []]}, {"a": {"s": 7}}, {"a": {"s": [9]}}, {"a": {"s": 10}}, {"a": 6}, {"a": 4}, {"a": [5, None]}, {"a": [5, [1]]}, {"a": [Decimal128("4.5"), Binary(b"0")]}, {"a": [{"s": 5}, False]}, {"a": [{"s": 9}]}, {"a": [True, "y"]}, {"a": Binary(b"a")}, {"a": b"bytes"}, {"a": ["abc"]}, {"a": "banana"}, {"a": "appple"}, {"a": [Regex("^a", "ix")]}, {"a": Regex("^b")}, {"a": Code("x", {"m": 0})}, {"a": Code("y")}, {"a": Code("y", {})}, {"a": Code("y", {"m": 0})}, {"a": MinKey()}, {"a": MaxKey()}, {"a": Timestamp(0, 1)}, {"a": Timestamp(1, 1)}, {"a": ObjectId(b"000000000000")}, {"a": ObjectId(b"000000000001")}, {"a": datetime(1900, 1, 1)}, {"a": datetime(1900, 1, 2)}, ] sort = [("a", 1)] monty_c = monty_sort(docs, sort) mongo_c = mongo_sort(docs, sort) for i in range(len(docs)): assert next(mongo_c)["_id"] == next(monty_c)["_id"]
def _parse_legacy_regex(doc): pattern = doc["$regex"] # Check if this is the $regex query operator. if isinstance(pattern, Regex): return doc flags = 0 # PyMongo always adds $options but some other tools may not. for opt in doc.get("$options", ""): flags |= _RE_OPT_TABLE.get(opt, 0) return Regex(pattern, flags)
def test_validate_pattern_valid_bson_regex(): class MyModel(Model): field: Pattern value = Regex("^.*$", flags="im") flags_int_value = re.compile("", flags=re.I | re.M).flags instance = MyModel(field=value) assert isinstance(instance.field, Pattern) assert instance.field.pattern == value.pattern assert instance.field.flags == flags_int_value
def object_hook(dct, compile_re=True): if "$oid" in dct: return ObjectId(str(dct["$oid"])) if "$numberLong" in dct: return int(dct["$numberLong"]) if "$decimal" in dct: v = str(dct["$decimal"]) if "$precision" in dct: precision = dct["$precision"][0] scale = dct["$precision"][1] d = Decimal(v, precision, scale) else: d = Decimal(v) return d if "$ref" in dct: return DBRef(dct["$ref"], dct["$id"], dct.get("$db", None)) if "$date" in dct: try: secs = float(dct["$date"]) / 1000.0 return EPOCH_AWARE + datetime.timedelta(seconds=secs) except ValueError: return datetime.datetime.strptime(dct["$date"], "%Y-%m-%d") if "$timestamp" in dct: try: ms = long_type(dct["$timestamp"]) return Timestamp(ms / 1000, ms % 1000 * 1000) except ValueError: dt = datetime.datetime.strptime(dct["$timestamp"], "%Y-%m-%d-%H.%M.%S.%f") secs = long_type(time.mktime(dt.timetuple())) return Timestamp(secs, dt.microsecond) if "$regex" in dct: flags = 0 # PyMongo always adds $options but some other tools may not. for opt in dct.get("$options", ""): flags |= _RE_OPT_TABLE.get(opt, 0) if compile_re: return re.compile(dct["$regex"], flags) else: return Regex(dct["$regex"], flags) if "$minKey" in dct: return MinKey() if "$maxKey" in dct: return MaxKey() if "$binary" in dct: if isinstance(dct["$type"], int): dct["$type"] = "%d" % dct["$type"] subtype = int(dct["$type"]) return Binary(base64.b64decode(dct["$binary"].encode()), subtype) if "$code" in dct: return Code(dct["$code"], dct.get("$scope")) if bson.has_uuid() and "$uuid" in dct: return bson.uuid.UUID(dct["$uuid"]) return dct
def test_qop_regex_9(monty_find, mongo_find): docs = [ {"a": "apple"} ] spec = {"a": Regex("^a")} monty_c = monty_find(docs, spec) mongo_c = mongo_find(docs, spec) assert mongo_c.count() == 1 assert monty_c.count() == mongo_c.count()
def test_qop_nin_12(monty_find, mongo_find): docs = [ { "a": "apple" }, ] spec = {"a": {"$nin": [Regex("*")]}} monty_c = monty_find(docs, spec) # Regular expression is invalid with pytest.raises(OperationFailure): next(monty_c)
def _parse_canonical_regex(doc): """Decode a JSON regex to bson.regex.Regex.""" regex = doc['$regularExpression'] if len(doc) != 1: raise TypeError('Bad $regularExpression, extra field(s): %s' % (doc,)) if len(regex) != 2: raise TypeError('Bad $regularExpression must include only "pattern"' 'and "options" components: %s' % (doc,)) opts = regex['options'] if not isinstance(opts, str): raise TypeError('Bad $regularExpression options, options must be ' 'string, was type %s' % (type(opts))) return Regex(regex['pattern'], opts)
def test_qop_not_4(monty_find, mongo_find): docs = [ { "a": "apple" }, ] spec = {"a": {"$not": Regex("^a")}} monty_c = monty_find(docs, spec) mongo_c = mongo_find(docs, spec) assert mongo_c.count() == 0 assert monty_c.count() == mongo_c.count()