def test_bson_regex(self): # Invalid Python regex, though valid PCRE. bson_re1 = Regex(r'[\w-\.]') self.assertEqual(r'[\w-\.]', bson_re1.pattern) self.assertEqual(0, bson_re1.flags) doc1 = {'r': bson_re1} doc1_bson = b('\x11\x00\x00\x00' # document length '\x0br\x00[\\w-\\.]\x00\x00' # r: regex '\x00') # document terminator self.assertEqual(doc1_bson, BSON.encode(doc1)) self.assertEqual(doc1, BSON(doc1_bson).decode(compile_re=False)) # Valid Python regex, with flags. re2 = re.compile('.*', re.I | re.L | re.M | re.S | re.U | re.X) bson_re2 = Regex('.*', re.I | re.L | re.M | re.S | re.U | re.X) doc2_with_re = {'r': re2} doc2_with_bson_re = {'r': bson_re2} doc2_bson = b("\x12\x00\x00\x00" # document length "\x0br\x00.*\x00ilmsux\x00" # r: regex "\x00") # document terminator self.assertEqual(doc2_bson, BSON.encode(doc2_with_re)) self.assertEqual(doc2_bson, BSON.encode(doc2_with_bson_re)) # Built-in re objects don't support ==. Compare pattern and flags. self.assertEqual(re2.pattern, BSON(doc2_bson).decode()['r'].pattern) self.assertEqual(re2.flags, BSON(doc2_bson).decode()['r'].flags) self.assertEqual(doc2_with_bson_re, BSON(doc2_bson).decode(compile_re=False))
def inner_match_fields(_fields, _matched_fields, _reference=None): """ Get list of matched fields inside the reference fields Args: _fields: list of referenced fields _matched_fields: list of text regex from the pipeline builder _reference: reference object to which the reference field refers Returns: list of fields where the regex matched """ for regex_ in possible_regex_list: try: runtime_regex = Regex(regex_, 'ims').try_compile() except Exception: runtime_regex = regex_ for field in _fields: try: res = runtime_regex.findall(str(field.get('value'))) if len(res) > 0: inner_value = _reference if _reference else field # removing duplicated from list if inner_value not in _matched_fields: _matched_fields.append(inner_value) if field['type'] == 'ref': inner_match_fields(field['reference']['summaries'], _matched_fields, field) if field['type'] == 'ref-section-field': inner_match_fields(field['references']['fields'], _matched_fields, field) except Exception: continue
def find_match_fields(result: R, possible_regex_list=None): """ Get list of matched fields inside the searchresult Args: result: Generic search result possible_regex_list: list of text regex from the pipeline builder Returns: list of fields where the regex matched """ matched_fields = [] fields = result.fields if not possible_regex_list: return None for regex_ in possible_regex_list: try: runtime_regex = Regex(regex_, 'imsx').try_compile() except Exception: runtime_regex = regex_ for field in fields: try: res = runtime_regex.findall(str(field.get('value'))) if len(res) > 0: matched_fields.append(field) except Exception: continue if len(matched_fields) > 0: return matched_fields return None
def test_bson_classes(self): _id = '5a918f9fa08bff9c7688d3e1' for a, b in [ (Binary(b'foo'), Binary(b'foo')), (Code('foo'), Code('foo')), (Code('foo', {'x': 1}), Code('foo', {'x': 1})), (DBRef('coll', 1), DBRef('coll', 1)), (DBRef('coll', 1, 'db'), DBRef('coll', 1, 'db')), (Decimal128('1'), Decimal128('1')), (MaxKey(), MaxKey()), (MinKey(), MinKey()), (ObjectId(_id), ObjectId(_id)), (Regex('foo', 'i'), Regex('foo', 'i')), (Timestamp(1, 2), Timestamp(1, 2)), ]: # Basic case. self.assertTrue( Matcher(Command(y=b)).matches(Command(y=b)), "MockupDB %r doesn't equal itself" % (b, )) # First Command argument is special, try comparing the second also. self.assertTrue( Matcher(Command('x', y=b)).matches(Command('x', y=b)), "MockupDB %r doesn't equal itself" % (b, )) # In practice, users pass PyMongo classes in message specs. self.assertTrue( Matcher(Command(y=b)).matches(Command(y=a)), "PyMongo %r != MockupDB %r" % (a, b)) self.assertTrue( Matcher(Command('x', y=b)).matches(Command('x', y=a)), "PyMongo %r != MockupDB %r" % (a, b))
def test_bson_regex(self): # Invalid Python regex, though valid PCRE. bson_re1 = Regex(r'[\w-\.]') self.assertEqual(r'[\w-\.]', bson_re1.pattern) self.assertEqual(0, bson_re1.flags) doc1 = {'r': bson_re1} doc1_bson = ( b'\x11\x00\x00\x00' # document length b'\x0br\x00[\\w-\\.]\x00\x00' # r: regex b'\x00') # document terminator self.assertEqual(doc1_bson, BSON.encode(doc1)) self.assertEqual(doc1, BSON(doc1_bson).decode()) # Valid Python regex, with flags. re2 = re.compile(u'.*', re.I | re.M | re.S | re.U | re.X) bson_re2 = Regex(u'.*', re.I | re.M | re.S | re.U | re.X) doc2_with_re = {'r': re2} doc2_with_bson_re = {'r': bson_re2} doc2_bson = ( b"\x11\x00\x00\x00" # document length b"\x0br\x00.*\x00imsux\x00" # r: regex b"\x00") # document terminator self.assertEqual(doc2_bson, BSON.encode(doc2_with_re)) self.assertEqual(doc2_bson, BSON.encode(doc2_with_bson_re)) self.assertEqual(re2.pattern, BSON(doc2_bson).decode()['r'].pattern) self.assertEqual(re2.flags, BSON(doc2_bson).decode()['r'].flags)
def test_regex_from_native(self): self.assertEqual('.*', Regex.from_native(re.compile('.*')).pattern) self.assertEqual(0, Regex.from_native(re.compile(b'')).flags) regex = re.compile(b'', re.I | re.L | re.M | re.S | re.X) self.assertEqual(re.I | re.L | re.M | re.S | re.X, Regex.from_native(regex).flags) unicode_regex = re.compile('', re.U) self.assertEqual(re.U, Regex.from_native(unicode_regex).flags)
def test_regex_from_native(self): self.assertEqual('.*', Regex.from_native(re.compile('.*')).pattern) self.assertEqual(0, Regex.from_native(re.compile(b'')).flags) regex = re.compile(b'', re.I | re.L | re.M | re.S | re.X) self.assertEqual( re.I | re.L | re.M | re.S | re.X, Regex.from_native(regex).flags) unicode_regex = re.compile('', re.U) self.assertEqual(re.U, Regex.from_native(unicode_regex).flags)
def test_regex_pickling(self): reg = Regex(".?") pickled_with_3 = (b'\x80\x04\x959\x00\x00\x00\x00\x00\x00\x00\x8c\n' b'bson.regex\x94\x8c\x05Regex\x94\x93\x94)\x81\x94}' b'\x94(\x8c\x07pattern\x94\x8c\x02.?\x94\x8c\x05flag' b's\x94K\x00ub.') self.round_trip_pickle(reg, pickled_with_3)
async def get_flows_and_count_db( *, current_page: int, page_size: int, sorter: str = None, flow_name: str, language: str, updated_at: list[date], triggered_counts: list[int]) -> (list[FlowSchemaDb], int): if updated_at: updated_at_start, updated_at_end = updated_at db_key = [(f"name", { "$ne": None }), (f"name", Regex(f".*{escape(flow_name)}.*", "i") if flow_name else ...), (f"triggered_count", { "$gte": triggered_counts[0], "$lte": triggered_counts[1] } if triggered_counts else ...), ("is_active", True), ("updated_at", { "$gte": make_timezone_aware(updated_at_start), "$lte": make_timezone_aware(updated_at_end) } if updated_at else ...)] query = form_query(db_key) flows = await get_flows_db(current_page=current_page, page_size=page_size, sorter=sorter, query=query) total = await get_flows_count_db(query=query) return flows, total
def gen_regex_search_query(search_query): # TODO sanitize this user input before querying the DB with it pattern = re.compile(search_query) regex = Regex.from_native(pattern) regex.flags ^= re.UNICODE regex.flags ^= re.IGNORECASE return regex
def test_exception_wrapping(self): # No matter what exception is raised while trying to decode BSON, # the final exception always matches InvalidBSON and the original # traceback is preserved. # Invalid Python regex, though valid PCRE. # Causes an error in re.compile(). bad_doc = BSON.encode({'r': Regex(r'[\w-\.]')}) try: decode_all(bad_doc) except InvalidBSON: exc_type, exc_value, exc_tb = sys.exc_info() # Original re error was captured and wrapped in InvalidBSON. self.assertEqual(exc_value.args[0], 'bad character range') # Traceback includes bson module's call into re module. for filename, lineno, fname, text in traceback.extract_tb(exc_tb): if filename.endswith('re.py') and fname == 'compile': # Traceback was correctly preserved. break else: self.fail('Traceback not captured') else: self.fail('InvalidBSON not raised')
def get_response_statements(self): """ Return only statements that are in response to another statement. A statement must exist which lists the closest matching statement in the in_response_to field. Otherwise, the logic adapter may find a closest matching statement that does not have a known response. """ # '程序员涤生' 原来的逻辑是根据in_response_to字段来判断是否是问题,如果一个句子出现在了其他句子的in_response_to字段中,那么该句子可以做为问题, # 因此需要先查出in_response_to字段中的text,然后查出在这些text集合中的句子,做为问题,这样的效率非常慢, # 通过在句子中加入Q和A标记,我们可以利用正则来直接匹配出表示问题的句子, # 并且我们只返回text字段,大大提升了查询的效率。 pattern = re.compile('^Q ') regex = Regex.from_native(pattern) # response_query = self.statements.find({'text': 'Q 今天天气怎么样?'}, {'text': 1}) response_query = self.statements.find({'text': {'$regex': regex}}, {'text': 1}) statement_objects = [] statement_vec = [] import datetime as dt starttime2 = dt.datetime.now() for r in response_query: try: # 此处考虑直接使用text对应的向量,从系统启动时就构建好的text-vec索引文件中获取 text_vec_indx = IntentClassifier().text_vec_indx vec = text_vec_indx.get(r['text'],None) if vec is not None: # 注意:下面这两个数组一定要保证长度一样,否则计算相似度的时候根据索引来取原文本会出先位置偏移,导致无法获取正确的答案!! statement_vec.append(vec) statement_objects.append(self.mongo_to_object({'text': r['text']})) except Exception as e: logging.warning("出现异常%s,问题句子为:%s", str(e), r['text']) endtime2 = dt.datetime.now() logging.debug("===========get_response_statements的for循环构造向量耗时: %s秒", (endtime2 - starttime2).seconds) return statement_objects, statement_vec
def query_nlike(self, value): # WILDCARD NOT CONTAINS if isinstance(value, list): value = value[0] return MongoQuery( {self.field: { '$not': Regex('.*' + value + '.*', 'i') }})
def query_nend(self, value): # DOESN'T END WITH if isinstance(value, list): value = value[0] return MongoQuery( {self.field: { '$not': Regex('*.' + value + '$', 'i') }})
async def get_portal_user(username: str): """ # Retrieve the correct portal user :return: """ query = {"username": Regex(f"^{username}$", "i"), "is_active": True} async for user in portal_user_collection.find(query): return user
def query_nstart(self, value): # DOESN'T START WITH if isinstance(value, list): value = value[0] return MongoQuery( {self.field: { '$not': Regex('^' + value + '.*', 'i') }})
def test_jsonify_Regex(self): regex = Regex("bb|[^b]{2}") json = {'a': 1, 'regex': regex} safe_json = {'a': 1, 'regex': {'$regex': "bb|[^b]{2}", "$options": ""}} jsonified_bson = jsonify(json).response jsonified = flask_jsonify(safe_json).response assert jsonified_bson == jsonified
async def find_files_in_directory(self, path): result = [] pattern = re.compile('^' + re.escape(path)) regex = Regex.from_native(pattern) regex.flags ^= re.UNICODE async for record in self._db.find({'absolute_path': { '$regex': regex }}): result.append(str(record['_id'])) return result
def extract_condition(val): if isinstance(val, dict): for code, sval in val.items(): if sval[0] == '/': val[code] = Regex(sval[1:-1]) return Condition(key, val) elif val == 1: return Condition(key, modifier='exists') elif val == 0: return Condition(key, modifier='not_exists')
def test_regex_comparison(self): re1 = Regex('a') re2 = Regex('b') self.assertNotEqual(re1, re2) re1 = Regex('a', re.I) re2 = Regex('a', re.M) self.assertNotEqual(re1, re2) re1 = Regex('a', re.I) re2 = Regex('a', re.I) self.assertEqual(re1, re2)
def do_query(self, tag_part, min_sentiment, min_relevance): results = self.db.find({ 'tags': { '$elemMatch': { 'tag': Regex.from_native(re.compile('.*{0}.*'.format(tag_part))), 'sentiment': {'$gte': min_sentiment}, 'relevance': {'$gte': min_relevance}, } } }) bson_results = [] for r in results: bson_results.append(bson.json_util.dumps(r, sort_keys=True, indent=4)) return bson_results
def remove_attachment_id_from_flow(flow_collection: Collection, url: str) -> None: """ Remove attachment_id from old collection and update url with new bucket """ filename = os.path.split(url)[-1] query = {"flow.data.url": Regex(f".*{re.escape(filename)}$", "i")} docs = flow_collection.find(query) for doc in docs: flows = doc['flow'] for flow in flows: if flow['type'] in ['image', 'video' ] and flow['data']['url'].endswith(filename): flow['data']['url'] = url flow['data'].pop('attachment_id', None) flow_collection.replace_one({"_id": doc['_id']}, doc)
def getQueryCondition(queryConditions): conditions = [] for fieldInfo in queryConditions: if fieldInfo["type"] == 'text' or fieldInfo[ "type"] == 'array' or fieldInfo["type"] == 'datetime': pattern = re.compile(r'.*' + fieldInfo['query'] + '.*', re.I) regex = Regex.from_native(pattern) regex.flags ^= re.UNICODE conditions.append({fieldInfo["name"]: regex}) elif fieldInfo["type"] == 'number': if fieldInfo["query"] != "": try: num = float(fieldInfo["query"]) conditions.append({fieldInfo["name"]: {"$eq": num}}) except: conditions.append({fieldInfo["name"]: {"$eq": -99999}}) else: conditions.append({fieldInfo["name"]: {"$gte": -99999}}) return {"$or": conditions}
async def update_message_db(message_item: UpdateMessageResponse, current_user: CurrentUserSchema, language: str = 'EN') -> str: query = {"_id": ObjectId(message_item.id)} result1 = result2 = result3 = 0 # add selected answer to message if it's not same with original response/graded response message_from_db = await message_collection.find_one(query) graded_response = message_from_db.get('adminportal', {}).get('answer') original_response = message_from_db.get('chatbot', {}).get('qnid') response = graded_response or original_response if not graded_response and response == message_item.new_response: return 'No questions updated' updated_info_query = { "updated_at": get_local_datetime_now(), "updated_by": ObjectId(current_user.userId), } # add graded response to message set_message_query = updated_info_query | { "adminportal.graded": True, "adminportal.answer": ObjectId(message_item.new_response) } result1 = await message_collection.update_one(query, {'$set': set_message_query}) # delete variation from main question and add variation to new question query = { "_id": ObjectId(response), "alternate_questions.text": Regex(f"^{escape(message_item.text)}$", "i"), "is_active": True } if question_db := await question_collection.find_one( query): # remove variation if found match for idx, v in enumerate(question_db['alternate_questions']): if v['text'].lower() == message_item.text.lower(): question_db['alternate_questions'].pop(idx) question_db |= updated_info_query result2 = await question_collection.replace_one( {"_id": question_db['_id']}, question_db) break
def find(self, page_size, page_index, keyword=None): reg = None cursor = None total = 0 if keyword is not None: reg = Regex(r'%s' % keyword) cursor = self.db.worklog\ .find({'$or': [{'title': reg}, {'content': reg}]})\ .sort("creationdate", -1)\ .skip((page_index - 1) * page_size)\ .limit(page_size) total = self.db.worklog\ .find({'$or': [{'title': reg}, {'content': reg}]})\ .count() else: cursor = self.db.worklog.find().sort("creationdate", -1).skip( (page_index - 1) * page_size).limit(page_size) total = self.db.worklog.count() return cursor, total
def references(self, object_: CmdbObject, filter: dict, limit: int, skip: int, sort: str, order: int, user: UserModel = None, permission: AccessControlPermission = None, *args, **kwargs) \ -> IterationResult[CmdbObject]: query = [] if isinstance(filter, dict): query.append(filter) elif isinstance(filter, list): query += filter query.append( Builder.lookup_(_from='framework.types', _local='type_id', _foreign='public_id', _as='type')) query.append(Builder.unwind_({'path': '$type'})) field_ref_query = { 'type.fields.type': 'ref', '$or': [{ 'type.fields.ref_types': Regex(f'.*{object_.type_id}.*', 'i') }, { 'type.fields.ref_types': object_.type_id }] } section_ref_query = { 'type.render_meta.sections.type': 'ref-section', 'type.render_meta.sections.reference.type_id': object_.type_id } query.append( Builder.match_(Builder.or_([field_ref_query, section_ref_query]))) query.append(Builder.match_({'fields.value': object_.public_id})) return self.iterate(filter=query, limit=limit, skip=skip, sort=sort, order=order, user=user, permission=permission)
def do_query(self, tag_part, min_sentiment, min_relevance): results = self.db.find({ 'tags': { '$elemMatch': { 'tag': Regex.from_native(re.compile('.*{0}.*'.format(tag_part))), 'sentiment': { '$gte': min_sentiment }, 'relevance': { '$gte': min_relevance }, } } }) bson_results = [] for r in results: bson_results.append( bson.json_util.dumps(r, sort_keys=True, indent=4)) return bson_results
def choose_user_job_recommend(self, conditon): lauage_name = { 'python': 'zhilian_python_BJ', 'java': 'zhilian_java_BJ' } user_chose_recommed_jobs = {} paremt = re.compile(conditon) regex = Regex.from_native(paremt) regex.flags = re.UNICODE for k, v in lauage_name.items(): cursor = self.connect()[v].find( {"responsibility": { "$regex": regex, "$options": "i" }}) count_list = [] for x in cursor: count_list.append(x) user_chose_recommed_jobs[k] = count_list print(user_chose_recommed_jobs) return user_chose_recommed_jobs
def show_symbols(path): path = re.escape(path) data = "" return_data = "" query = QueryDocument( Condition( tag='191', subfields={'a': Regex('^' + path)}, ), ) print(f" the query is -- {query.to_json()}") bibset = BibSet.from_query(query, projection={'191': True}, skip=0, limit=0) a_res_en = [] for bib in bibset.records: bib_value = bib.get_value('191', 'a') a_res_en.append(bib.get_value('191', 'a')) return_data = sorted([quote(doc) for doc in a_res_en], key=lambda x: int(''.join(c for c in x if c.isdigit()))) #return_data=a_res_en return (jsonify(return_data))
def references(self, object_: CmdbObject, filter: dict, limit: int, skip: int, sort: str, order: int, user: UserModel = None, permission: AccessControlPermission = None, *args, **kwargs) \ -> IterationResult[CmdbObject]: query = [] if isinstance(filter, dict): query.append(filter) elif isinstance(filter, list): query += filter query.append({ '$lookup': { 'from': 'framework.types', 'localField': 'type_id', 'foreignField': 'public_id', 'as': 'type' } }) query.append({'$unwind': {'path': '$type'}}) query.append({ '$match': { 'type.fields.type': 'ref', '$or': [{ 'type.fields.ref_types': Regex(f'.*{object_.type_id}.*', 'i') }, { 'type.fields.ref_types': object_.type_id }] } }) query.append({'$match': {'fields.value': object_.public_id}}) return self.iterate(filter=query, limit=limit, skip=skip, sort=sort, order=order, user=user, permission=permission)
from bson import Regex from dlx import DB from dlx.marc import BibSet, QueryDocument, Condition from config import Config DB.connect(Config.connect_string) query = QueryDocument(Condition(tag='191', modifier='exists'), Condition(tag='269', subfields={'a': Regex('^1975')})) print(query.to_json()) bibset = BibSet.from_query(query, projection={'191': True}, skip=0, limit=0) print('There are {} results'.format(bibset.count)) bibset.cache() for bib in bibset.records: print('id: {}, symbol: {}'.format(bib.id, bib.get_value('191', 'a'))) print(bibset.to_xml())
def setUp(self): """Set up function.""" from bson import Regex super(BSONRegexWithoutFlagTest, self).setUp() self.regex = Regex.from_native(self.regex)
def setUp(self): """Set up class.""" super(BSONRegexLocaleTest, self).setUp() from bson import Regex self.regex = Regex.from_native(self.regex)
def test_regex_hash(self): self.assertRaises(TypeError, hash, Regex('hello'))