Beispiel #1
0
    def test_bson_regex(self):
        # Invalid Python regex, though valid PCRE.
        bson_re1 = Regex(r'[\w-\.]')
        self.assertEqual(r'[\w-\.]', bson_re1.pattern)
        self.assertEqual(0, bson_re1.flags)

        doc1 = {'r': bson_re1}
        doc1_bson = b('\x11\x00\x00\x00'  # document length
                      '\x0br\x00[\\w-\\.]\x00\x00'  # r: regex
                      '\x00')  # document terminator

        self.assertEqual(doc1_bson, BSON.encode(doc1))
        self.assertEqual(doc1, BSON(doc1_bson).decode(compile_re=False))

        # Valid Python regex, with flags.
        re2 = re.compile('.*', re.I | re.L | re.M | re.S | re.U | re.X)
        bson_re2 = Regex('.*', re.I | re.L | re.M | re.S | re.U | re.X)

        doc2_with_re = {'r': re2}
        doc2_with_bson_re = {'r': bson_re2}
        doc2_bson = b("\x12\x00\x00\x00"  # document length
                      "\x0br\x00.*\x00ilmsux\x00"  # r: regex
                      "\x00")  # document terminator

        self.assertEqual(doc2_bson, BSON.encode(doc2_with_re))
        self.assertEqual(doc2_bson, BSON.encode(doc2_with_bson_re))

        # Built-in re objects don't support ==. Compare pattern and flags.
        self.assertEqual(re2.pattern, BSON(doc2_bson).decode()['r'].pattern)
        self.assertEqual(re2.flags, BSON(doc2_bson).decode()['r'].flags)

        self.assertEqual(doc2_with_bson_re,
                         BSON(doc2_bson).decode(compile_re=False))
Beispiel #2
0
 def inner_match_fields(_fields, _matched_fields, _reference=None):
     """
     Get list of matched fields inside the reference fields
     Args:
         _fields: list of referenced fields
         _matched_fields: list of text regex from the pipeline builder
         _reference: reference object to which the reference field refers
     Returns:
         list of fields where the regex matched
     """
     for regex_ in possible_regex_list:
         try:
             runtime_regex = Regex(regex_, 'ims').try_compile()
         except Exception:
             runtime_regex = regex_
         for field in _fields:
             try:
                 res = runtime_regex.findall(str(field.get('value')))
                 if len(res) > 0:
                     inner_value = _reference if _reference else field
                     # removing duplicated from list
                     if inner_value not in _matched_fields:
                         _matched_fields.append(inner_value)
                 if field['type'] == 'ref':
                     inner_match_fields(field['reference']['summaries'],
                                        _matched_fields, field)
                 if field['type'] == 'ref-section-field':
                     inner_match_fields(field['references']['fields'],
                                        _matched_fields, field)
             except Exception:
                 continue
    def find_match_fields(result: R, possible_regex_list=None):
        """
        Get list of matched fields inside the searchresult
        Args:
            result: Generic search result
            possible_regex_list: list of text regex from the pipeline builder

        Returns:
            list of fields where the regex matched
        """
        matched_fields = []
        fields = result.fields
        if not possible_regex_list:
            return None
        for regex_ in possible_regex_list:
            try:
                runtime_regex = Regex(regex_, 'imsx').try_compile()
            except Exception:
                runtime_regex = regex_
            for field in fields:
                try:
                    res = runtime_regex.findall(str(field.get('value')))
                    if len(res) > 0:
                        matched_fields.append(field)
                except Exception:
                    continue
        if len(matched_fields) > 0:
            return matched_fields
        return None
Beispiel #4
0
    def test_bson_classes(self):
        _id = '5a918f9fa08bff9c7688d3e1'

        for a, b in [
            (Binary(b'foo'), Binary(b'foo')),
            (Code('foo'), Code('foo')),
            (Code('foo', {'x': 1}), Code('foo', {'x': 1})),
            (DBRef('coll', 1), DBRef('coll', 1)),
            (DBRef('coll', 1, 'db'), DBRef('coll', 1, 'db')),
            (Decimal128('1'), Decimal128('1')),
            (MaxKey(), MaxKey()),
            (MinKey(), MinKey()),
            (ObjectId(_id), ObjectId(_id)),
            (Regex('foo', 'i'), Regex('foo', 'i')),
            (Timestamp(1, 2), Timestamp(1, 2)),
        ]:
            # Basic case.
            self.assertTrue(
                Matcher(Command(y=b)).matches(Command(y=b)),
                "MockupDB %r doesn't equal itself" % (b, ))

            # First Command argument is special, try comparing the second also.
            self.assertTrue(
                Matcher(Command('x', y=b)).matches(Command('x', y=b)),
                "MockupDB %r doesn't equal itself" % (b, ))

            # In practice, users pass PyMongo classes in message specs.
            self.assertTrue(
                Matcher(Command(y=b)).matches(Command(y=a)),
                "PyMongo %r != MockupDB %r" % (a, b))

            self.assertTrue(
                Matcher(Command('x', y=b)).matches(Command('x', y=a)),
                "PyMongo %r != MockupDB %r" % (a, b))
    def test_bson_regex(self):
        # Invalid Python regex, though valid PCRE.
        bson_re1 = Regex(r'[\w-\.]')
        self.assertEqual(r'[\w-\.]', bson_re1.pattern)
        self.assertEqual(0, bson_re1.flags)

        doc1 = {'r': bson_re1}
        doc1_bson = (
            b'\x11\x00\x00\x00'  # document length
            b'\x0br\x00[\\w-\\.]\x00\x00'  # r: regex
            b'\x00')  # document terminator

        self.assertEqual(doc1_bson, BSON.encode(doc1))
        self.assertEqual(doc1, BSON(doc1_bson).decode())

        # Valid Python regex, with flags.
        re2 = re.compile(u'.*', re.I | re.M | re.S | re.U | re.X)
        bson_re2 = Regex(u'.*', re.I | re.M | re.S | re.U | re.X)

        doc2_with_re = {'r': re2}
        doc2_with_bson_re = {'r': bson_re2}
        doc2_bson = (
            b"\x11\x00\x00\x00"  # document length
            b"\x0br\x00.*\x00imsux\x00"  # r: regex
            b"\x00")  # document terminator

        self.assertEqual(doc2_bson, BSON.encode(doc2_with_re))
        self.assertEqual(doc2_bson, BSON.encode(doc2_with_bson_re))

        self.assertEqual(re2.pattern, BSON(doc2_bson).decode()['r'].pattern)
        self.assertEqual(re2.flags, BSON(doc2_bson).decode()['r'].flags)
    def test_regex_from_native(self):
        self.assertEqual('.*', Regex.from_native(re.compile('.*')).pattern)
        self.assertEqual(0, Regex.from_native(re.compile(b'')).flags)

        regex = re.compile(b'', re.I | re.L | re.M | re.S | re.X)
        self.assertEqual(re.I | re.L | re.M | re.S | re.X,
                         Regex.from_native(regex).flags)

        unicode_regex = re.compile('', re.U)
        self.assertEqual(re.U, Regex.from_native(unicode_regex).flags)
    def test_regex_from_native(self):
        self.assertEqual('.*', Regex.from_native(re.compile('.*')).pattern)
        self.assertEqual(0, Regex.from_native(re.compile(b'')).flags)

        regex = re.compile(b'', re.I | re.L | re.M | re.S | re.X)
        self.assertEqual(
            re.I | re.L | re.M | re.S | re.X,
            Regex.from_native(regex).flags)

        unicode_regex = re.compile('', re.U)
        self.assertEqual(re.U, Regex.from_native(unicode_regex).flags)
 def test_regex_pickling(self):
     reg = Regex(".?")
     pickled_with_3 = (b'\x80\x04\x959\x00\x00\x00\x00\x00\x00\x00\x8c\n' 
                       b'bson.regex\x94\x8c\x05Regex\x94\x93\x94)\x81\x94}' 
                       b'\x94(\x8c\x07pattern\x94\x8c\x02.?\x94\x8c\x05flag'
                       b's\x94K\x00ub.')
     self.round_trip_pickle(reg, pickled_with_3)
Beispiel #9
0
async def get_flows_and_count_db(
        *,
        current_page: int,
        page_size: int,
        sorter: str = None,
        flow_name: str,
        language: str,
        updated_at: list[date],
        triggered_counts: list[int]) -> (list[FlowSchemaDb], int):
    if updated_at:
        updated_at_start, updated_at_end = updated_at
    db_key = [(f"name", {
        "$ne": None
    }), (f"name",
         Regex(f".*{escape(flow_name)}.*", "i") if flow_name else ...),
              (f"triggered_count", {
                  "$gte": triggered_counts[0],
                  "$lte": triggered_counts[1]
              } if triggered_counts else ...), ("is_active", True),
              ("updated_at", {
                  "$gte": make_timezone_aware(updated_at_start),
                  "$lte": make_timezone_aware(updated_at_end)
              } if updated_at else ...)]
    query = form_query(db_key)

    flows = await get_flows_db(current_page=current_page,
                               page_size=page_size,
                               sorter=sorter,
                               query=query)
    total = await get_flows_count_db(query=query)
    return flows, total
Beispiel #10
0
def gen_regex_search_query(search_query):
    # TODO sanitize this user input before querying the DB with it
    pattern = re.compile(search_query)
    regex = Regex.from_native(pattern)
    regex.flags ^= re.UNICODE
    regex.flags ^= re.IGNORECASE
    return regex
Beispiel #11
0
    def test_exception_wrapping(self):
        # No matter what exception is raised while trying to decode BSON,
        # the final exception always matches InvalidBSON and the original
        # traceback is preserved.

        # Invalid Python regex, though valid PCRE.
        # Causes an error in re.compile().
        bad_doc = BSON.encode({'r': Regex(r'[\w-\.]')})

        try:
            decode_all(bad_doc)
        except InvalidBSON:
            exc_type, exc_value, exc_tb = sys.exc_info()
            # Original re error was captured and wrapped in InvalidBSON.
            self.assertEqual(exc_value.args[0], 'bad character range')

            # Traceback includes bson module's call into re module.
            for filename, lineno, fname, text in traceback.extract_tb(exc_tb):
                if filename.endswith('re.py') and fname == 'compile':
                    # Traceback was correctly preserved.
                    break
            else:
                self.fail('Traceback not captured')
        else:
            self.fail('InvalidBSON not raised')
Beispiel #12
0
    def get_response_statements(self):
        """
        Return only statements that are in response to another statement.
        A statement must exist which lists the closest matching statement in the
        in_response_to field. Otherwise, the logic adapter may find a closest
        matching statement that does not have a known response.
        """
        # '程序员涤生' 原来的逻辑是根据in_response_to字段来判断是否是问题,如果一个句子出现在了其他句子的in_response_to字段中,那么该句子可以做为问题,
        # 因此需要先查出in_response_to字段中的text,然后查出在这些text集合中的句子,做为问题,这样的效率非常慢,
        # 通过在句子中加入Q和A标记,我们可以利用正则来直接匹配出表示问题的句子,
        # 并且我们只返回text字段,大大提升了查询的效率。
        pattern = re.compile('^Q ')
        regex = Regex.from_native(pattern)
        # response_query = self.statements.find({'text': 'Q 今天天气怎么样?'}, {'text': 1})
        response_query = self.statements.find({'text': {'$regex': regex}}, {'text': 1})

        statement_objects = []
        statement_vec = []
        import datetime as dt
        starttime2 = dt.datetime.now()
        for r in response_query:
            try:
                # 此处考虑直接使用text对应的向量,从系统启动时就构建好的text-vec索引文件中获取
                text_vec_indx = IntentClassifier().text_vec_indx
                vec = text_vec_indx.get(r['text'],None)
                if vec is not None: # 注意:下面这两个数组一定要保证长度一样,否则计算相似度的时候根据索引来取原文本会出先位置偏移,导致无法获取正确的答案!!
                    statement_vec.append(vec)
                    statement_objects.append(self.mongo_to_object({'text': r['text']}))
            except Exception as e:
                logging.warning("出现异常%s,问题句子为:%s", str(e), r['text'])
        endtime2 = dt.datetime.now()
        logging.debug("===========get_response_statements的for循环构造向量耗时: %s秒", (endtime2 - starttime2).seconds)
        return statement_objects, statement_vec
Beispiel #13
0
 def query_nlike(self, value):
     # WILDCARD NOT CONTAINS
     if isinstance(value, list):
         value = value[0]
     return MongoQuery(
         {self.field: {
             '$not': Regex('.*' + value + '.*', 'i')
         }})
Beispiel #14
0
 def query_nend(self, value):
     # DOESN'T END WITH
     if isinstance(value, list):
         value = value[0]
     return MongoQuery(
         {self.field: {
             '$not': Regex('*.' + value + '$', 'i')
         }})
async def get_portal_user(username: str):
    """
    # Retrieve the correct portal user
    :return:
    """
    query = {"username": Regex(f"^{username}$", "i"), "is_active": True}
    async for user in portal_user_collection.find(query):
        return user
Beispiel #16
0
 def query_nstart(self, value):
     # DOESN'T START WITH
     if isinstance(value, list):
         value = value[0]
     return MongoQuery(
         {self.field: {
             '$not': Regex('^' + value + '.*', 'i')
         }})
    def test_jsonify_Regex(self):
        regex = Regex("bb|[^b]{2}")
        json = {'a': 1, 'regex': regex}
        safe_json = {'a': 1, 'regex': {'$regex': "bb|[^b]{2}", "$options": ""}}

        jsonified_bson = jsonify(json).response
        jsonified = flask_jsonify(safe_json).response

        assert jsonified_bson == jsonified
Beispiel #18
0
 async def find_files_in_directory(self, path):
     result = []
     pattern = re.compile('^' + re.escape(path))
     regex = Regex.from_native(pattern)
     regex.flags ^= re.UNICODE
     async for record in self._db.find({'absolute_path': {
             '$regex': regex
     }}):
         result.append(str(record['_id']))
     return result
Beispiel #19
0
 def extract_condition(val):
     if isinstance(val, dict):
         for code, sval in val.items():
             if sval[0] == '/':
                 val[code] = Regex(sval[1:-1])
         return Condition(key, val)
     elif val == 1:
         return Condition(key, modifier='exists')
     elif val == 0:
         return Condition(key, modifier='not_exists')
 def test_regex_comparison(self):
     re1 = Regex('a')
     re2 = Regex('b')
     self.assertNotEqual(re1, re2)
     re1 = Regex('a', re.I)
     re2 = Regex('a', re.M)
     self.assertNotEqual(re1, re2)
     re1 = Regex('a', re.I)
     re2 = Regex('a', re.I)
     self.assertEqual(re1, re2)
    def do_query(self, tag_part, min_sentiment, min_relevance):
        results = self.db.find({
            'tags': {
                '$elemMatch': {
                    'tag': Regex.from_native(re.compile('.*{0}.*'.format(tag_part))),
                    'sentiment': {'$gte': min_sentiment},
                    'relevance': {'$gte': min_relevance},
                }
            }
        })

        bson_results = []
        for r in results:
            bson_results.append(bson.json_util.dumps(r, sort_keys=True, indent=4))

        return bson_results
Beispiel #22
0
def remove_attachment_id_from_flow(flow_collection: Collection,
                                   url: str) -> None:
    """
    Remove attachment_id from old collection and update url with new bucket
    """
    filename = os.path.split(url)[-1]
    query = {"flow.data.url": Regex(f".*{re.escape(filename)}$", "i")}
    docs = flow_collection.find(query)
    for doc in docs:
        flows = doc['flow']
        for flow in flows:
            if flow['type'] in ['image', 'video'
                                ] and flow['data']['url'].endswith(filename):
                flow['data']['url'] = url
                flow['data'].pop('attachment_id', None)
        flow_collection.replace_one({"_id": doc['_id']}, doc)
Beispiel #23
0
def getQueryCondition(queryConditions):
    conditions = []
    for fieldInfo in queryConditions:
        if fieldInfo["type"] == 'text' or fieldInfo[
                "type"] == 'array' or fieldInfo["type"] == 'datetime':
            pattern = re.compile(r'.*' + fieldInfo['query'] + '.*', re.I)
            regex = Regex.from_native(pattern)
            regex.flags ^= re.UNICODE
            conditions.append({fieldInfo["name"]: regex})
        elif fieldInfo["type"] == 'number':
            if fieldInfo["query"] != "":
                try:
                    num = float(fieldInfo["query"])
                    conditions.append({fieldInfo["name"]: {"$eq": num}})
                except:
                    conditions.append({fieldInfo["name"]: {"$eq": -99999}})
            else:
                conditions.append({fieldInfo["name"]: {"$gte": -99999}})
    return {"$or": conditions}
Beispiel #24
0
async def update_message_db(message_item: UpdateMessageResponse,
                            current_user: CurrentUserSchema,
                            language: str = 'EN') -> str:
    query = {"_id": ObjectId(message_item.id)}
    result1 = result2 = result3 = 0

    # add selected answer to message if it's not same with original response/graded response
    message_from_db = await message_collection.find_one(query)
    graded_response = message_from_db.get('adminportal', {}).get('answer')
    original_response = message_from_db.get('chatbot', {}).get('qnid')

    response = graded_response or original_response
    if not graded_response and response == message_item.new_response:
        return 'No questions updated'

    updated_info_query = {
        "updated_at": get_local_datetime_now(),
        "updated_by": ObjectId(current_user.userId),
    }
    # add graded response to message
    set_message_query = updated_info_query | {
        "adminportal.graded": True,
        "adminportal.answer": ObjectId(message_item.new_response)
    }
    result1 = await message_collection.update_one(query,
                                                  {'$set': set_message_query})

    # delete variation from main question and add variation to new question
    query = {
        "_id": ObjectId(response),
        "alternate_questions.text": Regex(f"^{escape(message_item.text)}$",
                                          "i"),
        "is_active": True
    }
    if question_db := await question_collection.find_one(
            query):  # remove variation if found match
        for idx, v in enumerate(question_db['alternate_questions']):
            if v['text'].lower() == message_item.text.lower():
                question_db['alternate_questions'].pop(idx)
                question_db |= updated_info_query
                result2 = await question_collection.replace_one(
                    {"_id": question_db['_id']}, question_db)
                break
 def find(self, page_size, page_index, keyword=None):
     reg = None
     cursor = None
     total = 0
     if keyword is not None:
         reg = Regex(r'%s' % keyword)
         cursor = self.db.worklog\
             .find({'$or': [{'title': reg}, {'content': reg}]})\
             .sort("creationdate", -1)\
             .skip((page_index - 1) * page_size)\
             .limit(page_size)
         total = self.db.worklog\
             .find({'$or': [{'title': reg}, {'content': reg}]})\
             .count()
     else:
         cursor = self.db.worklog.find().sort("creationdate", -1).skip(
             (page_index - 1) * page_size).limit(page_size)
         total = self.db.worklog.count()
     return cursor, total
Beispiel #26
0
    def references(self, object_: CmdbObject, filter: dict, limit: int, skip: int, sort: str, order: int,
                   user: UserModel = None, permission: AccessControlPermission = None, *args, **kwargs) \
            -> IterationResult[CmdbObject]:
        query = []
        if isinstance(filter, dict):
            query.append(filter)
        elif isinstance(filter, list):
            query += filter

        query.append(
            Builder.lookup_(_from='framework.types',
                            _local='type_id',
                            _foreign='public_id',
                            _as='type'))
        query.append(Builder.unwind_({'path': '$type'}))

        field_ref_query = {
            'type.fields.type':
            'ref',
            '$or': [{
                'type.fields.ref_types':
                Regex(f'.*{object_.type_id}.*', 'i')
            }, {
                'type.fields.ref_types': object_.type_id
            }]
        }
        section_ref_query = {
            'type.render_meta.sections.type': 'ref-section',
            'type.render_meta.sections.reference.type_id': object_.type_id
        }
        query.append(
            Builder.match_(Builder.or_([field_ref_query, section_ref_query])))
        query.append(Builder.match_({'fields.value': object_.public_id}))
        return self.iterate(filter=query,
                            limit=limit,
                            skip=skip,
                            sort=sort,
                            order=order,
                            user=user,
                            permission=permission)
    def do_query(self, tag_part, min_sentiment, min_relevance):
        results = self.db.find({
            'tags': {
                '$elemMatch': {
                    'tag':
                    Regex.from_native(re.compile('.*{0}.*'.format(tag_part))),
                    'sentiment': {
                        '$gte': min_sentiment
                    },
                    'relevance': {
                        '$gte': min_relevance
                    },
                }
            }
        })

        bson_results = []
        for r in results:
            bson_results.append(
                bson.json_util.dumps(r, sort_keys=True, indent=4))

        return bson_results
Beispiel #28
0
    def choose_user_job_recommend(self, conditon):
        lauage_name = {
            'python': 'zhilian_python_BJ',
            'java': 'zhilian_java_BJ'
        }
        user_chose_recommed_jobs = {}
        paremt = re.compile(conditon)
        regex = Regex.from_native(paremt)
        regex.flags = re.UNICODE

        for k, v in lauage_name.items():
            cursor = self.connect()[v].find(
                {"responsibility": {
                    "$regex": regex,
                    "$options": "i"
                }})
            count_list = []
            for x in cursor:
                count_list.append(x)
            user_chose_recommed_jobs[k] = count_list
        print(user_chose_recommed_jobs)

        return user_chose_recommed_jobs
Beispiel #29
0
def show_symbols(path):
    path = re.escape(path)
    data = ""
    return_data = ""
    query = QueryDocument(
        Condition(
            tag='191',
            subfields={'a': Regex('^' + path)},
        ), )
    print(f" the query is  -- {query.to_json()}")
    bibset = BibSet.from_query(query,
                               projection={'191': True},
                               skip=0,
                               limit=0)
    a_res_en = []
    for bib in bibset.records:
        bib_value = bib.get_value('191', 'a')
        a_res_en.append(bib.get_value('191', 'a'))
    return_data = sorted([quote(doc) for doc in a_res_en],
                         key=lambda x: int(''.join(c for c in x
                                                   if c.isdigit())))
    #return_data=a_res_en
    return (jsonify(return_data))
Beispiel #30
0
 def references(self, object_: CmdbObject, filter: dict, limit: int, skip: int, sort: str, order: int,
                user: UserModel = None, permission: AccessControlPermission = None, *args, **kwargs) \
         -> IterationResult[CmdbObject]:
     query = []
     if isinstance(filter, dict):
         query.append(filter)
     elif isinstance(filter, list):
         query += filter
     query.append({
         '$lookup': {
             'from': 'framework.types',
             'localField': 'type_id',
             'foreignField': 'public_id',
             'as': 'type'
         }
     })
     query.append({'$unwind': {'path': '$type'}})
     query.append({
         '$match': {
             'type.fields.type':
             'ref',
             '$or': [{
                 'type.fields.ref_types':
                 Regex(f'.*{object_.type_id}.*', 'i')
             }, {
                 'type.fields.ref_types': object_.type_id
             }]
         }
     })
     query.append({'$match': {'fields.value': object_.public_id}})
     return self.iterate(filter=query,
                         limit=limit,
                         skip=skip,
                         sort=sort,
                         order=order,
                         user=user,
                         permission=permission)
Beispiel #31
0
from bson import Regex
from dlx import DB
from dlx.marc import BibSet, QueryDocument, Condition
from config import Config
DB.connect(Config.connect_string)

query = QueryDocument(Condition(tag='191', modifier='exists'),
                      Condition(tag='269', subfields={'a': Regex('^1975')}))

print(query.to_json())

bibset = BibSet.from_query(query, projection={'191': True}, skip=0, limit=0)
print('There are {} results'.format(bibset.count))

bibset.cache()

for bib in bibset.records:
    print('id: {}, symbol: {}'.format(bib.id, bib.get_value('191', 'a')))

print(bibset.to_xml())
 def setUp(self):
     """Set up function."""
     from bson import Regex
     super(BSONRegexWithoutFlagTest, self).setUp()
     self.regex = Regex.from_native(self.regex)
 def setUp(self):
     """Set up class."""
     super(BSONRegexLocaleTest, self).setUp()
     from bson import Regex
     self.regex = Regex.from_native(self.regex)
 def test_regex_hash(self):
     self.assertRaises(TypeError, hash, Regex('hello'))