def test_regex_from_native(self):
        self.assertEqual('.*', Regex.from_native(re.compile('.*')).pattern)
        self.assertEqual(0, Regex.from_native(re.compile(b'')).flags)

        regex = re.compile(b'', re.I | re.L | re.M | re.S | re.X)
        self.assertEqual(re.I | re.L | re.M | re.S | re.X,
                         Regex.from_native(regex).flags)

        unicode_regex = re.compile('', re.U)
        self.assertEqual(re.U, Regex.from_native(unicode_regex).flags)
    def test_regex_from_native(self):
        self.assertEqual('.*', Regex.from_native(re.compile('.*')).pattern)
        self.assertEqual(0, Regex.from_native(re.compile(b'')).flags)

        regex = re.compile(b'', re.I | re.L | re.M | re.S | re.X)
        self.assertEqual(
            re.I | re.L | re.M | re.S | re.X,
            Regex.from_native(regex).flags)

        unicode_regex = re.compile('', re.U)
        self.assertEqual(re.U, Regex.from_native(unicode_regex).flags)
Exemple #3
0
def gen_regex_search_query(search_query):
    # TODO sanitize this user input before querying the DB with it
    pattern = re.compile(search_query)
    regex = Regex.from_native(pattern)
    regex.flags ^= re.UNICODE
    regex.flags ^= re.IGNORECASE
    return regex
Exemple #4
0
    def get_response_statements(self):
        """
        Return only statements that are in response to another statement.
        A statement must exist which lists the closest matching statement in the
        in_response_to field. Otherwise, the logic adapter may find a closest
        matching statement that does not have a known response.
        """
        # '程序员涤生' 原来的逻辑是根据in_response_to字段来判断是否是问题,如果一个句子出现在了其他句子的in_response_to字段中,那么该句子可以做为问题,
        # 因此需要先查出in_response_to字段中的text,然后查出在这些text集合中的句子,做为问题,这样的效率非常慢,
        # 通过在句子中加入Q和A标记,我们可以利用正则来直接匹配出表示问题的句子,
        # 并且我们只返回text字段,大大提升了查询的效率。
        pattern = re.compile('^Q ')
        regex = Regex.from_native(pattern)
        # response_query = self.statements.find({'text': 'Q 今天天气怎么样?'}, {'text': 1})
        response_query = self.statements.find({'text': {'$regex': regex}}, {'text': 1})

        statement_objects = []
        statement_vec = []
        import datetime as dt
        starttime2 = dt.datetime.now()
        for r in response_query:
            try:
                # 此处考虑直接使用text对应的向量,从系统启动时就构建好的text-vec索引文件中获取
                text_vec_indx = IntentClassifier().text_vec_indx
                vec = text_vec_indx.get(r['text'],None)
                if vec is not None: # 注意:下面这两个数组一定要保证长度一样,否则计算相似度的时候根据索引来取原文本会出先位置偏移,导致无法获取正确的答案!!
                    statement_vec.append(vec)
                    statement_objects.append(self.mongo_to_object({'text': r['text']}))
            except Exception as e:
                logging.warning("出现异常%s,问题句子为:%s", str(e), r['text'])
        endtime2 = dt.datetime.now()
        logging.debug("===========get_response_statements的for循环构造向量耗时: %s秒", (endtime2 - starttime2).seconds)
        return statement_objects, statement_vec
Exemple #5
0
 async def find_files_in_directory(self, path):
     result = []
     pattern = re.compile('^' + re.escape(path))
     regex = Regex.from_native(pattern)
     regex.flags ^= re.UNICODE
     async for record in self._db.find({'absolute_path': {
             '$regex': regex
     }}):
         result.append(str(record['_id']))
     return result
    def do_query(self, tag_part, min_sentiment, min_relevance):
        results = self.db.find({
            'tags': {
                '$elemMatch': {
                    'tag': Regex.from_native(re.compile('.*{0}.*'.format(tag_part))),
                    'sentiment': {'$gte': min_sentiment},
                    'relevance': {'$gte': min_relevance},
                }
            }
        })

        bson_results = []
        for r in results:
            bson_results.append(bson.json_util.dumps(r, sort_keys=True, indent=4))

        return bson_results
def getQueryCondition(queryConditions):
    conditions = []
    for fieldInfo in queryConditions:
        if fieldInfo["type"] == 'text' or fieldInfo[
                "type"] == 'array' or fieldInfo["type"] == 'datetime':
            pattern = re.compile(r'.*' + fieldInfo['query'] + '.*', re.I)
            regex = Regex.from_native(pattern)
            regex.flags ^= re.UNICODE
            conditions.append({fieldInfo["name"]: regex})
        elif fieldInfo["type"] == 'number':
            if fieldInfo["query"] != "":
                try:
                    num = float(fieldInfo["query"])
                    conditions.append({fieldInfo["name"]: {"$eq": num}})
                except:
                    conditions.append({fieldInfo["name"]: {"$eq": -99999}})
            else:
                conditions.append({fieldInfo["name"]: {"$gte": -99999}})
    return {"$or": conditions}
    def do_query(self, tag_part, min_sentiment, min_relevance):
        results = self.db.find({
            'tags': {
                '$elemMatch': {
                    'tag':
                    Regex.from_native(re.compile('.*{0}.*'.format(tag_part))),
                    'sentiment': {
                        '$gte': min_sentiment
                    },
                    'relevance': {
                        '$gte': min_relevance
                    },
                }
            }
        })

        bson_results = []
        for r in results:
            bson_results.append(
                bson.json_util.dumps(r, sort_keys=True, indent=4))

        return bson_results
Exemple #9
0
    def choose_user_job_recommend(self, conditon):
        lauage_name = {
            'python': 'zhilian_python_BJ',
            'java': 'zhilian_java_BJ'
        }
        user_chose_recommed_jobs = {}
        paremt = re.compile(conditon)
        regex = Regex.from_native(paremt)
        regex.flags = re.UNICODE

        for k, v in lauage_name.items():
            cursor = self.connect()[v].find(
                {"responsibility": {
                    "$regex": regex,
                    "$options": "i"
                }})
            count_list = []
            for x in cursor:
                count_list.append(x)
            user_chose_recommed_jobs[k] = count_list
        print(user_chose_recommed_jobs)

        return user_chose_recommed_jobs
def queryTable(table,
               request,
               additionalColumns={"_id": 0},
               additionalConditions=None,
               aggregationConditions=False):
    if additionalConditions is None:
        additionalConditions = []
    pageNum = int(request.POST["pageNum"])
    pageSize = int(request.POST["pageSize"])
    queryConditions = json.loads(request.POST["fields"])
    conditions = getQueryCondition(queryConditions)
    if 'advance' in request.POST and request.POST['advance'] == '1':
        queryFields = json.loads(request.POST["queryFields"])
        multiConditions = json.loads(request.POST["multiConditions"])
        for field in queryFields:
            if field['type'] == 'datetime':
                datetime_from = multiConditions[field['value'] + "_from"]
                if datetime_from != '':
                    additionalConditions.append({
                        field['value']: {
                            "$gte": datetime_from.replace("T", " ")
                        }
                    })
                datetime_to = multiConditions[field['value'] + "_to"]
                if datetime_to != '':
                    additionalConditions.append({
                        field['value']: {
                            "$lte": datetime_to.replace("T", " ")
                        }
                    })
            elif field['type'] == 'number':
                try:
                    num_from = float(multiConditions[field['value'] + "_from"])
                    additionalConditions.append(
                        {field['value']: {
                             "$gte": num_from
                         }})
                except:
                    pass
                try:
                    num_to = float(multiConditions[field['value'] + "_to"])
                    additionalConditions.append(
                        {field['value']: {
                             "$lte": num_to
                         }})
                except:
                    pass
            else:
                query_t = multiConditions[field['value']]
                if query_t == '':
                    continue
                pattern_t = re.compile(r'.*' + query_t + '.*', re.I)
                regex_t = Regex.from_native(pattern_t)
                regex_t.flags ^= re.UNICODE
                additionalConditions.append({field['value']: regex_t})
    # CTRL + ALT + SHIFT + J同时选中相同单词
    if aggregationConditions:
        aggregationConditionsT = aggregationConditions.copy()
        if additionalConditions:
            additionalConditions.append(conditions)
            conditions = {"$and": additionalConditions}
        aggregationConditionsT.append({'$match': conditions})
        total = len(
            table.aggregate(aggregationConditionsT)._CommandCursor__data)
        # 注意顺序,应该先sort再skip和limit!!!
        aggregationConditionsT.append(
            {"$sort": {
                request.POST["sortProp"]: int(request.POST["order"])
            }})
        aggregationConditionsT.append({'$skip': pageSize * (pageNum - 1)})
        aggregationConditionsT.append({"$limit": pageSize})
        result = list(table.aggregate(aggregationConditionsT))
    else:
        # 如果有额外的查询条件,使用and语句加入条件
        if additionalConditions:
            additionalConditions.append(conditions)
            query = table.find({"$and": additionalConditions},
                               additionalColumns)
        # 没有额外的查询条件,直接查询
        else:
            query = table.find(conditions, additionalColumns)
        sortCondition = [(request.POST["sortProp"], int(request.POST["order"]))
                         ]
        # .collation({"locale": "en"})不区分大小写
        query.sort(sortCondition).skip(
            pageSize * (pageNum - 1)).limit(pageSize).collation(
                {"locale": "en"})
        result = list(query)
        total = query.count()
    return result, total
Exemple #11
0
 def setUp(self):
     """Set up class."""
     super(BSONRegexLocaleTest, self).setUp()
     from bson import Regex
     self.regex = Regex.from_native(self.regex)
Exemple #12
0
 def setUp(self):
     """Set up function."""
     from bson import Regex
     super(BSONRegexWithoutFlagTest, self).setUp()
     self.regex = Regex.from_native(self.regex)
Exemple #13
0
 def _convert_regex(self, regex):
     cr = Regex.from_native(re.compile(regex))
     cr.flags = re.UNICODE
     return cr
 def setUp(self):
     """Set up class."""
     super(BSONRegexLocaleTest, self).setUp()
     from bson import Regex
     self.regex = Regex.from_native(self.regex)
 def setUp(self):
     """Set up function."""
     from bson import Regex
     super(BSONRegexWithoutFlagTest, self).setUp()
     self.regex = Regex.from_native(self.regex)