Ejemplo n.º 1
0
    def test_bson_regex(self):
        # Invalid Python regex, though valid PCRE.
        bson_re1 = Regex(r'[\w-\.]')
        self.assertEqual(r'[\w-\.]', bson_re1.pattern)
        self.assertEqual(0, bson_re1.flags)

        doc1 = {'r': bson_re1}
        doc1_bson = b('\x11\x00\x00\x00'  # document length
                      '\x0br\x00[\\w-\\.]\x00\x00'  # r: regex
                      '\x00')  # document terminator

        self.assertEqual(doc1_bson, BSON.encode(doc1))
        self.assertEqual(doc1, BSON(doc1_bson).decode(compile_re=False))

        # Valid Python regex, with flags.
        re2 = re.compile('.*', re.I | re.L | re.M | re.S | re.U | re.X)
        bson_re2 = Regex('.*', re.I | re.L | re.M | re.S | re.U | re.X)

        doc2_with_re = {'r': re2}
        doc2_with_bson_re = {'r': bson_re2}
        doc2_bson = b("\x12\x00\x00\x00"  # document length
                      "\x0br\x00.*\x00ilmsux\x00"  # r: regex
                      "\x00")  # document terminator

        self.assertEqual(doc2_bson, BSON.encode(doc2_with_re))
        self.assertEqual(doc2_bson, BSON.encode(doc2_with_bson_re))

        # Built-in re objects don't support ==. Compare pattern and flags.
        self.assertEqual(re2.pattern, BSON(doc2_bson).decode()['r'].pattern)
        self.assertEqual(re2.flags, BSON(doc2_bson).decode()['r'].flags)

        self.assertEqual(doc2_with_bson_re,
                         BSON(doc2_bson).decode(compile_re=False))
Ejemplo n.º 2
0
    def test_bson_regex(self):
        # Invalid Python regex, though valid PCRE.
        bson_re1 = Regex(r'[\w-\.]')
        self.assertEqual(r'[\w-\.]', bson_re1.pattern)
        self.assertEqual(0, bson_re1.flags)

        doc1 = {'r': bson_re1}
        doc1_bson = (
            b'\x11\x00\x00\x00'  # document length
            b'\x0br\x00[\\w-\\.]\x00\x00'  # r: regex
            b'\x00')  # document terminator

        self.assertEqual(doc1_bson, BSON.encode(doc1))
        self.assertEqual(doc1, BSON(doc1_bson).decode())

        # Valid Python regex, with flags.
        re2 = re.compile(u'.*', re.I | re.M | re.S | re.U | re.X)
        bson_re2 = Regex(u'.*', re.I | re.M | re.S | re.U | re.X)

        doc2_with_re = {'r': re2}
        doc2_with_bson_re = {'r': bson_re2}
        doc2_bson = (
            b"\x11\x00\x00\x00"  # document length
            b"\x0br\x00.*\x00imsux\x00"  # r: regex
            b"\x00")  # document terminator

        self.assertEqual(doc2_bson, BSON.encode(doc2_with_re))
        self.assertEqual(doc2_bson, BSON.encode(doc2_with_bson_re))

        self.assertEqual(re2.pattern, BSON(doc2_bson).decode()['r'].pattern)
        self.assertEqual(re2.flags, BSON(doc2_bson).decode()['r'].flags)
Ejemplo n.º 3
0
    def test_bson_classes(self):
        _id = '5a918f9fa08bff9c7688d3e1'

        for a, b in [
            (Binary(b'foo'), Binary(b'foo')),
            (Code('foo'), Code('foo')),
            (Code('foo', {'x': 1}), Code('foo', {'x': 1})),
            (DBRef('coll', 1), DBRef('coll', 1)),
            (DBRef('coll', 1, 'db'), DBRef('coll', 1, 'db')),
            (Decimal128('1'), Decimal128('1')),
            (MaxKey(), MaxKey()),
            (MinKey(), MinKey()),
            (ObjectId(_id), ObjectId(_id)),
            (Regex('foo', 'i'), Regex('foo', 'i')),
            (Timestamp(1, 2), Timestamp(1, 2)),
        ]:
            # Basic case.
            self.assertTrue(
                Matcher(Command(y=b)).matches(Command(y=b)),
                "MockupDB %r doesn't equal itself" % (b, ))

            # First Command argument is special, try comparing the second also.
            self.assertTrue(
                Matcher(Command('x', y=b)).matches(Command('x', y=b)),
                "MockupDB %r doesn't equal itself" % (b, ))

            # In practice, users pass PyMongo classes in message specs.
            self.assertTrue(
                Matcher(Command(y=b)).matches(Command(y=a)),
                "PyMongo %r != MockupDB %r" % (a, b))

            self.assertTrue(
                Matcher(Command('x', y=b)).matches(Command('x', y=a)),
                "PyMongo %r != MockupDB %r" % (a, b))
Ejemplo n.º 4
0
 def inner_match_fields(_fields, _matched_fields, _reference=None):
     """
     Get list of matched fields inside the reference fields
     Args:
         _fields: list of referenced fields
         _matched_fields: list of text regex from the pipeline builder
         _reference: reference object to which the reference field refers
     Returns:
         list of fields where the regex matched
     """
     for regex_ in possible_regex_list:
         try:
             runtime_regex = Regex(regex_, 'ims').try_compile()
         except Exception:
             runtime_regex = regex_
         for field in _fields:
             try:
                 res = runtime_regex.findall(str(field.get('value')))
                 if len(res) > 0:
                     inner_value = _reference if _reference else field
                     # removing duplicated from list
                     if inner_value not in _matched_fields:
                         _matched_fields.append(inner_value)
                 if field['type'] == 'ref':
                     inner_match_fields(field['reference']['summaries'],
                                        _matched_fields, field)
                 if field['type'] == 'ref-section-field':
                     inner_match_fields(field['references']['fields'],
                                        _matched_fields, field)
             except Exception:
                 continue
Ejemplo n.º 5
0
    def test_exception_wrapping(self):
        # No matter what exception is raised while trying to decode BSON,
        # the final exception always matches InvalidBSON and the original
        # traceback is preserved.

        # Invalid Python regex, though valid PCRE.
        # Causes an error in re.compile().
        bad_doc = BSON.encode({'r': Regex(r'[\w-\.]')})

        try:
            decode_all(bad_doc)
        except InvalidBSON:
            exc_type, exc_value, exc_tb = sys.exc_info()
            # Original re error was captured and wrapped in InvalidBSON.
            self.assertEqual(exc_value.args[0], 'bad character range')

            # Traceback includes bson module's call into re module.
            for filename, lineno, fname, text in traceback.extract_tb(exc_tb):
                if filename.endswith('re.py') and fname == 'compile':
                    # Traceback was correctly preserved.
                    break
            else:
                self.fail('Traceback not captured')
        else:
            self.fail('InvalidBSON not raised')
Ejemplo n.º 6
0
    def find_match_fields(result: R, possible_regex_list=None):
        """
        Get list of matched fields inside the searchresult
        Args:
            result: Generic search result
            possible_regex_list: list of text regex from the pipeline builder

        Returns:
            list of fields where the regex matched
        """
        matched_fields = []
        fields = result.fields
        if not possible_regex_list:
            return None
        for regex_ in possible_regex_list:
            try:
                runtime_regex = Regex(regex_, 'imsx').try_compile()
            except Exception:
                runtime_regex = regex_
            for field in fields:
                try:
                    res = runtime_regex.findall(str(field.get('value')))
                    if len(res) > 0:
                        matched_fields.append(field)
                except Exception:
                    continue
        if len(matched_fields) > 0:
            return matched_fields
        return None
Ejemplo n.º 7
0
async def get_flows_and_count_db(
        *,
        current_page: int,
        page_size: int,
        sorter: str = None,
        flow_name: str,
        language: str,
        updated_at: list[date],
        triggered_counts: list[int]) -> (list[FlowSchemaDb], int):
    if updated_at:
        updated_at_start, updated_at_end = updated_at
    db_key = [(f"name", {
        "$ne": None
    }), (f"name",
         Regex(f".*{escape(flow_name)}.*", "i") if flow_name else ...),
              (f"triggered_count", {
                  "$gte": triggered_counts[0],
                  "$lte": triggered_counts[1]
              } if triggered_counts else ...), ("is_active", True),
              ("updated_at", {
                  "$gte": make_timezone_aware(updated_at_start),
                  "$lte": make_timezone_aware(updated_at_end)
              } if updated_at else ...)]
    query = form_query(db_key)

    flows = await get_flows_db(current_page=current_page,
                               page_size=page_size,
                               sorter=sorter,
                               query=query)
    total = await get_flows_count_db(query=query)
    return flows, total
Ejemplo n.º 8
0
 def test_regex_pickling(self):
     reg = Regex(".?")
     pickled_with_3 = (b'\x80\x04\x959\x00\x00\x00\x00\x00\x00\x00\x8c\n' 
                       b'bson.regex\x94\x8c\x05Regex\x94\x93\x94)\x81\x94}' 
                       b'\x94(\x8c\x07pattern\x94\x8c\x02.?\x94\x8c\x05flag'
                       b's\x94K\x00ub.')
     self.round_trip_pickle(reg, pickled_with_3)
Ejemplo n.º 9
0
 def query_nstart(self, value):
     # DOESN'T START WITH
     if isinstance(value, list):
         value = value[0]
     return MongoQuery(
         {self.field: {
             '$not': Regex('^' + value + '.*', 'i')
         }})
Ejemplo n.º 10
0
 def query_nlike(self, value):
     # WILDCARD NOT CONTAINS
     if isinstance(value, list):
         value = value[0]
     return MongoQuery(
         {self.field: {
             '$not': Regex('.*' + value + '.*', 'i')
         }})
Ejemplo n.º 11
0
 def query_nend(self, value):
     # DOESN'T END WITH
     if isinstance(value, list):
         value = value[0]
     return MongoQuery(
         {self.field: {
             '$not': Regex('*.' + value + '$', 'i')
         }})
Ejemplo n.º 12
0
async def get_portal_user(username: str):
    """
    # Retrieve the correct portal user
    :return:
    """
    query = {"username": Regex(f"^{username}$", "i"), "is_active": True}
    async for user in portal_user_collection.find(query):
        return user
Ejemplo n.º 13
0
    def test_jsonify_Regex(self):
        regex = Regex("bb|[^b]{2}")
        json = {'a': 1, 'regex': regex}
        safe_json = {'a': 1, 'regex': {'$regex': "bb|[^b]{2}", "$options": ""}}

        jsonified_bson = jsonify(json).response
        jsonified = flask_jsonify(safe_json).response

        assert jsonified_bson == jsonified
Ejemplo n.º 14
0
 def extract_condition(val):
     if isinstance(val, dict):
         for code, sval in val.items():
             if sval[0] == '/':
                 val[code] = Regex(sval[1:-1])
         return Condition(key, val)
     elif val == 1:
         return Condition(key, modifier='exists')
     elif val == 0:
         return Condition(key, modifier='not_exists')
Ejemplo n.º 15
0
 def test_regex_comparison(self):
     re1 = Regex('a')
     re2 = Regex('b')
     self.assertNotEqual(re1, re2)
     re1 = Regex('a', re.I)
     re2 = Regex('a', re.M)
     self.assertNotEqual(re1, re2)
     re1 = Regex('a', re.I)
     re2 = Regex('a', re.I)
     self.assertEqual(re1, re2)
Ejemplo n.º 16
0
def remove_attachment_id_from_flow(flow_collection: Collection,
                                   url: str) -> None:
    """
    Remove attachment_id from old collection and update url with new bucket
    """
    filename = os.path.split(url)[-1]
    query = {"flow.data.url": Regex(f".*{re.escape(filename)}$", "i")}
    docs = flow_collection.find(query)
    for doc in docs:
        flows = doc['flow']
        for flow in flows:
            if flow['type'] in ['image', 'video'
                                ] and flow['data']['url'].endswith(filename):
                flow['data']['url'] = url
                flow['data'].pop('attachment_id', None)
        flow_collection.replace_one({"_id": doc['_id']}, doc)
 def find(self, page_size, page_index, keyword=None):
     reg = None
     cursor = None
     total = 0
     if keyword is not None:
         reg = Regex(r'%s' % keyword)
         cursor = self.db.worklog\
             .find({'$or': [{'title': reg}, {'content': reg}]})\
             .sort("creationdate", -1)\
             .skip((page_index - 1) * page_size)\
             .limit(page_size)
         total = self.db.worklog\
             .find({'$or': [{'title': reg}, {'content': reg}]})\
             .count()
     else:
         cursor = self.db.worklog.find().sort("creationdate", -1).skip(
             (page_index - 1) * page_size).limit(page_size)
         total = self.db.worklog.count()
     return cursor, total
Ejemplo n.º 18
0
async def update_message_db(message_item: UpdateMessageResponse,
                            current_user: CurrentUserSchema,
                            language: str = 'EN') -> str:
    query = {"_id": ObjectId(message_item.id)}
    result1 = result2 = result3 = 0

    # add selected answer to message if it's not same with original response/graded response
    message_from_db = await message_collection.find_one(query)
    graded_response = message_from_db.get('adminportal', {}).get('answer')
    original_response = message_from_db.get('chatbot', {}).get('qnid')

    response = graded_response or original_response
    if not graded_response and response == message_item.new_response:
        return 'No questions updated'

    updated_info_query = {
        "updated_at": get_local_datetime_now(),
        "updated_by": ObjectId(current_user.userId),
    }
    # add graded response to message
    set_message_query = updated_info_query | {
        "adminportal.graded": True,
        "adminportal.answer": ObjectId(message_item.new_response)
    }
    result1 = await message_collection.update_one(query,
                                                  {'$set': set_message_query})

    # delete variation from main question and add variation to new question
    query = {
        "_id": ObjectId(response),
        "alternate_questions.text": Regex(f"^{escape(message_item.text)}$",
                                          "i"),
        "is_active": True
    }
    if question_db := await question_collection.find_one(
            query):  # remove variation if found match
        for idx, v in enumerate(question_db['alternate_questions']):
            if v['text'].lower() == message_item.text.lower():
                question_db['alternate_questions'].pop(idx)
                question_db |= updated_info_query
                result2 = await question_collection.replace_one(
                    {"_id": question_db['_id']}, question_db)
                break
Ejemplo n.º 19
0
    def references(self, object_: CmdbObject, filter: dict, limit: int, skip: int, sort: str, order: int,
                   user: UserModel = None, permission: AccessControlPermission = None, *args, **kwargs) \
            -> IterationResult[CmdbObject]:
        query = []
        if isinstance(filter, dict):
            query.append(filter)
        elif isinstance(filter, list):
            query += filter

        query.append(
            Builder.lookup_(_from='framework.types',
                            _local='type_id',
                            _foreign='public_id',
                            _as='type'))
        query.append(Builder.unwind_({'path': '$type'}))

        field_ref_query = {
            'type.fields.type':
            'ref',
            '$or': [{
                'type.fields.ref_types':
                Regex(f'.*{object_.type_id}.*', 'i')
            }, {
                'type.fields.ref_types': object_.type_id
            }]
        }
        section_ref_query = {
            'type.render_meta.sections.type': 'ref-section',
            'type.render_meta.sections.reference.type_id': object_.type_id
        }
        query.append(
            Builder.match_(Builder.or_([field_ref_query, section_ref_query])))
        query.append(Builder.match_({'fields.value': object_.public_id}))
        return self.iterate(filter=query,
                            limit=limit,
                            skip=skip,
                            sort=sort,
                            order=order,
                            user=user,
                            permission=permission)
Ejemplo n.º 20
0
def show_symbols(path):
    path = re.escape(path)
    data = ""
    return_data = ""
    query = QueryDocument(
        Condition(
            tag='191',
            subfields={'a': Regex('^' + path)},
        ), )
    print(f" the query is  -- {query.to_json()}")
    bibset = BibSet.from_query(query,
                               projection={'191': True},
                               skip=0,
                               limit=0)
    a_res_en = []
    for bib in bibset.records:
        bib_value = bib.get_value('191', 'a')
        a_res_en.append(bib.get_value('191', 'a'))
    return_data = sorted([quote(doc) for doc in a_res_en],
                         key=lambda x: int(''.join(c for c in x
                                                   if c.isdigit())))
    #return_data=a_res_en
    return (jsonify(return_data))
Ejemplo n.º 21
0
 def references(self, object_: CmdbObject, filter: dict, limit: int, skip: int, sort: str, order: int,
                user: UserModel = None, permission: AccessControlPermission = None, *args, **kwargs) \
         -> IterationResult[CmdbObject]:
     query = []
     if isinstance(filter, dict):
         query.append(filter)
     elif isinstance(filter, list):
         query += filter
     query.append({
         '$lookup': {
             'from': 'framework.types',
             'localField': 'type_id',
             'foreignField': 'public_id',
             'as': 'type'
         }
     })
     query.append({'$unwind': {'path': '$type'}})
     query.append({
         '$match': {
             'type.fields.type':
             'ref',
             '$or': [{
                 'type.fields.ref_types':
                 Regex(f'.*{object_.type_id}.*', 'i')
             }, {
                 'type.fields.ref_types': object_.type_id
             }]
         }
     })
     query.append({'$match': {'fields.value': object_.public_id}})
     return self.iterate(filter=query,
                         limit=limit,
                         skip=skip,
                         sort=sort,
                         order=order,
                         user=user,
                         permission=permission)
Ejemplo n.º 22
0
 def test_regex_hash(self):
     self.assertRaises(TypeError, hash, Regex('hello'))
Ejemplo n.º 23
0
async def get_conversations_and_count_db(*,
                                         current_page: int,
                                         page_size: int,
                                         tags: list[str] = None,
                                         search_query: str = ''):
    conversations = []
    db_key = [
        ("$addFields", {
            "fullname": {
                "$concat": ["$first_name", " ", "$last_name"]
            }
        }),
        ("$match", {
            "tags": {
                "$all": tags
            }
        } if tags else ...),
        ("$match", {
            "fullname": Regex(f".*{escape(search_query)}.*", "i")
        } if search_query else ...),
    ]

    pipeline = form_pipeline(db_key)
    total = await bot_user_pipeline_count(pipeline=pipeline[:])
    extra_stages = [
        {
            "$sort": SON([("last_active.received_at", -1)])
        },
        {
            "$skip": (current_page - 1) * page_size
        },
        {
            "$limit": page_size
        },
        {
            "$lookup": {
                "from": "message",
                "localField": "last_active.received_message_id",
                "foreignField": "_id",
                "as": "last_message"
            }
        },
        {
            "$unwind": {
                "path": "$last_message",
                "preserveNullAndEmptyArrays": False
            }
        },
        # {"$lookup": {
        #     "from": "message",
        #     "localField": "message.chatbot.convo_id",
        #     "foreignField": "chatbot.convo_id",
        #     "as": "conversations"
        # }},
        # {"$addFields": {
        #     "last_message": {
        #         "$last": "$conversations"
        #     }
        # }},
        {
            "$project": {
                "_id": 1,
                "fullname": 1,
                "first_name": 1,
                "last_name": 1,
                "email": 1,
                "gender": 1,
                "profile_pic_url": 1,
                "last_active": 1,
                "created_at": 1,
                "chatbot": 1,
                "tags": 1,
                "platforms": 1,
                "last_message": 1
            }
        }
    ]

    pipeline.extend(extra_stages)

    cursor = bot_user_collection.aggregate(pipeline)

    async for conversation in cursor:
        entry = ConversationBotUserSchema(**bot_user_helper(conversation))
        if entry.last_message:
            entry.last_message = format_message_to_display(entry.last_message)
        conversations.append(entry)
    return conversations, total
Ejemplo n.º 24
0
 def query_end(self, value):
     # ENDS WITH
     if isinstance(value, list):
         value = value[0]
     return MongoQuery({self.field: Regex('*.' + value + '$', 'i')})
Ejemplo n.º 25
0
 def query_start(self, value):
     # STARTS WITH
     if isinstance(value, list):
         value = value[0]
     return MongoQuery({self.field: Regex('^' + value + '.*', 'i')})
Ejemplo n.º 26
0
def show_txt(path):
    '''displays the text of the document '''
    data = ""
    return_data = ""
    doc_list = []
    #path=quote(path)
    path = re.escape(path)
    '''
 i2 = urllib.parse.quote(i.encode("utf-8"))  #need to deal with special characters in each url
        uu2 = urllib.parse.urljoin(uu, i2)         #create url
    '''
    print(f" this is compiled path -- {'^' + str(path)+'$'}")
    doc_list = list(
        txts_coll.find({"doc_sym": {
            "$regex": "^" + str(path) + "$"
        }}))
    if len(doc_list) == 0 and path != 'favicon.ico':
        print(f"no exact DS {str(path)} - generating one")
        bib_value = ''
        #doc_list=list(txts_coll.find({"doc_sym":{"$regex":path}}))
        ''' extract text from DB'''
        #build list of tuples (striped_doc_sum, url to the pdf in s3)
        query = QueryDocument(
            Condition(tag='191', subfields={'a': Regex('^' + path + '$')}))
        #)
        print(f" the imp query is  -- {query.to_json()}")
        bibset = BibSet.from_query(query, skip=0, limit=3)
        a_res_en = []
        if bibset.count == 1:
            for bib in bibset.records:
                bib_value = bib.get_value('191', 'a')
                a_res_en.append(
                    (bib.get_value('191',
                                   'a'), 'http://' + ''.join(bib.files('EN'))))
                print(a_res_en)
                for url in a_res_en:
                    #txt_name = url.split('/')[-1]
                    #url is a tuple ; url[0] is a DS; url[1] is a s3 link to the pdf
                    txt_name = url[0]  # e.g. ARES721
                    #txt_name = txt_name.split('.')[0] +'.txt'
                    #txt_name = txt_name +'.txt'
                    #txt_loc='\\txts\\'+txt_name
                    if len(url[1]) > 10:
                        print(f" - - the {url[0]} is {url[1]} - -")
                        pdf = PDFExtract(url[1])
                        parsed = parser.from_buffer(
                            pdf.get_txt_from_url(url[1]))
                        print(f"0----PDFExtract----0")
                        txt = Txt(bib.get_value('191', 'a'))
                        print(txt.set_txt(parsed["content"]))
                        txt.title = bib.get_value('245', 'a')
                        #txt.title=bib.get_value('239','a')
                        ''' load text into txts'''
                        if txt.txt is not None:
                            query = {"doc_sym": txt.symbol}
                            txts_coll.replace_one(query,
                                                  txt.to_bson(),
                                                  upsert=True)

    doc_list = []
    doc_list = list(
        txts_coll.find({"doc_sym": {
            "$regex": "^" + str(path) + "$"
        }}))
    print(f" this is compiled path -- {'^' + str(path)+'$'}")
    if len(doc_list) == 1:
        print(f"-- it's a hit- 1")
        if doc_list[0]['doc_sym'][0] != 'S':
            return_data = doc_list[0]['raw_txt']
        else:
            #for SC docs - temporary measure
            doc_1 = doc_list[0].pop('_id')
            return_data = doc_list[0]
    elif len(doc_list) > 1:
        print(f"-- it's a hit- many")
        return_data = sorted([doc['doc_sym'] for doc in doc_list],
                             key=lambda x: int(''.join(c for c in x
                                                       if c.isdigit())))
        #return_data=sorted(["<a href="+doc['doc_sym']+">" for doc in doc_list])
        #return_data=sorted([url_for('/'+doc_list[0]['raw_txt']) for doc in doc_list])

    if return_data == "":
        return jsonify('text with document symbol:%s was not found' % path)
    #return(render_template('ds.html', data=return_data))
    #print(return_data)
    return jsonify(return_data)
Ejemplo n.º 27
0
 def get_emitente(self):
     self.set_query({"_t": Regex(u".*Emitente.*", "i")})
     return self.return_one()
Ejemplo n.º 28
0
    TypeTestCase(Int64, "long", Int64(13)),
    TypeTestCase(str, "string", "foo"),
    TypeTestCase(float, "double", 3.14),
    TypeTestCase(Decimal, "decimal", Decimal("3.14159265359")),
    TypeTestCase(
        Decimal, "decimal", "3.14159265359"
    ),  # TODO split tests for  odmantic type inference
    TypeTestCase(Decimal128, "decimal", Decimal128(Decimal("3.14159265359"))),
    TypeTestCase(Dict, "object", {"foo": "bar", "fizz": {"foo": "bar"}}),
    TypeTestCase(bool, "bool", False),
    TypeTestCase(Pattern, "regex", re.compile(r"^.*$")),
    TypeTestCase(Pattern, "regex", re.compile(r"^.*$", flags=re.IGNORECASE)),
    TypeTestCase(
        Pattern, "regex", re.compile(r"^.*$", flags=re.IGNORECASE | re.MULTILINE)
    ),
    TypeTestCase(Regex, "regex", Regex(r"^.*$", flags=32)),
    TypeTestCase(ObjectId, "objectId", ObjectId()),
    TypeTestCase(bytes, "binData", b"\xf0\xf1\xf2"),
    TypeTestCase(Binary, "binData", Binary(b"\xf0\xf1\xf2")),
    TypeTestCase(datetime, "date", sample_datetime),
    TypeTestCase(List, "array", ["one"]),
    # Compound Types
    TypeTestCase(Tuple[str, ...], "array", ("one",)),  # type: ignore
    TypeTestCase(List[ObjectId], "array", [ObjectId() for _ in range(5)]),
    TypeTestCase(
        Union[Tuple[ObjectId, ...], None],  # type: ignore
        "array",
        tuple(ObjectId() for _ in range(5)),
    ),
]
Ejemplo n.º 29
0
from bson import Regex
from dlx import DB
from dlx.marc import BibSet, QueryDocument, Condition
from config import Config
DB.connect(Config.connect_string)

query = QueryDocument(Condition(tag='191', modifier='exists'),
                      Condition(tag='269', subfields={'a': Regex('^1975')}))

print(query.to_json())

bibset = BibSet.from_query(query, projection={'191': True}, skip=0, limit=0)
print('There are {} results'.format(bibset.count))

bibset.cache()

for bib in bibset.records:
    print('id: {}, symbol: {}'.format(bib.id, bib.get_value('191', 'a')))

print(bibset.to_xml())
database = Client(config.PROD).connect()
# Год, по которому вести поиск в базе
year = 2018

collection = database[f"claims_{year}"]
result_file = open(f'IPGU01001_{year}.csv', "w+")

pipeline = [
    {
        u"$match": {
            u"service": {
                u"$exists": True
            },
            u"service.srguServicePassportId": {
                u"$not": Regex(u".*_444$", "i")
            },
            u"senderCode": u"IPGU01001"
        }
    },
    # {
    #     u"$limit": 100.0
    # },
    {
        u"$group": {
            u"_id": u"$service.srguServicePassportId",
            u"count": {
                u"$sum": 1.0
            }
        }
    }