Esempio n. 1
0
    def __init__(self, db_name, coll_name):
        self._coll_name = coll_name
        self._db_name = db_name
        if coll_name in ("oplog.rs", "oplog.$main"):
            with open(os.path.join(HERE, "fixtures", "oplog_rs_options"),
                      'r') as f:
                self.options = MagicMock(return_value=json.load(
                    f, object_hook=json_util.object_hook))

            val1 = [{'ts': Timestamp(1600262019, 1)}]
            val2 = [{'ts': Timestamp(1600327624, 8)}]
            limit = MagicMock(limit=MagicMock(
                side_effect=itertools.cycle([val1, val2])))
            sort = MagicMock(sort=MagicMock(return_value=limit))
            self.find = MagicMock(return_value=sort)
        elif coll_name == 'system.sessions':
            with open(os.path.join(HERE, "fixtures", "system.sessions"),
                      'r') as f:
                content = json.load(f, object_hook=json_util.object_hook)
                self.aggregate = MagicMock(return_value=iter([content]))
        elif coll_name == 'chunks':
            self.count_documents = MagicMock(side_effect=[100, 5])
        elif coll_name == 'system.replset':
            with open(os.path.join(HERE, "fixtures", "system.replset"),
                      'r') as f:
                content = json.load(f, object_hook=json_util.object_hook)
                self.find_one = MagicMock(return_value=content)
        else:
            with open(
                    os.path.join(HERE, "fixtures",
                                 "indexStats-{}".format(coll_name)), 'r') as f:
                self.aggregate = MagicMock(return_value=json.load(
                    f, object_hook=json_util.object_hook))
    def cluster_time_conversation(self, callback, replies):
        cluster_time = Timestamp(0, 0)
        server = MockupDB()

        # First test all commands include $clusterTime with wire version 6.
        responder = server.autoresponds(
            'ismaster',
            {'minWireVersion': 0,
             'maxWireVersion': 6,
             '$clusterTime': {'clusterTime': cluster_time}})

        server.run()
        self.addCleanup(server.stop)

        client = MongoClient(server.uri)
        self.addCleanup(client.close)

        with going(callback, client):
            for reply in replies:
                request = server.receives()
                self.assertIn('$clusterTime', request)
                self.assertEqual(request['$clusterTime']['clusterTime'],
                                 cluster_time)
                cluster_time = Timestamp(cluster_time.time,
                                         cluster_time.inc + 1)
                reply['$clusterTime'] = {'clusterTime': cluster_time}
                request.reply(reply)
Esempio n. 3
0
    def test_bson_classes(self):
        _id = '5a918f9fa08bff9c7688d3e1'

        for a, b in [
            (Binary(b'foo'), Binary(b'foo')),
            (Code('foo'), Code('foo')),
            (Code('foo', {'x': 1}), Code('foo', {'x': 1})),
            (DBRef('coll', 1), DBRef('coll', 1)),
            (DBRef('coll', 1, 'db'), DBRef('coll', 1, 'db')),
            (Decimal128('1'), Decimal128('1')),
            (MaxKey(), MaxKey()),
            (MinKey(), MinKey()),
            (ObjectId(_id), ObjectId(_id)),
            (Regex('foo', 'i'), Regex('foo', 'i')),
            (Timestamp(1, 2), Timestamp(1, 2)),
        ]:
            # Basic case.
            self.assertTrue(
                Matcher(Command(y=b)).matches(Command(y=b)),
                "MockupDB %r doesn't equal itself" % (b, ))

            # First Command argument is special, try comparing the second also.
            self.assertTrue(
                Matcher(Command('x', y=b)).matches(Command('x', y=b)),
                "MockupDB %r doesn't equal itself" % (b, ))

            # In practice, users pass PyMongo classes in message specs.
            self.assertTrue(
                Matcher(Command(y=b)).matches(Command(y=a)),
                "PyMongo %r != MockupDB %r" % (a, b))

            self.assertTrue(
                Matcher(Command('x', y=b)).matches(Command('x', y=a)),
                "PyMongo %r != MockupDB %r" % (a, b))
Esempio n. 4
0
def importStandInstallFee():

    fee_op = mongo_client.get_col_op_test("priceManage","purchaseFee");

    df = pd.read_excel("C:/Users/zhu/Desktop/导入导出/采购价/安装费-标准-模板.xlsx")
    dataList = np.array(df).tolist()
    addList = []
    for data in dataList:
        print(data)
        one = {}

        one["_id"] = str(objectid.ObjectId())
        one["groupId"] = "5ce3bfcaeeeb414fbc90831e"
        one["groupName"] = "标准"
        one["priority"] = 1
        one["feeType"] = "installFee"
        one["name"] = data[0]
        one["template"]  = "费用=max(安装件数*$安装单价,$最低一票);返回 费用;"


        keys = {"products":[{"productId":data[3],"productName":data[4],"productType":"standardProduct"}]}
        one["keys"] = keys
        one["vars"] = [{"varName":"$安装单价","varValue":data[5]},{"varName":"$最低一票","varValue":data[6]}]

        one["createTime"] = Timestamp(1559098387, 253)
        one["lastUpdateTime"] = Timestamp(1559785785, 272)
        one["version"] = 1
        one["creator"] = "15000000000"
        one["creatorName"] = "测试九江管理员"
        one["modifier"] = "15000000000"
        one["modifierName"] = "测试九江管理员"

        addList.append(one)
    fee_op.insert_many(addList)
Esempio n. 5
0
def importStandComboInstallFee():

    fee_op = mongo_client.get_col_op_test("priceManage","purchaseFee");

    df = pd.read_excel("C:/Users/zhu/Desktop/导入导出/采购价/安装费-标准-模板.xlsx")
    dataList = np.array(df).tolist()
    addList = []
    for data in dataList:
        print(data)
        one = {}

        one["_id"] = str(objectid.ObjectId())
        one["groupId"] = "5cf77b79dd77c768c8103f90"
        one["groupName"] = "标准套餐"
        one["priority"] = 2
        one["feeType"] = "installFee"
        one["name"] = data[0]
        one["template"]  = "费用 = $安装单价*安装件数;返回 费用;"


        keys = {"combos":[{"comboId":data[3],"comboName":data[4]}]}
        one["keys"] = keys
        one["vars"] = [{"varName":"$安装单价","varValue":data[5]}]

        one["createTime"] = Timestamp(1559098387, 253)
        one["lastUpdateTime"] = Timestamp(1559785785, 272)
        one["version"] = 1
        one["creator"] = "15000000000"
        one["creatorName"] = "测试九江管理员"
        one["modifier"] = "15000000000"
        one["modifierName"] = "测试九江管理员"

        addList.append(one)
    fee_op.insert_many(addList)
Esempio n. 6
0
    def get_last_saved_ts(self):

        slices = self.list_names()
        if not slices:
            return Timestamp(int(datetime.datetime(1970, 1, 2).timestamp()), 0)

        last_slice = max(slices)
        time, inc = last_slice.split('_')
        last_ts = Timestamp(int(time), int(inc))

        return last_ts
Esempio n. 7
0
    def get_last_saved_ts(self):

        with MongoStore(store_url, oplog_store_db) as store:
            slices = store.list()
            if not slices:
                return Timestamp(
                    int(datetime.datetime(1970, 1, 2).timestamp()), 0)

            last_slice = max(slices)
            time, inc = last_slice.split('_')
            last_ts = Timestamp(int(time), int(inc))

        return last_ts
Esempio n. 8
0
def logging(request):
    """
    日志显示
    by: 范俊伟 at:2015-04-15
    日志正序排列
    by: 范俊伟 at:2015-04-16
    日志改为反序排列
    by:王健 at:2015-04-19
    :param request:
    :return:
    """
    begin_time = time.time() - 60 * 60 * 24 * 3
    ts = Timestamp(int(begin_time), 0)
    docs = db.logs.find({
        "timestamp": {
            "$gt": ts
        }
    }).sort("timestamp", pymongo.DESCENDING)
    res = []
    for i in docs:
        timestamp = i.get('timestamp')
        datetime = None
        if timestamp:
            datetime = timestamp.as_datetime()
        i.update({"datetime": datetime})
        res.append(i)
    return render_to_response('needserver/logging.html', {"list": res})
Esempio n. 9
0
 def parseTimestamp(self, tsString):
     timestampParts = tsString.split(':')
     if len(timestampParts) < 2:
         raise ValueError(
             "Invalid Message found, a Timestamp should like [1553406314:1], but [{}] is found."
             .format(tsString))
     return Timestamp(int(timestampParts[0]), int(timestampParts[1]))
Esempio n. 10
0
    def execute(self):
        interval = self.config.get('shedule', 'interval')

        try:
            ts_consumer = self.kafkaFacade.get_consumer(self.ts_topic)
            ts_producer = self.kafkaFacade.get_prducer(self.ts_topic)

            next_ts = self.get_current_ts(ts_consumer, ts_producer)
            if not next_ts:
                self.schedule.enter(int(interval), 0, self.execute)
                return

            ns = self.config.get('condition', 'namespace')
            queryDict = {'ns': ns}
            next_start_ts = None
            if next_ts and len(next_ts) == 1:
                queryDict['ts'] = {'$lt': next_ts[0]}
                next_start_ts = next_ts[0]
            elif len(next_ts) == 2:
                queryDict['ts'] = {'$gte': next_ts[0], '$lt': next_ts[1]}
                next_start_ts = next_ts[1]

            dbUrl = self.config.get('mongo', 'mongodb.url')
            conn = MongoClient(dbUrl)
            mongo_local = conn["local"]
            logger.info('synchronizing data for %s...' % (queryDict))
            for x in mongo_local.oplog.rs.find(queryDict).sort([('ts', 1)]):
                print(x)  # TODO: handle every oplogs row - add sync logic here
            logger.info('data synchronized for %s...' % (queryDict))

            logger.info('figuring out next synchronztion ts scope ...')
            next_end_ts = Timestamp(datetime.utcnow(), 1)
            for x in mongo_local.oplog.rs.aggregate([{
                    '$match': {
                        'ts': {
                            '$gt': next_start_ts
                        }
                    }
            }, {
                    '$group': {
                        '_id': '1',
                        'next_end_ts': {
                            '$max': '$ts'
                        }
                    }
            }]):
                next_end_ts = x['next_end_ts']

            # post next synch message
            message = '%s:%s~%s:%s' % (next_start_ts.time, next_start_ts.inc,
                                       next_end_ts.time, next_end_ts.inc)
            self.kafkaFacade.produce_message(ts_producer, message)
            self.kafkaFacade.commit(ts_consumer)
        except BaseException:
            logger.error("Exception Occurs: ", exc_info=True)
        finally:
            self.kafkaFacade.release(ts_consumer, ts_producer)

        self.schedule.enter(int(interval), 0, self.execute)
Esempio n. 11
0
def importStandComboDeliverFee():

    fee_op = mongo_client.get_col_op_test("priceManage","purchaseFee");
    area_op = mongo_client.get_col_op_prod("baseConfig", "area")

    df = pd.read_excel("C:/Users/zhu/Desktop/导入导出/采购价/送货费-标准套餐-模板.xlsx")
    dataList = np.array(df).tolist()
    addList = []
    errorList = []
    for data in dataList:
        print(data)
        one = {}

        one["_id"] = str(objectid.ObjectId())
        one["groupId"] = "5ce3c643eeeb414fbc908323"
        one["groupName"] = "标准套餐"
        one["priority"] = 2
        one["feeType"] = "basicDeliveryFee"
        one["name"] = data[0]
        one["template"]  = "费用 = $基础送货费;返回 费用;"


        area = area_op.find_one({"mergerName":data[3]+data[4]+data[5]})

        if area is None:
            errorList.append(data);
            continue
        keys = {
            "combos": [{"comboId": data[6], "comboName": data[7]}],
            "destinations":[{"destinationId":area.get("code"),"destinationName":area.get("mergerName")}]}
        one["keys"] = keys
        one["vars"] = [{"varName":"$基础送货费","varValue":data[8]}]

        one["createTime"] = Timestamp(1559098387, 253)
        one["lastUpdateTime"] = Timestamp(1559785785, 272)
        one["version"] = 1
        one["creator"] = "15000000000"
        one["creatorName"] = "测试九江管理员"
        one["modifier"] = "15000000000"
        one["modifierName"] = "测试九江管理员"

        addList.append(one)
    fee_op.insert_many(addList)

    el = pd.DataFrame(errorList);
    el.to_excel("error_scd.xlsx", index=False)
    def test_replicate_mongodb_to_pg(self):
        """Replicate mongodb to Postgres"""

        def assert_columns_exist(table):
            """Helper inner function to test if every table and column exists in the target"""
            assertions.assert_cols_in_table(self.run_query_target_postgres, 'ppw_e2e_tap_mongodb', table,
                                            ['_id', 'document', '_sdc_extracted_at',
                                             '_sdc_batched_at', '_sdc_deleted_at'])

        def assert_row_counts_equal(target_schema, table, count_in_source):
            assert count_in_source == \
                   self.run_query_target_postgres(f'select count(_id) from {target_schema}.{table}')[0][0]

        # Run tap first time - fastsync and singer should be triggered
        assertions.assert_run_tap_success(TAP_MONGODB_ID, TARGET_ID, ['fastsync', 'singer'])
        assert_columns_exist('listings')
        assert_columns_exist('my_collection')

        listing_count = self.mongodb_con['listings'].count_documents({})
        my_coll_count = self.mongodb_con['my_collection'].count_documents({})

        assert_row_counts_equal('ppw_e2e_tap_mongodb', 'listings', listing_count)
        assert_row_counts_equal('ppw_e2e_tap_mongodb', 'my_collection', my_coll_count)

        result_insert = self.mongodb_con.my_collection.insert_many([
            {
                'age': randint(10, 30),
                'id': 1001,
                'uuid': uuid.uuid4(),
                'ts': Timestamp(12030, 500)
            },
            {
                'date': datetime.utcnow(),
                'id': 1002,
                'uuid': uuid.uuid4(),
                'regex': bson.Regex(r'^[A-Z]\\w\\d{2,6}.*$')
            },
            {
                'uuid': uuid.uuid4(),
                'id': 1003,
                'nested_json': {'a': 1, 'b': 3, 'c': {'key': bson.datetime.datetime(2020, 5, 3, 10, 0, 0)}}
            }
        ])
        my_coll_count += len(result_insert.inserted_ids)

        result_del = self.mongodb_con.my_collection.delete_one({'_id': result_insert.inserted_ids[0]})
        my_coll_count -= result_del.deleted_count

        result_update = self.mongodb_con.my_collection.update_many({}, {'$set': {'id': 0}})

        assertions.assert_run_tap_success(TAP_MONGODB_ID, TARGET_ID, ['fastsync', 'singer'])

        assert result_update.modified_count == self.run_query_target_postgres(
            'select count(_id) from ppw_e2e_tap_mongodb.my_collection where cast(document->>\'id\' as int) = 0')[0][0]

        assert_row_counts_equal('ppw_e2e_tap_mongodb', 'my_collection', my_coll_count)
Esempio n. 13
0
 def get_oplog_ts(self, source, dest):
     source_str = _mongo_dict_to_str(source)
     dest_str = _mongo_dict_to_str(dest)
     query = "SELECT oplog_ts "
     query += "FROM %s " % self.STATE_TABLE
     query += "WHERE source = ? AND dest = ?"
     with self._conn:
         cursor = self._conn.cursor()
         cursor.execute(query, (source_str, dest_str))
         result = json.loads(cursor.fetchone()[0])
         return Timestamp(time=result['time'], inc=result['inc'])
Esempio n. 14
0
def get_current_timestamp():
    """Get the current timestamp as a bson Timestamp object."""
    if not Timestamp:
        raise NotImplementedError('timestamp is not supported. Import pymongo to use it.')
    now = int(time.time())
    if _LAST_TIMESTAMP_INC and _LAST_TIMESTAMP_INC[0] == now:
        _LAST_TIMESTAMP_INC[1] += 1
    else:
        del _LAST_TIMESTAMP_INC[:]
        _LAST_TIMESTAMP_INC.extend([now, 1])
    return Timestamp(now, _LAST_TIMESTAMP_INC[1])
Esempio n. 15
0
    def read_tag_file(self):
        fname = 'tag_file'
        if os.path.exists(fname):
            with open(fname, 'r') as f:
                tag = f.read()

            if tag == '-1':
                return -1
            _time, _inc = tag.split('=')[0].split('_')
            ts = Timestamp(int(_time), int(_inc))
            return ts
        return None
Esempio n. 16
0
        def send_cluster_time(time, inc, should_update):
            old = t.max_cluster_time()
            new = {'clusterTime': Timestamp(time, inc)}
            got_hello(t,
                      ('host', 27017),
                      {'ok': 1,
                       'minWireVersion': 0,
                       'maxWireVersion': 6,
                       '$clusterTime': new})

            actual = t.max_cluster_time()
            if should_update:
                self.assertEqual(actual, new)
            else:
                self.assertEqual(actual, old)
Esempio n. 17
0
    def setUp(self):
        """Initializes all required variables and set up arango connection, create test
            database and collection before every test method
        """
        self.url = "localhost:8529"
        self.timestamp = bson_ts_to_long(Timestamp(int(time.time()), 1))
        self.arango_doc_manager_obj = DocManager(self.url)
        self.arango_connection = self.arango_doc_manager_obj.arango

        self.database_name = "test_db"
        self.collection_name = "test_collection"
        self.namespace = self.database_name + '.' + self.collection_name

        self.db = self.arango_connection.create_database(self.database_name)
        self.collection = self.db.create_collection(self.collection_name)
 def watcher(self):
     # 监听,调度时间间隔内的指定ns对象。
     self.logger.info('mongodb-oplog-watcher is running')
     now_timestamp = time2timestamp(time.time())
     time_offline = now_timestamp - self.roll_time
     offline_mongotime = Timestamp(time_offline, 1)
     query = {"$and": [{"ts": {"$gte": offline_mongotime}}, {"ns": {"$eq": self.ns_filter}}]}
     objs = self.oplog.find(query)
     objs_list = [obj for obj in objs]
     # print(objs_list)
     if len(objs_list) > 0:
         # todo 业务逻辑
         # 若有数据更新则根据数据_id; 操作coll无_id,操作doc有_id
         if '$cmd' in self.ns_filter:
             self.collection_handle()
         else:
             self.doc_handle()
Esempio n. 19
0
    def get_current_ts(self, ts_consumer, ts_producer):
        has_pendding_msg = self.kafkaFacade.has_pendding_message(
            self.ts_topic, ts_consumer)
        if self.kafkaFacade.is_new_consumer(
                ts_consumer) and not has_pendding_msg:
            initStamp = Timestamp(datetime.utcnow(), 1)
            initMessage = '%s:%s' % (initStamp.time, initStamp.inc)
            self.kafkaFacade.produce_message(ts_producer, initMessage)
            return False

        if has_pendding_msg:
            messge = self.kafkaFacade.get_message(self.ts_topic, ts_consumer)
            if messge:
                ts_metadata = messge.split('~')
                return list(map(self.parseTimestamp, ts_metadata))

        return False
Esempio n. 20
0
    def _bootstrap(self, db_name, coll_name):
        """读取历史数据"""
        # 传递开始标志位
        yield {
            'operationType': 'bootstrap-start',
            'ns': {
                'db': db_name,
                'coll': coll_name
            },
            'topic': f'{db_name}-{coll_name}',
        }

        db = self.conn.client[db_name]
        coll = db[coll_name]

        # 开启 session, 获得当前时间下最大的 _id, start_operation_time
        with self.conn.client.start_session() as session:
            with session.start_transaction(
                    read_concern=ReadConcern('snapshot')):
                ts = Timestamp(datetime.utcnow(), 0)
                max_obj = coll.find_one(sort=[('_id', DESCENDING)])

        max_id = max_obj.get('_id')
        if max_id:
            domain = {'_id': {'$lte': max_id}}
            values = coll.find(domain)
            index = 1

            for val in values:
                data = {
                    'operationType': 'bootstrap-insert',
                    'fullDocument': val,
                    'documentKey': val['_id'],
                    'ns': {
                        'db': db_name,
                        'coll': coll_name
                    },
                    'topic': f'{db_name}-{coll_name}',
                }
                yield data
                # 添加采样数据到日志
                index += 1
                if index % 50_000 == 0:
                    logger.info(f"MongoDB index = {index}, data={data}")
Esempio n. 21
0
    async def init_async(cls,
                         oplog,
                         operation_handler: OperationHandler,
                         namespace_filter: str=None,
                         starting_timestamp: Timestamp=None,
                         on_nothing_to_fetch_on_cursor=None):

        if starting_timestamp is None:
            last_doc = await oplog.find_one(sort=[('$natural', DESCENDING)])
            try:
                starting_timestamp = last_doc['ts']
            except TypeError:
                starting_timestamp = Timestamp(0, 1)

        return cls(oplog,
                   operation_handler,
                   namespace_filter,
                   starting_timestamp,
                   on_nothing_to_fetch_on_cursor)
Esempio n. 22
0
    def test_row_to_singer_record_successful_transformation_with_deleted(self):
        stream = {
            'stream': 'myStream',
            'metadata': [{
                'breadcrumb': [],
                'metadata': {}
            }]
        }

        row = {
            '_id': ObjectId('0123456789ab0123456789ab'),
            'key1': 10,
            'key2': Timestamp(1589379991, 4696183),
            'key3': 1.5
        }
        dt = datetime(2020, 5, 13, 14, 10, 10, tzinfo=tzutc())

        result = common.row_to_singer_record(
            stream, row, dt, datetime(2020, 5, 20, 15, 0, 0, 0,
                                      tzinfo=tzutc()), 100)

        self.assertEqual(
            {
                'type': 'RECORD',
                'stream': 'myStream',
                'record': {
                    '_id': '0123456789ab0123456789ab',
                    'document': {
                        '_id': '0123456789ab0123456789ab',
                        'key1': 10,
                        'key2': '2020-05-13T14:26:31.000000Z',
                        'key3': 1.5
                    },
                    common.SDC_DELETED_AT: '2020-05-20T15:00:00.000000Z',
                },
                'version': 100,
                'time_extracted': '2020-05-13T14:10:10.000000Z',
            }, result.asdict())
Esempio n. 23
0
def slice_name_to_ts(name):
    time, inc = name.split('_')
    ts = Timestamp(time=int(time), inc=int(inc))
    return ts
Esempio n. 24
0
                'type_cm': 'jsonb',
                'value': {"one": "two"}},
    'extraProps': {'name_cm': '_extra_props',
                   'type_cm': 'jsonb',
                   'value': {"three": "four",
                             "five": "six"}
                   },
    'signupCode': {'name_cm': 'signup_code',
                   'type_cm': 'text',
                   'value': "I am a text"}
}


oplog_entries_update = [
    {
        'ts': Timestamp(1556029671, 1),
        't': 48,
        'h': -4473962510602026742,
        'v': 2,
        'op': 'u',
        'operationType': 'update',
        'ns': {'db': 'test_purrito', 'coll': 'Employee'},
        'o2': {'_id': '1'},
        'updateDescription' : {'updatedFields': {'firstName': 'Janos',
                'lastName': None, 'hair': None}},
        'documentKey': {'_id': '1'},
        'o': {
            '$set': {
                'firstName': 'Janos',
                'lastName': None
            },
Esempio n. 25
0
def dt2ts(dt):
    return Timestamp(int(dt.timestamp()), 0)
Esempio n. 26
0
    def test_copy_table_with_collection_found_success(self):
        """
        Test copy_table method with a collection name that's in the db, the copy should continue successfully
        """
        self.mongo.database = Mock(spec_set=Database).return_value
        self.mongo.database.list_collection_names.return_value = [
            'col1', 'col2', 'col3', 'my_col'
        ]

        with patch('pipelinewise.fastsync.commons.tap_mongodb.subprocess.call'
                   ) as call_mock:
            call_mock.return_value = 0

            with patch('pipelinewise.fastsync.commons.tap_mongodb.os.remove'
                       ) as os_remove_mock:
                os_remove_mock.return_value = True

                with patch('pipelinewise.fastsync.commons.tap_mongodb.gzip'
                           ) as gzip_mock:
                    mock_enter = Mock()

                    with patch('pipelinewise.fastsync.commons.tap_mongodb.bson.decode_file_iter') as \
                            bson_decode_iter_mock:

                        bson_decode_iter_mock.return_value = [
                            {
                                '_id': ObjectId('0123456789ab0123456789aa'),
                                'key1': 1,
                                'key2': time.time()
                            },
                            {
                                '_id': ObjectId('0123456789ab0123456789ab'),
                                'key1': 2
                            },
                            {
                                '_id': ObjectId('0123456789ab0123456789ac'),
                                'key3': Timestamp(10000, 50)
                            },
                        ]

                        mock_enter.return_value.open.return_value = Mock()

                        gzip_mock.return_value.__enter__ = mock_enter
                        gzip_mock.return_value.__exit__ = Mock()

                        self.mongo.copy_table('my_col', 'file.csv.gzip', 'tmp')

                        call_mock.assert_called_once_with([
                            'mongodump', '--uri',
                            '"mongodb://*****:*****@foo.com:3306/my_db'
                            '?authSource=admin&readPreference=secondaryPreferred&ssl=true"',
                            '--forceTableScan', '--gzip', '-c', 'my_col', '-o',
                            'tmp'
                        ])

                        os_remove_mock.assert_has_calls([
                            call('tmp/my_db/my_col.metadata.json.gz'),
                            call('tmp/my_db/my_col.bson.gz')
                        ])
                        self.assertEqual(os_remove_mock.call_count, 2)
                        self.assertEqual(bson_decode_iter_mock.call_count, 1)
Esempio n. 27
0
def int_to_bson_timestamp(ts: int):
    # https://docs.mongodb.com/manual/reference/bson-types/#timestamps
    t = ts >> 32
    i = ts & (2**32 - 1)
    return Timestamp(time=t, inc=i)
Esempio n. 28
0
    collection_white_list = ast.literal_eval(
        config_parser.config_section_map("source_db_connection")
        ['collection_list'])
    log.info("<=====Collection List Processes by Opslog ======>" +
             ', '.join(collection_white_list))
    source_db = MongoDatabase(dbtype=source_db_config.get("dbtype"),
                              host=source_db_config.get("host"),
                              port=int(source_db_config.get("port")),
                              user=source_db_config.get("user"),
                              password=source_db_config.get("pass"),
                              database=source_db_config.get("database"))
    mongo_db_connection = source_db.connect()

    # get the latest timestamp in the database
    # last_ts = db.oplog.rs.find().sort('$natural', -1)[0]['ts'];
    # print(last_ts.as_datetime())
    sqlite_db_connection = SqliteQueue(sqlite_db)
    ts_value = sqlite_db_connection.select_max()

    if not ts_value:
        last_ts = Timestamp(int(time.time()), 1)
        #last_ts = Timestamp(1533198932, 1)
    else:
        (key, value) = ts_value
        last_ts = Timestamp(key, value)

    print(last_ts.as_datetime())

    oplog_watcher(mongo_db_connection, sqlite_db_connection,
                  collection_white_list, last_ts)
Esempio n. 29
0
    def test_replicate_mongodb_to_sf(self):
        """Replicate mongodb to Snowflake"""
        def assert_columns_exist(table):
            """Helper inner function to test if every table and column exists in the target"""
            assertions.assert_cols_in_table(
                self.run_query_target_snowflake,
                'ppw_e2e_tap_mongodb',
                table,
                [
                    '_ID',
                    'DOCUMENT',
                    '_SDC_EXTRACTED_AT',
                    '_SDC_BATCHED_AT',
                    '_SDC_DELETED_AT',
                ],
            )

        def assert_row_counts_equal(target_schema, table, count_in_source):
            assert (count_in_source == self.run_query_target_snowflake(
                f'select count(_id) from {target_schema}.{table}')[0][0])

        # Run tap first time - fastsync and singer should be triggered
        assertions.assert_run_tap_success(TAP_MONGODB_ID, TARGET_ID,
                                          ['fastsync', 'singer'])
        assert_columns_exist('listings')
        assert_columns_exist('my_collection')
        assert_columns_exist('all_datatypes')

        listing_count = self.mongodb_con['listings'].count_documents({})
        my_coll_count = self.mongodb_con['my_collection'].count_documents({})
        all_datatypes_count = self.mongodb_con[
            'all_datatypes'].count_documents({})

        assert_row_counts_equal('ppw_e2e_tap_mongodb', 'listings',
                                listing_count)
        assert_row_counts_equal('ppw_e2e_tap_mongodb', 'my_collection',
                                my_coll_count)
        assert_row_counts_equal('ppw_e2e_tap_mongodb', 'all_datatypes',
                                all_datatypes_count)

        result_insert = self.mongodb_con.my_collection.insert_many([
            {
                'age': randint(10, 30),
                'id': 1001,
                'uuid': uuid.uuid4(),
                'ts': Timestamp(12030, 500),
            },
            {
                'date': datetime.utcnow(),
                'id': 1002,
                'uuid': uuid.uuid4(),
                'regex': bson.Regex(r'^[A-Z]\\w\\d{2,6}.*$'),
            },
            {
                'uuid':
                uuid.uuid4(),
                'id':
                1003,
                'decimal':
                bson.Decimal128(decimal.Decimal('5.64547548425446546546644')),
                'nested_json': {
                    'a': 1,
                    'b': 3,
                    'c': {
                        'key': bson.datetime.datetime(2020, 5, 3, 10, 0, 0)
                    },
                },
            },
        ])
        my_coll_count += len(result_insert.inserted_ids)

        result_del = self.mongodb_con.my_collection.delete_one(
            {'_id': result_insert.inserted_ids[0]})
        my_coll_count -= result_del.deleted_count

        result_update = self.mongodb_con.my_collection.update_many(
            {}, {'$set': {
                'id': 0
            }})

        assertions.assert_run_tap_success(TAP_MONGODB_ID, TARGET_ID,
                                          ['singer'])

        assert (result_update.modified_count == self.run_query_target_snowflake(
            'select count(_id) from ppw_e2e_tap_mongodb.my_collection where document:id = 0'
        )[0][0])

        assert_row_counts_equal('ppw_e2e_tap_mongodb', 'my_collection',
                                my_coll_count)
class CurrentDateOperationTests(LiveCollectionFieldUpdateOperatorsTests,
                                asynctest.TestCase):
    """
    $currentDate

    The $currentDate operator sets the value of a field to the current date,
    either as a Date or a timestamp. The default type is Date.

    Behavior

    If the field does not exist, $currentDate adds the field to a document.

    """
    template_doc = {
        "status": "a",
        "modified_at": Timestamp(datetime(2006, 6, 6, 6, 6, 6), 0)
    }

    async def test_it_updates_existing_date_field(self):
        await self.collection.update_one({'_id': self.doc['_id']}, {
            "$currentDate": {
                "modified_at": True,
            },
            "$set": {
                "status": "D",
                "cancellation.reason": "user request"
            }
        })

        await self.observer.observe_changes()
        self.assertEqual(
            self.handler.collection[self.doc['_id']], {
                '_id': self.doc['_id'],
                'status': 'D',
                'modified_at': ANY,
                'cancellation': {
                    'reason': 'user request'
                }
            })
        affected_doc = self.handler.collection[self.doc['_id']]
        self.assertIsInstance(affected_doc['modified_at'], datetime)
        self.assertEqual(affected_doc['modified_at'].date(),
                         datetime.today().date())

    async def test_it_creates_new_datetime_fields(self):
        await self.collection.update_one({'_id': self.doc['_id']}, {
            "$currentDate": {
                "cancellation.date": {
                    "$type": "date"
                }
            },
            "$set": {
                "status": "D",
                "cancellation.reason": "user request"
            }
        })

        await self.observer.observe_changes()
        affected_doc = self.handler.collection[self.doc['_id']]
        self.assertIsInstance(affected_doc['cancellation']['date'], datetime)
        self.assertEqual(affected_doc['cancellation']['date'].date(),
                         datetime.today().date())

    async def test_it_creates_new_timestamp_fields(self):
        await self.collection.update_one({'_id': self.doc['_id']}, {
            "$currentDate": {
                "cancellation.date": {
                    "$type": "timestamp"
                }
            },
            "$set": {
                "status": "D",
                "cancellation.reason": "user request"
            }
        })

        await self.observer.observe_changes()
        affected_doc = self.handler.collection[self.doc['_id']]
        self.assertIsInstance(affected_doc['cancellation']['date'], Timestamp)
        self.assertEqual(
            affected_doc['cancellation']['date'].as_datetime().date(),
            datetime.today().date())