def __init__(self, db_name, coll_name): self._coll_name = coll_name self._db_name = db_name if coll_name in ("oplog.rs", "oplog.$main"): with open(os.path.join(HERE, "fixtures", "oplog_rs_options"), 'r') as f: self.options = MagicMock(return_value=json.load( f, object_hook=json_util.object_hook)) val1 = [{'ts': Timestamp(1600262019, 1)}] val2 = [{'ts': Timestamp(1600327624, 8)}] limit = MagicMock(limit=MagicMock( side_effect=itertools.cycle([val1, val2]))) sort = MagicMock(sort=MagicMock(return_value=limit)) self.find = MagicMock(return_value=sort) elif coll_name == 'system.sessions': with open(os.path.join(HERE, "fixtures", "system.sessions"), 'r') as f: content = json.load(f, object_hook=json_util.object_hook) self.aggregate = MagicMock(return_value=iter([content])) elif coll_name == 'chunks': self.count_documents = MagicMock(side_effect=[100, 5]) elif coll_name == 'system.replset': with open(os.path.join(HERE, "fixtures", "system.replset"), 'r') as f: content = json.load(f, object_hook=json_util.object_hook) self.find_one = MagicMock(return_value=content) else: with open( os.path.join(HERE, "fixtures", "indexStats-{}".format(coll_name)), 'r') as f: self.aggregate = MagicMock(return_value=json.load( f, object_hook=json_util.object_hook))
def cluster_time_conversation(self, callback, replies): cluster_time = Timestamp(0, 0) server = MockupDB() # First test all commands include $clusterTime with wire version 6. responder = server.autoresponds( 'ismaster', {'minWireVersion': 0, 'maxWireVersion': 6, '$clusterTime': {'clusterTime': cluster_time}}) server.run() self.addCleanup(server.stop) client = MongoClient(server.uri) self.addCleanup(client.close) with going(callback, client): for reply in replies: request = server.receives() self.assertIn('$clusterTime', request) self.assertEqual(request['$clusterTime']['clusterTime'], cluster_time) cluster_time = Timestamp(cluster_time.time, cluster_time.inc + 1) reply['$clusterTime'] = {'clusterTime': cluster_time} request.reply(reply)
def test_bson_classes(self): _id = '5a918f9fa08bff9c7688d3e1' for a, b in [ (Binary(b'foo'), Binary(b'foo')), (Code('foo'), Code('foo')), (Code('foo', {'x': 1}), Code('foo', {'x': 1})), (DBRef('coll', 1), DBRef('coll', 1)), (DBRef('coll', 1, 'db'), DBRef('coll', 1, 'db')), (Decimal128('1'), Decimal128('1')), (MaxKey(), MaxKey()), (MinKey(), MinKey()), (ObjectId(_id), ObjectId(_id)), (Regex('foo', 'i'), Regex('foo', 'i')), (Timestamp(1, 2), Timestamp(1, 2)), ]: # Basic case. self.assertTrue( Matcher(Command(y=b)).matches(Command(y=b)), "MockupDB %r doesn't equal itself" % (b, )) # First Command argument is special, try comparing the second also. self.assertTrue( Matcher(Command('x', y=b)).matches(Command('x', y=b)), "MockupDB %r doesn't equal itself" % (b, )) # In practice, users pass PyMongo classes in message specs. self.assertTrue( Matcher(Command(y=b)).matches(Command(y=a)), "PyMongo %r != MockupDB %r" % (a, b)) self.assertTrue( Matcher(Command('x', y=b)).matches(Command('x', y=a)), "PyMongo %r != MockupDB %r" % (a, b))
def importStandInstallFee(): fee_op = mongo_client.get_col_op_test("priceManage","purchaseFee"); df = pd.read_excel("C:/Users/zhu/Desktop/导入导出/采购价/安装费-标准-模板.xlsx") dataList = np.array(df).tolist() addList = [] for data in dataList: print(data) one = {} one["_id"] = str(objectid.ObjectId()) one["groupId"] = "5ce3bfcaeeeb414fbc90831e" one["groupName"] = "标准" one["priority"] = 1 one["feeType"] = "installFee" one["name"] = data[0] one["template"] = "费用=max(安装件数*$安装单价,$最低一票);返回 费用;" keys = {"products":[{"productId":data[3],"productName":data[4],"productType":"standardProduct"}]} one["keys"] = keys one["vars"] = [{"varName":"$安装单价","varValue":data[5]},{"varName":"$最低一票","varValue":data[6]}] one["createTime"] = Timestamp(1559098387, 253) one["lastUpdateTime"] = Timestamp(1559785785, 272) one["version"] = 1 one["creator"] = "15000000000" one["creatorName"] = "测试九江管理员" one["modifier"] = "15000000000" one["modifierName"] = "测试九江管理员" addList.append(one) fee_op.insert_many(addList)
def importStandComboInstallFee(): fee_op = mongo_client.get_col_op_test("priceManage","purchaseFee"); df = pd.read_excel("C:/Users/zhu/Desktop/导入导出/采购价/安装费-标准-模板.xlsx") dataList = np.array(df).tolist() addList = [] for data in dataList: print(data) one = {} one["_id"] = str(objectid.ObjectId()) one["groupId"] = "5cf77b79dd77c768c8103f90" one["groupName"] = "标准套餐" one["priority"] = 2 one["feeType"] = "installFee" one["name"] = data[0] one["template"] = "费用 = $安装单价*安装件数;返回 费用;" keys = {"combos":[{"comboId":data[3],"comboName":data[4]}]} one["keys"] = keys one["vars"] = [{"varName":"$安装单价","varValue":data[5]}] one["createTime"] = Timestamp(1559098387, 253) one["lastUpdateTime"] = Timestamp(1559785785, 272) one["version"] = 1 one["creator"] = "15000000000" one["creatorName"] = "测试九江管理员" one["modifier"] = "15000000000" one["modifierName"] = "测试九江管理员" addList.append(one) fee_op.insert_many(addList)
def get_last_saved_ts(self): slices = self.list_names() if not slices: return Timestamp(int(datetime.datetime(1970, 1, 2).timestamp()), 0) last_slice = max(slices) time, inc = last_slice.split('_') last_ts = Timestamp(int(time), int(inc)) return last_ts
def get_last_saved_ts(self): with MongoStore(store_url, oplog_store_db) as store: slices = store.list() if not slices: return Timestamp( int(datetime.datetime(1970, 1, 2).timestamp()), 0) last_slice = max(slices) time, inc = last_slice.split('_') last_ts = Timestamp(int(time), int(inc)) return last_ts
def logging(request): """ 日志显示 by: 范俊伟 at:2015-04-15 日志正序排列 by: 范俊伟 at:2015-04-16 日志改为反序排列 by:王健 at:2015-04-19 :param request: :return: """ begin_time = time.time() - 60 * 60 * 24 * 3 ts = Timestamp(int(begin_time), 0) docs = db.logs.find({ "timestamp": { "$gt": ts } }).sort("timestamp", pymongo.DESCENDING) res = [] for i in docs: timestamp = i.get('timestamp') datetime = None if timestamp: datetime = timestamp.as_datetime() i.update({"datetime": datetime}) res.append(i) return render_to_response('needserver/logging.html', {"list": res})
def parseTimestamp(self, tsString): timestampParts = tsString.split(':') if len(timestampParts) < 2: raise ValueError( "Invalid Message found, a Timestamp should like [1553406314:1], but [{}] is found." .format(tsString)) return Timestamp(int(timestampParts[0]), int(timestampParts[1]))
def execute(self): interval = self.config.get('shedule', 'interval') try: ts_consumer = self.kafkaFacade.get_consumer(self.ts_topic) ts_producer = self.kafkaFacade.get_prducer(self.ts_topic) next_ts = self.get_current_ts(ts_consumer, ts_producer) if not next_ts: self.schedule.enter(int(interval), 0, self.execute) return ns = self.config.get('condition', 'namespace') queryDict = {'ns': ns} next_start_ts = None if next_ts and len(next_ts) == 1: queryDict['ts'] = {'$lt': next_ts[0]} next_start_ts = next_ts[0] elif len(next_ts) == 2: queryDict['ts'] = {'$gte': next_ts[0], '$lt': next_ts[1]} next_start_ts = next_ts[1] dbUrl = self.config.get('mongo', 'mongodb.url') conn = MongoClient(dbUrl) mongo_local = conn["local"] logger.info('synchronizing data for %s...' % (queryDict)) for x in mongo_local.oplog.rs.find(queryDict).sort([('ts', 1)]): print(x) # TODO: handle every oplogs row - add sync logic here logger.info('data synchronized for %s...' % (queryDict)) logger.info('figuring out next synchronztion ts scope ...') next_end_ts = Timestamp(datetime.utcnow(), 1) for x in mongo_local.oplog.rs.aggregate([{ '$match': { 'ts': { '$gt': next_start_ts } } }, { '$group': { '_id': '1', 'next_end_ts': { '$max': '$ts' } } }]): next_end_ts = x['next_end_ts'] # post next synch message message = '%s:%s~%s:%s' % (next_start_ts.time, next_start_ts.inc, next_end_ts.time, next_end_ts.inc) self.kafkaFacade.produce_message(ts_producer, message) self.kafkaFacade.commit(ts_consumer) except BaseException: logger.error("Exception Occurs: ", exc_info=True) finally: self.kafkaFacade.release(ts_consumer, ts_producer) self.schedule.enter(int(interval), 0, self.execute)
def importStandComboDeliverFee(): fee_op = mongo_client.get_col_op_test("priceManage","purchaseFee"); area_op = mongo_client.get_col_op_prod("baseConfig", "area") df = pd.read_excel("C:/Users/zhu/Desktop/导入导出/采购价/送货费-标准套餐-模板.xlsx") dataList = np.array(df).tolist() addList = [] errorList = [] for data in dataList: print(data) one = {} one["_id"] = str(objectid.ObjectId()) one["groupId"] = "5ce3c643eeeb414fbc908323" one["groupName"] = "标准套餐" one["priority"] = 2 one["feeType"] = "basicDeliveryFee" one["name"] = data[0] one["template"] = "费用 = $基础送货费;返回 费用;" area = area_op.find_one({"mergerName":data[3]+data[4]+data[5]}) if area is None: errorList.append(data); continue keys = { "combos": [{"comboId": data[6], "comboName": data[7]}], "destinations":[{"destinationId":area.get("code"),"destinationName":area.get("mergerName")}]} one["keys"] = keys one["vars"] = [{"varName":"$基础送货费","varValue":data[8]}] one["createTime"] = Timestamp(1559098387, 253) one["lastUpdateTime"] = Timestamp(1559785785, 272) one["version"] = 1 one["creator"] = "15000000000" one["creatorName"] = "测试九江管理员" one["modifier"] = "15000000000" one["modifierName"] = "测试九江管理员" addList.append(one) fee_op.insert_many(addList) el = pd.DataFrame(errorList); el.to_excel("error_scd.xlsx", index=False)
def test_replicate_mongodb_to_pg(self): """Replicate mongodb to Postgres""" def assert_columns_exist(table): """Helper inner function to test if every table and column exists in the target""" assertions.assert_cols_in_table(self.run_query_target_postgres, 'ppw_e2e_tap_mongodb', table, ['_id', 'document', '_sdc_extracted_at', '_sdc_batched_at', '_sdc_deleted_at']) def assert_row_counts_equal(target_schema, table, count_in_source): assert count_in_source == \ self.run_query_target_postgres(f'select count(_id) from {target_schema}.{table}')[0][0] # Run tap first time - fastsync and singer should be triggered assertions.assert_run_tap_success(TAP_MONGODB_ID, TARGET_ID, ['fastsync', 'singer']) assert_columns_exist('listings') assert_columns_exist('my_collection') listing_count = self.mongodb_con['listings'].count_documents({}) my_coll_count = self.mongodb_con['my_collection'].count_documents({}) assert_row_counts_equal('ppw_e2e_tap_mongodb', 'listings', listing_count) assert_row_counts_equal('ppw_e2e_tap_mongodb', 'my_collection', my_coll_count) result_insert = self.mongodb_con.my_collection.insert_many([ { 'age': randint(10, 30), 'id': 1001, 'uuid': uuid.uuid4(), 'ts': Timestamp(12030, 500) }, { 'date': datetime.utcnow(), 'id': 1002, 'uuid': uuid.uuid4(), 'regex': bson.Regex(r'^[A-Z]\\w\\d{2,6}.*$') }, { 'uuid': uuid.uuid4(), 'id': 1003, 'nested_json': {'a': 1, 'b': 3, 'c': {'key': bson.datetime.datetime(2020, 5, 3, 10, 0, 0)}} } ]) my_coll_count += len(result_insert.inserted_ids) result_del = self.mongodb_con.my_collection.delete_one({'_id': result_insert.inserted_ids[0]}) my_coll_count -= result_del.deleted_count result_update = self.mongodb_con.my_collection.update_many({}, {'$set': {'id': 0}}) assertions.assert_run_tap_success(TAP_MONGODB_ID, TARGET_ID, ['fastsync', 'singer']) assert result_update.modified_count == self.run_query_target_postgres( 'select count(_id) from ppw_e2e_tap_mongodb.my_collection where cast(document->>\'id\' as int) = 0')[0][0] assert_row_counts_equal('ppw_e2e_tap_mongodb', 'my_collection', my_coll_count)
def get_oplog_ts(self, source, dest): source_str = _mongo_dict_to_str(source) dest_str = _mongo_dict_to_str(dest) query = "SELECT oplog_ts " query += "FROM %s " % self.STATE_TABLE query += "WHERE source = ? AND dest = ?" with self._conn: cursor = self._conn.cursor() cursor.execute(query, (source_str, dest_str)) result = json.loads(cursor.fetchone()[0]) return Timestamp(time=result['time'], inc=result['inc'])
def get_current_timestamp(): """Get the current timestamp as a bson Timestamp object.""" if not Timestamp: raise NotImplementedError('timestamp is not supported. Import pymongo to use it.') now = int(time.time()) if _LAST_TIMESTAMP_INC and _LAST_TIMESTAMP_INC[0] == now: _LAST_TIMESTAMP_INC[1] += 1 else: del _LAST_TIMESTAMP_INC[:] _LAST_TIMESTAMP_INC.extend([now, 1]) return Timestamp(now, _LAST_TIMESTAMP_INC[1])
def read_tag_file(self): fname = 'tag_file' if os.path.exists(fname): with open(fname, 'r') as f: tag = f.read() if tag == '-1': return -1 _time, _inc = tag.split('=')[0].split('_') ts = Timestamp(int(_time), int(_inc)) return ts return None
def send_cluster_time(time, inc, should_update): old = t.max_cluster_time() new = {'clusterTime': Timestamp(time, inc)} got_hello(t, ('host', 27017), {'ok': 1, 'minWireVersion': 0, 'maxWireVersion': 6, '$clusterTime': new}) actual = t.max_cluster_time() if should_update: self.assertEqual(actual, new) else: self.assertEqual(actual, old)
def setUp(self): """Initializes all required variables and set up arango connection, create test database and collection before every test method """ self.url = "localhost:8529" self.timestamp = bson_ts_to_long(Timestamp(int(time.time()), 1)) self.arango_doc_manager_obj = DocManager(self.url) self.arango_connection = self.arango_doc_manager_obj.arango self.database_name = "test_db" self.collection_name = "test_collection" self.namespace = self.database_name + '.' + self.collection_name self.db = self.arango_connection.create_database(self.database_name) self.collection = self.db.create_collection(self.collection_name)
def watcher(self): # 监听,调度时间间隔内的指定ns对象。 self.logger.info('mongodb-oplog-watcher is running') now_timestamp = time2timestamp(time.time()) time_offline = now_timestamp - self.roll_time offline_mongotime = Timestamp(time_offline, 1) query = {"$and": [{"ts": {"$gte": offline_mongotime}}, {"ns": {"$eq": self.ns_filter}}]} objs = self.oplog.find(query) objs_list = [obj for obj in objs] # print(objs_list) if len(objs_list) > 0: # todo 业务逻辑 # 若有数据更新则根据数据_id; 操作coll无_id,操作doc有_id if '$cmd' in self.ns_filter: self.collection_handle() else: self.doc_handle()
def get_current_ts(self, ts_consumer, ts_producer): has_pendding_msg = self.kafkaFacade.has_pendding_message( self.ts_topic, ts_consumer) if self.kafkaFacade.is_new_consumer( ts_consumer) and not has_pendding_msg: initStamp = Timestamp(datetime.utcnow(), 1) initMessage = '%s:%s' % (initStamp.time, initStamp.inc) self.kafkaFacade.produce_message(ts_producer, initMessage) return False if has_pendding_msg: messge = self.kafkaFacade.get_message(self.ts_topic, ts_consumer) if messge: ts_metadata = messge.split('~') return list(map(self.parseTimestamp, ts_metadata)) return False
def _bootstrap(self, db_name, coll_name): """读取历史数据""" # 传递开始标志位 yield { 'operationType': 'bootstrap-start', 'ns': { 'db': db_name, 'coll': coll_name }, 'topic': f'{db_name}-{coll_name}', } db = self.conn.client[db_name] coll = db[coll_name] # 开启 session, 获得当前时间下最大的 _id, start_operation_time with self.conn.client.start_session() as session: with session.start_transaction( read_concern=ReadConcern('snapshot')): ts = Timestamp(datetime.utcnow(), 0) max_obj = coll.find_one(sort=[('_id', DESCENDING)]) max_id = max_obj.get('_id') if max_id: domain = {'_id': {'$lte': max_id}} values = coll.find(domain) index = 1 for val in values: data = { 'operationType': 'bootstrap-insert', 'fullDocument': val, 'documentKey': val['_id'], 'ns': { 'db': db_name, 'coll': coll_name }, 'topic': f'{db_name}-{coll_name}', } yield data # 添加采样数据到日志 index += 1 if index % 50_000 == 0: logger.info(f"MongoDB index = {index}, data={data}")
async def init_async(cls, oplog, operation_handler: OperationHandler, namespace_filter: str=None, starting_timestamp: Timestamp=None, on_nothing_to_fetch_on_cursor=None): if starting_timestamp is None: last_doc = await oplog.find_one(sort=[('$natural', DESCENDING)]) try: starting_timestamp = last_doc['ts'] except TypeError: starting_timestamp = Timestamp(0, 1) return cls(oplog, operation_handler, namespace_filter, starting_timestamp, on_nothing_to_fetch_on_cursor)
def test_row_to_singer_record_successful_transformation_with_deleted(self): stream = { 'stream': 'myStream', 'metadata': [{ 'breadcrumb': [], 'metadata': {} }] } row = { '_id': ObjectId('0123456789ab0123456789ab'), 'key1': 10, 'key2': Timestamp(1589379991, 4696183), 'key3': 1.5 } dt = datetime(2020, 5, 13, 14, 10, 10, tzinfo=tzutc()) result = common.row_to_singer_record( stream, row, dt, datetime(2020, 5, 20, 15, 0, 0, 0, tzinfo=tzutc()), 100) self.assertEqual( { 'type': 'RECORD', 'stream': 'myStream', 'record': { '_id': '0123456789ab0123456789ab', 'document': { '_id': '0123456789ab0123456789ab', 'key1': 10, 'key2': '2020-05-13T14:26:31.000000Z', 'key3': 1.5 }, common.SDC_DELETED_AT: '2020-05-20T15:00:00.000000Z', }, 'version': 100, 'time_extracted': '2020-05-13T14:10:10.000000Z', }, result.asdict())
def slice_name_to_ts(name): time, inc = name.split('_') ts = Timestamp(time=int(time), inc=int(inc)) return ts
'type_cm': 'jsonb', 'value': {"one": "two"}}, 'extraProps': {'name_cm': '_extra_props', 'type_cm': 'jsonb', 'value': {"three": "four", "five": "six"} }, 'signupCode': {'name_cm': 'signup_code', 'type_cm': 'text', 'value': "I am a text"} } oplog_entries_update = [ { 'ts': Timestamp(1556029671, 1), 't': 48, 'h': -4473962510602026742, 'v': 2, 'op': 'u', 'operationType': 'update', 'ns': {'db': 'test_purrito', 'coll': 'Employee'}, 'o2': {'_id': '1'}, 'updateDescription' : {'updatedFields': {'firstName': 'Janos', 'lastName': None, 'hair': None}}, 'documentKey': {'_id': '1'}, 'o': { '$set': { 'firstName': 'Janos', 'lastName': None },
def dt2ts(dt): return Timestamp(int(dt.timestamp()), 0)
def test_copy_table_with_collection_found_success(self): """ Test copy_table method with a collection name that's in the db, the copy should continue successfully """ self.mongo.database = Mock(spec_set=Database).return_value self.mongo.database.list_collection_names.return_value = [ 'col1', 'col2', 'col3', 'my_col' ] with patch('pipelinewise.fastsync.commons.tap_mongodb.subprocess.call' ) as call_mock: call_mock.return_value = 0 with patch('pipelinewise.fastsync.commons.tap_mongodb.os.remove' ) as os_remove_mock: os_remove_mock.return_value = True with patch('pipelinewise.fastsync.commons.tap_mongodb.gzip' ) as gzip_mock: mock_enter = Mock() with patch('pipelinewise.fastsync.commons.tap_mongodb.bson.decode_file_iter') as \ bson_decode_iter_mock: bson_decode_iter_mock.return_value = [ { '_id': ObjectId('0123456789ab0123456789aa'), 'key1': 1, 'key2': time.time() }, { '_id': ObjectId('0123456789ab0123456789ab'), 'key1': 2 }, { '_id': ObjectId('0123456789ab0123456789ac'), 'key3': Timestamp(10000, 50) }, ] mock_enter.return_value.open.return_value = Mock() gzip_mock.return_value.__enter__ = mock_enter gzip_mock.return_value.__exit__ = Mock() self.mongo.copy_table('my_col', 'file.csv.gzip', 'tmp') call_mock.assert_called_once_with([ 'mongodump', '--uri', '"mongodb://*****:*****@foo.com:3306/my_db' '?authSource=admin&readPreference=secondaryPreferred&ssl=true"', '--forceTableScan', '--gzip', '-c', 'my_col', '-o', 'tmp' ]) os_remove_mock.assert_has_calls([ call('tmp/my_db/my_col.metadata.json.gz'), call('tmp/my_db/my_col.bson.gz') ]) self.assertEqual(os_remove_mock.call_count, 2) self.assertEqual(bson_decode_iter_mock.call_count, 1)
def int_to_bson_timestamp(ts: int): # https://docs.mongodb.com/manual/reference/bson-types/#timestamps t = ts >> 32 i = ts & (2**32 - 1) return Timestamp(time=t, inc=i)
collection_white_list = ast.literal_eval( config_parser.config_section_map("source_db_connection") ['collection_list']) log.info("<=====Collection List Processes by Opslog ======>" + ', '.join(collection_white_list)) source_db = MongoDatabase(dbtype=source_db_config.get("dbtype"), host=source_db_config.get("host"), port=int(source_db_config.get("port")), user=source_db_config.get("user"), password=source_db_config.get("pass"), database=source_db_config.get("database")) mongo_db_connection = source_db.connect() # get the latest timestamp in the database # last_ts = db.oplog.rs.find().sort('$natural', -1)[0]['ts']; # print(last_ts.as_datetime()) sqlite_db_connection = SqliteQueue(sqlite_db) ts_value = sqlite_db_connection.select_max() if not ts_value: last_ts = Timestamp(int(time.time()), 1) #last_ts = Timestamp(1533198932, 1) else: (key, value) = ts_value last_ts = Timestamp(key, value) print(last_ts.as_datetime()) oplog_watcher(mongo_db_connection, sqlite_db_connection, collection_white_list, last_ts)
def test_replicate_mongodb_to_sf(self): """Replicate mongodb to Snowflake""" def assert_columns_exist(table): """Helper inner function to test if every table and column exists in the target""" assertions.assert_cols_in_table( self.run_query_target_snowflake, 'ppw_e2e_tap_mongodb', table, [ '_ID', 'DOCUMENT', '_SDC_EXTRACTED_AT', '_SDC_BATCHED_AT', '_SDC_DELETED_AT', ], ) def assert_row_counts_equal(target_schema, table, count_in_source): assert (count_in_source == self.run_query_target_snowflake( f'select count(_id) from {target_schema}.{table}')[0][0]) # Run tap first time - fastsync and singer should be triggered assertions.assert_run_tap_success(TAP_MONGODB_ID, TARGET_ID, ['fastsync', 'singer']) assert_columns_exist('listings') assert_columns_exist('my_collection') assert_columns_exist('all_datatypes') listing_count = self.mongodb_con['listings'].count_documents({}) my_coll_count = self.mongodb_con['my_collection'].count_documents({}) all_datatypes_count = self.mongodb_con[ 'all_datatypes'].count_documents({}) assert_row_counts_equal('ppw_e2e_tap_mongodb', 'listings', listing_count) assert_row_counts_equal('ppw_e2e_tap_mongodb', 'my_collection', my_coll_count) assert_row_counts_equal('ppw_e2e_tap_mongodb', 'all_datatypes', all_datatypes_count) result_insert = self.mongodb_con.my_collection.insert_many([ { 'age': randint(10, 30), 'id': 1001, 'uuid': uuid.uuid4(), 'ts': Timestamp(12030, 500), }, { 'date': datetime.utcnow(), 'id': 1002, 'uuid': uuid.uuid4(), 'regex': bson.Regex(r'^[A-Z]\\w\\d{2,6}.*$'), }, { 'uuid': uuid.uuid4(), 'id': 1003, 'decimal': bson.Decimal128(decimal.Decimal('5.64547548425446546546644')), 'nested_json': { 'a': 1, 'b': 3, 'c': { 'key': bson.datetime.datetime(2020, 5, 3, 10, 0, 0) }, }, }, ]) my_coll_count += len(result_insert.inserted_ids) result_del = self.mongodb_con.my_collection.delete_one( {'_id': result_insert.inserted_ids[0]}) my_coll_count -= result_del.deleted_count result_update = self.mongodb_con.my_collection.update_many( {}, {'$set': { 'id': 0 }}) assertions.assert_run_tap_success(TAP_MONGODB_ID, TARGET_ID, ['singer']) assert (result_update.modified_count == self.run_query_target_snowflake( 'select count(_id) from ppw_e2e_tap_mongodb.my_collection where document:id = 0' )[0][0]) assert_row_counts_equal('ppw_e2e_tap_mongodb', 'my_collection', my_coll_count)
class CurrentDateOperationTests(LiveCollectionFieldUpdateOperatorsTests, asynctest.TestCase): """ $currentDate The $currentDate operator sets the value of a field to the current date, either as a Date or a timestamp. The default type is Date. Behavior If the field does not exist, $currentDate adds the field to a document. """ template_doc = { "status": "a", "modified_at": Timestamp(datetime(2006, 6, 6, 6, 6, 6), 0) } async def test_it_updates_existing_date_field(self): await self.collection.update_one({'_id': self.doc['_id']}, { "$currentDate": { "modified_at": True, }, "$set": { "status": "D", "cancellation.reason": "user request" } }) await self.observer.observe_changes() self.assertEqual( self.handler.collection[self.doc['_id']], { '_id': self.doc['_id'], 'status': 'D', 'modified_at': ANY, 'cancellation': { 'reason': 'user request' } }) affected_doc = self.handler.collection[self.doc['_id']] self.assertIsInstance(affected_doc['modified_at'], datetime) self.assertEqual(affected_doc['modified_at'].date(), datetime.today().date()) async def test_it_creates_new_datetime_fields(self): await self.collection.update_one({'_id': self.doc['_id']}, { "$currentDate": { "cancellation.date": { "$type": "date" } }, "$set": { "status": "D", "cancellation.reason": "user request" } }) await self.observer.observe_changes() affected_doc = self.handler.collection[self.doc['_id']] self.assertIsInstance(affected_doc['cancellation']['date'], datetime) self.assertEqual(affected_doc['cancellation']['date'].date(), datetime.today().date()) async def test_it_creates_new_timestamp_fields(self): await self.collection.update_one({'_id': self.doc['_id']}, { "$currentDate": { "cancellation.date": { "$type": "timestamp" } }, "$set": { "status": "D", "cancellation.reason": "user request" } }) await self.observer.observe_changes() affected_doc = self.handler.collection[self.doc['_id']] self.assertIsInstance(affected_doc['cancellation']['date'], Timestamp) self.assertEqual( affected_doc['cancellation']['date'].as_datetime().date(), datetime.today().date())