def test_bson_ts_to_long(self): """Test bson_ts_to_long and long_to_bson_ts """ tstamp = timestamp.Timestamp(0x12345678, 0x90abcdef) self.assertEqual(0x1234567890abcdef, bson_ts_to_long(tstamp)) self.assertEqual(long_to_bson_ts(0x1234567890abcdef), tstamp)
def test_Timestamps(self): """Tests mongo operations with Timestamps""" conn = yield txmongo.MongoConnection(mongo_host, mongo_port) test = conn.foo.test_ts test.drop() # insert with specific timestamp doc1 = {'_id':objectid.ObjectId(), 'ts':timestamp.Timestamp(1, 2)} yield test.insert(doc1, safe=True) result = yield test.find_one(doc1) self.assertEqual(result.get('ts').time, 1) self.assertEqual(result.get('ts').inc, 2) # insert with specific timestamp doc2 = {'_id':objectid.ObjectId(), 'ts':timestamp.Timestamp(2, 1)} yield test.insert(doc2, safe=True) # the objects come back sorted by ts correctly. # (test that we stored inc/time in the right fields) result = yield test.find(filter=qf.sort(qf.ASCENDING('ts'))) self.assertEqual(len(result), 2) self.assertEqual(result[0]['_id'], doc1['_id']) self.assertEqual(result[1]['_id'], doc2['_id']) # insert with null timestamp doc3 = {'_id':objectid.ObjectId(), 'ts':timestamp.Timestamp(0, 0)} yield test.insert(doc3, safe=True) # time field loaded correctly result = yield test.find_one(doc3['_id']) now = time.time() self.assertTrue(now - 2 <= result['ts'].time <= now) # delete yield test.remove(doc1["_id"], safe=True) yield test.remove(doc2["_id"], safe=True) yield test.remove(doc3["_id"], safe=True) # disconnect yield conn.disconnect()
def load_request_attr(question, request): question_attrs = [ 'description', 'answer', 'dismissed', 'category', 'date' ] for attr_name in question_attrs: attr = request.json.get(attr_name) if attr_name == 'date': attr = timestamp.Timestamp(int(attr) // 1000, 1) question[attr_name] = attr
def test_bson_ts_to_long(self): """Test bson_ts_to_long and long_to_bson_ts """ ts = timestamp.Timestamp(0x12345678, 0x90abcdef) self.assertEqual(0x1234567890abcdef, bson_ts_to_long(ts)) self.assertEqual(long_to_bson_ts(0x1234567890abcdef), ts) print("PASSED BSON TS TO LONG")
def oplog_has_aged_out(client, state, tap_stream_id): earliest_ts_row = client.local.oplog.rs.find_one(sort=[('$natural', pymongo.ASCENDING)]) earliest_ts = earliest_ts_row.get('ts') stream_state = state.get('bookmarks', {}).get(tap_stream_id) if not stream_state or not stream_state.get('oplog_ts_time'): return False bookmarked_ts = timestamp.Timestamp(stream_state['oplog_ts_time'], stream_state['oplog_ts_inc']) return bookmarked_ts < earliest_ts
def sync_collection(client, stream, state, stream_projection): tap_stream_id = stream['tap_stream_id'] LOGGER.info('Starting oplog sync for %s', tap_stream_id) md_map = metadata.to_map(stream['metadata']) database_name = metadata.get(md_map, (), 'database-name') collection_name = stream.get("table_name") stream_state = state.get('bookmarks', {}).get(tap_stream_id) oplog_ts = timestamp.Timestamp(stream_state['oplog_ts_time'], stream_state['oplog_ts_inc']) # Write activate version message version = common.get_stream_version(tap_stream_id, state) activate_version_message = singer.ActivateVersionMessage( stream=common.calculate_destination_stream_name(stream), version=version) singer.write_message(activate_version_message) time_extracted = utils.now() rows_saved = 0 start_time = time.time() oplog_query = {'ts': {'$gte': oplog_ts}} projection = transform_projection(stream_projection) oplog_replay = stream_projection is None LOGGER.info( 'Querying %s with:\n\tFind Parameters: %s\n\tProjection: %s\n\toplog_replay: %s', tap_stream_id, oplog_query, projection, oplog_replay) update_buffer = set() # consider adding oplog_replay, but this would require removing the projection # default behavior is a non_tailable cursor but we might want a tailable one # regardless of whether its long lived or not. with client.local.oplog.rs.find(oplog_query, projection, sort=[('$natural', pymongo.ASCENDING)], oplog_replay=oplog_replay) as cursor: for row in cursor: # assertions that mongo is respecing the ts query and sort order if row.get('ts') and row.get('ts') < oplog_ts: raise common.MongoAssertionException( "Mongo is not honoring the query param") if row.get('ts') and row.get('ts') < timestamp.Timestamp( stream_state['oplog_ts_time'], stream_state['oplog_ts_inc']): raise common.MongoAssertionException( "Mongo is not honoring the sort ascending param") if row.get('ns') != '{}.{}'.format(database_name, collection_name): if row.get('ts'): state = update_bookmarks(state, tap_stream_id, row['ts']) continue row_op = row['op'] if row_op == 'i': record_message = common.row_to_singer_record( stream, row['o'], version, time_extracted) singer.write_message(record_message) rows_saved += 1 elif row_op == 'u': update_buffer.add(row['o2']['_id']) elif row_op == 'd': # remove update from buffer if that document has been deleted if row['o']['_id'] in update_buffer: update_buffer.remove(row['o']['_id']) # Delete ops only contain the _id of the row deleted row['o'][SDC_DELETED_AT] = row['ts'] record_message = common.row_to_singer_record( stream, row['o'], version, time_extracted) singer.write_message(record_message) rows_saved += 1 state = update_bookmarks(state, tap_stream_id, row['ts']) # flush buffer if it has filled up if len(update_buffer) >= MAX_UPDATE_BUFFER_LENGTH: for buffered_row in flush_buffer(client, update_buffer, stream_projection, database_name, collection_name): record_message = common.row_to_singer_record( stream, buffered_row, version, time_extracted) singer.write_message(record_message) rows_saved += 1 update_buffer = set() # write state every UPDATE_BOOKMARK_PERIOD messages if rows_saved % common.UPDATE_BOOKMARK_PERIOD == 0: # flush buffer before writing state for buffered_row in flush_buffer(client, update_buffer, stream_projection, database_name, collection_name): record_message = common.row_to_singer_record( stream, buffered_row, version, time_extracted) singer.write_message(record_message) rows_saved += 1 update_buffer = set() # write state singer.write_message( singer.StateMessage(value=copy.deepcopy(state))) # flush buffer if finished with oplog for buffered_row in flush_buffer(client, update_buffer, stream_projection, database_name, collection_name): record_message = common.row_to_singer_record( stream, buffered_row, version, time_extracted) singer.write_message(record_message) rows_saved += 1 common.COUNTS[tap_stream_id] += rows_saved common.TIMES[tap_stream_id] += time.time() - start_time LOGGER.info('Syncd %s records for %s', rows_saved, tap_stream_id)
def update_wrong_questions_file(): import os from werkzeug.utils import secure_filename from .utils import get_upload_path uid = current_user.get_id() db = get_db() resp = {} f = request.files['file'] import uuid hashname = str(uuid.uuid4()) + os.path.splitext(f.filename)[-1] question = {'uid': uid} question['description'] = request.form.get('description') question['date'] = timestamp.Timestamp( int(request.form.get('date')) // 1000, 1) question['fname'] = f.filename question['hashname'] = hashname question['dismissed'] = boolean(request.form.get('dismissed')) is True question['category'] = request.form.get('category') question['answer'] = request.form.get('answer') question['url'] = 'https://netwx.c-leon.top/api/uploads/' + hashname if request.method == 'PUT': _id = request.form.get('_id') condition = {'uid': uid, '_id': ObjectId(_id)} ori_question = db.question.find_one(condition) if not ori_question: resp['success'] = False resp['message'] = '_id not found' return jsonify(resp) else: try: os.remove( os.path.join( get_upload_path(), str(ori_question['hashname']))) #secure_filename(name) except FileNotFoundError: resp['success'] = False resp['message'] = 'original file not found.' return jsonify(resp) ori_question.update(question) update_result = db.question.update_one(condition, {'$set': ori_question}) upload_path = os.path.join( get_upload_path(), str(question['hashname'])) # secure_filename(name) f.save(upload_path) # print(result.raw_result) resp['success'] = True resp['matched_count'] = update_result.matched_count resp['modified_count'] = update_result.modified_count return jsonify(resp) upload_path = os.path.join(get_upload_path(), str( question['hashname'])) #secure_filename(name) f.save(upload_path) result = db.question.insert_one(question) resp['_id'] = str(result.inserted_id) resp['success'] = True resp['message'] = 'upload success' return jsonify(resp)
def sync_oplog_stream(client, streams, state): streams_map = generate_streams_map(streams) #for tap_stream_id in streams_map.keys(): # common.whitelist_bookmark_keys(BOOKMARK_KEYS, tap_stream_id, state) for tap_stream_id, bookmark in state.get('bookmarks', {}).items(): oplog_ts = min([timestamp.Timestamp(v['oplog_ts_time'], v['oplog_ts_inc']) for k,v in state.get('bookmarks', {}).items() if streams_map.get(k)]) LOGGER.info("Starting oplog replication with ts=%s", oplog_ts) time_extracted = utils.now() rows_saved = 0 ops_skipped = 0 with client.local.oplog.rs.find({'ts': {'$gt': oplog_ts}}, oplog_replay=True) as cursor: while cursor.alive: try: row = next(cursor) if row['op'] == 'n': LOGGER.debug('Skipping noop op') elif not streams_map.get(generate_tap_stream_id_for_row(row)): ops_skipped = ops_skipped + 1 if ops_skipped % UPDATE_BOOKMARK_PERIOD == 0: LOGGER.info("Skipped %s ops so far as they were not for selected tables; %s rows extracted", ops_skipped, rows_saved) else: rows_saved += 1 row_op = row['op'] if row_op in ['i']: tap_stream_id = generate_tap_stream_id_for_row(row) stream_map_entry = streams_map[tap_stream_id] whitelisted_row = {k:v for k,v in row['o'].items() if k not in stream_map_entry['blacklist']} record_message = common.row_to_singer_record(stream_map_entry['stream'], whitelisted_row, common.get_stream_version(tap_stream_id, state), time_extracted) singer.write_message(record_message) if row_op in ['u']: tap_stream_id = generate_tap_stream_id_for_row(row) stream_map_entry = streams_map[tap_stream_id] # if '$set' in row['o'].keys(): # obj = dict(row['o2'], **row['o']['$set']) # else: # obj = row['o'] whitelisted_row = {k:v for k,v in row['o'].items() if k not in stream_map_entry['blacklist']} record_message = common.row_to_singer_record(stream_map_entry['stream'], whitelisted_row, common.get_stream_version(tap_stream_id, state), time_extracted) singer.write_message(record_message) elif row_op == 'd': tap_stream_id = generate_tap_stream_id_for_row(row) stream_map_entry = streams_map[tap_stream_id] # Delete ops only contain the _id of the row deleted whitelisted_row = [] whitelisted_row['_id'] = row['o']['_id'] whitelisted_row[SDC_DELETED_AT] = row['ts'] record_message = common.row_to_singer_record(stream_map_entry['stream'], whitelisted_row, common.get_stream_version(tap_stream_id, state), time_extracted) singer.write_message(record_message) else: LOGGER.info("Skipping op for table %s as it is not an INSERT, UPDATE, or DELETE", row['ns']) state = update_bookmarks(state, streams_map, row['ts']) except InvalidBSON as e: LOGGER.info(e) continue if rows_saved % 1000 == 0: singer.write_state(state) # Send state message at the end singer.write_state(state)
def sync_oplog_stream(client, streams, state): streams_map = generate_streams_map(streams) for tap_stream_id in streams_map.keys(): common.whitelist_bookmark_keys(BOOKMARK_KEYS, tap_stream_id, state) for tap_stream_id, bookmark in state.get('bookmarks', {}).items(): columns = streams_map.get(tap_stream_id) if not columns: continue oplog_ts = min([timestamp.Timestamp(v['oplog_ts_time'], v['oplog_ts_inc']) for k,v in state.get('bookmarks', {}).items() if streams_map.get(k)]) LOGGER.info("Starting oplog replication with ts=%s", oplog_ts) time_extracted = utils.now() rows_saved = 0 ops_skipped = 0 with client.local.oplog.rs.find({'ts': {'$gt': oplog_ts}}, oplog_replay=True) as cursor: for row in cursor: if row['op'] == 'n': LOGGER.info('Skipping noop op') elif not streams_map.get(generate_tap_stream_id_for_row(row)): ops_skipped = ops_skipped + 1 if ops_skipped % UPDATE_BOOKMARK_PERIOD == 0: LOGGER.info("Skipped %s ops so far as they were not for selected tables; %s rows extracted", ops_skipped, rows_saved) else: row_op = row['op'] if row_op in ['i', 'u']: tap_stream_id = generate_tap_stream_id_for_row(row) stream_map_entry = streams_map[tap_stream_id] whitelisted_row = {k:v for k,v in row['o'].items() if k in stream_map_entry['columns']} record_message = common.row_to_singer_record(stream_map_entry['stream'], whitelisted_row, common.get_stream_version(tap_stream_id, state), time_extracted) singer.write_message(record_message) elif row_op == 'd': tap_stream_id = generate_tap_stream_id_for_row(row) stream_map_entry = streams_map[tap_stream_id] # Delete ops only contain the _id of the row deleted whitelisted_row = {column_name:None for column_name in stream_map_entry['columns']} whitelisted_row['_id'] = row['o']['_id'] whitelisted_row[SDC_DELETED_AT] = row['ts'] record_message = common.row_to_singer_record(stream_map_entry['stream'], whitelisted_row, common.get_stream_version(tap_stream_id, state), time_extracted) singer.write_message(record_message) else: LOGGER.info("Skipping op for table %s as it is not an INSERT, UPDATE, or DELETE", row['ns']) state = update_bookmarks(state, streams_map, row['ts'])