def test_write_oplog_progress(self): """Test write_oplog_progress under several circumstances """ os.system('touch temp_config.txt') config_file_path = os.getcwd() + '/temp_config.txt' c = Connector(main_address, config_file_path, None, ['test.test'], '_id', None, None) #test that None is returned if there is no config file specified. self.assertEqual(c.write_oplog_progress(), None) c.oplog_progress.get_dict()[1] = Timestamp(12, 34) #pretend to insert a thread/timestamp pair c.write_oplog_progress() data = json.load(open(config_file_path, 'r')) self.assertEqual(1, int(data[0])) self.assertEqual(long_to_bson_ts(int(data[1])), Timestamp(12, 34)) #ensure the temp file was deleted self.assertFalse(os.path.exists(config_file_path + '~')) #ensure that updates work properly c.oplog_progress.get_dict()[1] = Timestamp(44, 22) c.write_oplog_progress() config_file = open(config_file_path, 'r') data = json.load(config_file) self.assertEqual(1, int(data[0])) self.assertEqual(long_to_bson_ts(int(data[1])), Timestamp(44, 22)) os.system('rm ' + config_file_path) config_file.close() print("PASSED TEST WRITE OPLOG PROGRESS")
def test_write_oplog_progress(self): """Test write_oplog_progress under several circumstances """ os.system('touch temp_config.txt') config_file_path = os.getcwd() + '/temp_config.txt' conn = Connector(MAIN_ADDRESS, config_file_path, None, ['test.test'], '_id', None, None) #test that None is returned if there is no config file specified. self.assertEqual(conn.write_oplog_progress(), None) conn.oplog_progress.get_dict()[1] = Timestamp(12, 34) #pretend to insert a thread/timestamp pair conn.write_oplog_progress() data = json.load(open(config_file_path, 'r')) self.assertEqual(1, int(data[0])) self.assertEqual(long_to_bson_ts(int(data[1])), Timestamp(12, 34)) #ensure the temp file was deleted self.assertFalse(os.path.exists(config_file_path + '~')) #ensure that updates work properly conn.oplog_progress.get_dict()[1] = Timestamp(44, 22) conn.write_oplog_progress() config_file = open(config_file_path, 'r') data = json.load(config_file) self.assertEqual(1, int(data[0])) self.assertEqual(long_to_bson_ts(int(data[1])), Timestamp(44, 22)) os.system('rm ' + config_file_path) config_file.close()
def read_oplog_progress(self): """Reads oplog progress from file provided by user. This method is only called once before any threads are spanwed. """ if self.oplog_checkpoint is None: return None # Check for empty file try: if os.stat(self.oplog_checkpoint).st_size == 0: logging.info("MongoConnector: Empty oplog progress file.") return None except OSError: return None source = open(self.oplog_checkpoint, 'r') try: data = json.load(source) except ValueError: # empty file err_msg = "MongoConnector: Can't read oplog progress file." reason = "It may be empty or corrupt." logging.info("%s %s" % (err_msg, reason)) source.close() return None source.close() count = 0 oplog_dict = self.oplog_progress.get_dict() for count in range(0, len(data), 2): oplog_str = data[count] ts = data[count + 1] oplog_dict[oplog_str] = util.long_to_bson_ts(ts)
def test_bson_ts_to_long(self): """Test bson_ts_to_long and long_to_bson_ts """ ts = timestamp.Timestamp(0x12345678, 0x90abcdef) self.assertEqual(0x1234567890abcdef, bson_ts_to_long(ts)) self.assertEqual(long_to_bson_ts(0x1234567890abcdef), ts) print("PASSED BSON TS TO LONG")
def test_bson_ts_to_long(self): """Test bson_ts_to_long and long_to_bson_ts """ tstamp = timestamp.Timestamp(0x12345678, 0x90abcdef) self.assertEqual(0x1234567890abcdef, bson_ts_to_long(tstamp)) self.assertEqual(long_to_bson_ts(0x1234567890abcdef), tstamp)
def test_dump_collection(self): """Test dump_collection in oplog_manager. Assertion failure if it doesn't pass """ test_oplog, primary_conn, search_ts = self.get_oplog_thread() solr = DocManager() test_oplog.doc_manager = solr #with documents primary_conn['test']['test'].insert({'name': 'paulie'}) search_ts = test_oplog.get_last_oplog_timestamp() test_oplog.dump_collection() test_oplog.doc_manager.commit() solr_results = solr._search() self.assertEqual(len(solr_results), 1) solr_doc = solr_results[0] self.assertEqual(long_to_bson_ts(solr_doc['_ts']), search_ts) self.assertEqual(solr_doc['name'], 'paulie') self.assertEqual(solr_doc['ns'], 'test.test')
def test_dump_collection(self): """Test dump_collection in oplog_manager. Assertion failure if it doesn't pass """ test_oplog, primary_conn, oplog_coll = self.get_oplog_thread() solr = DocManager() test_oplog.doc_manager = solr #with documents primary_conn['test']['test'].insert({'name': 'paulie'}) search_ts = test_oplog.get_last_oplog_timestamp() test_oplog.dump_collection() test_oplog.doc_manager.commit() solr_results = solr._search() self.assertEqual(len(solr_results), 1) solr_doc = solr_results[0] self.assertEqual(long_to_bson_ts(solr_doc['_ts']), search_ts) self.assertEqual(solr_doc['name'], 'paulie') self.assertEqual(solr_doc['ns'], 'test.test') #test_oplog.join() print("PASSED TEST DUMP COLLECTION")
def test_dump_collection(self): """Test dump_collection in oplog_manager. Assertion failure if it doesn't pass """ test_oplog, primary_conn, oplog_coll, mongos = self.get_oplog_thread() solr = DocManager() test_oplog.doc_manager = solr # with documents safe_mongo_op(mongos['alpha']['foo'].insert, {'name': 'paulie'}) search_ts = test_oplog.get_last_oplog_timestamp() test_oplog.dump_collection() test_oplog.doc_manager.commit() solr_results = solr._search() assert (len(solr_results) == 1) solr_doc = solr_results[0] assert (long_to_bson_ts(solr_doc['_ts']) == search_ts) assert (solr_doc['name'] == 'paulie') assert (solr_doc['ns'] == 'alpha.foo') print("PASSED TEST DUMP COLLECTION")
def test_dump_collection(self): """Test dump_collection in oplog_manager. Assertion failure if it doesn't pass """ test_oplog, primary_conn, oplog_coll = self.get_oplog_thread() solr = DocManager() test_oplog.doc_manager = solr # with documents primary_conn["test"]["test"].insert({"name": "paulie"}) search_ts = test_oplog.get_last_oplog_timestamp() test_oplog.dump_collection() test_oplog.doc_manager.commit() solr_results = solr._search() self.assertEqual(len(solr_results), 1) solr_doc = solr_results[0] self.assertEqual(long_to_bson_ts(solr_doc["_ts"]), search_ts) self.assertEqual(solr_doc["name"], "paulie") self.assertEqual(solr_doc["ns"], "test.test") # test_oplog.join() print("PASSED TEST DUMP COLLECTION")
def rollback(self): """Rollback target system to consistent state. The strategy is to find the latest timestamp in the target system and the largest timestamp in the oplog less than the latest target system timestamp. This defines the rollback window and we just roll these back until the oplog and target system are in consistent states. """ self.doc_manager.commit() last_inserted_doc = self.doc_manager.get_last_doc() if last_inserted_doc is None: return None target_ts = util.long_to_bson_ts(last_inserted_doc['_ts']) last_oplog_entry = self.oplog.find_one({'ts': {'$lte': target_ts}}, sort=[('$natural', pymongo.DESCENDING)]) if last_oplog_entry is None: return None rollback_cutoff_ts = last_oplog_entry['ts'] start_ts = util.bson_ts_to_long(rollback_cutoff_ts) end_ts = last_inserted_doc['_ts'] rollback_set = {} # this is a dictionary of ns:list of docs for doc in self.doc_manager.search(start_ts, end_ts): if doc['ns'] in rollback_set: rollback_set[doc['ns']].append(doc) else: rollback_set[doc['ns']] = [doc] for namespace, doc_list in rollback_set.items(): database, coll = namespace.split('.', 1) obj_id = bson.objectid.ObjectId bson_obj_id_list = [obj_id(doc['_id']) for doc in doc_list] to_update = util.retry_until_ok( self.main_connection[database][coll].find, {'_id': {'$in': bson_obj_id_list}}) #doc list are docs in target system, to_update are docs in mongo doc_hash = {} # hash by _id for doc in doc_list: doc_hash[bson.objectid.ObjectId(doc['_id'])] = doc to_index = [] count = 0 while True: try: for doc in to_update: if doc['_id'] in doc_hash: del doc_hash[doc['_id']] to_index.append(doc) break except (pymongo.errors.OperationFailure, pymongo.errors.AutoReconnect): count += 1 if count > 60: sys.exit(1) time.sleep(1) #delete the inconsistent documents for doc in doc_hash.values(): self.doc_manager.remove(doc) #insert the ones from mongo for doc in to_index: doc['_ts'] = util.bson_ts_to_long(rollback_cutoff_ts) doc['ns'] = namespace try: self.doc_manager.upsert(doc) except SystemError: logging.error("Unable to insert %s" % (doc)) return rollback_cutoff_ts
def dump_collection(self): """Dumps collection into the target system. This method is called when we're initializing the cursor and have no configs i.e. when we're starting for the first time. """ dump_set = self.namespace_set #no namespaces specified if not self.namespace_set: db_list = self.main_connection.database_names() for db in db_list: if db == "config" or db == "local": continue coll_list = self.main_connection[db].collection_names() for coll in coll_list: if coll.startswith("system"): continue namespace = str(db) + "." + str(coll) dump_set.append(namespace) long_ts = None for namespace in dump_set: db, coll = namespace.split('.', 1) target_coll = self.main_connection[db][coll] cursor = util.retry_until_ok(target_coll.find) cursor = cursor.sort('$natural', pymongo.DESCENDING) oplog_cursor = util.retry_until_ok(self.oplog.find) oplog_cursor = oplog_cursor.sort('$natural', pymongo.DESCENDING) for entry in oplog_cursor: if entry['op'] != 'i': continue #The 'o' field represents the document search_doc = entry['o'] cursor.rewind() for doc in cursor: if search_doc == doc: long_ts = util.bson_ts_to_long(entry['ts']) break if long_ts: break cursor.rewind() try: for doc in cursor: doc['ns'] = namespace doc['_ts'] = long_ts self.doc_manager.upsert(doc) except (pymongo.errors.AutoReconnect, pymongo.errors.OperationFailure): err_msg = "OplogManager: Failed during dump collection" effect = "cannot recover!" logging.error('%s %s %s' % (err_msg, effect, self.oplog)) self.running = False return if long_ts: long_ts = util.long_to_bson_ts(long_ts) else: # Implies that we are just initiating the set long_ts = self.get_last_oplog_timestamp() return long_ts
def rollback(self): """Rollback target system to consistent state. The strategy is to find the latest timestamp in the target system and the largest timestamp in the oplog less than the latest target system timestamp. This defines the rollback window and we just roll these back until the oplog and target system are in consistent states. """ self.doc_manager.commit() last_inserted_doc = self.doc_manager.get_last_doc() if last_inserted_doc is None: return None target_ts = util.long_to_bson_ts(last_inserted_doc['_ts']) last_oplog_entry = self.oplog.find_one({'ts': { '$lte': target_ts }}, sort=[('$natural', pymongo.DESCENDING)]) if last_oplog_entry is None: return None rollback_cutoff_ts = last_oplog_entry['ts'] start_ts = util.bson_ts_to_long(rollback_cutoff_ts) end_ts = last_inserted_doc['_ts'] rollback_set = {} # this is a dictionary of ns:list of docs for doc in self.doc_manager.search(start_ts, end_ts): if doc['ns'] in rollback_set: rollback_set[doc['ns']].append(doc) else: rollback_set[doc['ns']] = [doc] for namespace, doc_list in rollback_set.items(): database, coll = namespace.split('.', 1) obj_id = bson.objectid.ObjectId bson_obj_id_list = [obj_id(doc['_id']) for doc in doc_list] to_update = util.retry_until_ok( self.main_connection[database][coll].find, {'_id': { '$in': bson_obj_id_list }}) #doc list are docs in target system, to_update are docs in mongo doc_hash = {} # hash by _id for doc in doc_list: doc_hash[bson.objectid.ObjectId(doc['_id'])] = doc to_index = [] count = 0 while True: try: for doc in to_update: if doc['_id'] in doc_hash: del doc_hash[doc['_id']] to_index.append(doc) break except (pymongo.errors.OperationFailure, pymongo.errors.AutoReconnect): count += 1 if count > 60: sys.exit(1) time.sleep(1) #delete the inconsistent documents for doc in doc_hash.values(): self.doc_manager.remove(doc) #insert the ones from mongo for doc in to_index: doc['_ts'] = util.bson_ts_to_long(rollback_cutoff_ts) doc['ns'] = namespace try: self.doc_manager.upsert(doc) except SystemError: logging.error("Unable to insert %s" % (doc)) return rollback_cutoff_ts