def test_rollback(self): """Tests rollback. We force a rollback by adding a doc, killing the primary, adding another doc, killing the new primary, and then restarting both. """ primary_conn = Connection('localhost', int(PORTS_ONE['PRIMARY'])) conn['test']['test'].insert({'name': 'paul'}, safe=True) while conn['test']['test'].find({'name': 'paul'}).count() != 1: time.sleep(1) while len(s._search()) != 1: time.sleep(1) killMongoProc('localhost', PORTS_ONE['PRIMARY']) new_primary_conn = Connection('localhost', int(PORTS_ONE['SECONDARY'])) admin = new_primary_conn['admin'] while admin.command("isMaster")['ismaster'] is False: time.sleep(1) time.sleep(5) count = 0 while True: try: a = conn['test']['test'].insert({'name': 'pauline'}, safe=True) break except: time.sleep(1) count += 1 if count >= 60: sys.exit(1) continue while(len(s._search()) != 2): time.sleep(1) a = s._search() b = conn['test']['test'].find_one({'name': 'pauline'}) self.assertEqual(len(a), 2) #make sure pauling is there for it in a: if it['name'] == 'pauline': self.assertEqual(it['_id'], str(b['_id'])) killMongoProc('localhost', PORTS_ONE['SECONDARY']) startMongoProc(PORTS_ONE['PRIMARY'], "demo-repl", "/replset1a", "/replset1a.log", None) while primary_conn['admin'].command("isMaster")['ismaster'] is False: time.sleep(1) startMongoProc(PORTS_ONE['SECONDARY'], "demo-repl", "/replset1b", "/replset1b.log", None) time.sleep(2) a = s._search() self.assertEqual(len(a), 1) for it in a: self.assertEqual(it['name'], 'paul') find_cursor = retry_until_ok(conn['test']['test'].find) self.assertEqual(retry_until_ok(find_cursor.count), 1) print("PASSED TEST ROLLBACK")
def test_rollback(self): """Tests rollback. We force a rollback by adding a doc, killing the primary, adding another doc, killing the new primary, and then restarting both. """ primary_conn = Connection('localhost', int(PORTS_ONE['PRIMARY'])) self.conn['test']['test'].insert({'name': 'paul'}, safe=True) while self.conn['test']['test'].find({'name': 'paul'}).count() != 1: time.sleep(1) while len(self.elastic_doc._search()) != 1: time.sleep(1) kill_mongo_proc('localhost', PORTS_ONE['PRIMARY']) new_primary_conn = Connection('localhost', int(PORTS_ONE['SECONDARY'])) admin = new_primary_conn['admin'] while admin.command("isMaster")['ismaster'] is False: time.sleep(1) time.sleep(5) count = 0 while True: try: self.conn['test']['test'].insert( {'name': 'pauline'}, safe=True) break except OperationFailure: time.sleep(1) count += 1 if count >= 60: sys.exit(1) continue while(len(self.elastic_doc._search()) != 2): time.sleep(1) result_set_1 = self.elastic_doc._search() result_set_2 = self.conn['test']['test'].find_one({'name': 'pauline'}) self.assertEqual(len(result_set_1), 2) #make sure pauling is there for item in result_set_1: if item['name'] == 'pauline': self.assertEqual(item['_id'], str(result_set_2['_id'])) kill_mongo_proc('localhost', PORTS_ONE['SECONDARY']) start_mongo_proc(PORTS_ONE['PRIMARY'], "demo-repl", "/replset1a", "/replset1a.log", None) while primary_conn['admin'].command("isMaster")['ismaster'] is False: time.sleep(1) start_mongo_proc(PORTS_ONE['SECONDARY'], "demo-repl", "/replset1b", "/replset1b.log", None) time.sleep(2) result_set_1 = self.elastic_doc._search() self.assertEqual(len(result_set_1), 1) for item in result_set_1: self.assertEqual(item['name'], 'paul') find_cursor = retry_until_ok(self.conn['test']['test'].find) self.assertEqual(retry_until_ok(find_cursor.count), 1)
def test_rollback(self): """Tests rollback. Rollback is performed by inserting one document, killing primary, inserting another doc, killing secondary, and then restarting both. """ primary_conn = Connection('localhost', int(PORTS_ONE['PRIMARY'])) conn['test']['test'].insert({'name': 'paul'}, safe=True) while conn['test']['test'].find({'name': 'paul'}).count() != 1: time.sleep(1) killMongoProc('localhost', PORTS_ONE['PRIMARY']) new_primary_conn = Connection('localhost', int(PORTS_ONE['SECONDARY'])) admin_db = new_primary_conn['admin'] while admin_db.command("isMaster")['ismaster'] is False: time.sleep(1) time.sleep(5) count = 0 while True: try: a = conn['test']['test'].insert({'name': 'pauline'}, safe=True) break except: count += 1 if count > 60: string = 'Call to insert failed too many times' string += ' in test_rollback' logging.error(string) sys.exit(1) time.sleep(1) continue while (len(doc_manager._search()) != 2): time.sleep(1) a = doc_manager._search() b = conn['test']['test'].find_one({'name': 'pauline'}) self.assertEqual(len(a), 2) for it in a: if it['name'] == 'pauline': self.assertEqual(it['_id'], b['_id']) killMongoProc('localhost', PORTS_ONE['SECONDARY']) startMongoProc(PORTS_ONE['PRIMARY'], "demo-repl", "/replset1a", "/replset1a.log", None) while primary_conn['admin'].command("isMaster")['ismaster'] is False: time.sleep(1) startMongoProc(PORTS_ONE['SECONDARY'], "demo-repl", "/replset1b", "/replset1b.log", None) time.sleep(2) a = doc_manager._search() self.assertEqual(len(a), 1) for it in a: self.assertEqual(it['name'], 'paul') find_cursor = retry_until_ok(conn['test']['test'].find) self.assertEqual(retry_until_ok(find_cursor.count), 1) #self.assertEqual(conn['test']['test'].find().count(), 1) print("PASSED TEST ROLLBACK")
def test_rollback(self): """Tests rollback. We force a rollback by adding a doc, killing the primary, adding another doc, killing the new primary, and then restarting both. """ primary_conn = Connection('localhost', int(PORTS_ONE['PRIMARY'])) conn['test']['test'].insert({'name': 'paul'}, safe=True) while conn['test']['test'].find({'name': 'paul'}).count() != 1: time.sleep(1) while len(doc_manager._search()) != 1: time.sleep(1) killMongoProc('localhost', PORTS_ONE['PRIMARY']) new_primary_conn = Connection('localhost', int(PORTS_ONE['SECONDARY'])) admin = new_primary_conn['admin'] while admin.command("isMaster")['ismaster'] is False: time.sleep(1) time.sleep(5) count = 0 while True: try: a = conn['test']['test'].insert({'name': 'pauline'}, safe=True) break except: time.sleep(1) count += 1 if count >= 60: sys.exit(1) continue while(len(doc_manager._search()) != 2): time.sleep(1) a = doc_manager._search() b = conn['test']['test'].find_one({'name': 'pauline'}) self.assertEqual(len(a), 2) #make sure pauling is there for it in a: if it['name'] == 'pauline': self.assertEqual(it['_id'], b['_id']) killMongoProc('localhost', PORTS_ONE['SECONDARY']) startMongoProc(PORTS_ONE['PRIMARY'], "demo-repl", "/replset1a", "/replset1a.log", None) while primary_conn['admin'].command("isMaster")['ismaster'] is False: time.sleep(1) startMongoProc(PORTS_ONE['SECONDARY'], "demo-repl", "/replset1b", "/replset1b.log", None) time.sleep(2) a = doc_manager._search() self.assertEqual(len(a), 1) for it in a: self.assertEqual(it['name'], 'paul') find_cursor = retry_until_ok(conn['test']['test'].find) self.assertEqual(retry_until_ok(find_cursor.count), 1) print("PASSED TEST ROLLBACK")
def test_stressed_rollback(self): """Test stressed rollback with number of documents equal to specified in global variable. Strategy for rollback is the same as before. """ while len(self.elastic_doc._search()) != 0: time.sleep(1) for i in range(0, NUMBER_OF_DOC_DIRS): self.conn['test']['test'].insert({'name': 'Paul ' + str(i)}, safe=True) while len(self.elastic_doc._search()) != NUMBER_OF_DOC_DIRS: time.sleep(1) primary_conn = Connection('localhost', int(PORTS_ONE['PRIMARY'])) kill_mongo_proc('localhost', PORTS_ONE['PRIMARY']) new_primary_conn = Connection('localhost', int(PORTS_ONE['SECONDARY'])) admin = new_primary_conn['admin'] while admin.command("isMaster")['ismaster'] is False: time.sleep(1) time.sleep(5) count = -1 while count + 1 < NUMBER_OF_DOC_DIRS: try: count += 1 self.conn['test']['test'].insert( {'name': 'Pauline ' + str(count)}, safe=True) except (OperationFailure, AutoReconnect): time.sleep(1) while (len(self.elastic_doc._search()) != self.conn['test']['test'].find().count()): time.sleep(1) result_set_1 = self.elastic_doc._search() for item in result_set_1: if 'Pauline' in item['name']: result_set_2 = self.conn['test']['test'].find_one( {'name': item['name']}) self.assertEqual(item['_id'], str(result_set_2['_id'])) kill_mongo_proc('localhost', PORTS_ONE['SECONDARY']) start_mongo_proc(PORTS_ONE['PRIMARY'], "demo-repl", "/replset1a", "/replset1a.log", None) while primary_conn['admin'].command("isMaster")['ismaster'] is False: time.sleep(1) time.sleep(1) start_mongo_proc(PORTS_ONE['SECONDARY'], "demo-repl", "/replset1b", "/replset1b.log", None) while(len(self.elastic_doc._search()) != NUMBER_OF_DOC_DIRS): time.sleep(5) result_set_1 = self.elastic_doc._search() self.assertEqual(len(result_set_1), NUMBER_OF_DOC_DIRS) for item in result_set_1: self.assertTrue('Paul' in item['name']) find_cursor = retry_until_ok(self.conn['test']['test'].find) self.assertEqual(retry_until_ok(find_cursor.count), NUMBER_OF_DOC_DIRS)
def test_stressed_rollback(self): """Test stressed rollback with number of documents equal to specified in global variable. Rollback is performed like before, but with more documents. """ while len(doc_manager._search()) != 0: time.sleep(1) for i in range(0, NUMBER_OF_DOCS): conn['test']['test'].insert({'name': 'Paul ' + str(i)}, safe=True) while len(doc_manager._search()) != NUMBER_OF_DOCS: time.sleep(1) primary_conn = Connection('localhost', int(PORTS_ONE['PRIMARY'])) killMongoProc('localhost', PORTS_ONE['PRIMARY']) new_primary_conn = Connection('localhost', int(PORTS_ONE['SECONDARY'])) admin_db = new_primary_conn['admin'] while admin_db.command("isMaster")['ismaster'] is False: time.sleep(1) time.sleep(5) count = -1 while count + 1 < NUMBER_OF_DOCS: try: count += 1 conn['test']['test'].insert({'name': 'Pauline ' + str(count)}, safe=True) except (OperationFailure, AutoReconnect): time.sleep(1) while (len(doc_manager._search()) != conn['test']['test'].find().count()): time.sleep(1) a = doc_manager._search() i = 0 for it in a: if 'Pauline' in it['name']: b = conn['test']['test'].find_one({'name': it['name']}) self.assertEqual(it['_id'], b['_id']) killMongoProc('localhost', PORTS_ONE['SECONDARY']) startMongoProc(PORTS_ONE['PRIMARY'], "demo-repl", "/replset1a", "/replset1a.log", None) while primary_conn['admin'].command("isMaster")['ismaster'] is False: time.sleep(1) startMongoProc(PORTS_ONE['SECONDARY'], "demo-repl", "/replset1b", "/replset1b.log", None) while (len(doc_manager._search()) != NUMBER_OF_DOCS): time.sleep(5) a = doc_manager._search() self.assertEqual(len(a), NUMBER_OF_DOCS) for it in a: self.assertTrue('Paul' in it['name']) find_cursor = retry_until_ok(conn['test']['test'].find) self.assertEqual(retry_until_ok(find_cursor.count), NUMBER_OF_DOCS) print("PASSED TEST STRESSED ROLBACK")
def test_rollback(self): """Tests rollback. Rollback is performed by inserting one document, killing primary, inserting another doc, killing secondary, and then restarting both. """ primary_conn = Connection('localhost', int(PORTS_ONE['PRIMARY'])) self.conn['test']['test'].insert({'name': 'paul'}, safe=True) while self.conn['test']['test'].find({'name': 'paul'}).count() != 1: time.sleep(1) kill_mongo_proc('localhost', PORTS_ONE['PRIMARY']) new_primary_conn = Connection('localhost', int(PORTS_ONE['SECONDARY'])) admin_db = new_primary_conn['admin'] while admin_db.command("isMaster")['ismaster'] is False: time.sleep(1) time.sleep(5) count = 0 while True: try: self.conn['test']['test'].insert({'name': 'pauline'}, safe=True) break except OperationFailure: count += 1 if count > 60: self.fail('Call to insert failed too' ' many times in test_rollback') time.sleep(1) continue while (len(self.synchronizer._search()) != 2): time.sleep(1) result_set_1 = self.synchronizer._search() result_set_2 = self.conn['test']['test'].find_one({'name': 'pauline'}) self.assertEqual(len(result_set_1), 2) for item in result_set_1: if item['name'] == 'pauline': self.assertEqual(item['_id'], result_set_2['_id']) kill_mongo_proc('localhost', PORTS_ONE['SECONDARY']) start_mongo_proc(PORTS_ONE['PRIMARY'], "demo-repl", "/replset1a", "/replset1a.log", None) while primary_conn['admin'].command("isMaster")['ismaster'] is False: time.sleep(1) start_mongo_proc(PORTS_ONE['SECONDARY'], "demo-repl", "/replset1b", "/replset1b.log", None) time.sleep(2) result_set_1 = self.synchronizer._search() self.assertEqual(len(result_set_1), 1) for item in result_set_1: self.assertEqual(item['name'], 'paul') find_cursor = retry_until_ok(self.conn['test']['test'].find) self.assertEqual(retry_until_ok(find_cursor.count), 1)
def dump_collection(self): """Dumps collection into the target system. Returns the timestamp of the last oplog entry right before it starts dumping. This method is called when we're initializing the cursor and have no configs i.e. when we're starting for the first time. """ dump_set = self.namespace_set #no namespaces specified if not self.namespace_set: db_list = self.main_connection.database_names() for db in db_list: if db == "config" or db == "local": continue coll_list = self.main_connection[db].collection_names() for coll in coll_list: if coll.startswith("system"): continue namespace = str(db) + "." + str(coll) dump_set.append(namespace) timestamp = util.retry_until_ok(self.get_last_oplog_timestamp) if timestamp is None: return None long_ts = util.bson_ts_to_long(timestamp) for namespace in dump_set: db, coll = namespace.split('.', 1) target_coll = self.main_connection[db][coll] cursor = util.retry_until_ok(target_coll.find) try: for doc in cursor: doc['ns'] = namespace doc['_ts'] = long_ts self.doc_manager.upsert(doc) except pymongo.errors.AutoReconnect as e: err_msg = "OplogManager: Failed during dump collection. " err_msg += "AutoReconnect error: %s." % e effect = " Cannot recover!" logging.error('%s %s %s' % (err_msg, effect, self.oplog)) self.running = False return except pymongo.errors.OperationFailure as e: err_msg = "OplogManager: Failed during dump collection" err_msg += "OperationFailure error: %s." % e effect = " Cannot recover!" logging.error('%s %s %s' % (err_msg, effect, self.oplog)) self.running = False return return timestamp
def dump_collection(self): """Dumps collection into the target system. This method is called when we're initializing the cursor and have no configs i.e. when we're starting for the first time. """ dump_set = self.namespace_set #no namespaces specified if not self.namespace_set: db_list = self.main_connection.database_names() for database in db_list: if database == "config" or database == "local": continue coll_list = self.main_connection[database].collection_names() for coll in coll_list: if coll.startswith("system"): continue namespace = str(database) + "." + str(coll) dump_set.append(namespace) timestamp = util.retry_until_ok(self.get_last_oplog_timestamp) if timestamp is None: return None for namespace in dump_set: database, coll = namespace.split('.', 1) target_coll = self.main_connection[database][coll] cursor = util.retry_until_ok(target_coll.find) long_ts = util.bson_ts_to_long(timestamp) try: for doc in cursor: doc['ns'] = namespace doc['_ts'] = long_ts try: self.doc_manager.upsert(doc) except SystemError: logging.error("Unable to insert %s" % (doc)) except (pymongo.errors.AutoReconnect, pymongo.errors.OperationFailure): err_msg = "OplogManager: Failed during dump collection" effect = "cannot recover!" logging.error('%s %s %s' % (err_msg, effect, self.oplog)) self.running = False return return timestamp
def retrieve_doc(self, entry): """Given the doc ID's, retrieve those documents from the mongos. """ if not entry: return None namespace = entry['ns'] # Update operations don't have an 'o' field specifying the document #- instead it specifies # the changes. So we use 'o2' for updates to get the doc_id later. if 'o2' in entry: doc_field = 'o2' else: doc_field = 'o' doc_id = entry[doc_field]['_id'] db_name, coll_name = namespace.split('.', 1) coll = self.main_connection[db_name][coll_name] doc = util.retry_until_ok(coll.find_one, {'_id': doc_id}) return doc
def run(self): """Start the oplog worker. """ while self.running is True: cursor = self.init_cursor() # we've fallen too far behind if cursor is None and self.checkpoint is not None: err_msg = "OplogManager: Last entry no longer in oplog" effect = "cannot recover!" logging.error('%s %s %s' % (err_msg, effect, self.oplog)) self.running = False continue #The only entry is the last one we processed if util.retry_until_ok(cursor.count) == 1: time.sleep(1) continue last_ts = None err = False try: for entry in cursor: if 'fromMigrate' in entry and entry['fromMigrate'] is True: continue #sync the current oplog operation operation = entry['op'] ns = entry['ns'] #check if ns is excluded or not. #also ensure non-empty namespace set. if ns not in self.namespace_set and self.namespace_set: continue #delete if operation == 'd': entry['_id'] = entry['o']['_id'] self.doc_manager.remove(entry) #insert/update. They are equal because of lack of support #for partial update elif operation == 'i' or operation == 'u': doc = self.retrieve_doc(entry) if doc is not None: doc['_ts'] = util.bson_ts_to_long(entry['ts']) doc['ns'] = ns self.doc_manager.upsert(doc) last_ts = entry['ts'] except (pymongo.errors.AutoReconnect, pymongo.errors.OperationFailure): err = True pass if err is True and self.auth_key is not None: primary_conn['admin'].authenticate(self.auth_username, self.auth_key) err = False if last_ts is not None: self.checkpoint = last_ts self.update_checkpoint()
def test_retry_until_ok(self): """Test retry_until_ok """ self.assertTrue(retry_until_ok(err_func)) self.assertEqual(err_func.counter, 3) print("PASSED RETRY UNTIL OK")
def get_oplog_cursor(self, timestamp): """Move cursor to the proper place in the oplog. """ if timestamp is None: return None cursor = util.retry_until_ok(self.oplog.find, {'ts': { '$lte': timestamp }}) if (util.retry_until_ok(cursor.count)) == 0: return None # Check to see if cursor is too stale while (True): try: cursor = self.oplog.find({'ts': { '$gte': timestamp }}, tailable=True, await_data=True) cursor = cursor.sort('$natural', pymongo.ASCENDING) cursor_len = cursor.count() break except (pymongo.errors.AutoReconnect, pymongo.errors.OperationFailure): pass if cursor_len == 1: # means we are the end of the oplog self.checkpoint = timestamp #to commit new TS after rollbacks return cursor elif cursor_len > 1: doc = next(cursor) if timestamp == doc['ts']: return cursor else: # error condition logging.error('%s Bad timestamp in config file' % self.oplog) return None else: #rollback, we are past the last element in the oplog timestamp = self.rollback() logging.info('Finished rollback') return self.get_oplog_cursor(timestamp)
def get_oplog_cursor(self, timestamp): """Move cursor to the proper place in the oplog. """ if timestamp is None: return None cursor = util.retry_until_ok(self.oplog.find, {'ts': {'$lte': timestamp}}) if (util.retry_until_ok(cursor.count)) == 0: return None # Check to see if cursor is too stale while (True): try: cursor = self.oplog.find({'ts': {'$gte': timestamp}}, tailable=True, await_data=True) cursor = cursor.sort('$natural', pymongo.ASCENDING) cursor_len = cursor.count() break except (pymongo.errors.AutoReconnect, pymongo.errors.OperationFailure): pass if cursor_len == 1: # means we are the end of the oplog self.checkpoint = timestamp #to commit new TS after rollbacks return cursor elif cursor_len > 1: doc = next(cursor) if timestamp == doc['ts']: return cursor else: # error condition logging.error('%s Bad timestamp in config file' % self.oplog) return None else: #rollback, we are past the last element in the oplog timestamp = self.rollback() logging.info('Finished rollback') return self.get_oplog_cursor(timestamp)
def dump_collection(self): """Dumps collection into the target system. This method is called when we're initializing the cursor and have no configs i.e. when we're starting for the first time. """ dump_set = self.namespace_set #no namespaces specified if not self.namespace_set: db_list = self.main_connection.database_names() for db in db_list: if db == "config" or db == "local": continue coll_list = self.main_connection[db].collection_names() for coll in coll_list: if coll.startswith("system"): continue namespace = str(db) + "." + str(coll) dump_set.append(namespace) long_ts = None for namespace in dump_set: db, coll = namespace.split('.', 1) target_coll = self.main_connection[db][coll] cursor = util.retry_until_ok(target_coll.find) cursor = cursor.sort('$natural', pymongo.DESCENDING) oplog_cursor = util.retry_until_ok(self.oplog.find) oplog_cursor = oplog_cursor.sort('$natural', pymongo.DESCENDING) for entry in oplog_cursor: if entry['op'] != 'i': continue #The 'o' field represents the document search_doc = entry['o'] cursor.rewind() for doc in cursor: if search_doc == doc: long_ts = util.bson_ts_to_long(entry['ts']) break if long_ts: break cursor.rewind() try: for doc in cursor: doc['ns'] = namespace doc['_ts'] = long_ts self.doc_manager.upsert(doc) except (pymongo.errors.AutoReconnect, pymongo.errors.OperationFailure): err_msg = "OplogManager: Failed during dump collection" effect = "cannot recover!" logging.error('%s %s %s' % (err_msg, effect, self.oplog)) self.running = False return if long_ts: long_ts = util.long_to_bson_ts(long_ts) else: # Implies that we are just initiating the set long_ts = self.get_last_oplog_timestamp() return long_ts
def commit(self): """This function is used to force a refresh/commit. """ retry_until_ok(self.elastic.refresh)
def test_rollback(self): """Test rollback in oplog_manager. Assertion failure if it doesn't pass We force a rollback by inserting a doc, killing the primary, inserting another doc, killing the new primary, and then restarting both. """ os.system('rm config.txt; touch config.txt') start_cluster() test_oplog, primary_conn, mongos, oplog_coll = self.get_new_oplog() solr = DocManager() test_oplog.doc_manager = solr solr._delete() # equivalent to solr.delete(q='*: *') obj1 = ObjectId('4ff74db3f646462b38000001') mongos['test']['test'].remove({}) mongos['test']['test'].insert({'_id': obj1, 'name': 'paulie'}, safe=1) while (mongos['test']['test'].find().count() != 1): time.sleep(1) cutoff_ts = test_oplog.get_last_oplog_timestamp() obj2 = ObjectId('4ff74db3f646462b38000002') first_doc = {'name': 'paulie', '_ts': bson_ts_to_long(cutoff_ts), 'ns': 'test.test', '_id': obj1} #try kill one, try restarting killMongoProc(primary_conn.host, PORTS_ONE['PRIMARY']) new_primary_conn = Connection('localhost', int(PORTS_ONE['SECONDARY'])) admin = new_primary_conn['admin'] while admin.command("isMaster")['ismaster'] is False: time.sleep(1) time.sleep(5) count = 0 while True: try: current_conn = mongos['test']['test'] current_conn.insert({'_id': obj2, 'name': 'paul'}, safe=1) break except: count += 1 if count > 60: string = 'Call to insert doc failed too many times' logging.error(string) sys.exit(1) time.sleep(1) continue while (mongos['test']['test'].find().count() != 2): print(mongos['test']['test'].find().count()) time.sleep(1) killMongoProc(primary_conn.host, PORTS_ONE['SECONDARY']) startMongoProc(PORTS_ONE['PRIMARY'], "demo-repl", "/replset1a", "/replset1a.log", None) #wait for master to be established while primary_conn['admin'].command("isMaster")['ismaster'] is False: time.sleep(1) startMongoProc(PORTS_ONE['SECONDARY'], "demo-repl", "/replset1b", "/replset1b.log", None) #wait for secondary to be established admin = new_primary_conn['admin'] while admin.command("replSetGetStatus")['myState'] != 2: time.sleep(1) while retry_until_ok(mongos['test']['test'].find().count) != 1: time.sleep(1) self.assertEqual(str(new_primary_conn.port), PORTS_ONE['SECONDARY']) self.assertEqual(str(primary_conn.port), PORTS_ONE['PRIMARY']) last_ts = test_oplog.get_last_oplog_timestamp() second_doc = {'name': 'paul', '_ts': bson_ts_to_long(last_ts), 'ns': 'test.test', '_id': obj2} test_oplog.doc_manager.upsert(first_doc) test_oplog.doc_manager.upsert(second_doc) test_oplog.rollback() test_oplog.doc_manager.commit() results = solr._search() assert(len(results) == 1) results_doc = results[0] self.assertEqual(results_doc['name'], 'paulie') self.assertTrue(results_doc['_ts'] <= bson_ts_to_long(cutoff_ts)) #test_oplog.join() print("PASSED TEST ROLLBACK")
def rollback(self): """Rollback target system to consistent state. The strategy is to find the latest timestamp in the target system and the largest timestamp in the oplog less than the latest target system timestamp. This defines the rollback window and we just roll these back until the oplog and target system are in consistent states. """ self.doc_manager.commit() last_inserted_doc = self.doc_manager.get_last_doc() if last_inserted_doc is None: return None target_ts = util.long_to_bson_ts(last_inserted_doc['_ts']) last_oplog_entry = self.oplog.find_one({'ts': { '$lte': target_ts }}, sort=[('$natural', pymongo.DESCENDING)]) if last_oplog_entry is None: return None rollback_cutoff_ts = last_oplog_entry['ts'] start_ts = util.bson_ts_to_long(rollback_cutoff_ts) end_ts = last_inserted_doc['_ts'] rollback_set = {} # this is a dictionary of ns:list of docs for doc in self.doc_manager.search(start_ts, end_ts): if doc['ns'] in rollback_set: rollback_set[doc['ns']].append(doc) else: rollback_set[doc['ns']] = [doc] for namespace, doc_list in rollback_set.items(): database, coll = namespace.split('.', 1) obj_id = bson.objectid.ObjectId bson_obj_id_list = [obj_id(doc['_id']) for doc in doc_list] to_update = util.retry_until_ok( self.main_connection[database][coll].find, {'_id': { '$in': bson_obj_id_list }}) #doc list are docs in target system, to_update are docs in mongo doc_hash = {} # hash by _id for doc in doc_list: doc_hash[bson.objectid.ObjectId(doc['_id'])] = doc to_index = [] count = 0 while True: try: for doc in to_update: if doc['_id'] in doc_hash: del doc_hash[doc['_id']] to_index.append(doc) break except (pymongo.errors.OperationFailure, pymongo.errors.AutoReconnect): count += 1 if count > 60: sys.exit(1) time.sleep(1) #delete the inconsistent documents for doc in doc_hash.values(): self.doc_manager.remove(doc) #insert the ones from mongo for doc in to_index: doc['_ts'] = util.bson_ts_to_long(rollback_cutoff_ts) doc['ns'] = namespace try: self.doc_manager.upsert(doc) except SystemError: logging.error("Unable to insert %s" % (doc)) return rollback_cutoff_ts
def start_cluster(sharded=False, key_file=None, use_mongos=True): """Sets up cluster with 1 shard, replica set with 3 members """ # Kill all spawned mongods killAllMongoProc('localhost', PORTS_ONE) killAllMongoProc('localhost', PORTS_TWO) # Kill all spawned mongos killMongosProc() # reset data dirs remove_dir(DEMO_SERVER_LOG) remove_dir(DEMO_SERVER_DATA) create_dir(DEMO_SERVER_DATA + "/standalone/journal") create_dir(DEMO_SERVER_DATA + "/replset1a/journal") create_dir(DEMO_SERVER_DATA + "/replset1b/journal") create_dir(DEMO_SERVER_DATA + "/replset1c/journal") if sharded: create_dir(DEMO_SERVER_DATA + "/replset2a/journal") create_dir(DEMO_SERVER_DATA + "/replset2b/journal") create_dir(DEMO_SERVER_DATA + "/replset2c/journal") create_dir(DEMO_SERVER_DATA + "/shard1a/journal") create_dir(DEMO_SERVER_DATA + "/shard1b/journal") create_dir(DEMO_SERVER_DATA + "/config1/journal") create_dir(DEMO_SERVER_LOG) # Create the replica set startMongoProc(PORTS_ONE["PRIMARY"], "demo-repl", "/replset1a", "/replset1a.log", key_file) startMongoProc(PORTS_ONE["SECONDARY"], "demo-repl", "/replset1b", "/replset1b.log", key_file) startMongoProc(PORTS_ONE["ARBITER"], "demo-repl", "/replset1c", "/replset1c.log", key_file) if sharded: startMongoProc(PORTS_TWO["PRIMARY"], "demo-repl-2", "/replset2a", "/replset2a.log", key_file) startMongoProc(PORTS_TWO["SECONDARY"], "demo-repl-2", "/replset2b", "/replset2b.log", key_file) startMongoProc(PORTS_TWO["ARBITER"], "demo-repl-2", "/replset2c", "/replset2c.log", key_file) # Setup config server CMD = ["mongod --oplogSize 500 --fork --configsvr --noprealloc --port " + PORTS_ONE["CONFIG"] + " --dbpath " + DEMO_SERVER_DATA + "/config1 --rest --logpath " + DEMO_SERVER_LOG + "/config1.log --logappend"] if key_file is not None: CMD[0] += " --keyFile " + key_file CMD[0] += " &" executeCommand(CMD) checkStarted(int(PORTS_ONE["CONFIG"])) # Setup the mongos, same mongos for both shards CMD = ["mongos --port " + PORTS_ONE["MONGOS"] + " --fork --configdb localhost:" + PORTS_ONE["CONFIG"] + " --chunkSize 1 --logpath " + DEMO_SERVER_LOG + "/mongos1.log --logappend"] if key_file is not None: CMD[0] += " --keyFile " + key_file CMD[0] += " &" if use_mongos: executeCommand(CMD) checkStarted(int(PORTS_ONE["MONGOS"])) # configuration for replSet 1 config = {'_id': "demo-repl", 'members': [ {'_id': 0, 'host': "localhost:27117"}, {'_id': 1, 'host': "localhost:27118"}, {'_id': 2, 'host': "localhost:27119", 'arbiterOnly': 'true'}]} # configuration for replSet 2, not always used config2 = {'_id': "demo-repl-2", 'members': [ {'_id': 0, 'host': "localhost:27317"}, {'_id': 1, 'host': "localhost:27318"}, {'_id': 2, 'host': "localhost:27319", 'arbiterOnly': 'true'}]} primary = Connection('localhost:27117') if use_mongos: mongos = Connection('localhost:27217') primary.admin.command("replSetInitiate", config) # ensure that the replSet is properly configured while retry_until_ok(primary.admin.command, "replSetGetStatus")['myState'] == 0: time.sleep(1) if use_mongos: counter = 100 while counter > 0: try: mongos.admin.command("addShard", "demo-repl/localhost:27117") break except OperationFailure: # replSet not ready yet counter -= 1 time.sleep(1) if counter == 0: print("Could not add shard to mongos") sys.exit(1) if sharded: primary2 = Connection('localhost:27317') primary2.admin.command("replSetInitiate", config2) while retry_until_ok(primary2.admin.command, "replSetGetStatus")['myState'] == 0: time.sleep(1) counter = 100 while counter > 0: try: admin_db = mongos.admin admin_db.command("addShard", "demo-repl-2/localhost:27317", maxSize=1) break except OperationFailure: # replSet not ready yet counter -= 1 time.sleep(1) if counter == 0: print("Could not add shard to mongos") sys.exit(1) # shard on the alpha.foo collection admin_db = mongos.admin admin_db.command("enableSharding", "alpha") admin_db.command("shardCollection", "alpha.foo", key={"_id": 1}) primary = Connection('localhost:27117') admin = primary['admin'] while admin.command("isMaster")['ismaster'] is False: time.sleep(1) secondary = Connection('localhost:27118') while secondary.admin.command("replSetGetStatus")['myState'] is not 2: time.sleep(1)
def commit(self): """This function is used to force a commit. """ retry_until_ok(self.solr.commit)
def start_cluster(sharded=False, key_file=None, use_mongos=True): """Sets up cluster with 1 shard, replica set with 3 members """ # Kill all spawned mongods kill_all_mongo_proc('localhost', PORTS_ONE) kill_all_mongo_proc('localhost', PORTS_TWO) # Kill all spawned mongos kill_mongos_proc() # reset data dirs remove_dir(DEMO_SERVER_LOG) remove_dir(DEMO_SERVER_DATA) create_dir(DEMO_SERVER_DATA + "/standalone/journal") create_dir(DEMO_SERVER_DATA + "/replset1a/journal") create_dir(DEMO_SERVER_DATA + "/replset1b/journal") create_dir(DEMO_SERVER_DATA + "/replset1c/journal") if sharded: create_dir(DEMO_SERVER_DATA + "/replset2a/journal") create_dir(DEMO_SERVER_DATA + "/replset2b/journal") create_dir(DEMO_SERVER_DATA + "/replset2c/journal") create_dir(DEMO_SERVER_DATA + "/shard1a/journal") create_dir(DEMO_SERVER_DATA + "/shard1b/journal") create_dir(DEMO_SERVER_DATA + "/config1/journal") create_dir(DEMO_SERVER_LOG) # Create the replica set start_mongo_proc(PORTS_ONE["PRIMARY"], "demo-repl", "/replset1a", "/replset1a.log", key_file) start_mongo_proc(PORTS_ONE["SECONDARY"], "demo-repl", "/replset1b", "/replset1b.log", key_file) start_mongo_proc(PORTS_ONE["ARBITER"], "demo-repl", "/replset1c", "/replset1c.log", key_file) if sharded: start_mongo_proc(PORTS_TWO["PRIMARY"], "demo-repl-2", "/replset2a", "/replset2a.log", key_file) start_mongo_proc(PORTS_TWO["SECONDARY"], "demo-repl-2", "/replset2b", "/replset2b.log", key_file) start_mongo_proc(PORTS_TWO["ARBITER"], "demo-repl-2", "/replset2c", "/replset2c.log", key_file) # Setup config server cmd = ("mongod --oplogSize 500 --fork --configsvr --noprealloc --port " + PORTS_ONE["CONFIG"] + " --dbpath " + DEMO_SERVER_DATA + "/config1 --rest --logpath " + DEMO_SERVER_LOG + "/config1.log --logappend") if key_file is not None: cmd += " --keyFile " + key_file cmd += " &" execute_command(cmd) check_started(int(PORTS_ONE["CONFIG"])) # Setup the mongos, same mongos for both shards cmd = ["mongos --port " + PORTS_ONE["MONGOS"] + " --fork --configdb localhost:" + PORTS_ONE["CONFIG"] + " --chunkSize 1 --logpath " + DEMO_SERVER_LOG + "/mongos1.log --logappend"] if key_file is not None: cmd += " --keyFile " + key_file cmd += " &" if use_mongos: execute_command(cmd) check_started(int(PORTS_ONE["MONGOS"])) # configuration for replSet 1 config = {'_id': "demo-repl", 'members': [ {'_id': 0, 'host': "localhost:27117"}, {'_id': 1, 'host': "localhost:27118"}, {'_id': 2, 'host': "localhost:27119", 'arbiterOnly': 'true'}]} # configuration for replSet 2, not always used config2 = {'_id': "demo-repl-2", 'members': [ {'_id': 0, 'host': "localhost:27317"}, {'_id': 1, 'host': "localhost:27318"}, {'_id': 2, 'host': "localhost:27319", 'arbiterOnly': 'true'}]} primary = Connection('localhost:27117') if use_mongos: mongos = Connection('localhost:27217') primary.admin.command("replSetInitiate", config) # ensure that the replSet is properly configured while retry_until_ok(primary.admin.command, "replSetGetStatus")['myState'] == 0: time.sleep(1) if use_mongos: counter = 100 while counter > 0: try: mongos.admin.command("addShard", "demo-repl/localhost:27117") break except OperationFailure: # replSet not ready yet counter -= 1 time.sleep(1) if counter == 0: return False if sharded: primary2 = Connection('localhost:27317') primary2.admin.command("replSetInitiate", config2) while retry_until_ok(primary2.admin.command, "replSetGetStatus")['myState'] == 0: time.sleep(1) counter = 100 while counter > 0: try: admin_db = mongos.admin admin_db.command("addShard", "demo-repl-2/localhost:27317", maxSize=1) break except OperationFailure: # replSet not ready yet counter -= 1 time.sleep(1) if counter == 0: return False # shard on the alpha.foo collection admin_db = mongos.admin admin_db.command("enableSharding", "alpha") admin_db.command("shardCollection", "alpha.foo", key={"_id": 1}) primary = Connection('localhost:27117') admin = primary['admin'] while admin.command("isMaster")['ismaster'] is False: time.sleep(1) secondary = Connection('localhost:27118') while secondary.admin.command("replSetGetStatus")['myState'] is not 2: time.sleep(1) return True
def test_rollback(self): """Test rollback in oplog_manager. Assertion failure if it doesn't pass We force a rollback by inserting a doc, killing the primary, inserting another doc, killing the new primary, and then restarting both. """ os.system('rm config.txt; touch config.txt') start_cluster() test_oplog, primary_conn, mongos, oplog_coll = self.get_new_oplog() solr = DocManager() test_oplog.doc_manager = solr solr._delete() # equivalent to solr.delete(q='*: *') obj1 = ObjectId('4ff74db3f646462b38000001') mongos['test']['test'].remove({}) mongos['test']['test'].insert({'_id': obj1, 'name': 'paulie'}, safe=1) while (mongos['test']['test'].find().count() != 1): time.sleep(1) cutoff_ts = test_oplog.get_last_oplog_timestamp() obj2 = ObjectId('4ff74db3f646462b38000002') first_doc = { 'name': 'paulie', '_ts': bson_ts_to_long(cutoff_ts), 'ns': 'test.test', '_id': obj1 } #try kill one, try restarting killMongoProc(primary_conn.host, PORTS_ONE['PRIMARY']) new_primary_conn = Connection('localhost', int(PORTS_ONE['SECONDARY'])) admin = new_primary_conn['admin'] while admin.command("isMaster")['ismaster'] is False: time.sleep(1) time.sleep(5) count = 0 while True: try: current_conn = mongos['test']['test'] current_conn.insert({'_id': obj2, 'name': 'paul'}, safe=1) break except: count += 1 if count > 60: string = 'Call to insert doc failed too many times' logging.error(string) sys.exit(1) time.sleep(1) continue while (mongos['test']['test'].find().count() != 2): print(mongos['test']['test'].find().count()) time.sleep(1) killMongoProc(primary_conn.host, PORTS_ONE['SECONDARY']) startMongoProc(PORTS_ONE['PRIMARY'], "demo-repl", "/replset1a", "/replset1a.log", None) #wait for master to be established while primary_conn['admin'].command("isMaster")['ismaster'] is False: time.sleep(1) startMongoProc(PORTS_ONE['SECONDARY'], "demo-repl", "/replset1b", "/replset1b.log", None) #wait for secondary to be established admin = new_primary_conn['admin'] while admin.command("replSetGetStatus")['myState'] != 2: time.sleep(1) while retry_until_ok(mongos['test']['test'].find().count) != 1: time.sleep(1) self.assertEqual(str(new_primary_conn.port), PORTS_ONE['SECONDARY']) self.assertEqual(str(primary_conn.port), PORTS_ONE['PRIMARY']) last_ts = test_oplog.get_last_oplog_timestamp() second_doc = { 'name': 'paul', '_ts': bson_ts_to_long(last_ts), 'ns': 'test.test', '_id': obj2 } test_oplog.doc_manager.upsert(first_doc) test_oplog.doc_manager.upsert(second_doc) test_oplog.rollback() test_oplog.doc_manager.commit() results = solr._search() assert (len(results) == 1) results_doc = results[0] self.assertEqual(results_doc['name'], 'paulie') self.assertTrue(results_doc['_ts'] <= bson_ts_to_long(cutoff_ts)) #test_oplog.join() print("PASSED TEST ROLLBACK")
def rollback(self): """Rollback target system to consistent state. The strategy is to find the latest timestamp in the target system and the largest timestamp in the oplog less than the latest target system timestamp. This defines the rollback window and we just roll these back until the oplog and target system are in consistent states. """ self.doc_manager.commit() last_inserted_doc = self.doc_manager.get_last_doc() if last_inserted_doc is None: return None target_ts = util.long_to_bson_ts(last_inserted_doc['_ts']) last_oplog_entry = self.oplog.find_one({'ts': {'$lte': target_ts}}, sort=[('$natural', pymongo.DESCENDING)]) if last_oplog_entry is None: return None rollback_cutoff_ts = last_oplog_entry['ts'] start_ts = util.bson_ts_to_long(rollback_cutoff_ts) end_ts = last_inserted_doc['_ts'] rollback_set = {} # this is a dictionary of ns:list of docs for doc in self.doc_manager.search(start_ts, end_ts): if doc['ns'] in rollback_set: rollback_set[doc['ns']].append(doc) else: rollback_set[doc['ns']] = [doc] for namespace, doc_list in rollback_set.items(): database, coll = namespace.split('.', 1) obj_id = bson.objectid.ObjectId bson_obj_id_list = [obj_id(doc['_id']) for doc in doc_list] to_update = util.retry_until_ok( self.main_connection[database][coll].find, {'_id': {'$in': bson_obj_id_list}}) #doc list are docs in target system, to_update are docs in mongo doc_hash = {} # hash by _id for doc in doc_list: doc_hash[bson.objectid.ObjectId(doc['_id'])] = doc to_index = [] count = 0 while True: try: for doc in to_update: if doc['_id'] in doc_hash: del doc_hash[doc['_id']] to_index.append(doc) break except (pymongo.errors.OperationFailure, pymongo.errors.AutoReconnect): count += 1 if count > 60: sys.exit(1) time.sleep(1) #delete the inconsistent documents for doc in doc_hash.values(): self.doc_manager.remove(doc) #insert the ones from mongo for doc in to_index: doc['_ts'] = util.bson_ts_to_long(rollback_cutoff_ts) doc['ns'] = namespace try: self.doc_manager.upsert(doc) except SystemError: logging.error("Unable to insert %s" % (doc)) return rollback_cutoff_ts
def test_retry_until_ok(self): """Test retry_until_ok """ self.assertTrue(retry_until_ok(err_func)) self.assertEqual(err_func.counter, 3)