Beispiel #1
0
    def test_write_oplog_progress(self):
        """Test write_oplog_progress under several circumstances
        """
        os.system('touch temp_config.txt')
        config_file_path = os.getcwd() + '/temp_config.txt'
        c = Connector(main_address, config_file_path, None, ['test.test'],
                      '_id', None, None)

        #test that None is returned if there is no config file specified.
        self.assertEqual(c.write_oplog_progress(), None)

        c.oplog_progress.get_dict()[1] = Timestamp(12, 34)
        #pretend to insert a thread/timestamp pair
        c.write_oplog_progress()

        data = json.load(open(config_file_path, 'r'))
        self.assertEqual(1, int(data[0]))
        self.assertEqual(long_to_bson_ts(int(data[1])), Timestamp(12, 34))

        #ensure the temp file was deleted
        self.assertFalse(os.path.exists(config_file_path + '~'))

        #ensure that updates work properly
        c.oplog_progress.get_dict()[1] = Timestamp(44, 22)
        c.write_oplog_progress()

        config_file = open(config_file_path, 'r')
        data = json.load(config_file)
        self.assertEqual(1, int(data[0]))
        self.assertEqual(long_to_bson_ts(int(data[1])), Timestamp(44, 22))

        os.system('rm ' + config_file_path)
        config_file.close()
        print("PASSED TEST WRITE OPLOG PROGRESS")
    def test_write_oplog_progress(self):
        """Test write_oplog_progress under several circumstances
        """
        os.system('touch temp_config.txt')
        config_file_path = os.getcwd() + '/temp_config.txt'
        conn = Connector(MAIN_ADDRESS, config_file_path, None, ['test.test'],
                      '_id', None, None)

        #test that None is returned if there is no config file specified.
        self.assertEqual(conn.write_oplog_progress(), None)

        conn.oplog_progress.get_dict()[1] = Timestamp(12, 34)
        #pretend to insert a thread/timestamp pair
        conn.write_oplog_progress()

        data = json.load(open(config_file_path, 'r'))
        self.assertEqual(1, int(data[0]))
        self.assertEqual(long_to_bson_ts(int(data[1])), Timestamp(12, 34))

        #ensure the temp file was deleted
        self.assertFalse(os.path.exists(config_file_path + '~'))

        #ensure that updates work properly
        conn.oplog_progress.get_dict()[1] = Timestamp(44, 22)
        conn.write_oplog_progress()

        config_file = open(config_file_path, 'r')
        data = json.load(config_file)
        self.assertEqual(1, int(data[0]))
        self.assertEqual(long_to_bson_ts(int(data[1])), Timestamp(44, 22))

        os.system('rm ' + config_file_path)
        config_file.close()
    def read_oplog_progress(self):
        """Reads oplog progress from file provided by user.
        This method is only called once before any threads are spanwed.
        """

        if self.oplog_checkpoint is None:
            return None

        # Check for empty file
        try:
            if os.stat(self.oplog_checkpoint).st_size == 0:
                logging.info("MongoConnector: Empty oplog progress file.")
                return None
        except OSError:
            return None

        source = open(self.oplog_checkpoint, 'r')
        try:
            data = json.load(source)
        except ValueError:       # empty file
            err_msg = "MongoConnector: Can't read oplog progress file."
            reason = "It may be empty or corrupt."
            logging.info("%s %s" % (err_msg, reason))
            source.close()
            return None

        source.close()

        count = 0
        oplog_dict = self.oplog_progress.get_dict()
        for count in range(0, len(data), 2):
            oplog_str = data[count]
            ts = data[count + 1]
            oplog_dict[oplog_str] = util.long_to_bson_ts(ts)
    def read_oplog_progress(self):
        """Reads oplog progress from file provided by user.
        This method is only called once before any threads are spanwed.
        """

        if self.oplog_checkpoint is None:
            return None

        # Check for empty file
        try:
            if os.stat(self.oplog_checkpoint).st_size == 0:
                logging.info("MongoConnector: Empty oplog progress file.")
                return None
        except OSError:
            return None

        source = open(self.oplog_checkpoint, 'r')
        try:
            data = json.load(source)
        except ValueError:  # empty file
            err_msg = "MongoConnector: Can't read oplog progress file."
            reason = "It may be empty or corrupt."
            logging.info("%s %s" % (err_msg, reason))
            source.close()
            return None

        source.close()

        count = 0
        oplog_dict = self.oplog_progress.get_dict()
        for count in range(0, len(data), 2):
            oplog_str = data[count]
            ts = data[count + 1]
            oplog_dict[oplog_str] = util.long_to_bson_ts(ts)
Beispiel #5
0
    def test_bson_ts_to_long(self):
        """Test bson_ts_to_long and long_to_bson_ts
        """

        ts = timestamp.Timestamp(0x12345678, 0x90abcdef)

        self.assertEqual(0x1234567890abcdef, bson_ts_to_long(ts))
        self.assertEqual(long_to_bson_ts(0x1234567890abcdef), ts)
        print("PASSED BSON TS TO LONG")
Beispiel #6
0
    def test_bson_ts_to_long(self):
        """Test bson_ts_to_long and long_to_bson_ts
        """

        ts = timestamp.Timestamp(0x12345678, 0x90abcdef)

        self.assertEqual(0x1234567890abcdef, bson_ts_to_long(ts))
        self.assertEqual(long_to_bson_ts(0x1234567890abcdef), ts)
        print("PASSED BSON TS TO LONG")
    def test_bson_ts_to_long(self):
        """Test bson_ts_to_long and long_to_bson_ts
        """

        tstamp = timestamp.Timestamp(0x12345678, 0x90abcdef)

        self.assertEqual(0x1234567890abcdef, 
            bson_ts_to_long(tstamp))
        self.assertEqual(long_to_bson_ts(0x1234567890abcdef),
            tstamp)
    def test_dump_collection(self):
        """Test dump_collection in oplog_manager. Assertion failure if it
            doesn't pass
        """

        test_oplog, primary_conn, search_ts = self.get_oplog_thread()
        solr = DocManager()
        test_oplog.doc_manager = solr

        #with documents
        primary_conn['test']['test'].insert({'name': 'paulie'})
        search_ts = test_oplog.get_last_oplog_timestamp()
        test_oplog.dump_collection()

        test_oplog.doc_manager.commit()
        solr_results = solr._search()
        self.assertEqual(len(solr_results), 1)
        solr_doc = solr_results[0]
        self.assertEqual(long_to_bson_ts(solr_doc['_ts']), search_ts)
        self.assertEqual(solr_doc['name'], 'paulie')
        self.assertEqual(solr_doc['ns'], 'test.test')
    def test_dump_collection(self):
        """Test dump_collection in oplog_manager. Assertion failure if it
            doesn't pass
        """

        test_oplog, primary_conn, oplog_coll = self.get_oplog_thread()
        solr = DocManager()
        test_oplog.doc_manager = solr

        #with documents
        primary_conn['test']['test'].insert({'name': 'paulie'})
        search_ts = test_oplog.get_last_oplog_timestamp()
        test_oplog.dump_collection()

        test_oplog.doc_manager.commit()
        solr_results = solr._search()
        self.assertEqual(len(solr_results), 1)
        solr_doc = solr_results[0]
        self.assertEqual(long_to_bson_ts(solr_doc['_ts']), search_ts)
        self.assertEqual(solr_doc['name'], 'paulie')
        self.assertEqual(solr_doc['ns'], 'test.test')

        #test_oplog.join()
        print("PASSED TEST DUMP COLLECTION")
    def test_dump_collection(self):
        """Test dump_collection in oplog_manager.

        Assertion failure if it doesn't pass
        """

        test_oplog, primary_conn, oplog_coll, mongos = self.get_oplog_thread()
        solr = DocManager()
        test_oplog.doc_manager = solr

        # with documents
        safe_mongo_op(mongos['alpha']['foo'].insert, {'name': 'paulie'})
        search_ts = test_oplog.get_last_oplog_timestamp()
        test_oplog.dump_collection()

        test_oplog.doc_manager.commit()
        solr_results = solr._search()
        assert (len(solr_results) == 1)
        solr_doc = solr_results[0]
        assert (long_to_bson_ts(solr_doc['_ts']) == search_ts)
        assert (solr_doc['name'] == 'paulie')
        assert (solr_doc['ns'] == 'alpha.foo')

        print("PASSED TEST DUMP COLLECTION")
    def test_dump_collection(self):
        """Test dump_collection in oplog_manager. Assertion failure if it
            doesn't pass
        """

        test_oplog, primary_conn, oplog_coll = self.get_oplog_thread()
        solr = DocManager()
        test_oplog.doc_manager = solr

        # with documents
        primary_conn["test"]["test"].insert({"name": "paulie"})
        search_ts = test_oplog.get_last_oplog_timestamp()
        test_oplog.dump_collection()

        test_oplog.doc_manager.commit()
        solr_results = solr._search()
        self.assertEqual(len(solr_results), 1)
        solr_doc = solr_results[0]
        self.assertEqual(long_to_bson_ts(solr_doc["_ts"]), search_ts)
        self.assertEqual(solr_doc["name"], "paulie")
        self.assertEqual(solr_doc["ns"], "test.test")

        # test_oplog.join()
        print("PASSED TEST DUMP COLLECTION")
    def test_dump_collection(self):
        """Test dump_collection in oplog_manager.

        Assertion failure if it doesn't pass
        """

        test_oplog, primary_conn, oplog_coll, mongos = self.get_oplog_thread()
        solr = DocManager()
        test_oplog.doc_manager = solr

        # with documents
        safe_mongo_op(mongos['alpha']['foo'].insert, {'name': 'paulie'})
        search_ts = test_oplog.get_last_oplog_timestamp()
        test_oplog.dump_collection()

        test_oplog.doc_manager.commit()
        solr_results = solr._search()
        assert (len(solr_results) == 1)
        solr_doc = solr_results[0]
        assert (long_to_bson_ts(solr_doc['_ts']) == search_ts)
        assert (solr_doc['name'] == 'paulie')
        assert (solr_doc['ns'] == 'alpha.foo')

        print("PASSED TEST DUMP COLLECTION")
Beispiel #13
0
    def rollback(self):
        """Rollback target system to consistent state.

        The strategy is to find the latest timestamp in the target system and
        the largest timestamp in the oplog less than the latest target system
        timestamp. This defines the rollback window and we just roll these
        back until the oplog and target system are in consistent states.
        """
        self.doc_manager.commit()
        last_inserted_doc = self.doc_manager.get_last_doc()

        if last_inserted_doc is None:
            return None

        target_ts = util.long_to_bson_ts(last_inserted_doc['_ts'])
        last_oplog_entry = self.oplog.find_one({'ts': {'$lte': target_ts}},
                                               sort=[('$natural',
                                               pymongo.DESCENDING)])
        if last_oplog_entry is None:
            return None

        rollback_cutoff_ts = last_oplog_entry['ts']
        start_ts = util.bson_ts_to_long(rollback_cutoff_ts)
        end_ts = last_inserted_doc['_ts']

        rollback_set = {}   # this is a dictionary of ns:list of docs
        for doc in self.doc_manager.search(start_ts, end_ts):
            if doc['ns'] in rollback_set:
                rollback_set[doc['ns']].append(doc)
            else:
                rollback_set[doc['ns']] = [doc]

        for namespace, doc_list in rollback_set.items():
            database, coll = namespace.split('.', 1)
            obj_id = bson.objectid.ObjectId
            bson_obj_id_list = [obj_id(doc['_id']) for doc in doc_list]

            to_update = util.retry_until_ok(
                self.main_connection[database][coll].find,
                {'_id': {'$in': bson_obj_id_list}})
            #doc list are docs in  target system, to_update are docs in mongo
            doc_hash = {}  # hash by _id
            for doc in doc_list:
                doc_hash[bson.objectid.ObjectId(doc['_id'])] = doc

            to_index = []
            count = 0
            while True:
                try:
                    for doc in to_update:
                        if doc['_id'] in doc_hash:
                            del doc_hash[doc['_id']]
                            to_index.append(doc)
                    break
                except (pymongo.errors.OperationFailure,
                        pymongo.errors.AutoReconnect):
                    count += 1
                    if count > 60:
                        sys.exit(1)
                    time.sleep(1)

            #delete the inconsistent documents
            for doc in doc_hash.values():
                self.doc_manager.remove(doc)

            #insert the ones from mongo
            for doc in to_index:
                doc['_ts'] = util.bson_ts_to_long(rollback_cutoff_ts)
                doc['ns'] = namespace
                try:
                    self.doc_manager.upsert(doc)
                except SystemError:
                    logging.error("Unable to insert %s" % (doc))

        return rollback_cutoff_ts
    def dump_collection(self):
        """Dumps collection into the target system.

        This method is called when we're initializing the cursor and have no
        configs i.e. when we're starting for the first time.
        """

        dump_set = self.namespace_set

        #no namespaces specified
        if not self.namespace_set:
            db_list = self.main_connection.database_names()
            for db in db_list:
                if db == "config" or db == "local":
                    continue
                coll_list = self.main_connection[db].collection_names()
                for coll in coll_list:
                    if coll.startswith("system"):
                        continue
                    namespace = str(db) + "." + str(coll)
                    dump_set.append(namespace)

        long_ts = None

        for namespace in dump_set:
            db, coll = namespace.split('.', 1)
            target_coll = self.main_connection[db][coll]
            cursor = util.retry_until_ok(target_coll.find)
            cursor = cursor.sort('$natural', pymongo.DESCENDING)
            oplog_cursor = util.retry_until_ok(self.oplog.find)
            oplog_cursor = oplog_cursor.sort('$natural', pymongo.DESCENDING)

            for entry in oplog_cursor:

                if entry['op'] != 'i':
                    continue
                #The 'o' field represents the document
                search_doc = entry['o']
                cursor.rewind()
                for doc in cursor:
                    if search_doc == doc:
                        long_ts = util.bson_ts_to_long(entry['ts'])
                        break

                if long_ts:
                    break

            cursor.rewind()

            try:
                for doc in cursor:
                    doc['ns'] = namespace
                    doc['_ts'] = long_ts
                    self.doc_manager.upsert(doc)
            except (pymongo.errors.AutoReconnect,
                    pymongo.errors.OperationFailure):

                err_msg = "OplogManager: Failed during dump collection"
                effect = "cannot recover!"
                logging.error('%s %s %s' % (err_msg, effect, self.oplog))
                self.running = False
                return

        if long_ts:
            long_ts = util.long_to_bson_ts(long_ts)
        else:  # Implies that we are just initiating the set
            long_ts = self.get_last_oplog_timestamp()

        return long_ts
    def rollback(self):
        """Rollback target system to consistent state.

        The strategy is to find the latest timestamp in the target system and
        the largest timestamp in the oplog less than the latest target system
        timestamp. This defines the rollback window and we just roll these
        back until the oplog and target system are in consistent states.
        """
        self.doc_manager.commit()
        last_inserted_doc = self.doc_manager.get_last_doc()

        if last_inserted_doc is None:
            return None

        target_ts = util.long_to_bson_ts(last_inserted_doc['_ts'])
        last_oplog_entry = self.oplog.find_one({'ts': {
            '$lte': target_ts
        }},
                                               sort=[('$natural',
                                                      pymongo.DESCENDING)])
        if last_oplog_entry is None:
            return None

        rollback_cutoff_ts = last_oplog_entry['ts']
        start_ts = util.bson_ts_to_long(rollback_cutoff_ts)
        end_ts = last_inserted_doc['_ts']

        rollback_set = {}  # this is a dictionary of ns:list of docs
        for doc in self.doc_manager.search(start_ts, end_ts):
            if doc['ns'] in rollback_set:
                rollback_set[doc['ns']].append(doc)
            else:
                rollback_set[doc['ns']] = [doc]

        for namespace, doc_list in rollback_set.items():
            database, coll = namespace.split('.', 1)
            obj_id = bson.objectid.ObjectId
            bson_obj_id_list = [obj_id(doc['_id']) for doc in doc_list]

            to_update = util.retry_until_ok(
                self.main_connection[database][coll].find,
                {'_id': {
                    '$in': bson_obj_id_list
                }})
            #doc list are docs in  target system, to_update are docs in mongo
            doc_hash = {}  # hash by _id
            for doc in doc_list:
                doc_hash[bson.objectid.ObjectId(doc['_id'])] = doc

            to_index = []
            count = 0
            while True:
                try:
                    for doc in to_update:
                        if doc['_id'] in doc_hash:
                            del doc_hash[doc['_id']]
                            to_index.append(doc)
                    break
                except (pymongo.errors.OperationFailure,
                        pymongo.errors.AutoReconnect):
                    count += 1
                    if count > 60:
                        sys.exit(1)
                    time.sleep(1)

            #delete the inconsistent documents
            for doc in doc_hash.values():
                self.doc_manager.remove(doc)

            #insert the ones from mongo
            for doc in to_index:
                doc['_ts'] = util.bson_ts_to_long(rollback_cutoff_ts)
                doc['ns'] = namespace
                try:
                    self.doc_manager.upsert(doc)
                except SystemError:
                    logging.error("Unable to insert %s" % (doc))

        return rollback_cutoff_ts