Example #1
0
        def do_dump(dm, error_queue):
            try:
                LOG.debug(
                    "OplogThread: Using bulk upsert function for " "collection dump"
                )
                upsert_all(dm)

                if gridfs_dump_set:
                    LOG.info(
                        "OplogThread: dumping GridFS collections: %s", gridfs_dump_set
                    )

                # Dump GridFS files
                for gridfs_ns in gridfs_dump_set:
                    mongo_coll = self.get_collection(gridfs_ns)
                    from_coll = self.get_collection(gridfs_ns + ".files")
                    dest_ns = self.namespace_config.map_namespace(gridfs_ns)
                    for doc in docs_to_dump(from_coll):
                        gridfile = GridFSFile(mongo_coll, doc)
                        dm.insert_file(gridfile, dest_ns, long_ts)
            except Exception:
                # Likely exceptions:
                # pymongo.errors.OperationFailure,
                # mongo_connector.errors.ConnectionFailed
                # mongo_connector.errors.OperationFailed
                error_queue.put(sys.exc_info())
Example #2
0
        def do_dump(dm, error_queue):
            try:
                # Dump the documents, bulk upsert if possible
                if hasattr(dm, "bulk_upsert"):
                    LOG.debug("OplogThread: Using bulk upsert function for "
                              "collection dump")
                    upsert_all(dm)
                else:
                    LOG.debug(
                        "OplogThread: DocManager %s has no "
                        "bulk_upsert method.  Upserting documents "
                        "serially for collection dump." % str(dm))
                    upsert_each(dm)

                # Dump GridFS files
                for gridfs_ns in self.gridfs_set:
                    db, coll = gridfs_ns.split('.', 1)
                    mongo_coll = self.primary_client[db][coll]
                    dest_ns = self.dest_mapping.get(gridfs_ns, gridfs_ns)
                    for doc in docs_to_dump(gridfs_ns + '.files'):
                        gridfile = GridFSFile(mongo_coll, doc)
                        dm.insert_file(gridfile, dest_ns, long_ts)
            except:
                # Likely exceptions:
                # pymongo.errors.OperationFailure,
                # mongo_connector.errors.ConnectionFailed
                # mongo_connector.errors.OperationFailed
                error_queue.put(sys.exc_info())
Example #3
0
    def _process_with_doc_managers(self, entry, timestamp, is_gridfs_file):
        operation = entry["op"]
        ns = entry["ns"]
        for docman in self.doc_managers:
            try:
                LOG.debug(
                    "OplogThread: Operation for this "
                    "entry is %s" % str(operation)
                )

                # Remove
                if operation == "d":
                    docman.remove(entry["o"]["_id"], ns, timestamp)
                    self.remove_inc += 1

                # Insert
                elif operation == "i":  # Insert
                    # Retrieve inserted document from
                    # 'o' field in oplog record
                    doc = entry.get("o")
                    # Extract timestamp and namespace
                    if is_gridfs_file:
                        db, coll = ns.split(".", 1)
                        gridfile = GridFSFile(
                            self.primary_client[db][coll], doc
                        )
                        docman.insert_file(gridfile, ns, timestamp)
                    else:
                        docman.upsert(doc, ns, timestamp)
                    self.upsert_inc += 1

                # Update
                elif operation == "u":
                    docman.update(
                        entry["o2"]["_id"], entry["o"], ns, timestamp
                    )
                    self.update_inc += 1

                # Command
                elif operation == "c":
                    # use unmapped namespace
                    doc = entry.get("o")
                    docman.handle_command(doc, entry["ns"], timestamp)

            except errors.OperationFailed:
                LOG.exception(
                    "Unable to process oplog document %r" % entry
                )
            except errors.ConnectionFailed:
                LOG.exception(
                    "Connection failed while processing oplog "
                    "document %r" % entry
                )
Example #4
0
        def do_dump(dm, error_queue):
            try:
                LOG.debug("OplogThread: Using bulk upsert function for "
                          "collection dump")
                upsert_all(dm)

                # Dump GridFS files
                for gridfs_ns in self.gridfs_set:
                    mongo_coll = self.get_collection(gridfs_ns)
                    from_coll = self.get_collection(gridfs_ns + '.files')
                    dest_ns = self.dest_mapping_stru.get(gridfs_ns, gridfs_ns)
                    for doc in docs_to_dump(from_coll):
                        gridfile = GridFSFile(mongo_coll, doc)
                        dm.insert_file(gridfile, dest_ns, long_ts)
            except:
                # Likely exceptions:
                # pymongo.errors.OperationFailure,
                # mongo_connector.errors.ConnectionFailed
                # mongo_connector.errors.OperationFailed
                error_queue.put(sys.exc_info())
Example #5
0
    def run(self):
        """Start the oplog worker.
        """
        ReplicationLagLogger(self, 30).start()
        LOG.debug("OplogThread: Run thread started")
        while self.running is True:
            LOG.debug("OplogThread: Getting cursor")
            cursor, cursor_empty = retry_until_ok(self.init_cursor)
            # we've fallen too far behind
            if cursor is None and self.checkpoint is not None:
                err_msg = "OplogThread: Last entry no longer in oplog"
                effect = "cannot recover!"
                LOG.error("%s %s %s" % (err_msg, effect, self.oplog))
                self.running = False
                continue

            if cursor_empty:
                LOG.debug(
                    "OplogThread: Last entry is the one we "
                    "already processed.  Up to date.  Sleeping."
                )
                time.sleep(1)
                continue

            last_ts = None
            remove_inc = 0
            upsert_inc = 0
            update_inc = 0
            try:
                LOG.debug("OplogThread: about to process new oplog entries")
                while cursor.alive and self.running:
                    LOG.debug(
                        "OplogThread: Cursor is still"
                        " alive and thread is still running."
                    )
                    for n, entry in enumerate(cursor):
                        # Break out if this thread should stop
                        if not self.running:
                            break

                        LOG.debug(
                            "OplogThread: Iterating through cursor,"
                            " document number in this cursor is %d" % n
                        )

                        skip, is_gridfs_file = self._should_skip_entry(entry)
                        if skip:
                            # update the last_ts on skipped entries to ensure
                            # our checkpoint does not fall off the oplog. This
                            # also prevents reprocessing skipped entries.
                            last_ts = entry["ts"]
                            continue

                        # Sync the current oplog operation
                        operation = entry["op"]
                        ns = entry["ns"]
                        timestamp = util.bson_ts_to_long(entry["ts"])
                        for docman in self.doc_managers:
                            try:
                                LOG.debug(
                                    "OplogThread: Operation for this "
                                    "entry is %s" % str(operation)
                                )

                                # Remove
                                if operation == "d":
                                    docman.remove(entry["o"]["_id"], ns, timestamp)
                                    remove_inc += 1

                                # Insert
                                elif operation == "i":  # Insert
                                    # Retrieve inserted document from
                                    # 'o' field in oplog record
                                    doc = entry.get("o")
                                    # Extract timestamp and namespace
                                    if is_gridfs_file:
                                        db, coll = ns.split(".", 1)
                                        gridfile = GridFSFile(
                                            self.primary_client[db][coll], doc
                                        )
                                        docman.insert_file(gridfile, ns, timestamp)
                                    else:
                                        docman.upsert(doc, ns, timestamp)
                                    upsert_inc += 1

                                # Update
                                elif operation == "u":
                                    docman.update(
                                        entry["o2"]["_id"], entry["o"], ns, timestamp
                                    )
                                    update_inc += 1

                                # Command
                                elif operation == "c":
                                    # use unmapped namespace
                                    doc = entry.get("o")
                                    docman.handle_command(doc, entry["ns"], timestamp)

                            except errors.OperationFailed:
                                LOG.exception(
                                    "Unable to process oplog document %r" % entry
                                )
                            except errors.ConnectionFailed:
                                LOG.exception(
                                    "Connection failed while processing oplog "
                                    "document %r" % entry
                                )

                        if (remove_inc + upsert_inc + update_inc) % 1000 == 0:
                            LOG.debug(
                                "OplogThread: Documents removed: %d, "
                                "inserted: %d, updated: %d so far"
                                % (remove_inc, upsert_inc, update_inc)
                            )

                        LOG.debug("OplogThread: Doc is processed.")

                        last_ts = entry["ts"]

                        # update timestamp per batch size
                        # n % -1 (default for self.batch_size) == 0 for all n
                        if n % self.batch_size == 1:
                            self.update_checkpoint(last_ts)
                            last_ts = None

                    # update timestamp after running through oplog
                    if last_ts is not None:
                        LOG.debug(
                            "OplogThread: updating checkpoint after "
                            "processing new oplog entries"
                        )
                        self.update_checkpoint(last_ts)

            except (
                pymongo.errors.AutoReconnect,
                pymongo.errors.OperationFailure,
                pymongo.errors.ConfigurationError,
            ):
                LOG.exception(
                    "Cursor closed due to an exception. " "Will attempt to reconnect."
                )

            # update timestamp before attempting to reconnect to MongoDB,
            # after being join()'ed, or if the cursor closes
            if last_ts is not None:
                LOG.debug(
                    "OplogThread: updating checkpoint after an "
                    "Exception, cursor closing, or join() on this"
                    "thread."
                )
                self.update_checkpoint(last_ts)

            LOG.debug(
                "OplogThread: Sleeping. Documents removed: %d, "
                "upserted: %d, updated: %d" % (remove_inc, upsert_inc, update_inc)
            )
            time.sleep(2)
 def get_file(self, doc):
     return GridFSFile(self.collection, doc)
Example #7
0
    def run(self):
        """Start the oplog worker.
        """
        LOG.debug("OplogThread: Run thread started")
        while self.running is True:
            LOG.debug("OplogThread: Getting cursor")
            cursor, cursor_len = self.init_cursor()

            # we've fallen too far behind
            if cursor is None and self.checkpoint is not None:
                err_msg = "OplogThread: Last entry no longer in oplog"
                effect = "cannot recover!"
                LOG.error('%s %s %s' % (err_msg, effect, self.oplog))
                self.running = False
                continue

            if cursor_len == 0:
                LOG.debug("OplogThread: Last entry is the one we "
                          "already processed.  Up to date.  Sleeping.")
                time.sleep(1)
                continue

            LOG.debug("OplogThread: Got the cursor, count is %d"
                      % cursor_len)

            last_ts = None
            remove_inc = 0
            upsert_inc = 0
            update_inc = 0
            try:
                LOG.debug("OplogThread: about to process new oplog "
                          "entries")
                while cursor.alive and self.running:
                    LOG.debug("OplogThread: Cursor is still"
                              " alive and thread is still running.")
                    for n, entry in enumerate(cursor):

                        LOG.debug("OplogThread: Iterating through cursor,"
                                  " document number in this cursor is %d"
                                  % n)
                        # Break out if this thread should stop
                        if not self.running:
                            break

                        # Don't replicate entries resulting from chunk moves
                        if entry.get("fromMigrate"):
                            continue

                        # Take fields out of the oplog entry that
                        # shouldn't be replicated. This may nullify
                        # the document if there's nothing to do.
                        if not self.filter_oplog_entry(entry):
                            continue

                        # Sync the current oplog operation
                        operation = entry['op']
                        ns = entry['ns']

                        if '.' not in ns:
                            continue
                        coll = ns.split('.', 1)[1]

                        # Ignore system collections
                        if coll.startswith("system."):
                            continue

                        # Ignore GridFS chunks
                        if coll.endswith('.chunks'):
                            continue

                        is_gridfs_file = False
                        if coll.endswith(".files"):
                            if ns in self.gridfs_files_set:
                                ns = ns[:-len(".files")]
                                is_gridfs_file = True
                            else:
                                continue

                        # use namespace mapping if one exists
                        ns = self.dest_mapping.get(ns, ns)
                        timestamp = util.bson_ts_to_long(entry['ts'])
                        for docman in self.doc_managers:
                            try:
                                LOG.debug("OplogThread: Operation for this "
                                          "entry is %s" % str(operation))

                                # Remove
                                if operation == 'd':
                                    docman.remove(
                                        entry['o']['_id'], ns, timestamp)
                                    remove_inc += 1

                                # Insert
                                elif operation == 'i':  # Insert
                                    # Retrieve inserted document from
                                    # 'o' field in oplog record
                                    doc = entry.get('o')
                                    # Extract timestamp and namespace
                                    if is_gridfs_file:
                                        db, coll = ns.split('.', 1)
                                        gridfile = GridFSFile(
                                            self.primary_client[db][coll],
                                            doc)
                                        docman.insert_file(
                                            gridfile, ns, timestamp)
                                    else:
                                        docman.upsert(doc, ns, timestamp)
                                    upsert_inc += 1

                                # Update
                                elif operation == 'u':
                                    docman.update(entry['o2']['_id'],
                                                  entry['o'],
                                                  ns, timestamp)
                                    update_inc += 1

                                # Command
                                elif operation == 'c':
                                    # use unmapped namespace
                                    doc = entry.get('o')
                                    docman.handle_command(doc,
                                                          entry['ns'],
                                                          timestamp)

                            except errors.OperationFailed:
                                LOG.exception(
                                    "Unable to process oplog document %r"
                                    % entry)
                            except errors.ConnectionFailed:
                                LOG.exception(
                                    "Connection failed while processing oplog "
                                    "document %r" % entry)

                        if (remove_inc + upsert_inc + update_inc) % 1000 == 0:
                            LOG.debug(
                                "OplogThread: Documents removed: %d, "
                                "inserted: %d, updated: %d so far" % (
                                    remove_inc, upsert_inc, update_inc))

                        LOG.debug("OplogThread: Doc is processed.")

                        last_ts = entry['ts']

                        # update timestamp per batch size
                        # n % -1 (default for self.batch_size) == 0 for all n
                        if n % self.batch_size == 1 and last_ts is not None:
                            self.checkpoint = last_ts
                            self.update_checkpoint()

                    # update timestamp after running through oplog
                    if last_ts is not None:
                        LOG.debug("OplogThread: updating checkpoint after"
                                  "processing new oplog entries")
                        self.checkpoint = last_ts
                        self.update_checkpoint()

            except (pymongo.errors.AutoReconnect,
                    pymongo.errors.OperationFailure,
                    pymongo.errors.ConfigurationError):
                LOG.exception(
                    "Cursor closed due to an exception. "
                    "Will attempt to reconnect.")

            # update timestamp before attempting to reconnect to MongoDB,
            # after being join()'ed, or if the cursor closes
            if last_ts is not None:
                LOG.debug("OplogThread: updating checkpoint after an "
                          "Exception, cursor closing, or join() on this"
                          "thread.")
                self.checkpoint = last_ts
                self.update_checkpoint()

            LOG.debug("OplogThread: Sleeping. Documents removed: %d, "
                      "upserted: %d, updated: %d"
                      % (remove_inc, upsert_inc, update_inc))
            time.sleep(2)
    def run(self):
        """Start the oplog worker.
        """
        ReplicationLagLogger(self, 30).start()
        LOG.debug("OplogThread: Run thread started")

        while self.running is True:
            LOG.debug("OplogThread: Getting cursor")
            cursor, cursor_empty = retry_until_ok(self.init_cursor)
            # we've fallen too far behind
            if cursor is None and self.checkpoint is not None:
                err_msg = "OplogThread: Last entry no longer in oplog"
                effect = "cannot recover!"
                LOG.error('%s %s %s' % (err_msg, effect, self.oplog))
                self.running = False
                continue

            if cursor_empty:
                LOG.debug("OplogThread: Last entry is the one we "
                          "already processed.  Up to date.  Sleeping.")
                time.sleep(1)
                continue

            last_ts = None
            remove_inc = 0
            upsert_inc = 0
            update_inc = 0
            try:
                LOG.debug("OplogThread: about to process new oplog entries")
                while cursor.alive and self.running:
                    LOG.debug("OplogThread: Cursor is still"
                              " alive and thread is still running.")
                    for n, entry in enumerate(cursor):
                        # Break out if this thread should stop
                        if not self.running:
                            break

                        LOG.debug("OplogThread: Iterating through cursor,"
                                  " document number in this cursor is %d" % n)

                        skip, is_gridfs_file = self._should_skip_entry(entry)
                        if skip:
                            # update the last_ts on skipped entries to ensure
                            # our checkpoint does not fall off the oplog. This
                            # also prevents reprocessing skipped entries.
                            last_ts = entry['ts']
                            continue

                        op_add = 0
                        op_remove = 0
                        op_update = 0

                        # Sync the current oplog operation
                        operation = entry['op']
                        ns = entry['ns']
                        timestamp = util.bson_ts_to_long(entry['ts'])
                        for docman in self.doc_managers:

                            @self.ERROR_TIME.time()
                            def process_exception(metric):
                                metric.inc()

                            try:
                                LOG.debug("OplogThread: Operation for this "
                                          "entry is %s" % str(operation))

                                # Remove
                                if operation == 'd':
                                    docman.remove(entry['o']['_id'], ns,
                                                  timestamp)
                                    remove_inc += 1
                                    op_remove += 1

                                # Insert
                                elif operation == 'i':  # Insert
                                    # Retrieve inserted document from
                                    # 'o' field in oplog record
                                    doc = entry.get('o')
                                    # Extract timestamp and namespace
                                    if is_gridfs_file:
                                        db, coll = ns.split('.', 1)
                                        gridfile = GridFSFile(
                                            self.primary_client[db][coll], doc)
                                        docman.insert_file(
                                            gridfile, ns, timestamp)
                                    else:
                                        docman.upsert(doc, ns, timestamp)
                                    upsert_inc += 1
                                    op_add += 1

                                # Update
                                elif operation == 'u':
                                    docman.update(entry['o2']['_id'],
                                                  entry['o'], ns, timestamp)
                                    update_inc += 1
                                    op_update += 1

                                # Command
                                elif operation == 'c':
                                    # use unmapped namespace
                                    doc = entry.get('o')
                                    docman.handle_command(
                                        doc, entry['ns'], timestamp)

                            except errors.OperationFailed:
                                # Remove
                                if operation == 'd':
                                    if op_remove > 0:
                                        op_remove -= 1
                                # Insert
                                elif operation == 'i':
                                    if op_add > 0:
                                        op_add -= 1
                                # Update
                                elif operation == 'u':
                                    if op_update > 0:
                                        op_update -= 1

                                process_exception(
                                    self.error_caught.labels(
                                        'cannot_process_doc',
                                        errors.OperationFailed))

                                LOG.exception(
                                    "Unable to process oplog document %r" %
                                    entry)
                            except errors.ConnectionFailed:
                                # Remove
                                if operation == 'd':
                                    if op_remove > 0:
                                        op_remove -= 1
                                # Insert
                                elif operation == 'i':
                                    if op_add > 0:
                                        op_add -= 1
                                # Update
                                elif operation == 'u':
                                    if op_update > 0:
                                        op_update -= 1

                                process_exception(
                                    self.error_caught.labels(
                                        'connection_failed',
                                        errors.ConnectionFailed))

                                LOG.exception(
                                    "Connection failed while processing oplog "
                                    "document %r" % entry)

                        if (remove_inc + upsert_inc + update_inc) % 1000 == 0:
                            LOG.debug("OplogThread: Documents removed: %d, "
                                      "inserted: %d, updated: %d so far" %
                                      (remove_inc, upsert_inc, update_inc))

                        LOG.debug("OplogThread: Doc is processed.")

                        last_ts = entry['ts']

                        # update timestamp per batch size
                        # n % -1 (default for self.batch_size) == 0 for all n
                        if n % self.batch_size == 1:
                            self.update_checkpoint(last_ts)
                            last_ts = None

                        LOG.always("Counter: Documents removed: %d, "
                                   "inserted: %d, updated: %d so far" %
                                   (op_remove, op_add, op_update))

                        # TODO: Add collection name as label
                        @self.REQUEST_TIME.time()
                        def process_request(add, remove, update):
                            self.doc_operation_count.labels('add').inc(add)
                            self.doc_operation_count.labels('remove').inc(
                                remove)
                            self.doc_operation_count.labels('update').inc(
                                update)

                        process_request(op_add, op_remove, op_update)

                    # update timestamp after running through oplog
                    if last_ts is not None:
                        LOG.debug("OplogThread: updating checkpoint after "
                                  "processing new oplog entries")
                        self.update_checkpoint(last_ts)

            except (pymongo.errors.AutoReconnect,
                    pymongo.errors.OperationFailure,
                    pymongo.errors.ConfigurationError):
                LOG.exception("Cursor closed due to an exception. "
                              "Will attempt to reconnect.")

            # update timestamp before attempting to reconnect to MongoDB,
            # after being join()'ed, or if the cursor closes
            if last_ts is not None:
                LOG.debug("OplogThread: updating checkpoint after an "
                          "Exception, cursor closing, or join() on this"
                          "thread.")
                self.update_checkpoint(last_ts)

            LOG.debug("OplogThread: Sleeping. Documents removed: %d, "
                      "upserted: %d, updated: %d" %
                      (remove_inc, upsert_inc, update_inc))
            time.sleep(2)