Exemple #1
0
    def _generate_workers(self, files, state, start_utc_secs, end_utc_secs):
        """Generate the threads that tails the data sources and put the fetched
        entries to the files"""
        # Create working threads to handle to track/dump mongodb activities
        workers_info = []
        doc_queue = Queue.Queue()

        # Writer thread, we only have one writer since we assume all files will
        # be written to the same device (disk or SSD), as a result it yields
        # not much benefit to have multiple writers.
        workers_info.append({
            "name": "write-all-docs-to-file",
            "thread": Thread(
                target=MongoQueryRecorder._process_doc_queue,
                args=(doc_queue, files, state))
        })
        tailer = utils.get_oplog_tailer(self.oplog_client,
                                        # we are only interested in "insert"
                                        ["i"],
                                        self.config["target_databases"],
                                        self.config["target_collections"],
                                        Timestamp(start_utc_secs, 0))
        oplog_cursor_id = tailer.cursor_id
        workers_info.append({
            "name": "tailing-oplogs",
            "on_close":
            lambda: self.oplog_client.kill_cursors([oplog_cursor_id]),
            "thread": Thread(
                target=tail_to_queue,
                args=(tailer, "oplog", doc_queue, state,
                      Timestamp(end_utc_secs, 0)))
        })

        start_datetime = datetime.utcfromtimestamp(start_utc_secs)
        end_datetime = datetime.utcfromtimestamp(end_utc_secs)
        for profiler_name,client in self.profiler_clients.items():
            # create a profile collection tailer for each db
            for db in self.config["target_databases"]:
                tailer = utils.get_profiler_tailer(client,
                                           db,
                                           self.config["target_collections"],
                                           start_datetime)
                tailer_id = "%s_%s" % (db, profiler_name)
                profiler_cursor_id = tailer.cursor_id
                workers_info.append({
                    "name": "tailing-profiler for %s on %s" % (db, profiler_name),
                    "on_close":
                    lambda: self.profiler_client.kill_cursors([profiler_cursor_id]),
                    "thread": Thread(
                        target=tail_to_queue,
                        args=(tailer, tailer_id, doc_queue, state,
                              end_datetime))
                })

        for worker_info in workers_info:
            utils.LOG.info("Starting thread: %s", worker_info["name"])
            worker_info["thread"].setDaemon(True)
            worker_info["thread"].start()

        return workers_info
Exemple #2
0
    def _generate_workers(self, files, state, start_utc_secs, end_utc_secs):
        """Generate the threads that tail the data sources and put the fetched
        entries to the files"""

        # Initialize a thread-safe queue that we'll put the docs into
        doc_queue = Queue.Queue()

        # Initialize a list that will keep track of all the worker threads that
        # handle tracking/dumping of mongodb activities
        workers_info = []

        # Writer thread, we only have one writer since we assume all files will
        # be written to the same device (disk or SSD), as a result it yields
        # not much benefit to have multiple writers.
        workers_info.append({
            "name":
            WRITER_THREAD_NAME,
            "thread":
            Thread(target=MongoQueryRecorder._process_doc_queue,
                   args=(doc_queue, files, state))
        })

        # For each server in the "oplog_servers" config...
        for server_string, mongo_client in self.oplog_clients.items():

            # Create a tailing cursor (aka a tailer) on an oplog collection.
            # "i" stands for the only op type we care about, which is an insert
            tailer = utils.get_oplog_tailer(mongo_client, ["i"],
                                            self.config["target_databases"],
                                            self.config["target_collections"],
                                            Timestamp(start_utc_secs, 0))

            # Create a new thread and add some metadata to it
            workers_info.append({
                "name":
                "tailing-oplogs on %s" % server_string,
                "on_close":
                lambda: self.oplog_client.kill_cursors([tailer.cursor_id]),
                "thread":
                Thread(target=tail_to_queue,
                       args=(tailer, "oplog", doc_queue, state,
                             Timestamp(end_utc_secs, 0)))
            })

        start_datetime = datetime.utcfromtimestamp(start_utc_secs)
        end_datetime = datetime.utcfromtimestamp(end_utc_secs)

        # For each server in the "profiler_servers" config...
        for server_string, mongo_client in self.profiler_clients.items():

            # For each database in the "target_databases" config...
            for db in self.config["target_databases"]:

                # Create a tailing cursor (aka a tailer) on a profile collection
                tailer = utils.get_profiler_tailer(
                    mongo_client, db, self.config["target_collections"],
                    start_datetime)

                # Create a new thread and add some metadata to it
                tailer_id = "%s_%s" % (db, server_string)
                workers_info.append({
                    "name":
                    "tailing-profiler for %s on %s" % (db, server_string),
                    "on_close":
                    lambda: self.profiler_client.kill_cursors(
                        [tailer.cursor_id]),
                    "thread":
                    Thread(target=tail_to_queue,
                           args=(tailer, tailer_id, doc_queue, state,
                                 end_datetime))
                })

        # Deamonize each thread and start it
        for worker_info in workers_info:
            utils.LOG.info("Starting thread: %s", worker_info["name"])
            worker_info["thread"].setDaemon(True)
            worker_info["thread"].start()

        # Return the list of all the started threads
        return workers_info
Exemple #3
0
    def _generate_workers(self, files, state, start_utc_secs, end_utc_secs):
        """Generate the threads that tail the data sources and put the fetched
        entries to the files"""

        # Initialize a thread-safe queue that we'll put the docs into
        doc_queue = Queue.Queue()

        # Initialize a list that will keep track of all the worker threads that
        # handle tracking/dumping of mongodb activities
        workers_info = []

        # Writer thread, we only have one writer since we assume all files will
        # be written to the same device (disk or SSD), as a result it yields
        # not much benefit to have multiple writers.
        workers_info.append({
            "name": "write-all-docs-to-file",
            "thread": Thread(
                target=MongoQueryRecorder._process_doc_queue,
                args=(doc_queue, files, state))
        })

        # For each server in the "oplog_servers" config...
        for server_string, mongo_client in self.oplog_clients.items():

            # Create a tailing cursor (aka a tailer) on an oplog collection.
            # "i" stands for the only op type we care about, which is an insert
            tailer = utils.get_oplog_tailer(mongo_client, ["i"],
                                            self.config["target_databases"],
                                            self.config["target_collections"],
                                            Timestamp(start_utc_secs, 0))

            # Create a new thread and add some metadata to it
            workers_info.append({
                "name": "tailing-oplogs on %s" % server_string,
                "on_close":
                    lambda: self.oplog_client.kill_cursors([tailer.cursor_id]),
                "thread": Thread(
                    target=tail_to_queue,
                    args=(tailer, "oplog", doc_queue, state,
                          Timestamp(end_utc_secs, 0)))
            })

        start_datetime = datetime.utcfromtimestamp(start_utc_secs)
        end_datetime = datetime.utcfromtimestamp(end_utc_secs)


        # For each server in the "profiler_servers" config...
        for server_string, mongo_client in self.profiler_clients.items():

            # For each database in the "target_databases" config...
            for db in self.config["target_databases"]:

                # Create a tailing cursor (aka a tailer) on a profile collection
                tailer = utils.get_profiler_tailer(mongo_client, db,
                                                   self.config["target_collections"],
                                                   start_datetime)

                # Create a new thread and add some metadata to it
                tailer_id = "%s_%s" % (db, server_string)
                workers_info.append({
                    "name": "tailing-profiler for %s on %s" % (db, server_string),
                    "on_close":
                        lambda: self.profiler_client.kill_cursors([tailer.cursor_id]),
                    "thread": Thread(
                        target=tail_to_queue,
                        args=(tailer, tailer_id, doc_queue, state,
                              end_datetime))
                })

        # Deamonize each thread and start it
        for worker_info in workers_info:
            utils.LOG.info("Starting thread: %s", worker_info["name"])
            worker_info["thread"].setDaemon(True)
            worker_info["thread"].start()

        # Return the list of all the started threads
        return workers_info
Exemple #4
0
    def _generate_workers(self, files, state, start_utc_secs, end_utc_secs):
        """Generate the threads that tails the data sources and put the fetched
        entries to the files"""
        # Create working threads to handle to track/dump mongodb activities
        workers_info = []
        doc_queue = Queue.Queue()

        # Writer thread, we only have one writer since we assume all files will
        # be written to the same device (disk or SSD), as a result it yields
        # not much benefit to have multiple writers.
        workers_info.append({
            "name":
            "write-all-docs-to-file",
            "thread":
            Thread(target=MongoQueryRecorder._process_doc_queue,
                   args=(doc_queue, files, state))
        })
        tailer = utils.get_oplog_tailer(
            self.oplog_client,
            # we are only interested in "insert"
            ["i"],
            self.config["target_databases"],
            self.config["target_collections"],
            Timestamp(start_utc_secs, 0))
        oplog_cursor_id = tailer.cursor_id
        workers_info.append({
            "name":
            "tailing-oplogs",
            "on_close":
            lambda: self.oplog_client.kill_cursors([oplog_cursor_id]),
            "thread":
            Thread(target=tail_to_queue,
                   args=(tailer, "oplog", doc_queue, state,
                         Timestamp(end_utc_secs, 0)))
        })

        start_datetime = datetime.utcfromtimestamp(start_utc_secs)
        end_datetime = datetime.utcfromtimestamp(end_utc_secs)
        for profiler_name, client in self.profiler_clients.items():
            # create a profile collection tailer for each db
            for db in self.config["target_databases"]:
                tailer = utils.get_profiler_tailer(
                    client, db, self.config["target_collections"],
                    start_datetime)
                tailer_id = "%s_%s" % (db, profiler_name)
                profiler_cursor_id = tailer.cursor_id
                workers_info.append({
                    "name":
                    "tailing-profiler for %s on %s" % (db, profiler_name),
                    "on_close":
                    lambda: self.profiler_client.kill_cursors(
                        [profiler_cursor_id]),
                    "thread":
                    Thread(target=tail_to_queue,
                           args=(tailer, tailer_id, doc_queue, state,
                                 end_datetime))
                })

        for worker_info in workers_info:
            utils.LOG.info("Starting thread: %s", worker_info["name"])
            worker_info["thread"].setDaemon(True)
            worker_info["thread"].start()

        return workers_info