Example #1
0
 def close(self, post_process=True):
     """
     Execute shutdown procedure for TaskManager
     <post_process> flag to launch post_processing pipeline
     """
     if self.closing:
         self.logger.error("TaskManager already closed")
         return
     self._shutdown_manager()
     if post_process:
         post_processing.run(self.manager_params) # launch post-crawl processing
 def close(self, post_process=True):
     """
     Execute shutdown procedure for TaskManager
     <post_process> flag to launch post_processing pipeline
     """
     if self.closing:
         self.logger.error("TaskManager already closed")
         return
     self._shutdown_manager()
     if post_process:
         post_processing.run(self.db_path)  # launch post-crawl processing
Example #3
0
    def close(self):
        """
        wait for all child processes to finish executing commands and closes everything
        Update crawl table for each browser (crawl_id) to show successful finish
        """
        self.closing = True
        for browser in self.browsers:
            if browser.command_thread is not None:
                browser.command_thread.join()
            browser.kill_browser_manager()
            if browser.current_profile_path is not None:
                subprocess.call(["rm", "-r", browser.current_profile_path])
            self.sock.send(("UPDATE crawl SET finished = 1 WHERE crawl_id = ?",
                            (browser.crawl_id,)))
        self.db.close()  # close db connection
        self.sock.close()  # close socket to data aggregator
        self.kill_data_aggregator()

        post_processing.run(self.db_path) # launch post-crawl processing
Example #4
0
    def close(self):
        """
        wait for all child processes to finish executing commands and closes everything
        Update crawl table for each browser (crawl_id) to show successful finish
        """

        for browser in self.browsers:
            if browser.command_thread is not None:
                browser.command_thread.join()
            browser.kill_browser_manager()
            if browser.current_profile_path is not None:
                subprocess.call(["rm", "-r", browser.current_profile_path])
            self.sock.send(("UPDATE crawl SET finished = 1 WHERE crawl_id = ?",
                            (browser.crawl_id,)))
        self.db.close()  # close db connection
        self.sock.close()  # close socket to data aggregator
        self.kill_data_aggregator()

        post_processing.run(self.db_path) # launch post-crawl processing