def close(self, post_process=True): """ Execute shutdown procedure for TaskManager <post_process> flag to launch post_processing pipeline """ if self.closing: self.logger.error("TaskManager already closed") return self._shutdown_manager() if post_process: post_processing.run(self.manager_params) # launch post-crawl processing
def close(self, post_process=True): """ Execute shutdown procedure for TaskManager <post_process> flag to launch post_processing pipeline """ if self.closing: self.logger.error("TaskManager already closed") return self._shutdown_manager() if post_process: post_processing.run(self.db_path) # launch post-crawl processing
def close(self): """ wait for all child processes to finish executing commands and closes everything Update crawl table for each browser (crawl_id) to show successful finish """ self.closing = True for browser in self.browsers: if browser.command_thread is not None: browser.command_thread.join() browser.kill_browser_manager() if browser.current_profile_path is not None: subprocess.call(["rm", "-r", browser.current_profile_path]) self.sock.send(("UPDATE crawl SET finished = 1 WHERE crawl_id = ?", (browser.crawl_id,))) self.db.close() # close db connection self.sock.close() # close socket to data aggregator self.kill_data_aggregator() post_processing.run(self.db_path) # launch post-crawl processing
def close(self): """ wait for all child processes to finish executing commands and closes everything Update crawl table for each browser (crawl_id) to show successful finish """ for browser in self.browsers: if browser.command_thread is not None: browser.command_thread.join() browser.kill_browser_manager() if browser.current_profile_path is not None: subprocess.call(["rm", "-r", browser.current_profile_path]) self.sock.send(("UPDATE crawl SET finished = 1 WHERE crawl_id = ?", (browser.crawl_id,))) self.db.close() # close db connection self.sock.close() # close socket to data aggregator self.kill_data_aggregator() post_processing.run(self.db_path) # launch post-crawl processing