def shutdown_browser(self, during_init): """ Runs the closing tasks for this Browser/BrowserManager """ # Join command thread if self.command_thread is not None: self.logger.debug("BROWSER %i: Joining command thread" % self.crawl_id) start_time = time.time() if self.current_timeout is not None: self.command_thread.join(self.current_timeout + 10) else: self.command_thread.join(60) self.logger.debug("BROWSER %i: %f seconds to join command thread" % (self.crawl_id, time.time() - start_time)) # Kill BrowserManager process and children self.logger.debug("BROWSER %i: Killing browser manager..." % self.crawl_id) self.kill_browser_manager() # Archive browser profile (if requested) self.logger.debug("BROWSER %i: during_init=%s | profile_archive_dir=%s" % (self.crawl_id, str(during_init), self.browser_params['profile_archive_dir'])) if not during_init and self.browser_params['profile_archive_dir'] is not None: self.logger.debug("BROWSER %i: Archiving browser profile directory to %s" % (self.crawl_id, self.browser_params['profile_archive_dir'])) profile_commands.dump_profile(self.current_profile_path, self.manager_params, self.browser_params, self.browser_params['profile_archive_dir'], close_webdriver=False, browser_settings=self.browser_settings, compress=True, save_flash=self.browser_params['disable_flash'] is False) # Clean up temporary files if self.current_profile_path is not None: shutil.rmtree(self.current_profile_path, ignore_errors = True)
def launch_browser_manager(self, spawn_timeout=30): """ sets up the BrowserManager and gets the process id, browser pid and, if applicable, screen pid loads associated user profile if necessary <spawn_timeout> is the timeout for creating BrowserManager """ # if this is restarting from a crash, update the tar location # to be a tar of the crashed browser's history if self.current_profile_path is not None: crashed_profile_path = self.current_profile_path # tar contents of crashed profile to a temp dir tempdir = tempfile.mkdtemp() + "/" profile_commands.dump_profile(crashed_profile_path, tempdir, close_webdriver=False, browser_settings=self.browser_settings, full_profile=True) self.browser_params['profile_tar'] = tempdir # make sure browser loads crashed profile self.browser_params['random_attributes'] = False # don't re-randomize attributes crash_recovery = True else: tempdir = None crashed_profile_path = None crash_recovery = False # keep trying to spawn a BrowserManager until we have a successful launch within the timeout limit browser_manager = None successful_spawn = False while not successful_spawn: # Resets the command/status queues (self.command_queue, self.status_queue) = (Queue(), Queue()) # builds and launches the browser_manager args = (self.command_queue, self.status_queue, self.browser_params, crash_recovery) browser_manager = Process(target=BrowserManager, args=args) browser_manager.start() # waits for BrowserManager to send success tuple i.e. (current_profile_path, browser pid, display pid) for i in xrange(0, int(spawn_timeout) * 1000): # no status for now -> sleep to avoid pegging CPU on blocking get if self.status_queue.empty(): time.sleep(0.001) continue (self.current_profile_path, self.browser_pid, self.display_pid, self.browser_settings) \ = self.status_queue.get() successful_spawn = True break # kill the BrowserManager if it failed to start up the browser if not successful_spawn: os.kill(browser_manager.pid, signal.SIGKILL) # if recovering from a crash, new browser has a new profile dir # so the crashed dir and temporary tar dump can be cleaned up if tempdir is not None: subprocess.call(["rm", "-r", tempdir]) if crashed_profile_path is not None: subprocess.call(["rm", "-r", crashed_profile_path]) self.is_fresh = crashed_profile_path is None # browser is fresh iff it starts from a blank profile return browser_manager
def launch_browser_manager(self): """ sets up the BrowserManager and gets the process id, browser pid and, if applicable, screen pid loads associated user profile if necessary """ # if this is restarting from a crash, update the tar location # to be a tar of the crashed browser's history if ENABLE_CRASH_RECOVERY and self.current_profile_path is not None: # tar contents of crashed profile to a temp dir tempdir = tempfile.mkdtemp() + "/" profile_commands.dump_profile( self.current_profile_path, self.manager_params, self.browser_params, tempdir, close_webdriver=False, browser_settings=self.browser_settings) self.browser_params[ 'profile_tar'] = tempdir # make sure browser loads crashed profile self.browser_params[ 'random_attributes'] = False # don't re-randomize attributes crash_recovery = True else: tempdir = None crash_recovery = False self.is_fresh = not crash_recovery # Try to spawn the browser within the timelimit unsuccessful_spawns = 0 success = False def check_queue(launch_status): result = self.status_queue.get(True, self._SPAWN_TIMEOUT) if result[0] == 'STATUS': launch_status[result[1]] = True return result[2] elif result[0] == 'CRITICAL': reraise(*cPickle.loads(result[1])) elif result[0] == 'FAILED': raise BrowserCrashError( 'Browser spawn returned failure status') while not success and unsuccessful_spawns < self._UNSUCCESSFUL_SPAWN_LIMIT: self.logger.debug("BROWSER %i: Spawn attempt %i " % (self.crawl_id, unsuccessful_spawns)) # Resets the command/status queues (self.command_queue, self.status_queue) = (Queue(), Queue()) # builds and launches the browser_manager args = (self.command_queue, self.status_queue, self.browser_params, self.manager_params, crash_recovery) self.browser_manager = Process(target=BrowserManager, args=args) self.browser_manager.daemon = True self.browser_manager.start() # Read success status of browser manager launch_status = dict() try: check_queue(launch_status) # proxy enabled (if necessary) spawned_profile_path = check_queue( launch_status) # selenium profile created check_queue(launch_status) # profile tar loaded (if necessary) (self.display_pid, self.display_port) = check_queue( launch_status) # Display launched check_queue(launch_status) # browser launch attempted (self.browser_pid, self.browser_settings) = check_queue( launch_status) # Browser launched if check_queue(launch_status) != 'READY': self.logger.error( "BROWSER %i: Mismatch of status queue return values, trying again..." % self.crawl_id) unsuccessful_spawns += 1 continue success = True except (EmptyQueue, BrowserCrashError): unsuccessful_spawns += 1 error_string = '' status_strings = [ 'Proxy Ready', 'Profile Created', 'Profile Tar', 'Display', 'Launch Attempted', 'Browser Launched', 'Browser Ready' ] for string in status_strings: error_string += " | %s: %s " % ( string, launch_status.get(string, False)) self.logger.error("BROWSER %i: Spawn unsuccessful %s" % (self.crawl_id, error_string)) self.kill_browser_manager() if launch_status.has_key('Profile Created'): shutil.rmtree(spawned_profile_path, ignore_errors=True) # If the browser spawned successfully, we should update the # current profile path class variable and clean up the tempdir # and previous profile path. if success: self.logger.debug("BROWSER %i: Browser spawn sucessful!" % self.crawl_id) previous_profile_path = self.current_profile_path self.current_profile_path = spawned_profile_path if previous_profile_path is not None: shutil.rmtree(previous_profile_path, ignore_errors=True) if tempdir is not None: shutil.rmtree(tempdir, ignore_errors=True) return success
def launch_browser_manager(self, spawn_timeout=30): """ sets up the BrowserManager and gets the process id, browser pid and, if applicable, screen pid loads associated user profile if necessary <spawn_timeout> is the timeout for creating BrowserManager """ # if this is restarting from a crash, update the tar location # to be a tar of the crashed browser's history if self.current_profile_path is not None: crashed_profile_path = self.current_profile_path # tar contents of crashed profile to a temp dir tempdir = tempfile.mkdtemp() + "/" profile_commands.dump_profile( crashed_profile_path, tempdir, close_webdriver=False, browser_settings=self.browser_settings, full_profile=True) self.browser_params[ 'profile_tar'] = tempdir # make sure browser loads crashed profile self.browser_params[ 'random_attributes'] = False # don't re-randomize attributes crash_recovery = True else: tempdir = None crashed_profile_path = None crash_recovery = False # keep trying to spawn a BrowserManager until we have a successful launch within the timeout limit browser_manager = None successful_spawn = False while not successful_spawn: # Resets the command/status queues (self.command_queue, self.status_queue) = (Queue(), Queue()) # builds and launches the browser_manager args = (self.command_queue, self.status_queue, self.browser_params, crash_recovery) browser_manager = Process(target=BrowserManager, args=args) browser_manager.start() # waits for BrowserManager to send success tuple i.e. (current_profile_path, browser pid, display pid) for i in xrange(0, int(spawn_timeout) * 1000): # no status for now -> sleep to avoid pegging CPU on blocking get if self.status_queue.empty(): time.sleep(0.001) continue (self.current_profile_path, self.browser_pid, self.display_pid, self.browser_settings) \ = self.status_queue.get() successful_spawn = True break # kill the BrowserManager if it failed to start up the browser if not successful_spawn: os.kill(browser_manager.pid, signal.SIGKILL) # if recovering from a crash, new browser has a new profile dir # so the crashed dir and temporary tar dump can be cleaned up if tempdir is not None: subprocess.call(["rm", "-r", tempdir]) if crashed_profile_path is not None: subprocess.call(["rm", "-r", crashed_profile_path]) self.is_fresh = crashed_profile_path is None # browser is fresh iff it starts from a blank profile return browser_manager
def launch_browser_manager(self): """ sets up the BrowserManager and gets the process id, browser pid and, if applicable, screen pid loads associated user profile if necessary """ # if this is restarting from a crash, update the tar location # to be a tar of the crashed browser's history if self.current_profile_path is not None: # tar contents of crashed profile to a temp dir tempdir = tempfile.mkdtemp() + "/" profile_commands.dump_profile(self.current_profile_path, self.manager_params, self.browser_params, tempdir, close_webdriver=False, browser_settings=self.browser_settings) self.browser_params['profile_tar'] = tempdir # make sure browser loads crashed profile self.browser_params['random_attributes'] = False # don't re-randomize attributes crash_recovery = True else: tempdir = None crash_recovery = False self.is_fresh = not crash_recovery # Try to spawn the browser within the timelimit unsuccessful_spawns = 0 success = False def check_queue(launch_status): result = self.status_queue.get(True, self._SPAWN_TIMEOUT) if result[0] == 'STATUS': launch_status[result[1]] = True return result[2] elif result[0] == 'CRITICAL': reraise(*cPickle.loads(result[1])) elif result[0] == 'FAILED': raise BrowserCrashError('Browser spawn returned failure status') while not success and unsuccessful_spawns < self._UNSUCCESSFUL_SPAWN_LIMIT: self.logger.debug("BROWSER %i: Spawn attempt %i " % (self.crawl_id, unsuccessful_spawns)) # Resets the command/status queues (self.command_queue, self.status_queue) = (Queue(), Queue()) # builds and launches the browser_manager args = (self.command_queue, self.status_queue, self.browser_params, self.manager_params, crash_recovery) self.browser_manager = Process(target=BrowserManager, args=args) self.browser_manager.daemon = True self.browser_manager.start() # Read success status of browser manager launch_status = dict() try: check_queue(launch_status) # proxy enabled (if necessary) spawned_profile_path = check_queue(launch_status) # selenium profile created check_queue(launch_status) # profile tar loaded (if necessary) (self.display_pid, self.display_port) = check_queue(launch_status) # Display launched check_queue(launch_status) # browser launch attempted (self.browser_pid, self.browser_settings) = check_queue(launch_status) # Browser launched if check_queue(launch_status) != 'READY': self.logger.error("BROWSER %i: Mismatch of status queue return values, trying again..." % self.crawl_id) unsuccessful_spawns += 1 continue success = True except (EmptyQueue, BrowserCrashError): unsuccessful_spawns += 1 error_string = '' status_strings = ['Proxy Ready','Profile Created','Profile Tar','Display','Launch Attempted', 'Browser Launched', 'Browser Ready'] for string in status_strings: error_string += " | %s: %s " % (string, launch_status.get(string, False)) self.logger.error("BROWSER %i: Spawn unsuccessful %s" % (self.crawl_id, error_string)) self.kill_browser_manager() if launch_status.has_key('Profile Created'): shutil.rmtree(spawned_profile_path, ignore_errors=True) # If the browser spawned successfully, we should update the # current profile path class variable and clean up the tempdir # and previous profile path. if success: self.logger.debug("BROWSER %i: Browser spawn sucessful!" % self.crawl_id) previous_profile_path = self.current_profile_path self.current_profile_path = spawned_profile_path if previous_profile_path is not None: shutil.rmtree(previous_profile_path, ignore_errors=True) if tempdir is not None: shutil.rmtree(tempdir, ignore_errors=True) return success
def launch_browser_manager(self, spawn_timeout=120): """ sets up the BrowserManager and gets the process id, browser pid and, if applicable, screen pid loads associated user profile if necessary <spawn_timeout> is the timeout for creating BrowserManager """ # if this is restarting from a crash, update the tar location # to be a tar of the crashed browser's history if self.current_profile_path is not None: crashed_profile_path = self.current_profile_path # tar contents of crashed profile to a temp dir tempdir = tempfile.mkdtemp() + "/" profile_commands.dump_profile( crashed_profile_path, tempdir, close_webdriver=False, browser_settings=self.browser_settings, full_profile=True) self.browser_params[ 'profile_tar'] = tempdir # make sure browser loads crashed profile self.browser_params[ 'random_attributes'] = False # don't re-randomize attributes crash_recovery = True else: tempdir = None crashed_profile_path = None crash_recovery = False # Try to spawn the browser within the timelimit unsuccessful_spawns = 0 retry = False success = False while not success and unsuccessful_spawns < 4: self.logger.debug("BROWSER %i: Spawn attempt %i " % (self.crawl_id, unsuccessful_spawns)) # Resets the command/status queues (self.command_queue, self.status_queue) = (Queue(), Queue()) # builds and launches the browser_manager args = (self.command_queue, self.status_queue, self.browser_params, crash_recovery) self.browser_manager = Process(target=BrowserManager, args=args) self.browser_manager.daemon = True self.browser_manager.start() # Read success status of browser manager prof_done = disp_done = browser_done = ready_done = launch_attempted = False try: self.current_profile_path = self.status_queue.get( True, spawn_timeout) prof_done = True (self.display_pid, self.display_port) = self.status_queue.get( True, spawn_timeout) disp_done = True useless = self.status_queue.get(True, spawn_timeout) launch_attempted = True (self.browser_pid, self.browser_settings) = self.status_queue.get( True, spawn_timeout) browser_done = True if self.status_queue.get(True, spawn_timeout) != 'READY': self.logger.error( "BROWSER %i: Mismatch of status queue return values, trying again..." % self.crawl_id) unsuccessful_spawns += 1 continue success = True except EmptyQueue: unsuccessful_spawns += 1 self.logger.error( "BROWSER %i: Spawn unsuccessful | Profile: %s | Display: %s | Launch attempted: %s | Browser: %s" % (self.crawl_id, str(prof_done), str(disp_done), str(launch_attempted), str(browser_done))) self.kill_browser_manager() if self.current_profile_path is not None: shutil.rmtree(self.current_profile_path, ignore_errors=True) # if recovering from a crash, new browser has a new profile dir # so the crashed dir and temporary tar dump can be cleaned up if success: self.logger.debug("BROWSER %i: Browser spawn sucessful!" % self.crawl_id) if tempdir is not None: shutil.rmtree(tempdir, ignore_errors=True) if crashed_profile_path is not None: shutil.rmtree(crashed_profile_path, ignore_errors=True) self.is_fresh = crashed_profile_path is None # browser is fresh iff it starts from a blank profile return success
def launch_browser_manager(self, spawn_timeout=120): """ sets up the BrowserManager and gets the process id, browser pid and, if applicable, screen pid loads associated user profile if necessary <spawn_timeout> is the timeout for creating BrowserManager """ # if this is restarting from a crash, update the tar location # to be a tar of the crashed browser's history if self.current_profile_path is not None: crashed_profile_path = self.current_profile_path # tar contents of crashed profile to a temp dir tempdir = tempfile.mkdtemp() + "/" profile_commands.dump_profile(crashed_profile_path, self.manager_params, self.browser_params, tempdir, close_webdriver=False, browser_settings=self.browser_settings) self.browser_params['profile_tar'] = tempdir # make sure browser loads crashed profile self.browser_params['random_attributes'] = False # don't re-randomize attributes crash_recovery = True else: tempdir = None crashed_profile_path = None crash_recovery = False # Try to spawn the browser within the timelimit unsuccessful_spawns = 0 retry = False success = False while not success and unsuccessful_spawns < 4: self.logger.debug("BROWSER %i: Spawn attempt %i " % (self.crawl_id, unsuccessful_spawns)) # Resets the command/status queues (self.command_queue, self.status_queue) = (Queue(), Queue()) # builds and launches the browser_manager args = (self.command_queue, self.status_queue, self.browser_params, self.manager_params, crash_recovery) self.browser_manager = Process(target=BrowserManager, args=args) self.browser_manager.daemon = True self.browser_manager.start() # Read success status of browser manager prof_done = prof_tar_done = disp_done = browser_done = ready_done = launch_attempted = False try: self.current_profile_path = self.status_queue.get(True, spawn_timeout) prof_done = True useless = self.status_queue.get(True, spawn_timeout) prof_tar_done = True (self.display_pid, self.display_port) = self.status_queue.get(True, spawn_timeout) disp_done = True useless = self.status_queue.get(True, spawn_timeout) launch_attempted = True (self.browser_pid, self.browser_settings) = self.status_queue.get(True, spawn_timeout) browser_done = True if self.status_queue.get(True, spawn_timeout) != 'READY': self.logger.error("BROWSER %i: Mismatch of status queue return values, trying again..." % self.crawl_id) unsuccessful_spawns += 1 continue success = True except EmptyQueue: unsuccessful_spawns += 1 self.logger.error("BROWSER %i: Spawn unsuccessful | Profile Created: %s | Profile Tar: %s | Display: %s | Launch attempted: %s | Browser: %s" % (self.crawl_id, str(prof_done), str(prof_tar_done), str(disp_done), str(launch_attempted), str(browser_done))) self.kill_browser_manager() if self.current_profile_path is not None: shutil.rmtree(self.current_profile_path, ignore_errors=True) # if recovering from a crash, new browser has a new profile dir # so the crashed dir and temporary tar dump can be cleaned up if success: self.logger.debug("BROWSER %i: Browser spawn sucessful!" % self.crawl_id) if tempdir is not None: shutil.rmtree(tempdir, ignore_errors=True) if crashed_profile_path is not None: shutil.rmtree(crashed_profile_path, ignore_errors=True) self.is_fresh = crashed_profile_path is None # browser is fresh iff it starts from a blank profile return success