def test_tp_cookies_opt(): browser_params = BrowserParams() browser_params.tp_cookies = "something unsupported" with pytest.raises(ConfigError): validate_browser_params(browser_params) browser_params.tp_cookies = "never" validate_browser_params(browser_params)
def test_browser_type(): browser_params = BrowserParams() browser_params.browser = "something unsupported" with pytest.raises(ConfigError): validate_browser_params(browser_params) browser_params.browser = "firefox" validate_browser_params(browser_params)
def test_save_content_type(): browser_params = BrowserParams() browser_params.save_content = [] with pytest.raises(ConfigError): validate_browser_params(browser_params) browser_params.save_content = "something unsupported" with pytest.raises(ConfigError): validate_browser_params(browser_params) browser_params.save_content = False validate_browser_params(browser_params) browser_params.save_content = "script" validate_browser_params(browser_params)
def test_display_mode(): browser_params = BrowserParams() browser_params.display_mode = "something unsupported" with pytest.raises(ConfigError): validate_browser_params(browser_params) browser_params.display_mode = [] with pytest.raises(ConfigError): validate_browser_params(browser_params) browser_params.display_mode = "native" validate_browser_params(browser_params)
def __init__( self, manager_params_temp: ManagerParams, browser_params_temp: List[BrowserParams], logger_kwargs: Dict[Any, Any] = {}, ) -> None: """Initialize the TaskManager with browser and manager config params Parameters ---------- manager_params_temp : ManagerParams TaskManager configuration parameters browser_params_temp : list of BrowserParams Browser configuration parameters. It is a list which includes individual configurations for each browser. logger_kwargs : dict, optional Keyword arguments to pass to MPLogger on initialization. """ validate_manager_params(manager_params_temp) for bp in browser_params_temp: validate_browser_params(bp) validate_crawl_configs(manager_params_temp, browser_params_temp) manager_params = ManagerParamsInternal(**manager_params_temp.to_dict()) browser_params = [ BrowserParamsInternal(**bp.to_dict()) for bp in browser_params_temp ] # Make paths absolute in manager_params if manager_params.data_directory: manager_params.data_directory = os.path.expanduser( manager_params.data_directory ) if manager_params.log_directory: manager_params.log_directory = os.path.expanduser( manager_params.log_directory ) manager_params.database_name = os.path.join( manager_params.data_directory, manager_params.database_name ) manager_params.log_file = os.path.join( manager_params.log_directory, manager_params.log_file ) manager_params.screenshot_path = os.path.join( manager_params.data_directory, "screenshots" ) manager_params.source_dump_path = os.path.join( manager_params.data_directory, "sources" ) self.manager_params = manager_params self.browser_params = browser_params self._logger_kwargs = logger_kwargs # Create data directories if they do not exist if not os.path.exists(manager_params.screenshot_path): os.makedirs(manager_params.screenshot_path) if not os.path.exists(manager_params.source_dump_path): os.makedirs(manager_params.source_dump_path) # Check size of parameter dictionary self.num_browsers = manager_params.num_browsers # Parse and flesh out js_instrument_settings for a_browsers_params in self.browser_params: js_settings = a_browsers_params.js_instrument_settings cleaned_js_settings = clean_js_instrumentation_settings(js_settings) a_browsers_params.js_instrument_settings = cleaned_js_settings # Flow control self.closing = False self.failure_status: Optional[Dict[str, Any]] = None self.threadlock = threading.Lock() self.failurecount = 0 if manager_params.failure_limit: self.failure_limit = manager_params.failure_limit else: self.failure_limit = self.num_browsers * 2 + 10 # Start logging server thread self.logging_server = MPLogger( self.manager_params.log_file, self.manager_params, **self._logger_kwargs ) self.manager_params.logger_address = self.logging_server.logger_address self.logger = logging.getLogger("openwpm") # Initialize the data aggregators self._launch_aggregators() # Sets up the BrowserManager(s) + associated queues self.browsers = self._initialize_browsers(browser_params) self._launch_browsers() # Start the manager watchdog thread = threading.Thread(target=self._manager_watchdog, args=()) thread.daemon = True thread.name = "OpenWPM-watchdog" thread.start() # Save crawl config information to database openwpm_v, browser_v = get_version() self.data_aggregator.save_configuration(openwpm_v, browser_v) self.logger.info( get_configuration_string( self.manager_params, browser_params, (openwpm_v, browser_v) ) ) self.unsaved_command_sequences: Dict[int, CommandSequence] = dict() self.callback_thread = threading.Thread( target=self._mark_command_sequences_complete, args=() ) self.callback_thread.name = "OpenWPM-completion_handler" self.callback_thread.start()