def __init__(self, args, unit_test=False): self._args = args self._factory = Factory({ 'Application': Application, 'BatchDocumentConverter': BatchDocumentConverter, 'BandwidthLimiter': BandwidthLimiter, 'HTTPClient': HTTPClient, 'CookieJar': CookieJar, 'CookieJarWrapper': CookieJarWrapper, 'CookiePolicy': DeFactoCookiePolicy, 'ConnectionPool': ConnectionPool, 'CSSScraper': CSSScraper, 'DemuxDocumentScraper': DemuxDocumentScraper, 'DemuxURLFilter': DemuxURLFilter, 'FTPProcessor': FTPProcessor, 'ElementWalker': ElementWalker, 'FetchRule': FetchRule, 'FileWriter': NullWriter, 'FTPClient': FTPClient, 'FTPProcessorFetchParams': FTPProcessorFetchParams, 'HTTPProxyServer': HTTPProxyServer, 'HTMLParser': NotImplemented, 'HTMLScraper': HTMLScraper, 'JavaScriptScraper': JavaScriptScraper, 'PathNamer': PathNamer, 'PhantomJSDriver': PhantomJSDriver, 'PhantomJSCoprocessor': PhantomJSCoprocessor, 'PipelineSeries': PipelineSeries, 'ProcessingRule': ProcessingRule, 'Processor': DelegateProcessor, 'ProxyCoprocessor': ProxyCoprocessor, 'ProxyHostFilter': ProxyHostFilter, 'RedirectTracker': RedirectTracker, 'Request': Request, 'Resolver': Resolver, 'ResourceMonitor': ResourceMonitor, 'ResultRule': ResultRule, 'RobotsTxtChecker': RobotsTxtChecker, 'RobotsTxtPool': RobotsTxtPool, 'SitemapScraper': SitemapScraper, 'Statistics': Statistics, 'URLInfo': URLInfo, 'URLTable': URLTableHookWrapper, 'URLTableImplementation': SQLURLTable, 'URLRewriter': URLRewriter, 'Waiter': LinearWaiter, 'WARCRecorder': WARCRecorder, 'WebClient': WebClient, 'WebProcessor': WebProcessor, 'WebProcessorFetchParams': WebProcessorFetchParams, 'YoutubeDlCoprocessor': YoutubeDlCoprocessor, }) self._unit_test = unit_test
def test_factory(self): factory = Factory() factory.set('dict', dict) self.assertNotIn('dict', factory) self.assertFalse(factory.is_all_initialized()) my_instance = factory.new('dict', [('hi', 'hello')]) self.assertIn('dict', factory) self.assertEqual(my_instance, factory['dict']) self.assertTrue(factory.is_all_initialized()) self.assertEqual(1, len(factory)) self.assertEqual(['dict'], list(iter(factory))) self.assertEqual(my_instance, factory.instance_map['dict']) with self.assertRaises(ValueError): factory.new('dict', [('hi', 'hello')])
class Builder(object): '''Application builder. Args: args: Options from :class:`argparse.ArgumentParser` ''' def __init__(self, args, unit_test=False): self._args = args self._factory = Factory({ 'Application': Application, 'BandwidthLimiter': BandwidthLimiter, 'HTTPClient': HTTPClient, 'CookieJar': CookieJar, 'CookieJarWrapper': CookieJarWrapper, 'CookiePolicy': DeFactoCookiePolicy, 'ConnectionPool': ConnectionPool, 'CSSScraper': CSSScraper, 'DemuxDocumentScraper': DemuxDocumentScraper, 'DemuxURLFilter': DemuxURLFilter, 'FTPProcessor': FTPProcessor, 'ElementWalker': ElementWalker, 'FetchRule': FetchRule, 'FileWriter': NullWriter, 'FTPClient': FTPClient, 'FTPProcessorFetchParams': FTPProcessorFetchParams, 'HTTPProxyServer': HTTPProxyServer, 'HTMLParser': NotImplemented, 'HTMLScraper': HTMLScraper, 'JavaScriptScraper': JavaScriptScraper, 'PathNamer': PathNamer, 'PipelineSeries': PipelineSeries, 'ProcessingRule': ProcessingRule, 'Processor': DelegateProcessor, 'ProxyCoprocessor': ProxyCoprocessor, 'ProxyHostFilter': ProxyHostFilter, 'RedirectTracker': RedirectTracker, 'Request': Request, 'Resolver': Resolver, 'ResourceMonitor': ResourceMonitor, 'ResultRule': ResultRule, 'RobotsTxtChecker': RobotsTxtChecker, 'RobotsTxtPool': RobotsTxtPool, 'SitemapScraper': SitemapScraper, 'Statistics': Statistics, 'URLInfo': URLInfo, 'URLTable': URLTableHookWrapper, 'URLTableImplementation': SQLURLTable, 'URLRewriter': URLRewriter, 'Waiter': LinearWaiter, 'WARCRecorder': WARCRecorder, 'WebClient': WebClient, 'WebProcessor': WebProcessor, 'WebProcessorFetchParams': WebProcessorFetchParams, 'YoutubeDlCoprocessor': YoutubeDlCoprocessor, }) self._unit_test = unit_test @property def factory(self): '''Return the Factory. Returns: Factory: An :class:`.factory.Factory` instance. ''' return self._factory def build(self) -> Application: '''Put the application together. ''' pipelines = self._build_pipelines() self._factory.new('Application', pipelines) return self._factory['Application'] def _build_pipelines(self) -> PipelineSeries: app_session = AppSession(self._factory, self._args, self.get_stderr()) app_start_pipeline = Pipeline(AppSource(app_session), [ LoggingSetupTask(), PluginSetupTask(), DatabaseSetupTask(), ParserSetupTask(), WARCVisitsTask(), SSLContextTask(), ResmonSetupTask(), StatsStartTask(), URLFiltersSetupTask(), NetworkSetupTask(), ClientSetupTask(), WARCRecorderSetupTask(), FileWriterSetupTask(), ProcessorSetupTask(), ProxyServerSetupTask(), CoprocessorSetupTask(), InputURLTask(), URLFiltersPostURLImportSetupTask(), ]) url_item_source = URLItemSource(app_session) download_pipeline = Pipeline(url_item_source, [ ProcessTask(), ResmonSleepTask(), BackgroundAsyncTask(), CheckQuotaTask(), ]) download_stop_pipeline = Pipeline(AppSource(app_session), [StatsStopTask()]) download_stop_pipeline.skippable = True app_stop_pipeline = Pipeline(AppSource(app_session), [ BackgroundAsyncCleanupTask(), AppStopTask(), WARCRecorderTeardownTask(), CookieJarTeardownTask(), LoggingShutdownTask(), ]) pipeline_series = self._factory.new( 'PipelineSeries', (app_start_pipeline, download_pipeline, download_stop_pipeline, app_stop_pipeline)) pipeline_series.concurrency_pipelines.add(download_pipeline) return pipeline_series def build_and_run(self): '''Build and run the application. Returns: int: The exit status. ''' app = self.build() exit_code = app.run_sync() return exit_code def get_stderr(self): '''Return stderr or something else if under unit testing.''' if self._unit_test: return sys.stdout else: return sys.stderr
class Builder(object): '''Application builder. Args: args: Options from :class:`argparse.ArgumentParser` ''' def __init__(self, args, unit_test=False): self._args = args self._factory = Factory({ 'Application': Application, 'BatchDocumentConverter': BatchDocumentConverter, 'BandwidthLimiter': BandwidthLimiter, 'HTTPClient': HTTPClient, 'CookieJar': CookieJar, 'CookieJarWrapper': CookieJarWrapper, 'CookiePolicy': DeFactoCookiePolicy, 'ConnectionPool': ConnectionPool, 'CSSScraper': CSSScraper, 'DemuxDocumentScraper': DemuxDocumentScraper, 'DemuxURLFilter': DemuxURLFilter, 'FTPProcessor': FTPProcessor, 'ElementWalker': ElementWalker, 'FetchRule': FetchRule, 'FileWriter': NullWriter, 'FTPClient': FTPClient, 'FTPProcessorFetchParams': FTPProcessorFetchParams, 'HTTPProxyServer': HTTPProxyServer, 'HTMLParser': NotImplemented, 'HTMLScraper': HTMLScraper, 'JavaScriptScraper': JavaScriptScraper, 'PathNamer': PathNamer, 'PhantomJSDriver': PhantomJSDriver, 'PhantomJSCoprocessor': PhantomJSCoprocessor, 'PipelineSeries': PipelineSeries, 'ProcessingRule': ProcessingRule, 'Processor': DelegateProcessor, 'ProxyCoprocessor': ProxyCoprocessor, 'ProxyHostFilter': ProxyHostFilter, 'RedirectTracker': RedirectTracker, 'Request': Request, 'Resolver': Resolver, 'ResourceMonitor': ResourceMonitor, 'ResultRule': ResultRule, 'RobotsTxtChecker': RobotsTxtChecker, 'RobotsTxtPool': RobotsTxtPool, 'SitemapScraper': SitemapScraper, 'Statistics': Statistics, 'URLInfo': URLInfo, 'URLTable': URLTableHookWrapper, 'URLTableImplementation': SQLURLTable, 'URLRewriter': URLRewriter, 'Waiter': LinearWaiter, 'WARCRecorder': WARCRecorder, 'WebClient': WebClient, 'WebProcessor': WebProcessor, 'WebProcessorFetchParams': WebProcessorFetchParams, 'YoutubeDlCoprocessor': YoutubeDlCoprocessor, }) self._unit_test = unit_test @property def factory(self): '''Return the Factory. Returns: Factory: An :class:`.factory.Factory` instance. ''' return self._factory def build(self) -> Application: '''Put the application together. ''' pipelines = self._build_pipelines() self._factory.new('Application', pipelines) return self._factory['Application'] def _build_pipelines(self) -> PipelineSeries: app_session = AppSession(self._factory, self._args, self.get_stderr()) app_start_pipeline = Pipeline( AppSource(app_session), [ LoggingSetupTask(), DatabaseSetupTask(), ParserSetupTask(), WARCVisitsTask(), SSLContextTask(), ResmonSetupTask(), StatsStartTask(), URLFiltersSetupTask(), NetworkSetupTask(), ClientSetupTask(), WARCRecorderSetupTask(), FileWriterSetupTask(), ProcessorSetupTask(), ProxyServerSetupTask(), CoprocessorSetupTask(), LinkConversionSetupTask(), PluginSetupTask(), InputURLTask(), URLFiltersPostURLImportSetupTask(), ]) url_item_source = URLItemSource(app_session) download_pipeline = Pipeline( url_item_source, [ ProcessTask(), ResmonSleepTask(), BackgroundAsyncTask(), CheckQuotaTask(), ] ) download_stop_pipeline = Pipeline( AppSource(app_session), [ StatsStopTask() ]) download_stop_pipeline.skippable = True queued_file_source = QueuedFileSource(app_session) conversion_pipeline = Pipeline( queued_file_source, [ LinkConversionTask() ] ) conversion_pipeline.skippable = True app_stop_pipeline = Pipeline( AppSource(app_session), [ BackgroundAsyncCleanupTask(), AppStopTask(), WARCRecorderTeardownTask(), CookieJarTeardownTask(), LoggingShutdownTask(), ]) pipeline_series = self._factory.new( 'PipelineSeries', ( app_start_pipeline, download_pipeline, download_stop_pipeline, conversion_pipeline, app_stop_pipeline )) pipeline_series.concurrency_pipelines.add(download_pipeline) return pipeline_series def build_and_run(self): '''Build and run the application. Returns: int: The exit status. ''' app = self.build() exit_code = app.run_sync() return exit_code def get_stderr(self): '''Return stderr or something else if under unit testing.''' if self._unit_test: return sys.stdout else: return sys.stderr