def _build_pipelines(self) -> PipelineSeries: app_session = AppSession(self._factory, self._args, self.get_stderr()) app_start_pipeline = Pipeline(AppSource(app_session), [ LoggingSetupTask(), DatabaseSetupTask(), ParserSetupTask(), WARCVisitsTask(), SSLContextTask(), ResmonSetupTask(), StatsStartTask(), URLFiltersSetupTask(), NetworkSetupTask(), ClientSetupTask(), WARCRecorderSetupTask(), FileWriterSetupTask(), ProcessorSetupTask(), ProxyServerSetupTask(), CoprocessorSetupTask(), LinkConversionSetupTask(), PluginSetupTask(), InputURLTask(), URLFiltersPostURLImportSetupTask(), ]) url_item_source = URLItemSource(app_session) download_pipeline = Pipeline(url_item_source, [ ProcessTask(), ResmonSleepTask(), BackgroundAsyncTask(), CheckQuotaTask(), ]) download_stop_pipeline = Pipeline(AppSource(app_session), [StatsStopTask()]) download_stop_pipeline.skippable = True queued_file_source = QueuedFileSource(app_session) conversion_pipeline = Pipeline(queued_file_source, [LinkConversionTask()]) conversion_pipeline.skippable = True app_stop_pipeline = Pipeline(AppSource(app_session), [ BackgroundAsyncCleanupTask(), AppStopTask(), WARCRecorderTeardownTask(), CookieJarTeardownTask(), LoggingShutdownTask(), ]) pipeline_series = self._factory.new( 'PipelineSeries', (app_start_pipeline, download_pipeline, download_stop_pipeline, conversion_pipeline, app_stop_pipeline)) pipeline_series.concurrency_pipelines.add(download_pipeline) return pipeline_series
def test_pipeline_skipping(self): source1 = MyItemSource([1, 2, 3]) source2 = MyItemSource([4, 5, 6]) source3 = MyItemSource([7, 8, 9]) task1 = MyItemTask() pipeline1 = Pipeline(source1, [task1]) pipeline2 = Pipeline(source2, [MyItemTask()]) pipeline3 = Pipeline(source3, [MyItemTask()]) pipeline2.skippable = True app = Application(PipelineSeries([pipeline1, pipeline2, pipeline3])) def callback(work_item): app.stop() task1.callback = callback yield from app.run() self.assertTrue(source1.values, 'unprocessed') self.assertTrue(source2.values, 'skipped') self.assertFalse( source3.values, 'processed', )
def _build_pipelines(self) -> PipelineSeries: app_session = AppSession(self._factory, self._args, self.get_stderr()) app_start_pipeline = Pipeline( AppSource(app_session), [ LoggingSetupTask(), DatabaseSetupTask(), ParserSetupTask(), WARCVisitsTask(), SSLContextTask(), ResmonSetupTask(), StatsStartTask(), URLFiltersSetupTask(), NetworkSetupTask(), ClientSetupTask(), WARCRecorderSetupTask(), FileWriterSetupTask(), ProcessorSetupTask(), ProxyServerSetupTask(), CoprocessorSetupTask(), LinkConversionSetupTask(), PluginSetupTask(), InputURLTask(), URLFiltersPostURLImportSetupTask(), ]) url_item_source = URLItemSource(app_session) download_pipeline = Pipeline( url_item_source, [ ProcessTask(), ResmonSleepTask(), BackgroundAsyncTask(), CheckQuotaTask(), ] ) download_stop_pipeline = Pipeline( AppSource(app_session), [ StatsStopTask() ]) download_stop_pipeline.skippable = True queued_file_source = QueuedFileSource(app_session) conversion_pipeline = Pipeline( queued_file_source, [ LinkConversionTask() ] ) conversion_pipeline.skippable = True app_stop_pipeline = Pipeline( AppSource(app_session), [ BackgroundAsyncCleanupTask(), AppStopTask(), WARCRecorderTeardownTask(), CookieJarTeardownTask(), LoggingShutdownTask(), ]) pipeline_series = self._factory.new( 'PipelineSeries', ( app_start_pipeline, download_pipeline, download_stop_pipeline, conversion_pipeline, app_stop_pipeline )) pipeline_series.concurrency_pipelines.add(download_pipeline) return pipeline_series