Ejemplo n.º 1
0
    def _build_pipelines(self) -> PipelineSeries:
        app_session = AppSession(self._factory, self._args, self.get_stderr())

        app_start_pipeline = Pipeline(AppSource(app_session), [
            LoggingSetupTask(),
            DatabaseSetupTask(),
            ParserSetupTask(),
            WARCVisitsTask(),
            SSLContextTask(),
            ResmonSetupTask(),
            StatsStartTask(),
            URLFiltersSetupTask(),
            NetworkSetupTask(),
            ClientSetupTask(),
            WARCRecorderSetupTask(),
            FileWriterSetupTask(),
            ProcessorSetupTask(),
            ProxyServerSetupTask(),
            CoprocessorSetupTask(),
            LinkConversionSetupTask(),
            PluginSetupTask(),
            InputURLTask(),
            URLFiltersPostURLImportSetupTask(),
        ])

        url_item_source = URLItemSource(app_session)

        download_pipeline = Pipeline(url_item_source, [
            ProcessTask(),
            ResmonSleepTask(),
            BackgroundAsyncTask(),
            CheckQuotaTask(),
        ])

        download_stop_pipeline = Pipeline(AppSource(app_session),
                                          [StatsStopTask()])
        download_stop_pipeline.skippable = True

        queued_file_source = QueuedFileSource(app_session)

        conversion_pipeline = Pipeline(queued_file_source,
                                       [LinkConversionTask()])
        conversion_pipeline.skippable = True

        app_stop_pipeline = Pipeline(AppSource(app_session), [
            BackgroundAsyncCleanupTask(),
            AppStopTask(),
            WARCRecorderTeardownTask(),
            CookieJarTeardownTask(),
            LoggingShutdownTask(),
        ])

        pipeline_series = self._factory.new(
            'PipelineSeries',
            (app_start_pipeline, download_pipeline, download_stop_pipeline,
             conversion_pipeline, app_stop_pipeline))
        pipeline_series.concurrency_pipelines.add(download_pipeline)

        return pipeline_series
Ejemplo n.º 2
0
    def test_pipeline_skipping(self):
        source1 = MyItemSource([1, 2, 3])
        source2 = MyItemSource([4, 5, 6])
        source3 = MyItemSource([7, 8, 9])

        task1 = MyItemTask()

        pipeline1 = Pipeline(source1, [task1])
        pipeline2 = Pipeline(source2, [MyItemTask()])
        pipeline3 = Pipeline(source3, [MyItemTask()])

        pipeline2.skippable = True

        app = Application(PipelineSeries([pipeline1, pipeline2, pipeline3]))

        def callback(work_item):
            app.stop()

        task1.callback = callback

        yield from app.run()

        self.assertTrue(source1.values, 'unprocessed')
        self.assertTrue(source2.values, 'skipped')
        self.assertFalse(
            source3.values,
            'processed',
        )
Ejemplo n.º 3
0
    def _build_pipelines(self) -> PipelineSeries:
        app_session = AppSession(self._factory, self._args, self.get_stderr())

        app_start_pipeline = Pipeline(
            AppSource(app_session),
            [
                LoggingSetupTask(),
                DatabaseSetupTask(),
                ParserSetupTask(),
                WARCVisitsTask(),
                SSLContextTask(),
                ResmonSetupTask(),
                StatsStartTask(),
                URLFiltersSetupTask(),
                NetworkSetupTask(),
                ClientSetupTask(),
                WARCRecorderSetupTask(),
                FileWriterSetupTask(),
                ProcessorSetupTask(),
                ProxyServerSetupTask(),
                CoprocessorSetupTask(),
                LinkConversionSetupTask(),
                PluginSetupTask(),
                InputURLTask(),
                URLFiltersPostURLImportSetupTask(),
            ])

        url_item_source = URLItemSource(app_session)

        download_pipeline = Pipeline(
            url_item_source,
            [
                ProcessTask(),
                ResmonSleepTask(),
                BackgroundAsyncTask(),
                CheckQuotaTask(),
            ]
        )

        download_stop_pipeline = Pipeline(
            AppSource(app_session),
            [
                StatsStopTask()
            ])
        download_stop_pipeline.skippable = True

        queued_file_source = QueuedFileSource(app_session)

        conversion_pipeline = Pipeline(
            queued_file_source,
            [
                LinkConversionTask()
            ]
        )
        conversion_pipeline.skippable = True

        app_stop_pipeline = Pipeline(
            AppSource(app_session),
            [
                BackgroundAsyncCleanupTask(),
                AppStopTask(),
                WARCRecorderTeardownTask(),
                CookieJarTeardownTask(),
                LoggingShutdownTask(),
            ])

        pipeline_series = self._factory.new(
            'PipelineSeries',
            (
                app_start_pipeline, download_pipeline,
                download_stop_pipeline, conversion_pipeline, app_stop_pipeline
            ))
        pipeline_series.concurrency_pipelines.add(download_pipeline)

        return pipeline_series