Example #1
0
    def process(self, session: AppSession):
        self._debug_log_registered_hooks(session)
        internal_plugin_path = get_package_filename(os.path.join('application', 'plugins'))
        plugin_locations = [internal_plugin_path]

        plugin_filenames = []

        if session.args.plugin_script:
            plugin_filenames.append(session.args.plugin_script)

        locator = PluginLocator(plugin_locations, plugin_filenames)

        session.plugin_manager = PluginManager(plugin_locator=locator)
        session.plugin_manager.collectPlugins()

        for plugin_info in session.plugin_manager.getAllPlugins():
            if plugin_info.path.startswith(internal_plugin_path):
                _logger.debug(__(
                    _('Found plugin {name} from {filename}.'),
                    filename=plugin_info.path,
                    name=plugin_info.name
                ))
            else:
                _logger.info(__(
                    _('Found plugin {name} from {filename}.'),
                    filename=plugin_info.path,
                    name=plugin_info.name
                ))

            plugin_info.plugin_object.app_session = session

            if plugin_info.plugin_object.should_activate():
                session.plugin_manager.activatePluginByName(plugin_info.name)
                self._connect_plugin_hooks(session, plugin_info.plugin_object)
Example #2
0
    def process(self, session: AppSession):
        '''Build MITM proxy server.'''
        args = session.args
        if not (args.phantomjs or args.youtube_dl or args.proxy_server):
            return

        proxy_server = session.factory.new(
            'HTTPProxyServer',
            session.factory['HTTPClient'],
        )

        cookie_jar = session.factory.get('CookieJarWrapper')
        proxy_coprocessor = session.factory.new(
            'ProxyCoprocessor',
            session
        )

        proxy_socket = tornado.netutil.bind_sockets(
            session.args.proxy_server_port,
            address=session.args.proxy_server_address
        )[0]
        proxy_port = proxy_socket.getsockname()[1]

        proxy_async_server = yield from asyncio.start_server(proxy_server, sock=proxy_socket)

        session.async_servers.append(proxy_async_server)
        session.proxy_server_port = proxy_port
Example #3
0
    def process(self, session: AppSession):
        self._debug_log_registered_hooks(session)
        internal_plugin_path = get_package_filename(
            os.path.join('application', 'plugins'))
        plugin_locations = [internal_plugin_path]

        plugin_filenames = []

        if session.args.plugin_script:
            plugin_filenames.append(session.args.plugin_script)

        locator = PluginLocator(plugin_locations, plugin_filenames)

        session.plugin_manager = PluginManager(plugin_locator=locator)
        session.plugin_manager.collectPlugins()

        for plugin_info in session.plugin_manager.getAllPlugins():
            if plugin_info.path.startswith(internal_plugin_path):
                _logger.debug(
                    __(_('Found plugin {name} from {filename}.'),
                       filename=plugin_info.path,
                       name=plugin_info.name))
            else:
                _logger.info(
                    __(_('Found plugin {name} from {filename}.'),
                       filename=plugin_info.path,
                       name=plugin_info.name))

            plugin_info.plugin_object.app_session = session

            if plugin_info.plugin_object.should_activate():
                session.plugin_manager.activatePluginByName(plugin_info.name)
                self._connect_plugin_hooks(session, plugin_info.plugin_object)
Example #4
0
    def _build_pipelines(self) -> PipelineSeries:
        app_session = AppSession(self._factory, self._args, self.get_stderr())

        app_start_pipeline = Pipeline(AppSource(app_session), [
            LoggingSetupTask(),
            DatabaseSetupTask(),
            ParserSetupTask(),
            WARCVisitsTask(),
            SSLContextTask(),
            ResmonSetupTask(),
            StatsStartTask(),
            URLFiltersSetupTask(),
            NetworkSetupTask(),
            ClientSetupTask(),
            WARCRecorderSetupTask(),
            FileWriterSetupTask(),
            ProcessorSetupTask(),
            ProxyServerSetupTask(),
            CoprocessorSetupTask(),
            LinkConversionSetupTask(),
            PluginSetupTask(),
            InputURLTask(),
            URLFiltersPostURLImportSetupTask(),
        ])

        url_item_source = URLItemSource(app_session)

        download_pipeline = Pipeline(url_item_source, [
            ProcessTask(),
            ResmonSleepTask(),
            BackgroundAsyncTask(),
            CheckQuotaTask(),
        ])

        download_stop_pipeline = Pipeline(AppSource(app_session),
                                          [StatsStopTask()])
        download_stop_pipeline.skippable = True

        queued_file_source = QueuedFileSource(app_session)

        conversion_pipeline = Pipeline(queued_file_source,
                                       [LinkConversionTask()])
        conversion_pipeline.skippable = True

        app_stop_pipeline = Pipeline(AppSource(app_session), [
            BackgroundAsyncCleanupTask(),
            AppStopTask(),
            WARCRecorderTeardownTask(),
            CookieJarTeardownTask(),
            LoggingShutdownTask(),
        ])

        pipeline_series = self._factory.new(
            'PipelineSeries',
            (app_start_pipeline, download_pipeline, download_stop_pipeline,
             conversion_pipeline, app_stop_pipeline))
        pipeline_series.concurrency_pipelines.add(download_pipeline)

        return pipeline_series
Example #5
0
def new_mock_item_session():
    args = argparse.Namespace(directory_prefix='/tmp/')
    app_session = AppSession(None, args, None)
    url_record = new_mock_url_record()
    item_session = ItemSession(app_session, url_record)
    item_session.request = BaseRequest()
    item_session.request.url = 'http://example.com'

    return item_session
Example #6
0
    def process(self, session: AppSession):
        '''Build MITM proxy server.'''
        args = session.args
        if not (args.phantomjs or args.youtube_dl or args.proxy_server):
            return

        proxy_server = session.factory.new(
            'HTTPProxyServer',
            session.factory['HTTPClient'],
        )

        cookie_jar = session.factory.get('CookieJarWrapper')
        proxy_coprocessor = session.factory.new('ProxyCoprocessor', session)

        proxy_socket = tornado.netutil.bind_sockets(
            session.args.proxy_server_port,
            address=session.args.proxy_server_address)[0]
        proxy_port = proxy_socket.getsockname()[1]

        proxy_async_server = yield from asyncio.start_server(proxy_server,
                                                             sock=proxy_socket)

        session.async_servers.append(proxy_async_server)
        session.proxy_server_port = proxy_port
Example #7
0
 def _close_file_logger(cls, session: AppSession):
     if session.file_log_handler:
         logger = logging.getLogger()
         logger.removeHandler(session.file_log_handler)
         session.file_log_handler = None
Example #8
0
 def process(self, session: AppSession):
     session.ssl_context = self._build_ssl_context(session)
Example #9
0
 def _close_file_logger(cls, session: AppSession):
     if session.file_log_handler:
         logger = logging.getLogger()
         logger.removeHandler(session.file_log_handler)
         session.file_log_handler = None