Example #1
0
    def process(self, item_session: ItemSession, request, response, file_writer_session):
        '''Process PhantomJS.

        Coroutine.
        '''
        if response.status_code != 200:
            return

        if not HTMLReader.is_supported(request=request, response=response):
            return

        _logger.debug('Starting PhantomJS processing.')

        self._file_writer_session = file_writer_session

        # FIXME: this is a quick hack for crashes. See #137.
        attempts = int(os.environ.get('WPULL_PHANTOMJS_TRIES', 5))

        for dummy in range(attempts):
            try:
                yield from self._run_driver(item_session, request, response)
            except asyncio.TimeoutError:
                _logger.warning(_('Waiting for page load timed out.'))
                break
            except PhantomJSCrashed as error:
                _logger.exception(__('PhantomJS crashed: {}', error))
            else:
                break
        else:
            _logger.warning(__(
                _('PhantomJS failed to fetch ‘{url}’. I am sorry.'),
                url=request.url_info.url
            ))
Example #2
0
    def process(self, url_item, request, response, file_writer_session):
        '''Process PhantomJS.

        Coroutine.
        '''
        if response.status_code != 200:
            return

        if not HTMLReader.is_supported(request=request, response=response):
            return

        _logger.debug('Starting PhantomJS processing.')

        self._file_writer_session = file_writer_session

        # FIXME: this is a quick hack for crashes. See #137.
        attempts = int(os.environ.get('WPULL_PHANTOMJS_TRIES', 5))

        for dummy in range(attempts):
            try:
                yield From(self._run_driver(url_item, request, response))
            except trollius.TimeoutError:
                _logger.warning(_('Waiting for page load timed out.'))
                break
            except PhantomJSCrashed as error:
                _logger.exception(__('PhantomJS crashed: {}', error))
            else:
                break
        else:
            _logger.warning(
                __(_('PhantomJS failed to fetch ‘{url}’. I am sorry.'),
                   url=request.url_info.url))
Example #3
0
    def process(self, url_item, request, response, file_writer_session):
        if response.status_code != 200:
            return

        if not HTMLReader.is_supported(request=request, response=response):
            return

        session = Session(self._proxy_address, self._youtube_dl_path,
                          self._root_path, url_item, file_writer_session,
                          self._user_agent, self._warc_recorder,
                          self._inet_family, self._check_certificate)

        url = url_item.url_info.url
        _logger.info(__(_('youtube-dl fetching ‘{url}’.'), url=url))

        with contextlib.closing(session):
            yield From(session.run())

        _logger.info(__(_('youtube-dl fetched ‘{url}’.'), url=url))
Example #4
0
    def process(self, item_session: ItemSession, request, response, file_writer_session):
        if response.status_code != 200:
            return

        if not HTMLReader.is_supported(request=request, response=response):
            return

        session = Session(
            self._proxy_address, self._youtube_dl_path, self._root_path,
            item_session, file_writer_session, self._user_agent,
            self._warc_recorder, self._inet_family, self._check_certificate
        )

        url = item_session.url_record.url
        _logger.info(__(_('youtube-dl fetching ‘{url}’.'), url=url))

        with contextlib.closing(session):
            yield from session.run()

        _logger.info(__(_('youtube-dl fetched ‘{url}’.'), url=url))