def process(self, item_session: ItemSession, request, response, file_writer_session): '''Process PhantomJS. Coroutine. ''' if response.status_code != 200: return if not HTMLReader.is_supported(request=request, response=response): return _logger.debug('Starting PhantomJS processing.') self._file_writer_session = file_writer_session # FIXME: this is a quick hack for crashes. See #137. attempts = int(os.environ.get('WPULL_PHANTOMJS_TRIES', 5)) for dummy in range(attempts): try: yield from self._run_driver(item_session, request, response) except asyncio.TimeoutError: _logger.warning(_('Waiting for page load timed out.')) break except PhantomJSCrashed as error: _logger.exception(__('PhantomJS crashed: {}', error)) else: break else: _logger.warning(__( _('PhantomJS failed to fetch ‘{url}’. I am sorry.'), url=request.url_info.url ))
def process(self, url_item, request, response, file_writer_session): '''Process PhantomJS. Coroutine. ''' if response.status_code != 200: return if not HTMLReader.is_supported(request=request, response=response): return _logger.debug('Starting PhantomJS processing.') self._file_writer_session = file_writer_session # FIXME: this is a quick hack for crashes. See #137. attempts = int(os.environ.get('WPULL_PHANTOMJS_TRIES', 5)) for dummy in range(attempts): try: yield From(self._run_driver(url_item, request, response)) except trollius.TimeoutError: _logger.warning(_('Waiting for page load timed out.')) break except PhantomJSCrashed as error: _logger.exception(__('PhantomJS crashed: {}', error)) else: break else: _logger.warning( __(_('PhantomJS failed to fetch ‘{url}’. I am sorry.'), url=request.url_info.url))
def process(self, url_item, request, response, file_writer_session): if response.status_code != 200: return if not HTMLReader.is_supported(request=request, response=response): return session = Session(self._proxy_address, self._youtube_dl_path, self._root_path, url_item, file_writer_session, self._user_agent, self._warc_recorder, self._inet_family, self._check_certificate) url = url_item.url_info.url _logger.info(__(_('youtube-dl fetching ‘{url}’.'), url=url)) with contextlib.closing(session): yield From(session.run()) _logger.info(__(_('youtube-dl fetched ‘{url}’.'), url=url))
def process(self, item_session: ItemSession, request, response, file_writer_session): if response.status_code != 200: return if not HTMLReader.is_supported(request=request, response=response): return session = Session( self._proxy_address, self._youtube_dl_path, self._root_path, item_session, file_writer_session, self._user_agent, self._warc_recorder, self._inet_family, self._check_certificate ) url = item_session.url_record.url _logger.info(__(_('youtube-dl fetching ‘{url}’.'), url=url)) with contextlib.closing(session): yield from session.run() _logger.info(__(_('youtube-dl fetched ‘{url}’.'), url=url))