async def wrapped(self, request, context): metadata = dict(context.invocation_metadata()) try: self.statbox( action='enter', mode=func.__name__, request_id=metadata['request-id'], ) async for item in func(self, request, context, metadata): yield item self.statbox( action='exit', mode=func.__name__, request_id=metadata['request-id'], ) except aio.AbortError: raise except Exception as e: serialized_request = MessageToDict( request, preserving_proto_field_name=True) error_log(e, request=serialized_request, request_id=metadata['request-id']) if e.__class__ in self.error_mapping: await context.abort(*self.error_mapping[e.__class__]) raise e
async def resolve(self) -> AsyncIterable[PreparedRequest]: async with self.get_resolve_session() as session: url = f'{self.base_url}/main/{self.md5.upper()}' async with session.get(url, timeout=self.resolve_timeout) as resp: downloaded_page = await resp.text() match_ipfs = re.search( 'https://ipfs.io/ipfs/[A-Za-z0-9]+', downloaded_page, re.IGNORECASE, ) if match_ipfs: yield PreparedRequest(method='get', url=match_ipfs.group(), ssl=self.ssl) match_cf = re.search( 'https://cloudflare-ipfs.com/ipfs/[A-Za-z0-9]+', downloaded_page, re.IGNORECASE, ) if match_cf: yield PreparedRequest(method='get', url=match_cf.group(), ssl=self.ssl) match_infura = re.search( 'https://ipfs.infura.io/ipfs/[A-Za-z0-9]+', downloaded_page, re.IGNORECASE, ) if match_infura: yield PreparedRequest(method='get', url=match_infura.group(), ssl=self.ssl) if not match_cf or not match_infura or not match_ipfs: error_log(RegexNotFoundError(url=url))
async def download( self, sources: Iterable[BaseSource]) -> AsyncIterable[FileResponsePb]: for source in sources: try: await source.start() async for resp in self.download_source(source): yield resp return except DownloadError as e: error_log(e) continue finally: await source.stop() raise NotFoundError()
async def resolve(self) -> AsyncIterable[PreparedRequest]: async with self.get_resolve_session() as session: url = f'{self.base_url}/scimag/ads.php?doi={self.doi}' async with session.get(url, timeout=self.resolve_timeout) as resp: downloaded_page_bytes = await resp.read() downloaded_page = downloaded_page_bytes.decode( 'utf-8', 'backslashreplace') match = re.search( 'https?://.*/scimag/get\\.php\\?doi=.*&key=[A-Za-z0-9]+', downloaded_page, re.IGNORECASE, ) if match: yield PreparedRequest(method='get', url=match.group()) else: error_log(RegexNotFoundError(url=url))
async def _process_chat(self, event: events.ChatAction, request_id: str): try: chat = await self._put_chat(event, request_id=request_id) except (AioRpcError, BaseError) as e: error_log(e) event_chat = await event.get_chat() username = get_username(event, event_chat) chat = ChatPb( chat_id=event.chat_id, is_system_messaging_enabled=True, is_discovery_enabled=True, language='en', username=username, is_admin=False, is_subscribed=True, ) return chat
async def run_job(name, **kwargs): config = get_config() configure_logging(config) job_config = config['jobs'][name] job_class = import_object(job_config['class']) real_kwargs = job_config['kwargs'].copy() real_kwargs.update(kwargs) job = job_class(**real_kwargs) try: await job.start_and_wait() except Exception as e: error_log(e) raise finally: await job.stop()
async def resolve(self, timeout=None) -> AsyncIterable[PreparedRequest]: async with self.get_resolve_session() as session: url = f'{self.base_url}/{self.doi}' async with session.get( url, timeout=timeout or self.timeout ) as resp: # Sometimes sci-hub returns file if resp.headers.get('Content-Type') == 'application/pdf': yield PreparedRequest(method='get', url=url) downloaded_page_bytes = await resp.read() downloaded_page = downloaded_page_bytes.decode('utf-8', 'backslashreplace') match = re.search('(?:https?:)?//.*\\?download=true', downloaded_page, re.IGNORECASE) if match: url = match.group() if url.startswith('//'): url = 'http:' + url yield PreparedRequest(method='get', url=url) else: error_log(RegexNotFoundError(url=url))
def error_log(self, e, level=logging.ERROR, **fields): all_fields = {**self.default_fields, **fields} error_log(e, level=level, **all_fields)