async def handler(self, event: events.ChatAction, request_context: RequestContext): session_id = self.generate_session_id() request_context.add_default_fields(session_id=session_id) request_context.statbox(action='show', mode='submit') if event.document.mime_type != 'application/pdf': request_context.statbox(action='unknown_file_format') request_context.error_log( UnknownFileFormatError(format=event.document.mime_type)) return await asyncio.gather( event.reply( t('UNKNOWN_FILE_FORMAT_ERROR', language=request_context.chat.language), buttons=[close_button()], ), event.delete(), ) return await asyncio.gather( self.application.hub_client.submit( telegram_document=bytes(event.document), telegram_file_id=event.file.id, chat=request_context.chat, request_id=request_context.request_id, session_id=session_id, ), event.delete(), )
async def _on_fail(): await self.delivery_service.telegram_client.send_message( request_context.chat.chat_id, t('MAINTENANCE', language=request_context.chat.language).format( maintenance_picture_url=self.delivery_service. maintenance_picture_url), buttons=[close_button()])
async def respond_not_found(self, request_context: RequestContext, document_view): return await self.delivery_service.telegram_client.send_message( request_context.chat.chat_id, t("SOURCES_UNAVAILABLE", language=request_context.chat.language).format( document=document_view.get_robust_title()), buttons=[close_button()])
async def render(self) -> tuple[str, Optional[list]]: if not len(self.typed_documents): return t('COULD_NOT_FIND_ANYTHING', language=self.chat.language), [ close_button(self.session_id) ] serp_elements = [] for position, typed_document in enumerate(self.typed_documents): view = parse_typed_document_to_view(typed_document) serp_elements.append( view.get_snippet( language=self.chat.language, limit=512 + 128, )) promo = self.application.promotioner.choose_promotion( language=self.chat.language).format( related_channel=self.application.config['telegram'] ['related_channel'], ) serp_elements.append(promo) serp = '\n\n'.join(serp_elements) buttons = [] if self.has_next or self.page > 0: buttons = [ Button.inline( text='<<1' if self.page > 1 else ' ', data=f'/{self.cmd}_{self.session_id}_{self.message_id}_0' if self.page > 1 else '/noop', ), Button.inline( text=f'<{self.page}' if self.page > 0 else ' ', data= f'/{self.cmd}_{self.session_id}_{self.message_id}_{self.page - 1}' if self.page > 0 else '/noop', ), Button.inline( text=f'{self.page + 2}>' if self.has_next else ' ', data= f'/{self.cmd}_{self.session_id}_{self.message_id}_{self.page + 1}' if self.has_next else '/noop', ) ] buttons.append(close_button(self.session_id)) return serp, buttons
async def _send_fail_response(self, event: events.ChatAction, request_context: RequestContext): try: await event.reply(t( 'MAINTENANCE', language=request_context.chat.language).format( maintenance_picture_url=self.application. config['application']['maintenance_picture_url'], ), buttons=[close_button()]) except (ConnectionError, QueryIdInvalidError) as e: request_context.error_log(e)
async def external_cancel(self): self.task.cancel() self.request_context.statbox( action='externally_canceled', document_id=self.document_view.id, schema=self.document_view.schema, ) await self.delivery_service.telegram_client.send_message( self.request_context.chat.chat_id, t("DOWNLOAD_CANCELED", language=self.request_context.chat.language).format( document=self.document_view.get_robust_title()), buttons=[close_button()])
async def submit( self, request: SubmitRequestPb, context: ServicerContext, metadata: dict, ) -> SubmitResponsePb: session_id = metadata.get('session-id') request_context = RequestContext( bot_name=self.service_name, chat=request.chat, request_id=metadata.get('request-id'), ) request_context.add_default_fields( mode='submit', session_id=metadata.get('session-id'), **self.get_default_service_fields(), ) document = BinaryReader(request.telegram_document).tgread_object() if document.size > 20 * 1024 * 1024: request_context.error_log(FileTooBigError(size=document.size)) request_context.statbox(action='file_too_big') await self.telegram_client.send_message( request_context.chat.chat_id, t('FILE_TOO_BIG_ERROR', language=request_context.chat.language), buttons=[close_button()], ) return SubmitResponsePb() processing_message = await self.telegram_client.send_message( request_context.chat.chat_id, t("PROCESSING_PAPER", language=request_context.chat.language).format( filename=document.attributes[0].file_name, ), ) try: file = await self.telegram_client.download_document( document=document, file=bytes) try: processed_document = await self.grobid_client.process_fulltext_document( pdf_file=file) except BadRequestError as e: request_context.statbox(action='unparsable_document') request_context.error_log(e) await self.telegram_client.send_message( request_context.chat.chat_id, t('UNPARSABLE_DOCUMENT_ERROR', language=request_context.chat.language).format( filename=document.attributes[0].file_name, ), buttons=[close_button()], ) return SubmitResponsePb() if not processed_document.get('doi'): request_context.statbox(action='unparsable_doi') request_context.error_log(UnparsableDoiError()) await self.telegram_client.send_message( request_context.chat.chat_id, t('UNPARSABLE_DOI_ERROR', language=request_context.chat.language).format( filename=document.attributes[0].file_name, ), buttons=[close_button()], ) return SubmitResponsePb() search_response_pb = await self.meta_api_client.search( schemas=('scimag', ), query=processed_document['doi'], page=0, page_size=1, request_id=request_context.request_id, session_id=session_id, user_id=str(request_context.chat.chat_id), language=request_context.chat.language, ) if len(search_response_pb.scored_documents) == 0: request_context.statbox(action='unavailable_metadata') request_context.error_log( UnavailableMetadataError(doi=processed_document['doi'])) await self.telegram_client.send_message( request_context.chat.chat_id, t('UNAVAILABLE_METADATA_ERROR', language=request_context.chat.language).format( doi=processed_document['doi']), buttons=[close_button()], ) return SubmitResponsePb() document_view = ScimagView( search_response_pb.scored_documents[0].typed_document.scimag) uploaded_message = await self.send_file( document_view=document_view, file=file, request_context=request_context, session_id=session_id, voting=False, ) finally: await processing_message.delete() document_operation_pb = DocumentOperationPb( update_document=UpdateDocumentPb( typed_document=TypedDocumentPb(sharience=ShariencePb( parent_id=document_view.id, uploader_id=request_context.chat.chat_id, updated_at=int(time.time()), md5=hashlib.md5(file).hexdigest(), filesize=document.size, ipfs_multihashes=await self.get_ipfs_hashes(file=file), telegram_file_id=uploaded_message.file.id, )), ), ) request_context.statbox( action='success', document_id=document_view.id, schema='scimag', ) await operation_log(document_operation_pb) return SubmitResponsePb()
def get_view( self, language: str, session_id: str, bot_external_name: str, position: int = 0, back_command: Optional[str] = None, with_buttons: bool = True, ) -> Tuple[str, Optional[List[List[Button]]]]: parts = [f'**{self.get_robust_title()}**\n'] cover_url = self.get_cover_url() if cover_url: # There is an invisible character inside []! parts[-1] = f'[]({cover_url})' + parts[-1] if self.authors: parts.append( f'**{t("AUTHORS", language=language)}**: ' f'{escape_format(self.get_first_authors(first_n_authors=3))}') dt = self.get_issued_datetime() if dt: parts.append(f'**{t("YEAR", language=language)}**: {dt.year}') if self.edition: parts.append(f'**{t("EDITION", language=language)}**: ' f'{escape_format(self.edition)}') parts.append( f'**Links**: {" - ".join(self.generate_links(bot_external_name))}') if self.description: parts.append( f'\n**{t("DESCRIPTION", language=language)}**:\n' f'{escape_format(despace(self.description))}', ) if self.tags: parts.append(f'\n__{escape_format(", ".join(self.tags))}__') buttons = None if with_buttons: buttons = [[]] # Plain layout if not self.duplicates: if back_command: buttons[-1].append( Button.inline(text='⬅️', data=back_command)) buttons[-1].extend([ Button.inline( text= f'⬇️ {self.get_formatted_filedata(show_language=False)}', data=self.get_download_command(session_id=session_id, position=position), ), close_button(session_id), ]) else: buttons = [[]] for view in [self] + self.duplicates: filedata = view.get_formatted_filedata(show_language=False, show_filesize=True) if len(buttons[-1]) >= 2: buttons.append([]) # ⬇️ is a mark, Find+F over sources before replacing buttons[-1].append( Button.inline( text=f'⬇️ {filedata}', data=view.get_download_command( session_id=session_id, position=position), )) if len(buttons[-1]) == 1: buttons[-1].append(Button.inline( text=' ', data='/noop', )) buttons.append([]) if back_command: buttons[-1].append( Button.inline( text='⬅️', data=back_command, )) buttons[-1].append(close_button(session_id)) return '\n'.join(parts).strip()[:4096], buttons
def get_view( self, language: str, session_id: str, bot_external_name: str, position: int = 0, back_command: Optional[str] = None, with_buttons: bool = True, ) -> Tuple[str, Optional[List[List[Button]]]]: parts = [f'**{self.get_robust_title()}**\n'] if self.authors: parts.append(f'**{t("AUTHORS", language=language)}**: ' f'{escape_format(self.get_first_authors(first_n_authors=3))}') journal = self.get_robust_journal() if journal: parts.append(f'**{t("JOURNAL", language=language)}**: {journal}') dt = self.get_formatted_datetime() if dt: parts.append(f'**{t("YEAR", language=language)}**: {dt}') if self.downloads_count: parts.append(f'**NRank**: {math.log1p(self.downloads_count):.1f}') parts.append(f'**Links**: {" - ".join(self.generate_links(bot_external_name))}') if self.abstract: parts.append( f'\n**{t("ABSTRACT", language=language)}**: {escape_format(self.abstract)}', ) if self.tags: parts.append(f'\n__{escape_format(", ".join(self.tags))}__') buttons = None if with_buttons: buttons = [[]] if back_command: buttons[-1].append( Button.inline( text='⬅️', data=back_command ) ) # ⬇️ is a mark, Find+F over sources before replacing buttons[-1].append( Button.inline( text=f'⬇️ {self.get_formatted_filedata()}', data=self.get_download_command(session_id=session_id, position=position), ) ) if self.ref_by_count: buttons[-1].append( Button.switch_inline( text=f'🔗 {self.ref_by_count or ""}', query=f'references:"{self.doi}"', same_peer=True, ) ) buttons[-1].append(close_button(session_id)) return '\n'.join(parts).strip()[:4096], buttons
async def render(self) -> tuple[str, Optional[list]]: if not len(self.scored_documents): return t('COULD_NOT_FIND_ANYTHING', language=self.chat.language), [ close_button(self.session_id) ] serp_elements = [] bot_external_name = self.application.config['telegram'][ 'bot_external_name'] for scored_document in self.scored_documents: view = parse_typed_document_to_view(scored_document.typed_document) if not self.is_group_mode: view_command = view.get_view_command( session_id=self.session_id, message_id=self.message_id, position=scored_document.position, ) else: view_command = view.get_deep_link(bot_external_name, text='⬇️') serp_elements.append( view.get_snippet( language=self.chat.language, view_command=view_command, limit=512 + 128, )) serp = '\n\n'.join(serp_elements) if self.is_group_mode: try: encoded_query = encode_query_to_deep_link( self.query, bot_external_name, ) serp = ( f"{serp}\n\n**{t('DOWNLOAD_AND_SEARCH_MORE', language=self.chat.language)}: **" f'[@{bot_external_name}]' f'({encoded_query})') except TooLongQueryError: serp = ( f"{serp}\n\n**{t('DOWNLOAD_AND_SEARCH_MORE', language=self.chat.language)}: **" f'[@{bot_external_name}]' f'(https://t.me/{bot_external_name})') if not self.is_group_mode: promo = self.application.promotioner.choose_promotion( language=self.chat.language).format( related_channel=self.application.config['telegram'] ['related_channel'], ) serp = f'{serp}\n\n{promo}\n' buttons = None if not self.is_group_mode: buttons = [] if self.has_next or self.page > 0: buttons = [ Button.inline( text='<<1' if self.page > 1 else ' ', data=f'/search_{self.session_id}_{self.message_id}_0' if self.page > 1 else '/noop', ), Button.inline( text=f'<{self.page}' if self.page > 0 else ' ', data= f'/search_{self.session_id}_{self.message_id}_{self.page - 1}' if self.page > 0 else '/noop', ), Button.inline( text=f'{self.page + 2}>' if self.has_next else ' ', data= f'/search_{self.session_id}_{self.message_id}_{self.page + 1}' if self.has_next else '/noop', ) ] buttons.append(close_button(self.session_id)) return serp, buttons
async def do_search( self, event: events.ChatAction, request_context: RequestContext, prefetch_message, query: str, is_group_mode: bool = False, is_shortpath_enabled: bool = False, ): session_id = self.generate_session_id() message_id = prefetch_message.id request_context.add_default_fields(is_group_mode=is_group_mode, mode='search', session_id=session_id) start_time = time.time() try: search_widget = await SearchWidget.create( application=self.application, chat=request_context.chat, session_id=session_id, message_id=message_id, request_id=request_context.request_id, query=query, is_group_mode=is_group_mode, ) except AioRpcError as e: actions = [ self.application.telegram_client.delete_messages( request_context.chat.chat_id, [message_id], ) ] if e.code() == StatusCode.INVALID_ARGUMENT: too_difficult_picture_url = self.application.config[ 'application'].get('too_difficult_picture_url', '') if e.details() == 'url_query_error': actions.append( event.reply( t('INVALID_QUERY_ERROR', language=request_context.chat.language).format( too_difficult_picture_url= too_difficult_picture_url, ), buttons=[close_button()], )) elif e.details() == 'invalid_query_error': actions.append( event.reply( t('INVALID_SYNTAX_ERROR', language=request_context.chat.language).format( too_difficult_picture_url= too_difficult_picture_url, ), buttons=[close_button()], )) return await asyncio.gather(*actions) elif e.code() == StatusCode.CANCELLED: maintenance_picture_url = self.application.config[ 'application'].get('maintenance_picture_url', '') request_context.error_log(e) actions.append( event.reply( t('MAINTENANCE', language=request_context.chat.language).format( maintenance_picture_url=maintenance_picture_url, ), buttons=[close_button()], )) return await asyncio.gather(*actions) await asyncio.gather(*actions) raise e action = 'documents_found' if len(search_widget.scored_documents) == 0: action = 'documents_not_found' request_context.statbox( action=action, duration=time.time() - start_time, query=f'page:0 query:{query}', ) if len(search_widget.scored_documents) == 1 and is_shortpath_enabled: scored_document = search_widget.scored_documents[0] document_view = parse_typed_document_to_view( scored_document.typed_document) # Second (re-)fetching is required to retrieve duplicates document_view = await self.resolve_document( schema=scored_document.typed_document.WhichOneof('document'), document_id=document_view.id, position=0, session_id=session_id, request_context=request_context, ) view, buttons = document_view.get_view( language=request_context.chat.language, session_id=session_id, bot_external_name=self.application.config['telegram'] ['bot_external_name'], with_buttons=not is_group_mode, ) return await asyncio.gather( self.application.telegram_client.edit_message( request_context.chat.chat_id, message_id, view, buttons=buttons, ), ) serp, buttons = await search_widget.render() return await self.application.telegram_client.edit_message( request_context.chat.chat_id, message_id, serp, buttons=buttons, link_preview=False, )