Ejemplo n.º 1
0
    async def handler(self, event: events.ChatAction,
                      request_context: RequestContext):
        session_id = self.generate_session_id()

        request_context.add_default_fields(session_id=session_id)
        request_context.statbox(action='show', mode='submit')

        if event.document.mime_type != 'application/pdf':
            request_context.statbox(action='unknown_file_format')
            request_context.error_log(
                UnknownFileFormatError(format=event.document.mime_type))
            return await asyncio.gather(
                event.reply(
                    t('UNKNOWN_FILE_FORMAT_ERROR',
                      language=request_context.chat.language),
                    buttons=[close_button()],
                ),
                event.delete(),
            )

        return await asyncio.gather(
            self.application.hub_client.submit(
                telegram_document=bytes(event.document),
                telegram_file_id=event.file.id,
                chat=request_context.chat,
                request_id=request_context.request_id,
                session_id=session_id,
            ),
            event.delete(),
        )
Ejemplo n.º 2
0
 async def _on_fail():
     await self.delivery_service.telegram_client.send_message(
         request_context.chat.chat_id,
         t('MAINTENANCE',
           language=request_context.chat.language).format(
               maintenance_picture_url=self.delivery_service.
               maintenance_picture_url),
         buttons=[close_button()])
Ejemplo n.º 3
0
 async def respond_not_found(self, request_context: RequestContext,
                             document_view):
     return await self.delivery_service.telegram_client.send_message(
         request_context.chat.chat_id,
         t("SOURCES_UNAVAILABLE",
           language=request_context.chat.language).format(
               document=document_view.get_robust_title()),
         buttons=[close_button()])
Ejemplo n.º 4
0
    async def render(self) -> tuple[str, Optional[list]]:
        if not len(self.typed_documents):
            return t('COULD_NOT_FIND_ANYTHING', language=self.chat.language), [
                close_button(self.session_id)
            ]

        serp_elements = []
        for position, typed_document in enumerate(self.typed_documents):
            view = parse_typed_document_to_view(typed_document)
            serp_elements.append(
                view.get_snippet(
                    language=self.chat.language,
                    limit=512 + 128,
                ))

        promo = self.application.promotioner.choose_promotion(
            language=self.chat.language).format(
                related_channel=self.application.config['telegram']
                ['related_channel'], )
        serp_elements.append(promo)
        serp = '\n\n'.join(serp_elements)

        buttons = []
        if self.has_next or self.page > 0:
            buttons = [
                Button.inline(
                    text='<<1' if self.page > 1 else ' ',
                    data=f'/{self.cmd}_{self.session_id}_{self.message_id}_0'
                    if self.page > 1 else '/noop',
                ),
                Button.inline(
                    text=f'<{self.page}' if self.page > 0 else ' ',
                    data=
                    f'/{self.cmd}_{self.session_id}_{self.message_id}_{self.page - 1}'
                    if self.page > 0 else '/noop',
                ),
                Button.inline(
                    text=f'{self.page + 2}>' if self.has_next else ' ',
                    data=
                    f'/{self.cmd}_{self.session_id}_{self.message_id}_{self.page + 1}'
                    if self.has_next else '/noop',
                )
            ]
        buttons.append(close_button(self.session_id))

        return serp, buttons
Ejemplo n.º 5
0
 async def _send_fail_response(self, event: events.ChatAction,
                               request_context: RequestContext):
     try:
         await event.reply(t(
             'MAINTENANCE', language=request_context.chat.language).format(
                 maintenance_picture_url=self.application.
                 config['application']['maintenance_picture_url'], ),
                           buttons=[close_button()])
     except (ConnectionError, QueryIdInvalidError) as e:
         request_context.error_log(e)
Ejemplo n.º 6
0
 async def external_cancel(self):
     self.task.cancel()
     self.request_context.statbox(
         action='externally_canceled',
         document_id=self.document_view.id,
         schema=self.document_view.schema,
     )
     await self.delivery_service.telegram_client.send_message(
         self.request_context.chat.chat_id,
         t("DOWNLOAD_CANCELED",
           language=self.request_context.chat.language).format(
               document=self.document_view.get_robust_title()),
         buttons=[close_button()])
Ejemplo n.º 7
0
    async def submit(
        self,
        request: SubmitRequestPb,
        context: ServicerContext,
        metadata: dict,
    ) -> SubmitResponsePb:
        session_id = metadata.get('session-id')
        request_context = RequestContext(
            bot_name=self.service_name,
            chat=request.chat,
            request_id=metadata.get('request-id'),
        )
        request_context.add_default_fields(
            mode='submit',
            session_id=metadata.get('session-id'),
            **self.get_default_service_fields(),
        )

        document = BinaryReader(request.telegram_document).tgread_object()
        if document.size > 20 * 1024 * 1024:
            request_context.error_log(FileTooBigError(size=document.size))
            request_context.statbox(action='file_too_big')
            await self.telegram_client.send_message(
                request_context.chat.chat_id,
                t('FILE_TOO_BIG_ERROR',
                  language=request_context.chat.language),
                buttons=[close_button()],
            )
            return SubmitResponsePb()
        processing_message = await self.telegram_client.send_message(
            request_context.chat.chat_id,
            t("PROCESSING_PAPER",
              language=request_context.chat.language).format(
                  filename=document.attributes[0].file_name, ),
        )
        try:
            file = await self.telegram_client.download_document(
                document=document, file=bytes)
            try:
                processed_document = await self.grobid_client.process_fulltext_document(
                    pdf_file=file)
            except BadRequestError as e:
                request_context.statbox(action='unparsable_document')
                request_context.error_log(e)
                await self.telegram_client.send_message(
                    request_context.chat.chat_id,
                    t('UNPARSABLE_DOCUMENT_ERROR',
                      language=request_context.chat.language).format(
                          filename=document.attributes[0].file_name, ),
                    buttons=[close_button()],
                )
                return SubmitResponsePb()

            if not processed_document.get('doi'):
                request_context.statbox(action='unparsable_doi')
                request_context.error_log(UnparsableDoiError())
                await self.telegram_client.send_message(
                    request_context.chat.chat_id,
                    t('UNPARSABLE_DOI_ERROR',
                      language=request_context.chat.language).format(
                          filename=document.attributes[0].file_name, ),
                    buttons=[close_button()],
                )
                return SubmitResponsePb()

            search_response_pb = await self.meta_api_client.search(
                schemas=('scimag', ),
                query=processed_document['doi'],
                page=0,
                page_size=1,
                request_id=request_context.request_id,
                session_id=session_id,
                user_id=str(request_context.chat.chat_id),
                language=request_context.chat.language,
            )

            if len(search_response_pb.scored_documents) == 0:
                request_context.statbox(action='unavailable_metadata')
                request_context.error_log(
                    UnavailableMetadataError(doi=processed_document['doi']))
                await self.telegram_client.send_message(
                    request_context.chat.chat_id,
                    t('UNAVAILABLE_METADATA_ERROR',
                      language=request_context.chat.language).format(
                          doi=processed_document['doi']),
                    buttons=[close_button()],
                )
                return SubmitResponsePb()

            document_view = ScimagView(
                search_response_pb.scored_documents[0].typed_document.scimag)
            uploaded_message = await self.send_file(
                document_view=document_view,
                file=file,
                request_context=request_context,
                session_id=session_id,
                voting=False,
            )
        finally:
            await processing_message.delete()

        document_operation_pb = DocumentOperationPb(
            update_document=UpdateDocumentPb(
                typed_document=TypedDocumentPb(sharience=ShariencePb(
                    parent_id=document_view.id,
                    uploader_id=request_context.chat.chat_id,
                    updated_at=int(time.time()),
                    md5=hashlib.md5(file).hexdigest(),
                    filesize=document.size,
                    ipfs_multihashes=await self.get_ipfs_hashes(file=file),
                    telegram_file_id=uploaded_message.file.id,
                )), ), )
        request_context.statbox(
            action='success',
            document_id=document_view.id,
            schema='scimag',
        )
        await operation_log(document_operation_pb)
        return SubmitResponsePb()
Ejemplo n.º 8
0
    def get_view(
        self,
        language: str,
        session_id: str,
        bot_external_name: str,
        position: int = 0,
        back_command: Optional[str] = None,
        with_buttons: bool = True,
    ) -> Tuple[str, Optional[List[List[Button]]]]:
        parts = [f'**{self.get_robust_title()}**\n']
        cover_url = self.get_cover_url()
        if cover_url:
            # There is an invisible character inside []!
            parts[-1] = f'[​]({cover_url})' + parts[-1]

        if self.authors:
            parts.append(
                f'**{t("AUTHORS", language=language)}**: '
                f'{escape_format(self.get_first_authors(first_n_authors=3))}')
        dt = self.get_issued_datetime()
        if dt:
            parts.append(f'**{t("YEAR", language=language)}**: {dt.year}')
        if self.edition:
            parts.append(f'**{t("EDITION", language=language)}**: '
                         f'{escape_format(self.edition)}')

        parts.append(
            f'**Links**: {" - ".join(self.generate_links(bot_external_name))}')

        if self.description:
            parts.append(
                f'\n**{t("DESCRIPTION", language=language)}**:\n'
                f'{escape_format(despace(self.description))}', )

        if self.tags:
            parts.append(f'\n__{escape_format(", ".join(self.tags))}__')

        buttons = None
        if with_buttons:
            buttons = [[]]
            # Plain layout
            if not self.duplicates:
                if back_command:
                    buttons[-1].append(
                        Button.inline(text='⬅️', data=back_command))
                buttons[-1].extend([
                    Button.inline(
                        text=
                        f'⬇️ {self.get_formatted_filedata(show_language=False)}',
                        data=self.get_download_command(session_id=session_id,
                                                       position=position),
                    ),
                    close_button(session_id),
                ])
            else:
                buttons = [[]]
                for view in [self] + self.duplicates:
                    filedata = view.get_formatted_filedata(show_language=False,
                                                           show_filesize=True)
                    if len(buttons[-1]) >= 2:
                        buttons.append([])
                    # ⬇️ is a mark, Find+F over sources before replacing
                    buttons[-1].append(
                        Button.inline(
                            text=f'⬇️ {filedata}',
                            data=view.get_download_command(
                                session_id=session_id, position=position),
                        ))
                if len(buttons[-1]) == 1:
                    buttons[-1].append(Button.inline(
                        text=' ',
                        data='/noop',
                    ))
                buttons.append([])
                if back_command:
                    buttons[-1].append(
                        Button.inline(
                            text='⬅️',
                            data=back_command,
                        ))
                buttons[-1].append(close_button(session_id))

        return '\n'.join(parts).strip()[:4096], buttons
Ejemplo n.º 9
0
    def get_view(
        self,
        language: str,
        session_id: str,
        bot_external_name: str,
        position: int = 0,
        back_command: Optional[str] = None,
        with_buttons: bool = True,
    ) -> Tuple[str, Optional[List[List[Button]]]]:
        parts = [f'**{self.get_robust_title()}**\n']

        if self.authors:
            parts.append(f'**{t("AUTHORS", language=language)}**: '
                         f'{escape_format(self.get_first_authors(first_n_authors=3))}')
        journal = self.get_robust_journal()
        if journal:
            parts.append(f'**{t("JOURNAL", language=language)}**: {journal}')

        dt = self.get_formatted_datetime()
        if dt:
            parts.append(f'**{t("YEAR", language=language)}**: {dt}')

        if self.downloads_count:
            parts.append(f'**NRank**: {math.log1p(self.downloads_count):.1f}')

        parts.append(f'**Links**: {" - ".join(self.generate_links(bot_external_name))}')

        if self.abstract:
            parts.append(
                f'\n**{t("ABSTRACT", language=language)}**: {escape_format(self.abstract)}',
            )

        if self.tags:
            parts.append(f'\n__{escape_format(", ".join(self.tags))}__')

        buttons = None
        if with_buttons:
            buttons = [[]]
            if back_command:
                buttons[-1].append(
                    Button.inline(
                        text='⬅️',
                        data=back_command
                    )
                )

            # ⬇️ is a mark, Find+F over sources before replacing
            buttons[-1].append(
                Button.inline(
                    text=f'⬇️ {self.get_formatted_filedata()}',
                    data=self.get_download_command(session_id=session_id, position=position),
                )
            )
            if self.ref_by_count:
                buttons[-1].append(
                    Button.switch_inline(
                        text=f'🔗 {self.ref_by_count or ""}',
                        query=f'references:"{self.doi}"',
                        same_peer=True,
                    )
                )
            buttons[-1].append(close_button(session_id))
        return '\n'.join(parts).strip()[:4096], buttons
Ejemplo n.º 10
0
    async def render(self) -> tuple[str, Optional[list]]:
        if not len(self.scored_documents):
            return t('COULD_NOT_FIND_ANYTHING', language=self.chat.language), [
                close_button(self.session_id)
            ]

        serp_elements = []
        bot_external_name = self.application.config['telegram'][
            'bot_external_name']

        for scored_document in self.scored_documents:
            view = parse_typed_document_to_view(scored_document.typed_document)
            if not self.is_group_mode:
                view_command = view.get_view_command(
                    session_id=self.session_id,
                    message_id=self.message_id,
                    position=scored_document.position,
                )
            else:
                view_command = view.get_deep_link(bot_external_name, text='⬇️')
            serp_elements.append(
                view.get_snippet(
                    language=self.chat.language,
                    view_command=view_command,
                    limit=512 + 128,
                ))
        serp = '\n\n'.join(serp_elements)

        if self.is_group_mode:
            try:
                encoded_query = encode_query_to_deep_link(
                    self.query,
                    bot_external_name,
                )
                serp = (
                    f"{serp}\n\n**{t('DOWNLOAD_AND_SEARCH_MORE', language=self.chat.language)}: **"
                    f'[@{bot_external_name}]'
                    f'({encoded_query})')
            except TooLongQueryError:
                serp = (
                    f"{serp}\n\n**{t('DOWNLOAD_AND_SEARCH_MORE', language=self.chat.language)}: **"
                    f'[@{bot_external_name}]'
                    f'(https://t.me/{bot_external_name})')

        if not self.is_group_mode:
            promo = self.application.promotioner.choose_promotion(
                language=self.chat.language).format(
                    related_channel=self.application.config['telegram']
                    ['related_channel'], )
            serp = f'{serp}\n\n{promo}\n'

        buttons = None
        if not self.is_group_mode:
            buttons = []
            if self.has_next or self.page > 0:
                buttons = [
                    Button.inline(
                        text='<<1' if self.page > 1 else ' ',
                        data=f'/search_{self.session_id}_{self.message_id}_0'
                        if self.page > 1 else '/noop',
                    ),
                    Button.inline(
                        text=f'<{self.page}' if self.page > 0 else ' ',
                        data=
                        f'/search_{self.session_id}_{self.message_id}_{self.page - 1}'
                        if self.page > 0 else '/noop',
                    ),
                    Button.inline(
                        text=f'{self.page + 2}>' if self.has_next else ' ',
                        data=
                        f'/search_{self.session_id}_{self.message_id}_{self.page + 1}'
                        if self.has_next else '/noop',
                    )
                ]
            buttons.append(close_button(self.session_id))

        return serp, buttons
Ejemplo n.º 11
0
    async def do_search(
        self,
        event: events.ChatAction,
        request_context: RequestContext,
        prefetch_message,
        query: str,
        is_group_mode: bool = False,
        is_shortpath_enabled: bool = False,
    ):
        session_id = self.generate_session_id()
        message_id = prefetch_message.id
        request_context.add_default_fields(is_group_mode=is_group_mode,
                                           mode='search',
                                           session_id=session_id)
        start_time = time.time()

        try:
            search_widget = await SearchWidget.create(
                application=self.application,
                chat=request_context.chat,
                session_id=session_id,
                message_id=message_id,
                request_id=request_context.request_id,
                query=query,
                is_group_mode=is_group_mode,
            )
        except AioRpcError as e:
            actions = [
                self.application.telegram_client.delete_messages(
                    request_context.chat.chat_id,
                    [message_id],
                )
            ]
            if e.code() == StatusCode.INVALID_ARGUMENT:
                too_difficult_picture_url = self.application.config[
                    'application'].get('too_difficult_picture_url', '')
                if e.details() == 'url_query_error':
                    actions.append(
                        event.reply(
                            t('INVALID_QUERY_ERROR',
                              language=request_context.chat.language).format(
                                  too_difficult_picture_url=
                                  too_difficult_picture_url, ),
                            buttons=[close_button()],
                        ))
                elif e.details() == 'invalid_query_error':
                    actions.append(
                        event.reply(
                            t('INVALID_SYNTAX_ERROR',
                              language=request_context.chat.language).format(
                                  too_difficult_picture_url=
                                  too_difficult_picture_url, ),
                            buttons=[close_button()],
                        ))
                return await asyncio.gather(*actions)
            elif e.code() == StatusCode.CANCELLED:
                maintenance_picture_url = self.application.config[
                    'application'].get('maintenance_picture_url', '')
                request_context.error_log(e)
                actions.append(
                    event.reply(
                        t('MAINTENANCE',
                          language=request_context.chat.language).format(
                              maintenance_picture_url=maintenance_picture_url,
                          ),
                        buttons=[close_button()],
                    ))
                return await asyncio.gather(*actions)
            await asyncio.gather(*actions)
            raise e

        action = 'documents_found'
        if len(search_widget.scored_documents) == 0:
            action = 'documents_not_found'

        request_context.statbox(
            action=action,
            duration=time.time() - start_time,
            query=f'page:0 query:{query}',
        )

        if len(search_widget.scored_documents) == 1 and is_shortpath_enabled:
            scored_document = search_widget.scored_documents[0]
            document_view = parse_typed_document_to_view(
                scored_document.typed_document)
            # Second (re-)fetching is required to retrieve duplicates
            document_view = await self.resolve_document(
                schema=scored_document.typed_document.WhichOneof('document'),
                document_id=document_view.id,
                position=0,
                session_id=session_id,
                request_context=request_context,
            )
            view, buttons = document_view.get_view(
                language=request_context.chat.language,
                session_id=session_id,
                bot_external_name=self.application.config['telegram']
                ['bot_external_name'],
                with_buttons=not is_group_mode,
            )
            return await asyncio.gather(
                self.application.telegram_client.edit_message(
                    request_context.chat.chat_id,
                    message_id,
                    view,
                    buttons=buttons,
                ), )

        serp, buttons = await search_widget.render()
        return await self.application.telegram_client.edit_message(
            request_context.chat.chat_id,
            message_id,
            serp,
            buttons=buttons,
            link_preview=False,
        )