Ejemplo n.º 1
0
    async def handler(self, event: events.ChatAction,
                      request_context: RequestContext):
        session_id = self.generate_session_id()

        request_context.add_default_fields(session_id=session_id)
        request_context.statbox(action='show', mode='submit')

        if event.document.mime_type != 'application/pdf':
            request_context.statbox(action='unknown_file_format')
            request_context.error_log(
                UnknownFileFormatError(format=event.document.mime_type))
            return await asyncio.gather(
                event.reply(
                    t('UNKNOWN_FILE_FORMAT_ERROR',
                      language=request_context.chat.language),
                    buttons=[close_button()],
                ),
                event.delete(),
            )

        return await asyncio.gather(
            self.application.hub_client.submit(
                telegram_document=bytes(event.document),
                telegram_file_id=event.file.id,
                chat=request_context.chat,
                request_id=request_context.request_id,
                session_id=session_id,
            ),
            event.delete(),
        )
Ejemplo n.º 2
0
    async def handler(self, event: events.ChatAction, request_context: RequestContext):
        raw_query = event.pattern_match.group(1)
        query = None

        request_context.statbox(action='start', mode='start')

        try:
            query = decode_deep_query(raw_query)
        except DecodeDeepQueryError as e:
            request_context.error_log(e, mode='start', raw_query=raw_query)

        if query:
            request_context.statbox(action='query', mode='start', query=query)
            request_message = await self.application.telegram_client.send_message(event.chat, query)
            prefetch_message = await request_message.reply(
                t("SEARCHING", language=request_context.chat.language),
            )
            self.application.user_manager.last_widget[request_context.chat.chat_id] = prefetch_message.id
            await asyncio.gather(
                event.delete(),
                self.do_search(event, request_context, prefetch_message, query=query,
                               is_shortpath_enabled=True),
            )
        else:
            request_context.statbox(action='show', mode='start')
            await event.reply(t('HELP', language=request_context.chat.language))
Ejemplo n.º 3
0
    async def handler(self, event: events.ChatAction,
                      request_context: RequestContext):
        search_prefix, query, is_group_mode = self.parse_pattern(event)
        request_context.add_default_fields(mode='search_edit')

        if is_group_mode and not search_prefix:
            return
        if not is_group_mode and search_prefix:
            query = event.raw_text

        last_messages = await self.get_last_messages_in_chat(event)
        try:
            if not last_messages:
                raise MessageHasBeenDeletedError()
            for next_message in last_messages.messages:
                if next_message.is_reply and event.id == next_message.reply_to_msg_id:
                    request_context.statbox(action='resolved')
                    return await self.do_search(
                        event=event,
                        request_context=request_context,
                        prefetch_message=next_message,
                        query=query,
                        is_group_mode=is_group_mode,
                    )
            raise MessageHasBeenDeletedError()
        except MessageHasBeenDeletedError as e:
            request_context.error_log(e)
            return await event.reply(
                t('REPLY_MESSAGE_HAS_BEEN_DELETED',
                  language=request_context.chat.language), )
Ejemplo n.º 4
0
 async def _send_fail_response(self, event,
                               request_context: RequestContext):
     try:
         await event.answer(
             t('MAINTENANCE_WO_PIC',
               language=request_context.chat.language))
     except (ConnectionError, QueryIdInvalidError) as e:
         request_context.error_log(e)
Ejemplo n.º 5
0
 async def _send_fail_response(self, event: events.ChatAction,
                               request_context: RequestContext):
     try:
         await event.reply(t(
             'MAINTENANCE', language=request_context.chat.language).format(
                 maintenance_picture_url=self.application.
                 config['application']['maintenance_picture_url'], ),
                           buttons=[close_button()])
     except (ConnectionError, QueryIdInvalidError) as e:
         request_context.error_log(e)
Ejemplo n.º 6
0
    async def handler(self, event: events.ChatAction,
                      request_context: RequestContext):
        session_id, message_id, page = self.parse_pattern(event)

        request_context.add_default_fields(mode='search_paging',
                                           session_id=session_id)
        start_time = time.time()

        message = await event.get_message()
        if not message:
            return await event.answer()

        reply_message = await message.get_reply_message()
        try:
            if not reply_message:
                raise MessageHasBeenDeletedError()
            query = self.preprocess_query(reply_message.raw_text)
            search_widget = await SearchWidget.create(
                application=self.application,
                chat=request_context.chat,
                session_id=session_id,
                message_id=message_id,
                request_id=request_context.request_id,
                query=query,
                page=page,
            )
        except MessageHasBeenDeletedError:
            return await event.respond(
                t('REPLY_MESSAGE_HAS_BEEN_DELETED',
                  language=request_context.chat.language), )
        except AioRpcError as e:
            if e.code() == StatusCode.INVALID_ARGUMENT or e.code(
            ) == StatusCode.CANCELLED:
                request_context.error_log(e)
                return await event.answer(
                    t('MAINTENANCE_WO_PIC',
                      language=request_context.chat.language), )
            raise e

        action = 'documents_found'
        if len(search_widget.scored_documents) == 0:
            action = 'documents_not_found'

        request_context.statbox(
            action=action,
            duration=time.time() - start_time,
            query=f'page:{page} query:{query}',
        )
        serp, buttons = await search_widget.render()
        return await asyncio.gather(
            event.answer(),
            message.edit(serp, buttons=buttons, link_preview=False))
Ejemplo n.º 7
0
    async def handler(self, event: events.ChatAction,
                      request_context: RequestContext):
        try:
            self.check_search_ban_timeout(
                user_id=str(request_context.chat.chat_id))
        except BannedUserError as e:
            request_context.error_log(e)
            return await event.reply(
                t('BANNED_FOR_SECONDS',
                  language=request_context.chat.language).format(
                      seconds=e.ban_timeout,
                      reason=t('BAN_MESSAGE_TOO_MANY_REQUESTS',
                               language=request_context.chat.language),
                  ))
        search_prefix, query, is_group_mode = self.parse_pattern(event)

        if is_group_mode and not search_prefix:
            return
        if not is_group_mode and search_prefix:
            query = event.raw_text

        prefetch_message = await event.reply(
            t("SEARCHING", language=request_context.chat.language), )
        self.application.user_manager.last_widget[
            request_context.chat.chat_id] = prefetch_message.id
        try:
            await self.do_search(
                event=event,
                request_context=request_context,
                prefetch_message=prefetch_message,
                query=query,
                is_group_mode=is_group_mode,
                is_shortpath_enabled=True,
            )
        except (AioRpcError, asyncio.CancelledError) as e:
            await asyncio.gather(
                event.delete(),
                prefetch_message.delete(),
            )
            raise e
Ejemplo n.º 8
0
    async def submit(
        self,
        request: SubmitRequestPb,
        context: ServicerContext,
        metadata: dict,
    ) -> SubmitResponsePb:
        session_id = metadata.get('session-id')
        request_context = RequestContext(
            bot_name=self.service_name,
            chat=request.chat,
            request_id=metadata.get('request-id'),
        )
        request_context.add_default_fields(
            mode='submit',
            session_id=metadata.get('session-id'),
            **self.get_default_service_fields(),
        )

        document = BinaryReader(request.telegram_document).tgread_object()
        if document.size > 20 * 1024 * 1024:
            request_context.error_log(FileTooBigError(size=document.size))
            request_context.statbox(action='file_too_big')
            await self.telegram_client.send_message(
                request_context.chat.chat_id,
                t('FILE_TOO_BIG_ERROR',
                  language=request_context.chat.language),
                buttons=[close_button()],
            )
            return SubmitResponsePb()
        processing_message = await self.telegram_client.send_message(
            request_context.chat.chat_id,
            t("PROCESSING_PAPER",
              language=request_context.chat.language).format(
                  filename=document.attributes[0].file_name, ),
        )
        try:
            file = await self.telegram_client.download_document(
                document=document, file=bytes)
            try:
                processed_document = await self.grobid_client.process_fulltext_document(
                    pdf_file=file)
            except BadRequestError as e:
                request_context.statbox(action='unparsable_document')
                request_context.error_log(e)
                await self.telegram_client.send_message(
                    request_context.chat.chat_id,
                    t('UNPARSABLE_DOCUMENT_ERROR',
                      language=request_context.chat.language).format(
                          filename=document.attributes[0].file_name, ),
                    buttons=[close_button()],
                )
                return SubmitResponsePb()

            if not processed_document.get('doi'):
                request_context.statbox(action='unparsable_doi')
                request_context.error_log(UnparsableDoiError())
                await self.telegram_client.send_message(
                    request_context.chat.chat_id,
                    t('UNPARSABLE_DOI_ERROR',
                      language=request_context.chat.language).format(
                          filename=document.attributes[0].file_name, ),
                    buttons=[close_button()],
                )
                return SubmitResponsePb()

            search_response_pb = await self.meta_api_client.search(
                schemas=('scimag', ),
                query=processed_document['doi'],
                page=0,
                page_size=1,
                request_id=request_context.request_id,
                session_id=session_id,
                user_id=str(request_context.chat.chat_id),
                language=request_context.chat.language,
            )

            if len(search_response_pb.scored_documents) == 0:
                request_context.statbox(action='unavailable_metadata')
                request_context.error_log(
                    UnavailableMetadataError(doi=processed_document['doi']))
                await self.telegram_client.send_message(
                    request_context.chat.chat_id,
                    t('UNAVAILABLE_METADATA_ERROR',
                      language=request_context.chat.language).format(
                          doi=processed_document['doi']),
                    buttons=[close_button()],
                )
                return SubmitResponsePb()

            document_view = ScimagView(
                search_response_pb.scored_documents[0].typed_document.scimag)
            uploaded_message = await self.send_file(
                document_view=document_view,
                file=file,
                request_context=request_context,
                session_id=session_id,
                voting=False,
            )
        finally:
            await processing_message.delete()

        document_operation_pb = DocumentOperationPb(
            update_document=UpdateDocumentPb(
                typed_document=TypedDocumentPb(sharience=ShariencePb(
                    parent_id=document_view.id,
                    uploader_id=request_context.chat.chat_id,
                    updated_at=int(time.time()),
                    md5=hashlib.md5(file).hexdigest(),
                    filesize=document.size,
                    ipfs_multihashes=await self.get_ipfs_hashes(file=file),
                    telegram_file_id=uploaded_message.file.id,
                )), ), )
        request_context.statbox(
            action='success',
            document_id=document_view.id,
            schema='scimag',
        )
        await operation_log(document_operation_pb)
        return SubmitResponsePb()
Ejemplo n.º 9
0
    async def do_search(
        self,
        event: events.ChatAction,
        request_context: RequestContext,
        prefetch_message,
        query: str,
        is_group_mode: bool = False,
        is_shortpath_enabled: bool = False,
    ):
        session_id = self.generate_session_id()
        message_id = prefetch_message.id
        request_context.add_default_fields(is_group_mode=is_group_mode,
                                           mode='search',
                                           session_id=session_id)
        start_time = time.time()

        try:
            search_widget = await SearchWidget.create(
                application=self.application,
                chat=request_context.chat,
                session_id=session_id,
                message_id=message_id,
                request_id=request_context.request_id,
                query=query,
                is_group_mode=is_group_mode,
            )
        except AioRpcError as e:
            actions = [
                self.application.telegram_client.delete_messages(
                    request_context.chat.chat_id,
                    [message_id],
                )
            ]
            if e.code() == StatusCode.INVALID_ARGUMENT:
                too_difficult_picture_url = self.application.config[
                    'application'].get('too_difficult_picture_url', '')
                if e.details() == 'url_query_error':
                    actions.append(
                        event.reply(
                            t('INVALID_QUERY_ERROR',
                              language=request_context.chat.language).format(
                                  too_difficult_picture_url=
                                  too_difficult_picture_url, ),
                            buttons=[close_button()],
                        ))
                elif e.details() == 'invalid_query_error':
                    actions.append(
                        event.reply(
                            t('INVALID_SYNTAX_ERROR',
                              language=request_context.chat.language).format(
                                  too_difficult_picture_url=
                                  too_difficult_picture_url, ),
                            buttons=[close_button()],
                        ))
                return await asyncio.gather(*actions)
            elif e.code() == StatusCode.CANCELLED:
                maintenance_picture_url = self.application.config[
                    'application'].get('maintenance_picture_url', '')
                request_context.error_log(e)
                actions.append(
                    event.reply(
                        t('MAINTENANCE',
                          language=request_context.chat.language).format(
                              maintenance_picture_url=maintenance_picture_url,
                          ),
                        buttons=[close_button()],
                    ))
                return await asyncio.gather(*actions)
            await asyncio.gather(*actions)
            raise e

        action = 'documents_found'
        if len(search_widget.scored_documents) == 0:
            action = 'documents_not_found'

        request_context.statbox(
            action=action,
            duration=time.time() - start_time,
            query=f'page:0 query:{query}',
        )

        if len(search_widget.scored_documents) == 1 and is_shortpath_enabled:
            scored_document = search_widget.scored_documents[0]
            document_view = parse_typed_document_to_view(
                scored_document.typed_document)
            # Second (re-)fetching is required to retrieve duplicates
            document_view = await self.resolve_document(
                schema=scored_document.typed_document.WhichOneof('document'),
                document_id=document_view.id,
                position=0,
                session_id=session_id,
                request_context=request_context,
            )
            view, buttons = document_view.get_view(
                language=request_context.chat.language,
                session_id=session_id,
                bot_external_name=self.application.config['telegram']
                ['bot_external_name'],
                with_buttons=not is_group_mode,
            )
            return await asyncio.gather(
                self.application.telegram_client.edit_message(
                    request_context.chat.chat_id,
                    message_id,
                    view,
                    buttons=buttons,
                ), )

        serp, buttons = await search_widget.render()
        return await self.application.telegram_client.edit_message(
            request_context.chat.chat_id,
            message_id,
            serp,
            buttons=buttons,
            link_preview=False,
        )