Exemple #1
0
def extract_article_callback(update: Update, context: CallbackContext):
    cid = update.callback_query.message.chat.id
    mid = update.callback_query.message.message_id
    text = update.callback_query.message.text
    q = context.chat_data['query']
    section_name = context.chat_data['current_section']
    storage_article = context.chat_data[section_name].get(q)

    context.bot.edit_message_reply_markup(message_id=mid,
                                          chat_id=cid,
                                          reply_markup=muting_kb)

    context.bot.answerCallbackQuery(callback_query_id=update.callback_query.id,
                                    text=answer_query_text)

    toolkit = Toolkit()

    if storage_article is not None and storage_article.get(
            'extract_article_file'):

        context.bot.edit_message_reply_markup(message_id=mid,
                                              chat_id=cid,
                                              reply_markup=None)

        file = context.bot.send_document(
            chat_id=cid,
            document=storage_article.get('extract_article_file'),
            reply_markup=kb_dict[section_name])

        context.chat_data.update({'messages_ids': [file.message_id]})

        return

    else:
        context.chat_data[section_name].update({q: {'url': q}})
        article_info = toolkit.extract_article_info(q)

        current_article = {
            'title': article_info.get('title', 'unknown'),
            'text': article_info.get('article', 'unknown'),
            'author': article_info.get('author', 'unknown'),
            'date': article_info.get('publishDate', 'unknown'),
            'tags': article_info.get('tags', 'unknown')
        }

    name = f'url_extract_article_{cid}'
    path = toolkit.create_document(current_article, name)

    context.bot.edit_message_reply_markup(message_id=mid,
                                          chat_id=cid,
                                          reply_markup=None)

    file = context.bot.send_document(chat_id=cid,
                                     document=open(path, 'rb'),
                                     reply_markup=kb_dict[section_name])

    context.chat_data.update({'file_ids': [file.message_id]})
    context.chat_data[section_name][q].update(
        {'extract_article_file': file.document.file_id})
    os.remove(path)
Exemple #2
0
def extract_entity_callback(update: Update, context: CallbackContext):
    cid = update.callback_query.message.chat.id
    mid = update.callback_query.message.message_id
    text = update.callback_query.message.text
    q = context.chat_data['query']
    section_name = context.chat_data['current_section']
    storage_article = context.chat_data[section_name].get(q)
    toolkit = Toolkit()

    context.bot.edit_message_reply_markup(message_id=mid,
                                          chat_id=cid,
                                          reply_markup=muting_kb)

    context.bot.answerCallbackQuery(callback_query_id=update.callback_query.id,
                                    text=answer_query_text)

    if storage_article is not None and storage_article.get('entity_file'):

        context.bot.edit_message_reply_markup(message_id=mid,
                                              chat_id=cid,
                                              reply_markup=None)

        file = context.bot.send_document(
            chat_id=cid,
            document=storage_article.get('entity_file'),
            reply_markup=kb_dict[section_name])

        context.chat_data.update({'messages_ids': [file.message_id]})

        return

    else:
        if validators.url(q):
            context.chat_data[section_name].update({q: {'url': q}})
            article_info = toolkit.extract_entity({'url': q})['entities']
        else:
            context.chat_data[section_name].update({q: {'text': q}})
            try:
                article_info = toolkit.extract_entity({'text': q})['entities']
            except:
                update.callback_query.message.reply_text(nothing_text)
                return
        current_article = article_info

    name = f'url_extract_entity_{cid}'
    path = toolkit.create_document(current_article, name)

    context.bot.edit_message_reply_markup(message_id=mid,
                                          chat_id=cid,
                                          reply_markup=None)

    file = context.bot.send_document(chat_id=cid,
                                     document=open(path, 'rb'),
                                     reply_markup=kb_dict[section_name])

    context.chat_data.update({'file_ids': [file.message_id]})
    context.chat_data[section_name][q].update(
        {'entity_file': file.document.file_id})
    os.remove(path)
Exemple #3
0
def summarize_file_callback(update: Update, context: CallbackContext):
    cid = update.callback_query.message.chat.id
    mid = update.callback_query.message.message_id
    text = update.callback_query.message.text
    q = context.chat_data['query']
    file_name = q['file_name']
    file_id = q['file_id']
    section_name = context.chat_data['current_section']
    storage_article = context.chat_data[section_name].get(file_name)
    toolkit = Toolkit()

    context.bot.edit_message_reply_markup(message_id=mid,
                                          chat_id=cid,
                                          reply_markup=muting_kb)

    context.bot.answerCallbackQuery(callback_query_id=update.callback_query.id,
                                    text=answer_query_text)

    if storage_article is not None and storage_article.get('sentences'):
        current_article = storage_article

    else:
        path = f'{os.getcwd()}/doc_storage/{file_name}'

        with open(path, 'wb') as f:
            context.bot.get_file(file_id).download(out=f)
        content = str(textract.process(path))[2:]

        context.chat_data[section_name].update(
            {file_name: {
                'file_id': file_id
            }})
        current_article = {'text': content}
        os.remove(path)

    summarized_sentences = current_article.get('sentences', [])

    if not summarized_sentences:
        text_parts, title = get_parts_of_text(current_article['text'])

        for part in text_parts:
            try:
                summarized_sentences += toolkit.summarize({
                    'text': part,
                    'title': title
                })['sentences']
            except:
                update.callback_query.message.reply_text(error_text)

        context.chat_data[section_name][file_name].update(
            {'sentences': summarized_sentences})

    name = f'file_summarize_{cid}'
    path = toolkit.create_document({'sentences': summarized_sentences}, name)

    context.bot.edit_message_reply_markup(message_id=mid,
                                          chat_id=cid,
                                          reply_markup=None)

    file = context.bot.send_document(chat_id=cid,
                                     document=open(path, 'rb'),
                                     reply_markup=kb_dict[section_name])

    context.chat_data.update({'file_ids': [file.message_id]})
    context.chat_data[section_name][file_name].update(
        {'summarize_file': file.document.file_id})
    os.remove(path)
Exemple #4
0
def extract_entity_file_callback(update: Update, context: CallbackContext):
    cid = update.callback_query.message.chat.id
    mid = update.callback_query.message.message_id
    text = update.callback_query.message.text
    q = context.chat_data['query']
    file_name = q['file_name']
    file_id = q['file_id']
    section_name = context.chat_data['current_section']
    storage_article = context.chat_data[section_name].get(file_name)
    toolkit = Toolkit()

    context.bot.edit_message_reply_markup(message_id=mid,
                                          chat_id=cid,
                                          reply_markup=muting_kb)

    context.bot.answerCallbackQuery(callback_query_id=update.callback_query.id,
                                    text=answer_query_text)

    if storage_article is not None and storage_article.get('entity_file'):
        context.bot.edit_message_reply_markup(message_id=mid,
                                              chat_id=cid,
                                              text=text,
                                              reply_markup=None)

        file = context.bot.send_document(
            chat_id=cid,
            document=storage_article.get('entity_file'),
            reply_markup=kb_dict[section_name])

        context.chat_data.update({'file_ids': [file.message_id]})

        return

    path = f'{os.getcwd()}/doc_storage/{file_name}'

    with open(path, 'wb') as f:
        context.bot.get_file(file_id).download(out=f)
    content = str(textract.process(path))[2:]

    context.chat_data[section_name].update({file_name: {'file_id': file_id}})
    current_article = {'text': content}
    os.remove(path)

    text_parts, title = get_parts_of_text(current_article['text'])
    summarize_entities = {}
    for part in text_parts:
        try:
            part_entity = toolkit.extract_entity({
                'text': part,
                'title': title
            })['entities']
        except:
            update.callback_query.message.reply_text(error_text)
            return

        for name, value in part_entity.items():
            if summarize_entities.get(name):
                summarize_entities[name] = list(
                    set(summarize_entities[name] + value))
            else:
                summarize_entities.update({name: value})

    name = f'file_extract_entity_{cid}'
    path = toolkit.create_document(summarize_entities, name)

    context.bot.edit_message_reply_markup(message_id=mid,
                                          chat_id=cid,
                                          reply_markup=None)

    file = context.bot.send_document(chat_id=cid,
                                     document=open(path, 'rb'),
                                     reply_markup=kb_dict[section_name])

    context.chat_data.update({'file_ids': [file.message_id]})

    context.chat_data[section_name][file_name].update(
        {'entity_file': file.document.file_id})
    os.remove(path)