def crop_pdf(update, context, percent=None, offset=None):
    _ = set_lang(update, context)
    update.effective_message.reply_text(_("Cropping your PDF file"),
                                        reply_markup=ReplyKeyboardRemove())

    with tempfile.NamedTemporaryFile(suffix=".pdf") as tf:
        user_data = context.user_data
        file_id, file_name = user_data[PDF_INFO]
        pdf_file = context.bot.get_file(file_id)
        pdf_file.download(custom_path=tf.name)

        with tempfile.TemporaryDirectory() as dir_name:
            out_fn = os.path.join(dir_name, f"Cropped_{file_name}")
            command = f'pdf-crop-margins -o "{out_fn}" "{tf.name}"'

            if percent is not None:
                command += f" -p {percent}"
            else:
                command += f" -a {offset}"

            if run_cmd(command):
                send_result_file(update, context, out_fn, "crop")
            else:
                update.effective_message.reply_text(
                    _("Something went wrong, try again"))

    # Clean up memory
    if user_data[PDF_INFO] == file_id:
        del user_data[PDF_INFO]

    return ConversationHandler.END
def compare_pdf(update, context):
    _ = set_lang(update, context)
    message = update.effective_message
    message.reply_text(_("Comparing your PDF files"),
                       reply_markup=ReplyKeyboardRemove())

    with tempfile.NamedTemporaryFile() as tf1, tempfile.NamedTemporaryFile(
    ) as tf2:
        # Download PDF files
        user_data = context.user_data
        first_file_id = user_data[COMPARE_ID]
        first_file = context.bot.get_file(first_file_id)
        first_file.download(custom_path=tf1.name)
        second_file = message.document.get_file()
        second_file.download(custom_path=tf2.name)

        try:
            with tempfile.TemporaryDirectory() as dir_name:
                out_fn = os.path.join(dir_name, "Differences.png")
                pdf_diff.main(files=[tf1.name, tf2.name], out_file=out_fn)
                send_result_file(update, context, out_fn, "compare")
        except NoDifferenceError:
            message.reply_text(
                _("There are no differences in text between your PDF files"))

    # Clean up memory and files
    if user_data[COMPARE_ID] == first_file_id:
        del user_data[COMPARE_ID]

    return ConversationHandler.END
def url_to_pdf(update: Update, context: CallbackContext):
    _ = set_lang(update, context)
    message = update.effective_message
    url = message.text
    user_data = context.user_data

    if user_data is not None and URLS in user_data and url in user_data[URLS]:
        message.reply_text(
            _("You've sent me this web page already and I'm still converting it")
        )
    else:
        message.reply_text(_("Converting your web page into a PDF file"))
        if URLS in user_data:
            user_data[URLS].add(url)
        else:
            user_data[URLS] = {url}

        with tempfile.TemporaryDirectory() as dir_name:
            out_fn = os.path.join(dir_name, f"{urlparse(url).netloc}.pdf")
            try:
                HTML(url=url).write_pdf(out_fn)
                send_result_file(update, context, out_fn, "url")
            except URLFetchingError:
                message.reply_text(_("Unable to reach your web page"))

        user_data[URLS].remove(url)
def send_result_photos(update, context, dir_name, task):
    _ = set_lang(update, context)
    message = update.effective_message

    if message.text == _(PHOTOS):
        for photo_name in sorted(os.listdir(dir_name)):
            photo_path = os.path.join(dir_name, photo_name)
            if os.path.getsize(photo_path) <= MAX_FILESIZE_UPLOAD:
                try:
                    message.chat.send_action(ChatAction.UPLOAD_PHOTO)
                    message.reply_photo(open(photo_path, "rb"))
                except BadRequest:
                    message.chat.send_action(ChatAction.UPLOAD_DOCUMENT)
                    message.reply_document(open(photo_path, "rb"))

        message.reply_text(
            _("See above for all your photos"),
            reply_markup=get_support_markup(update, context),
        )
        update_stats(update, task)
    else:
        # Compress the directory of photos
        shutil.make_archive(dir_name, "zip", dir_name)

        # Send result file
        send_result_file(update, context, f"{dir_name}.zip", task)
Exemple #5
0
def add_ocr_to_pdf(update, context):
    if not check_user_data(update, context, PDF_INFO):
        return ConversationHandler.END

    _ = set_lang(update, context)
    update.effective_message.reply_text(
        _("Adding an OCR text layer to your PDF file"),
        reply_markup=ReplyKeyboardRemove(),
    )

    with tempfile.NamedTemporaryFile() as tf:
        user_data = context.user_data
        file_id, file_name = user_data[PDF_INFO]
        pdf_file = context.bot.get_file(file_id)
        pdf_file.download(custom_path=tf.name)

        with tempfile.TemporaryDirectory() as dir_name:
            out_fn = os.path.join(dir_name,
                                  f"OCR_{os.path.splitext(file_name)[0]}.pdf")
            try:
                # logging.getLogger("ocrmypdf").setLevel(logging.WARNING)
                ocrmypdf.ocr(tf.name, out_fn, deskew=True, progress_bar=False)
                send_result_file(update, context, out_fn, "ocr")
            except PriorOcrFoundError:
                update.effective_message.reply_text(
                    _("Your PDF file already has a text layer"))

    # Clean up memory
    if user_data[PDF_INFO] == file_id:
        del user_data[PDF_INFO]

    return ConversationHandler.END
Exemple #6
0
def send_pdf_text(update, context, pdf_texts, is_file, out_fn):
    _ = set_lang(update, context)
    message = update.effective_message

    if pdf_texts:
        if is_file:
            with open(out_fn, "w") as f:
                f.write("\n".join(pdf_texts))

            send_result_file(update, context, out_fn, "get_text")
        else:
            msg_text = ""
            for pdf_text in pdf_texts:
                if len(msg_text) + len(pdf_text) + 1 > MAX_MESSAGE_LENGTH:
                    message.reply_text(msg_text.strip())
                    msg_text = ""

                msg_text += f" {pdf_text}"

            if msg_text:
                message.reply_text(msg_text.strip())

            message.reply_text(
                _("*See above for all the text in your PDF file*"),
                parse_mode=ParseMode.MARKDOWN,
            )
    else:
        message.reply_text(_("I couldn't find any text in your PDF file"))
Exemple #7
0
def compress_pdf(update, context):
    if not check_user_data(update, context, PDF_INFO):
        return ConversationHandler.END

    _ = set_lang(update, context)
    update.effective_message.reply_text(
        _("Compressing your PDF file"),
        reply_markup=ReplyKeyboardRemove(),
    )

    with tempfile.NamedTemporaryFile() as tf:
        user_data = context.user_data
        file_id, file_name = user_data[PDF_INFO]
        pdf_file = context.bot.get_file(file_id)
        pdf_file.download(custom_path=tf.name)

        with tempfile.TemporaryDirectory() as dir_name:
            out_fn = os.path.join(
                dir_name, f"Compressed_{os.path.splitext(file_name)[0]}.pdf")
            cmd = "gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/default \
            -dNOPAUSE -dQUIET -dBATCH -sOutputFile={} {}".format(
                out_fn, tf.name)
            proc = Popen(shlex.split(cmd),
                         stdout=PIPE,
                         stderr=PIPE,
                         shell=False)
            out, err = proc.communicate()

            if proc.returncode != 0:
                log = Logger()
                log.error(
                    f'Stdout:\n{out.decode("utf-8")}\n\nStderr:\n{err.decode("utf-8")}'
                )
                update.effective_message.reply_text(
                    _("Something went wrong, try again"))
            else:
                old_size = os.path.getsize(tf.name)
                new_size = os.path.getsize(out_fn)
                update.effective_message.reply_text(
                    _("File size reduced by <b>{:.0%}</b>, "
                      "from <b>{}</b> to <b>{}</b>".format(
                          (1 - new_size / old_size),
                          humanize.naturalsize(old_size),
                          humanize.naturalsize(new_size),
                      )),
                    parse_mode=ParseMode.HTML,
                )
                send_result_file(update, context, out_fn, "compress")

    # Clean up memory
    if user_data[PDF_INFO] == file_id:
        del user_data[PDF_INFO]

    return ConversationHandler.END
def rename_pdf(update, context):
    result = check_back_user_data(update, context)
    if result is not None:
        return result

    _ = set_lang(update, context)
    message = update.effective_message
    text = re.sub(r"\.pdf$", "", message.text)
    invalid_chars = r"\/*?:\'<>|"

    if set(text) & set(invalid_chars):
        message.reply_text(
            _(
                "File names can't contain any of the following characters:\n{}\n"
                "Send me another file name"
            ).format(invalid_chars)
        )

        return WAIT_FILE_NAME

    new_fn = "{}.pdf".format(text)
    message.reply_text(
        _("Renaming your PDF file into <b>{}</b>").format(new_fn),
        parse_mode=ParseMode.HTML,
        reply_markup=ReplyKeyboardRemove(),
    )

    # Download PDF file
    user_data = context.user_data
    file_id, _ = user_data[PDF_INFO]
    tf = tempfile.NamedTemporaryFile()
    pdf_file = context.bot.get_file(file_id)
    pdf_file.download(custom_path=tf.name)

    # Rename PDF file
    with tempfile.TemporaryDirectory() as dir_name:
        out_fn = os.path.join(dir_name, new_fn)
        shutil.move(tf.name, out_fn)
        send_result_file(update, context, out_fn, "rename")

    # Clean up memory and files
    if user_data[PDF_INFO] == file_id:
        del user_data[PDF_INFO]
    try:
        tf.close()
    except FileNotFoundError:
        pass

    return ConversationHandler.END
def compress_pdf(update, context):
    if not check_user_data(update, context, PDF_INFO):
        return ConversationHandler.END

    _ = set_lang(update, context)
    update.effective_message.reply_text(
        _("Compressing your PDF file"),
        reply_markup=ReplyKeyboardRemove(),
    )

    with tempfile.NamedTemporaryFile() as tf:
        user_data = context.user_data
        file_id, file_name = user_data[PDF_INFO]
        pdf_file = context.bot.get_file(file_id)
        pdf_file.download(custom_path=tf.name)

        with tempfile.TemporaryDirectory() as dir_name:
            out_fn = os.path.join(
                dir_name, f"Compressed_{os.path.splitext(file_name)[0]}.pdf")
            command = ("gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 "
                       "-dPDFSETTINGS=/default -dNOPAUSE -dQUIET -dBATCH "
                       f'-sOutputFile="{out_fn}" "{tf.name}"')

            if run_cmd(command):
                old_size = os.path.getsize(tf.name)
                new_size = os.path.getsize(out_fn)
                update.effective_message.reply_text(
                    _("File size reduced by <b>{:.0%}</b>, "
                      "from <b>{}</b> to <b>{}</b>".format(
                          (1 - new_size / old_size),
                          humanize.naturalsize(old_size),
                          humanize.naturalsize(new_size),
                      )),
                    parse_mode=ParseMode.HTML,
                )
                send_result_file(update, context, out_fn, "compress")

            else:
                update.effective_message.reply_text(
                    _("Something went wrong, try again"))

    # Clean up memory
    if user_data[PDF_INFO] == file_id:
        del user_data[PDF_INFO]

    return ConversationHandler.END
Exemple #10
0
def text_to_pdf(update, context):
    _ = set_lang(update, context)
    message = update.effective_message
    text = message.text

    if text == _(CANCEL):
        return cancel(update, context)

    message.reply_text(_("Creating your PDF file"),
                       reply_markup=ReplyKeyboardRemove())
    html = HTML(string=BASE_HTML.format(text.replace("\n", "<br/>")))

    with tempfile.TemporaryDirectory() as dir_name:
        out_fn = os.path.join(dir_name, "Text.pdf")
        html.write_pdf(out_fn)
        send_result_file(update, context, out_fn, "text")

    return ConversationHandler.END
def process_photo(update: Update, context: CallbackContext,
                  file_ids: List[str], is_beautify: bool) -> None:
    _ = set_lang(update, context)
    if is_beautify:
        update.effective_message.reply_text(
            _("Beautifying and converting your photos"),
            reply_markup=ReplyKeyboardRemove(),
        )
    else:
        update.effective_message.reply_text(
            _("Converting your photos into PDF"),
            reply_markup=ReplyKeyboardRemove())

    # Setup temporary files
    temp_files = [tempfile.NamedTemporaryFile() for _ in range(len(file_ids))]
    photo_files = []

    # Download all photos
    for i, file_id in enumerate(file_ids):
        file_name = temp_files[i].name
        photo_file = context.bot.get_file(file_id)
        photo_file.download(custom_path=file_name)
        photo_files.append(file_name)

    with tempfile.TemporaryDirectory() as dir_name:
        if is_beautify:
            out_fn = os.path.join(dir_name, "Beautified.pdf")
            noteshrink.notescan_main(photo_files,
                                     basename=f"{dir_name}/page",
                                     pdfname=out_fn)
            send_result_file(update, context, out_fn, "beautify")
        else:
            out_fn = os.path.join(dir_name, "Converted.pdf")
            with open(out_fn, "wb") as f:
                f.write(img2pdf.convert(photo_files))

            send_result_file(update, context, out_fn, "to_pdf")

    # Clean up files
    for tf in temp_files:
        tf.close()
def get_pdf_preview(update, context):
    result = check_back_user_data(update, context)
    if result is not None:
        return result

    _ = set_lang(update, context)
    update.effective_message.reply_text(
        _("Extracting a preview for your PDF file"),
        reply_markup=ReplyKeyboardRemove())

    with tempfile.NamedTemporaryFile() as tf1:
        user_data = context.user_data
        file_id, file_name = user_data[PDF_INFO]
        pdf_reader = open_pdf(update, context, file_id, tf1.name)

        if pdf_reader:
            # Get first page of PDF file
            pdf_writer = PdfFileWriter()
            pdf_writer.addPage(pdf_reader.getPage(0))

            with tempfile.NamedTemporaryFile() as tf2:
                # Write cover preview PDF file
                with open(tf2.name, "wb") as f:
                    pdf_writer.write(f)

                with tempfile.TemporaryDirectory() as dir_name:
                    # Convert cover preview to JPEG
                    out_fn = os.path.join(
                        dir_name,
                        f"Preview_{os.path.splitext(file_name)[0]}.png")
                    imgs = pdf2image.convert_from_path(tf2.name, fmt="png")
                    imgs[0].save(out_fn)

                    # Send result file
                    send_result_file(update, context, out_fn, "preview")

    # Clean up memory and files
    if user_data[PDF_INFO] == file_id:
        del user_data[PDF_INFO]

    return ConversationHandler.END
Exemple #13
0
def crop_pdf(update, context, percent=None, offset=None):
    _ = set_lang(update, context)
    update.effective_message.reply_text(_("Cropping your PDF file"),
                                        reply_markup=ReplyKeyboardRemove())

    with tempfile.NamedTemporaryFile(suffix=".pdf") as tf:
        user_data = context.user_data
        file_id, file_name = user_data[PDF_INFO]
        pdf_file = context.bot.get_file(file_id)
        pdf_file.download(custom_path=tf.name)

        with tempfile.TemporaryDirectory() as dir_name:
            out_fn = os.path.join(dir_name, f"Cropped_{file_name}")
            if percent is not None:
                cmd = f"pdf-crop-margins -p {percent} -o {out_fn} {tf.name}"
            else:
                cmd = f"pdf-crop-margins -a {offset} -o {out_fn} {tf.name}"

            proc = Popen(shlex.split(cmd),
                         stdout=PIPE,
                         stderr=PIPE,
                         shell=False)
            out, err = proc.communicate()

            if proc.returncode != 0:
                log = Logger()
                log.error(
                    f'Stdout:\n{out.decode("utf-8")}\n\nStderr:\n{err.decode("utf-8")}'
                )
                update.effective_message.reply_text(
                    _("Something went wrong, try again"))
            else:
                send_result_file(update, context, out_fn, "crop")

    # Clean up memory
    if user_data[PDF_INFO] == file_id:
        del user_data[PDF_INFO]

    return ConversationHandler.END
Exemple #14
0
def text_to_pdf(update: Update, context: CallbackContext, font_family: str,
                font_url: str):
    if not check_user_data(update, context, TEXT):
        return ConversationHandler.END

    _ = set_lang(update, context)
    text = context.user_data[TEXT]
    update.effective_message.reply_text(_("Creating your PDF file"),
                                        reply_markup=ReplyKeyboardRemove())

    html = HTML(string="<p>{}</p>".format(text.replace("\n", "<br/>")))
    font_config = FontConfiguration()
    stylesheets: List[CSS] = None

    if font_family != DEFAULT_FONT:
        stylesheets = [
            CSS(
                string=("@font-face {"
                        f"font-family: {font_family};"
                        f"src: url({font_url});"
                        "}"
                        "p {"
                        f"font-family: {font_family};"
                        "}"),
                font_config=font_config,
            )
        ]

    with tempfile.TemporaryDirectory() as dir_name:
        out_fn = os.path.join(dir_name, "Text.pdf")
        html.write_pdf(out_fn,
                       stylesheets=stylesheets,
                       font_config=font_config)
        send_result_file(update, context, out_fn, "text")

    return ConversationHandler.END