Example #1
0
def open_file():
    browse_text.set("loading...")
    file = askopenfile(parent=root, mode='rb', filetypes=[("Pdf file", "*.pdf")])
    if file:
        read_pdf = PyPDF2.PdfFileReader(file)
        page = read_pdf.getPage(0)
        page_content = page.extractText()
        #page_content = page_content.encode('cp1252')
        page_content = page_content.replace('\u2122', "'")

        #show text box on row 2 col 0
        display_textbox(page_content, 2, 0, root)

        #reset the button text back to Browse
        browse_text.set("Browse")
Example #2
0
def open_file():
    browse_text.set("loading...")
    file = askopenfile(parent=root,
                       mode='rb',
                       filetype=[("Pdf file", "*.pdf")])
    if file:
        read_pdf = PyPDF2.PdfFileReader(file)
        page = read_pdf.getPage(0)
        page_content = page.extractText()
        # page_content = page_content.encode('cp1252')
        page_content = page_content.replace('\u2122', "'")

        # show text box on row w col 0
        display_textbox(page_content, 2, 0, root)

        # text_box = tk.Text(root, height=10, width=50, padx=15, pady=15)
        # text_box.insert(1.0, page_content)
        #  text_box.tag_configure("center", justify="center")
        #  text_box.tag_add("center", 1.0, 'end')
        #  text_box.grid(column=1, row=3)

        browse_text.set("Browse")
Example #3
0
def open_file():

    #clear global list of indices
    for i in img_idx:
        img_idx.pop()
    img_idx.append(0)  #set global index to 0

    browse_text.set("loading...")

    #load a PDF file
    file = askopenfile(parent=root,
                       mode='rb',
                       filetypes=[("Pdf file", "*.pdf")])
    if file:
        read_pdf = PyPDF2.PdfFileReader(file)
        #select a page
        page = read_pdf.getPage(0)
        #extract text content from page
        page_content = page.extractText()

        #SET A SPECIAL ENCODING OR REPLACE CHARACTERS
        #page_content = page_content.encode('cp1252')
        page_content = page_content.replace('\u2122', "'")

        #CLEARING GLOBAL VARIABLES ONCE A NEW PDF FILE IS SELECTED
        #clear the content of the previous PDF file
        if all_content:
            for i in all_content:
                all_content.pop()

        #clear the image list from the previous PDF file
        for i in range(0, len(all_images)):
            all_images.pop()

        #hide the displayed image from the previous PDF file and remove it
        if displayed_img:
            displayed_img[-1].grid_forget()
            displayed_img.pop()

        #BEGIN EXTRACTING
        #extract text
        all_content.append(page_content)
        #extract images
        images = extract_images(page)
        for img in images:
            all_images.append(img)

        #BEGIN DISPLAYING
        #display the first image that was detected
        selected_image = display_images(images[img_idx[-1]])
        displayed_img.append(selected_image)

        #display the text found on the page
        display_textbox(all_content, 4, 0, root)

        #reset the button text back to Browse
        browse_text.set("Browse")

        #BEGIN MENUES AND MENU WIDGETS
        #1.image menu on row 2
        img_menu = Frame(root, width=800, height=60)
        img_menu.grid(columnspan=3, rowspan=1, row=2)

        what_text = StringVar()
        what_img = Label(root, textvariable=what_text, font=("shanti", 10))
        what_text.set("image " + str(img_idx[-1] + 1) + " out of " +
                      str(len(all_images)))
        what_img.grid(row=2, column=1)

        #arrow buttons
        display_icon('arrow_l.png', 2, 0, E,
                     lambda: left_arrow(all_images, selected_image, what_text))
        display_icon(
            'arrow_r.png', 2, 2, W,
            lambda: right_arrow(all_images, selected_image, what_text))

        #2.save image menu on row 3
        save_img_menu = Frame(root, width=800, height=60, bg="#c8c8c8")
        save_img_menu.grid(columnspan=3, rowspan=1, row=3)

        #create action buttons
        copyText_btn = Button(root,
                              text="copy text",
                              command=lambda: copy_text(all_content, root),
                              font=("shanti", 10),
                              height=1,
                              width=15)
        saveAll_btn = Button(root,
                             text="save all images",
                             command=lambda: save_all(all_images),
                             font=("shanti", 10),
                             height=1,
                             width=15)
        save_btn = Button(root,
                          text="save image",
                          command=lambda: save_image(all_images[img_idx[-1]]),
                          font=("shanti", 10),
                          height=1,
                          width=15)

        #place buttons on grid
        copyText_btn.grid(row=3, column=0)
        saveAll_btn.grid(row=3, column=1)
        save_btn.grid(row=3, column=2)
Example #4
0
def open_file():

    for i in img_idx:
        img_idx.pop()
    img_idx.append(0)

    browse_text.set("cargando....")

    file = askopenfile(parent=root,
                       mode="rb",
                       filetypes=[("PDF file", "*.pdf")])
    if file:
        read_pdf = PyPDF2.PdfFileReader(file)
        page = read_pdf.getPage(0)
        page_content = page.extractText()

        page_content = page_content.replace('\u2122', "'")

        if all_content:
            for i in all_content:
                all_content.pop()

        for i in range(0, len(all_images)):
            all_images.pop()

        if displayed_img:
            displayed_img[-1].grid_forget()
            displayed_img.pop()

        all_content.append(page_content)

        images = extract_images(page)
        for img in images:
            all_images.append(img)

        selected_image = display_images(images[img_idx[-1]])
        displayed_img.append(selected_image)

        display_textbox(page_content, 4, 0, root)

        browse_text.set("Navegar")

        img_menu = Frame(root, width=800, height=60)
        img_menu.grid(columnspan=3, rowspan=1, row=2)

        que_text = StringVar()
        que_img = Label(root, textvariable=que_text)
        que_text.set("Imagen   " + str(img_idx[-1] + 1) + "  de  " +
                     str(len(all_images)))
        que_img.grid(row=2, column=1)

        display_icon('img/arrow_l.png', 2, 0, E,
                     lambda: left_arrow(all_images, selected_image, que_text))
        display_icon('img/arrow_r.png', 2, 2, W,
                     lambda: right_arrow(all_images, selected_image, que_text))

        save_img = Frame(root, width=800, height=60, bg="#c8c8c8")
        save_img.grid(columnspan=3, rowspan=1, row=3)

        copyText_btn = Button(root,
                              text="Copiar texto",
                              command=lambda: copy_text(all_content),
                              height=1,
                              width=15)
        saveAll_btn = Button(root,
                             text="Todas las imagenes",
                             command=lambda: save_all(all_images),
                             height=1,
                             width=15)
        save_btn = Button(root,
                          text="Guardar imagen",
                          command=lambda: save_image(all_images[img_idx[-1]]),
                          height=1,
                          width=15)

        copyText_btn.grid(row=3, column=0)
        saveAll_btn.grid(row=3, column=1)
        save_btn.grid(row=3, column=2)
Example #5
0
def open_file():
    for i in img_idx:
        img_idx.pop()
    img_idx.append(0)

    browse_text.set("loading...")
    file = askopenfile(parent=root,
                       mode='rb',
                       filetype=[("Pdf file", "*.pdf")])
    if file:
        read_pdf = PyPDF2.PdfFileReader(file)
        page = read_pdf.getPage(0)
        page_content = page.extractText()
        #page_content = page_content.encode('cp1252')
        page_content = page_content.replace('\u2122', "'")
        page_contents.append(page_content)

        if displayed_img:
            displayed_img[-1].grid_forget()
            displayed_img.pop()

        for i in range(0, len(all_images)):
            all_images.pop()

        images = extract_images(page)

        for i in images:
            all_images.append(i)

        img = images[img_idx[-1]]

        current_image = display_images(img)
        displayed_img.append(current_image)

        #show text box on row 4 col 0
        display_textbox(page_content, 4, 0, root)

        #reset the button text back to Browse
        browse_text.set("Browse")

        img_menu = Frame(root, width=800, height=60)
        img_menu.grid(columnspan=3, rowspan=1, row=2)

        what_text = StringVar()
        what_img = Label(root, textvariable=what_text, font=("Shanti", 10))
        what_text.set("image " + str(img_idx[-1] + 1) + " of " +
                      str(len(all_images)))
        what_img.grid(row=2, column=1)

        display_icon('arrow_l.png', 2, 0, E,
                     lambda: left_arrow(all_images, current_image, what_text))
        display_icon('arrow_r.png', 2, 2, W,
                     lambda: right_arrow(all_images, current_image, what_text))

        save_img = Frame(root, width=800, height=60, bg="#c8c8c8")
        save_img.grid(columnspan=3, rowspan=1, row=3)

        copyText_btn = Button(root,
                              text="copy text",
                              command=lambda: copy_text(page_contents),
                              font=("Shanti", 10),
                              height=1,
                              width=15)
        saveAll_btn = Button(root,
                             text="save all images",
                             command=lambda: save_all(all_images),
                             font=("Shanti", 10),
                             height=1,
                             width=15)
        save_btn = Button(root,
                          text="save image",
                          command=lambda: save_image(all_images[img_idx[-1]]),
                          font=("Shanti", 10),
                          height=1,
                          width=15)

        copyText_btn.grid(row=3, column=0)
        saveAll_btn.grid(row=3, column=1)
        save_btn.grid(row=3, column=2)
Example #6
0
def open_file():
    browse_text.set("loading...")
    file = askopenfile(parent=root,
                       mode='rb',
                       filetypes=[("Pdf file", "*.pdf")])
    if file:
        read_pdf = PyPDF2.PdfFileReader(file)
        page = read_pdf.getPage(0)
        page_content = page.extractText()
        #page_content = page_content.encode('cp1252')
        page_content = page_content.replace('\u2122', "'")
        page_contents.append(page_content)

        images = extract_images(page)

        for i in images:
            all_images.append(i)

        img = images[img_idx[-1]]

        display_images(img)
        #show text box on row 4 col 0
        display_textbox(page_content, 4, 0, root)
        #reset the button text back to Browse
        browse_text.set("Browse")

        img_menu = Frame(root, width=800, height=60)
        img_menu.grid(columnspan=3, rowspan=1, row=2)

        what_img = Label(root, text="Image 1 of 5", font=("shanti", 10))
        what_img.grid(row=2, column=1)

        display_icon("arrow_l.png", 2, 0, E)
        display_icon("arrow_r.png", 2, 2, W)

        save_img = Frame(root, width=800, height=60, bg="#c8c8c8")
        save_img.grid(columnspan=3, rowspan=1, row=3)

        copyText_btn = Button(root,
                              text="copy text",
                              command=lambda: copy_text(page_contents),
                              font=("shanti", 10),
                              height=1,
                              width=15)
        saveAll_btn = Button(root,
                             text="save all images",
                             command=lambda: save_all(all_images),
                             font=("shanti", 10),
                             height=1,
                             width=15)
        save_btn = Button(
            root,
            text="save image",
            command=lambda: save_image(all_images[img_idx[-1]], img_idx[-1]),
            font=("shanti", 10),
            height=1,
            width=15)

        copyText_btn.grid(row=3, column=0)
        saveAll_btn.grid(row=3, column=1)
        save_btn.grid(row=3, column=2)