def open_file(): browse_text.set("loading...") file = askopenfile(parent=root, mode='rb', filetypes=[("Pdf file", "*.pdf")]) if file: read_pdf = PyPDF2.PdfFileReader(file) page = read_pdf.getPage(0) page_content = page.extractText() #page_content = page_content.encode('cp1252') page_content = page_content.replace('\u2122', "'") #show text box on row 2 col 0 display_textbox(page_content, 2, 0, root) #reset the button text back to Browse browse_text.set("Browse")
def open_file(): browse_text.set("loading...") file = askopenfile(parent=root, mode='rb', filetype=[("Pdf file", "*.pdf")]) if file: read_pdf = PyPDF2.PdfFileReader(file) page = read_pdf.getPage(0) page_content = page.extractText() # page_content = page_content.encode('cp1252') page_content = page_content.replace('\u2122', "'") # show text box on row w col 0 display_textbox(page_content, 2, 0, root) # text_box = tk.Text(root, height=10, width=50, padx=15, pady=15) # text_box.insert(1.0, page_content) # text_box.tag_configure("center", justify="center") # text_box.tag_add("center", 1.0, 'end') # text_box.grid(column=1, row=3) browse_text.set("Browse")
def open_file(): #clear global list of indices for i in img_idx: img_idx.pop() img_idx.append(0) #set global index to 0 browse_text.set("loading...") #load a PDF file file = askopenfile(parent=root, mode='rb', filetypes=[("Pdf file", "*.pdf")]) if file: read_pdf = PyPDF2.PdfFileReader(file) #select a page page = read_pdf.getPage(0) #extract text content from page page_content = page.extractText() #SET A SPECIAL ENCODING OR REPLACE CHARACTERS #page_content = page_content.encode('cp1252') page_content = page_content.replace('\u2122', "'") #CLEARING GLOBAL VARIABLES ONCE A NEW PDF FILE IS SELECTED #clear the content of the previous PDF file if all_content: for i in all_content: all_content.pop() #clear the image list from the previous PDF file for i in range(0, len(all_images)): all_images.pop() #hide the displayed image from the previous PDF file and remove it if displayed_img: displayed_img[-1].grid_forget() displayed_img.pop() #BEGIN EXTRACTING #extract text all_content.append(page_content) #extract images images = extract_images(page) for img in images: all_images.append(img) #BEGIN DISPLAYING #display the first image that was detected selected_image = display_images(images[img_idx[-1]]) displayed_img.append(selected_image) #display the text found on the page display_textbox(all_content, 4, 0, root) #reset the button text back to Browse browse_text.set("Browse") #BEGIN MENUES AND MENU WIDGETS #1.image menu on row 2 img_menu = Frame(root, width=800, height=60) img_menu.grid(columnspan=3, rowspan=1, row=2) what_text = StringVar() what_img = Label(root, textvariable=what_text, font=("shanti", 10)) what_text.set("image " + str(img_idx[-1] + 1) + " out of " + str(len(all_images))) what_img.grid(row=2, column=1) #arrow buttons display_icon('arrow_l.png', 2, 0, E, lambda: left_arrow(all_images, selected_image, what_text)) display_icon( 'arrow_r.png', 2, 2, W, lambda: right_arrow(all_images, selected_image, what_text)) #2.save image menu on row 3 save_img_menu = Frame(root, width=800, height=60, bg="#c8c8c8") save_img_menu.grid(columnspan=3, rowspan=1, row=3) #create action buttons copyText_btn = Button(root, text="copy text", command=lambda: copy_text(all_content, root), font=("shanti", 10), height=1, width=15) saveAll_btn = Button(root, text="save all images", command=lambda: save_all(all_images), font=("shanti", 10), height=1, width=15) save_btn = Button(root, text="save image", command=lambda: save_image(all_images[img_idx[-1]]), font=("shanti", 10), height=1, width=15) #place buttons on grid copyText_btn.grid(row=3, column=0) saveAll_btn.grid(row=3, column=1) save_btn.grid(row=3, column=2)
def open_file(): for i in img_idx: img_idx.pop() img_idx.append(0) browse_text.set("cargando....") file = askopenfile(parent=root, mode="rb", filetypes=[("PDF file", "*.pdf")]) if file: read_pdf = PyPDF2.PdfFileReader(file) page = read_pdf.getPage(0) page_content = page.extractText() page_content = page_content.replace('\u2122', "'") if all_content: for i in all_content: all_content.pop() for i in range(0, len(all_images)): all_images.pop() if displayed_img: displayed_img[-1].grid_forget() displayed_img.pop() all_content.append(page_content) images = extract_images(page) for img in images: all_images.append(img) selected_image = display_images(images[img_idx[-1]]) displayed_img.append(selected_image) display_textbox(page_content, 4, 0, root) browse_text.set("Navegar") img_menu = Frame(root, width=800, height=60) img_menu.grid(columnspan=3, rowspan=1, row=2) que_text = StringVar() que_img = Label(root, textvariable=que_text) que_text.set("Imagen " + str(img_idx[-1] + 1) + " de " + str(len(all_images))) que_img.grid(row=2, column=1) display_icon('img/arrow_l.png', 2, 0, E, lambda: left_arrow(all_images, selected_image, que_text)) display_icon('img/arrow_r.png', 2, 2, W, lambda: right_arrow(all_images, selected_image, que_text)) save_img = Frame(root, width=800, height=60, bg="#c8c8c8") save_img.grid(columnspan=3, rowspan=1, row=3) copyText_btn = Button(root, text="Copiar texto", command=lambda: copy_text(all_content), height=1, width=15) saveAll_btn = Button(root, text="Todas las imagenes", command=lambda: save_all(all_images), height=1, width=15) save_btn = Button(root, text="Guardar imagen", command=lambda: save_image(all_images[img_idx[-1]]), height=1, width=15) copyText_btn.grid(row=3, column=0) saveAll_btn.grid(row=3, column=1) save_btn.grid(row=3, column=2)
def open_file(): for i in img_idx: img_idx.pop() img_idx.append(0) browse_text.set("loading...") file = askopenfile(parent=root, mode='rb', filetype=[("Pdf file", "*.pdf")]) if file: read_pdf = PyPDF2.PdfFileReader(file) page = read_pdf.getPage(0) page_content = page.extractText() #page_content = page_content.encode('cp1252') page_content = page_content.replace('\u2122', "'") page_contents.append(page_content) if displayed_img: displayed_img[-1].grid_forget() displayed_img.pop() for i in range(0, len(all_images)): all_images.pop() images = extract_images(page) for i in images: all_images.append(i) img = images[img_idx[-1]] current_image = display_images(img) displayed_img.append(current_image) #show text box on row 4 col 0 display_textbox(page_content, 4, 0, root) #reset the button text back to Browse browse_text.set("Browse") img_menu = Frame(root, width=800, height=60) img_menu.grid(columnspan=3, rowspan=1, row=2) what_text = StringVar() what_img = Label(root, textvariable=what_text, font=("Shanti", 10)) what_text.set("image " + str(img_idx[-1] + 1) + " of " + str(len(all_images))) what_img.grid(row=2, column=1) display_icon('arrow_l.png', 2, 0, E, lambda: left_arrow(all_images, current_image, what_text)) display_icon('arrow_r.png', 2, 2, W, lambda: right_arrow(all_images, current_image, what_text)) save_img = Frame(root, width=800, height=60, bg="#c8c8c8") save_img.grid(columnspan=3, rowspan=1, row=3) copyText_btn = Button(root, text="copy text", command=lambda: copy_text(page_contents), font=("Shanti", 10), height=1, width=15) saveAll_btn = Button(root, text="save all images", command=lambda: save_all(all_images), font=("Shanti", 10), height=1, width=15) save_btn = Button(root, text="save image", command=lambda: save_image(all_images[img_idx[-1]]), font=("Shanti", 10), height=1, width=15) copyText_btn.grid(row=3, column=0) saveAll_btn.grid(row=3, column=1) save_btn.grid(row=3, column=2)
def open_file(): browse_text.set("loading...") file = askopenfile(parent=root, mode='rb', filetypes=[("Pdf file", "*.pdf")]) if file: read_pdf = PyPDF2.PdfFileReader(file) page = read_pdf.getPage(0) page_content = page.extractText() #page_content = page_content.encode('cp1252') page_content = page_content.replace('\u2122', "'") page_contents.append(page_content) images = extract_images(page) for i in images: all_images.append(i) img = images[img_idx[-1]] display_images(img) #show text box on row 4 col 0 display_textbox(page_content, 4, 0, root) #reset the button text back to Browse browse_text.set("Browse") img_menu = Frame(root, width=800, height=60) img_menu.grid(columnspan=3, rowspan=1, row=2) what_img = Label(root, text="Image 1 of 5", font=("shanti", 10)) what_img.grid(row=2, column=1) display_icon("arrow_l.png", 2, 0, E) display_icon("arrow_r.png", 2, 2, W) save_img = Frame(root, width=800, height=60, bg="#c8c8c8") save_img.grid(columnspan=3, rowspan=1, row=3) copyText_btn = Button(root, text="copy text", command=lambda: copy_text(page_contents), font=("shanti", 10), height=1, width=15) saveAll_btn = Button(root, text="save all images", command=lambda: save_all(all_images), font=("shanti", 10), height=1, width=15) save_btn = Button( root, text="save image", command=lambda: save_image(all_images[img_idx[-1]], img_idx[-1]), font=("shanti", 10), height=1, width=15) copyText_btn.grid(row=3, column=0) saveAll_btn.grid(row=3, column=1) save_btn.grid(row=3, column=2)