def update_page(index): global dir_path global text_entry global ocr_entry global tiff_canvas if dir_path != "": orig_file = "%s/text_pages/pg_%04d.txt.orig" % (dir_path, index) filename = ("%s/text_pages/pg_%04d.txt" % (dir_path, index)) if os.path.isfile(orig_file) != True: shutil.copyfile(filename, orig_file) ocr_text.config(state=Tkinter.NORMAL) ocr_text.delete('1.0', Tkinter.END) text_text.delete('1.0', Tkinter.END) file = open(filename, 'r') text_text.insert('1.0', file.read()) ocr_file = open(orig_file, 'r') ocr_text.insert('1.0', ocr_file.read()) ocr_text.config(state=Tkinter.DISABLED) tiff_file = "%s/tiff_pages/pg_%04d.tiff" % (dir_path, index) print "Loading TIFF: " + tiff_file tiff_img = Image.open(tiff_file) tiff_im = ImageTk.PhotoImage(tiff_img) tiff_canvas.config(image=tiff_im) pdf_file = "%s/pdf_pages/pg_%04d.pdf" % (dir_path, index) print "Loading PDF: " + pdf_file pdf_fh = glob.glob(pdfImg.PyGs({}).make_img_from_pdf(pdf_file)[1])[0] print "PDF_FH " + pdf_fh pdf_img = Image.open(pdf_fh) tk_pdf_img = ImageTk.PhotoImage(pdf_img) pdf_canvas.itemconfig(pdf_image, image=tk_pdf_img) pdf_img.close() os.remove(pdf_fh)
def test_gs_set_nt(self, mock_subprocess, mock_os_name): """ Check that we have a exe on windows """ mock_os_name.__str__.return_value = 'nt' p = P.PyGs({}) assert 'gswin' in p.binary
def test_gs_run_nt(self, mock_subprocess, mock_os_name, capsys): """ Stupid test because Windows Tesseract only returns 3.02 instead of 3.02.02 """ mock_os_name.__str__.return_value = 'nt' p = P.PyGs({}) mock_subprocess.return_value = -1 p.binary = 'gsblah.exe' with pytest.raises(SystemExit): p._run_gs("","","") out,err = capsys.readouterr() assert p.msgs['GS_FAILED'] in out
def __init__(self, master = None): Frame.__init__(self, master) self.pack() w = 1500; h = 1000 x = (self.master.winfo_screenwidth())/2 y = (self.master.winfo_screenheight())/2 self.master.geometry("%dx%d+%d+%d" % (w, h, x, y)) __f_tmp=glob.glob(pdfImg.PyGs({}).make_img_from_pdf("Tkinter.pdf")[1])[0] # ^ this is needed for an "default"-Config img=Image.open(__f_tmp) pic = ImageTk.PhotoImage(img) label = Label(self, image = pic) label.image = pic label.pack()
def update_page(self, index): """Triggers when page is changed, displays images and text, renders output""" if self.dir_path != "": orig_file = "%s/text_pages/pg_%04d.txt.orig" % (self.dir_path, index) self.edit_page_path = ("%s/text_pages/pg_%04d.txt" % (self.dir_path, index)) if os.path.isfile(orig_file) != True: shutil.copyfile(self.edit_page_path, orig_file) self.ocr_text.config(state=tk.NORMAL) self.ocr_text.delete('1.0', tk.END) self.text_text.delete('1.0', tk.END) txt_file = open(self.edit_page_path, 'r') self.page_list_box.itemconfig(index - 1, {'bg': 'blue'}) page_text = txt_file.read() self.text_text.insert('1.0', page_text) ocr_file = open(orig_file, 'r') self.ocr_text.insert('1.0', ocr_file.read()) self.ocr_text.config(state=tk.DISABLED) txt_file.close() self.text_render_engine.update() tiff_file = "%s/tiff_pages/pg_%04d.tiff" % (self.dir_path, index) tiff_img = Image.open(tiff_file) tiff_img = tiff_img.resize((500, 600), Image.ANTIALIAS) tiff_im = ImageTk.PhotoImage(tiff_img) self.tiff_label.config(image=tiff_im) self.tiff_label.image = tiff_im pdf_file = "%s/pdf_pages/pg_%04d.pdf" % (self.dir_path, index) pdf_fh = glob.glob(pdfImg.PyGs( {}).make_img_from_pdf(pdf_file)[1])[0] pdf_img = Image.open(pdf_fh) pdf_img = pdf_img.resize((500, 600), Image.ANTIALIAS) tk_pdf_img = ImageTk.PhotoImage(pdf_img) self.pdf_label.config(image=tk_pdf_img) self.pdf_label.image = tk_pdf_img pdf_img.close() os.remove(pdf_fh) self.page = index
import pypdfocr.pypdfocr_gs as pdfImg import ttk import glob import os from pdfui import fname import pickle root = Tk() canv = Canvas(root, height=1500, width=1000) canv.pack(side=LEFT, fill=BOTH) # Generating and processing initial image from PDF my_img = glob.glob(pdfImg.PyGs({}).make_img_from_pdf(fname)[1])[0] im = Image.open(my_img) width1, height1 = im.size resized = im.resize((1000, height1 / 2), Image.ANTIALIAS) width, height = resized.size im2 = ImageTk.PhotoImage(resized) a1 = canv.create_image(0, 0, anchor="nw", image=im2) # Declaring array filled with images imglen = len(glob.glob(pdfImg.PyGs({}).make_img_from_pdf(fname)[1])) imgarr = [] for i in range(imglen): imgarr.append(glob.glob(pdfImg.PyGs({}).make_img_from_pdf(fname)[1])[i]) imarr = [] for i in imgarr:
import pypdfocr.pypdfocr_gs as pdfImg from PIL import Image, ImageTk import Tkinter as tk import ttk import glob, os root = tk.Tk() __f_tmp = glob.glob(pdfImg.PyGs({}).make_img_from_pdf("\DME\hell0.pdf")[1])[0] # ^ this is needed for a "default"-Config __img = Image.open(__f_tmp) __tk_img = ImageTk.PhotoImage(__img) ttk.Label(root, image=__tk_img).grid() __img.close() os.remove(__f_tmp) root.mainloop()
import pypdfocr.pypdfocr_gs as pdfImg from PIL import Image, ImageTk import Tkinter as tk import ttk import glob, os root = tk.Tk() __f_tmp = glob.glob(pdfImg.PyGs({}).make_img_from_pdf("test.pdf")[1])[0] # ^ this is needed for an "default"-Config __img = Image.open(__f_tmp) __tk_img = ImageTk.PhotoImage(__img) ttk.Label(root, image=__tk_img).grid() __img.close() os.remove(__f_tmp) root.mainloop()
def test_set_gs_binary(self): """Test setting binary via config works""" pygs = P.PyGs({'binary': "C:\\tools\\foo\\gs.exe"}) assert pygs.binary == os.path.join("C:\\", "tools", "foo", "gs.exe")
def pygs(self): return pypdfocr_gs.PyGs({})
def test_set_gs_binary(self): """Test setting binary via config works""" pygs = pypdfocr_gs.PyGs({'binary': "/foo/bar/bin/gs"}) assert pygs.binary == '/foo/bar/bin/gs'
def test_multiple_gs(self, monkeypatch, caplog): """If multiple version are found, the most recent is returned.""" monkeypatch.setattr('fnmatch.filter', mock.Mock( return_value=['gs1.00c.exe', 'gs4.32c.exe', 'gs4.31c.exe'])) pygs = P.PyGs({}) assert os.path.split(pygs.binary)[-1] == 'gs4.32c.exe"'
def test_gs_binary_missing(self, monkeypatch, caplog): """Test for when GS is not installed.""" monkeypatch.setattr('fnmatch.filter', mock.Mock(return_value=[])) with pytest.raises(SystemExit): P.PyGs({}) assert any([x for x in caplog.records if x.levelname == "ERROR"])
def test_set_gs_binary_posix_format(self): """Test setting binary using posix path format works.""" pygs = P.PyGs({'binary': "/tools/foo/gs.exe"}) assert pygs.binary == os.path.join("C:\\", "tools", "foo", "gs.exe")
def test_find_gs_binary(self): """Test finding GS exe.""" pygs = P.PyGs({}) assert "gswin" in pygs.binary
def test_gs_pdf_missing(self, capsys): p = P.PyGs({}) with pytest.raises(SystemExit): p.make_img_from_pdf("missing123.pdf") out,err = capsys.readouterr() assert p.msgs['GS_MISSING_PDF'] in out
def test_gs_missing(self, tmpdir, caplog): """Test for when invalid gs binary is found or specified.""" pygs = P.PyGs({'binary': str(tmpdir.join('gsblah'))}) with pytest.raises(SystemExit): pygs._run_gs("", "", "") assert any([x for x in caplog.records if x.levelname == "ERROR"])