Example #1
0
def update_page(index):
    global dir_path
    global text_entry
    global ocr_entry
    global tiff_canvas
    if dir_path != "":
        orig_file = "%s/text_pages/pg_%04d.txt.orig" % (dir_path, index)
        filename = ("%s/text_pages/pg_%04d.txt" % (dir_path, index))
        if os.path.isfile(orig_file) != True:
            shutil.copyfile(filename, orig_file)
        ocr_text.config(state=Tkinter.NORMAL)
        ocr_text.delete('1.0', Tkinter.END)
        text_text.delete('1.0', Tkinter.END)
        file = open(filename, 'r')
        text_text.insert('1.0', file.read())
        ocr_file = open(orig_file, 'r')
        ocr_text.insert('1.0', ocr_file.read())
        ocr_text.config(state=Tkinter.DISABLED)

        tiff_file = "%s/tiff_pages/pg_%04d.tiff" % (dir_path, index)
        print "Loading TIFF: " + tiff_file
        tiff_img = Image.open(tiff_file)
        tiff_im = ImageTk.PhotoImage(tiff_img)
        tiff_canvas.config(image=tiff_im)

        pdf_file = "%s/pdf_pages/pg_%04d.pdf" % (dir_path, index)
        print "Loading PDF: " + pdf_file
        pdf_fh = glob.glob(pdfImg.PyGs({}).make_img_from_pdf(pdf_file)[1])[0]
        print "PDF_FH " + pdf_fh
        pdf_img = Image.open(pdf_fh)
        tk_pdf_img = ImageTk.PhotoImage(pdf_img)
        pdf_canvas.itemconfig(pdf_image, image=tk_pdf_img)
        pdf_img.close()
        os.remove(pdf_fh)
Example #2
0
    def test_gs_set_nt(self, mock_subprocess, mock_os_name):
        """
            Check that we have a exe on windows
        """
        mock_os_name.__str__.return_value = 'nt'
        p = P.PyGs({})

        assert 'gswin' in p.binary
Example #3
0
    def test_gs_run_nt(self, mock_subprocess, mock_os_name, capsys):
        """
            Stupid test because Windows Tesseract only returns 3.02 instead of 3.02.02
        """
        mock_os_name.__str__.return_value = 'nt'
        p = P.PyGs({})

        mock_subprocess.return_value = -1
        p.binary = 'gsblah.exe'
        with pytest.raises(SystemExit):
            p._run_gs("","","")

        out,err = capsys.readouterr()
        assert p.msgs['GS_FAILED'] in out
Example #4
0
    def __init__(self, master = None):

        Frame.__init__(self, master)
        self.pack()
        w = 1500; h = 1000


        x = (self.master.winfo_screenwidth())/2
        y = (self.master.winfo_screenheight())/2

        self.master.geometry("%dx%d+%d+%d" % (w, h, x, y))

        __f_tmp=glob.glob(pdfImg.PyGs({}).make_img_from_pdf("Tkinter.pdf")[1])[0]
        #                             ^ this is needed for an "default"-Config
        img=Image.open(__f_tmp)
        pic = ImageTk.PhotoImage(img)
        label = Label(self, image = pic)
        label.image = pic
        label.pack()
    def update_page(self, index):
        """Triggers when page is changed, displays images and text, renders output"""
        if self.dir_path != "":
            orig_file = "%s/text_pages/pg_%04d.txt.orig" % (self.dir_path,
                                                            index)
            self.edit_page_path = ("%s/text_pages/pg_%04d.txt" %
                                   (self.dir_path, index))
            if os.path.isfile(orig_file) != True:
                shutil.copyfile(self.edit_page_path, orig_file)
            self.ocr_text.config(state=tk.NORMAL)
            self.ocr_text.delete('1.0', tk.END)
            self.text_text.delete('1.0', tk.END)
            txt_file = open(self.edit_page_path, 'r')
            self.page_list_box.itemconfig(index - 1, {'bg': 'blue'})
            page_text = txt_file.read()
            self.text_text.insert('1.0', page_text)
            ocr_file = open(orig_file, 'r')
            self.ocr_text.insert('1.0', ocr_file.read())
            self.ocr_text.config(state=tk.DISABLED)

            txt_file.close()
            self.text_render_engine.update()

            tiff_file = "%s/tiff_pages/pg_%04d.tiff" % (self.dir_path, index)
            tiff_img = Image.open(tiff_file)
            tiff_img = tiff_img.resize((500, 600), Image.ANTIALIAS)
            tiff_im = ImageTk.PhotoImage(tiff_img)
            self.tiff_label.config(image=tiff_im)
            self.tiff_label.image = tiff_im

            pdf_file = "%s/pdf_pages/pg_%04d.pdf" % (self.dir_path, index)
            pdf_fh = glob.glob(pdfImg.PyGs(
                {}).make_img_from_pdf(pdf_file)[1])[0]
            pdf_img = Image.open(pdf_fh)
            pdf_img = pdf_img.resize((500, 600), Image.ANTIALIAS)
            tk_pdf_img = ImageTk.PhotoImage(pdf_img)
            self.pdf_label.config(image=tk_pdf_img)
            self.pdf_label.image = tk_pdf_img
            pdf_img.close()
            os.remove(pdf_fh)
            self.page = index
Example #6
0
import pypdfocr.pypdfocr_gs as pdfImg
import ttk
import glob
import os
from pdfui import fname
import pickle




root = Tk()
canv = Canvas(root, height=1500, width=1000)
canv.pack(side=LEFT, fill=BOTH)

# Generating and processing initial image from PDF
my_img = glob.glob(pdfImg.PyGs({}).make_img_from_pdf(fname)[1])[0]
im = Image.open(my_img)
width1, height1 = im.size
resized = im.resize((1000, height1 / 2), Image.ANTIALIAS)
width, height = resized.size
im2 = ImageTk.PhotoImage(resized)
a1 = canv.create_image(0, 0, anchor="nw", image=im2)

# Declaring array filled with images
imglen = len(glob.glob(pdfImg.PyGs({}).make_img_from_pdf(fname)[1]))
imgarr = []
for i in range(imglen):
    imgarr.append(glob.glob(pdfImg.PyGs({}).make_img_from_pdf(fname)[1])[i])

imarr = []
for i in imgarr:
Example #7
0
import pypdfocr.pypdfocr_gs as pdfImg
from PIL import Image, ImageTk
import Tkinter as tk
import ttk

import glob, os

root = tk.Tk()

__f_tmp = glob.glob(pdfImg.PyGs({}).make_img_from_pdf("\DME\hell0.pdf")[1])[0]
#                             ^ this is needed for a "default"-Config
__img = Image.open(__f_tmp)

__tk_img = ImageTk.PhotoImage(__img)

ttk.Label(root, image=__tk_img).grid()

__img.close()
os.remove(__f_tmp)

root.mainloop()
Example #8
0
import pypdfocr.pypdfocr_gs as pdfImg
from PIL import Image, ImageTk
import Tkinter as tk
import ttk
import glob, os

root = tk.Tk()

__f_tmp = glob.glob(pdfImg.PyGs({}).make_img_from_pdf("test.pdf")[1])[0]
#                             ^ this is needed for an "default"-Config
__img = Image.open(__f_tmp)

__tk_img = ImageTk.PhotoImage(__img)

ttk.Label(root, image=__tk_img).grid()

__img.close()
os.remove(__f_tmp)

root.mainloop()
Example #9
0
 def test_set_gs_binary(self):
     """Test setting binary via config works"""
     pygs = P.PyGs({'binary': "C:\\tools\\foo\\gs.exe"})
     assert pygs.binary == os.path.join("C:\\", "tools", "foo", "gs.exe")
Example #10
0
 def pygs(self):
     return pypdfocr_gs.PyGs({})
Example #11
0
 def test_set_gs_binary(self):
     """Test setting binary via config works"""
     pygs = pypdfocr_gs.PyGs({'binary': "/foo/bar/bin/gs"})
     assert pygs.binary == '/foo/bar/bin/gs'
Example #12
0
 def test_multiple_gs(self, monkeypatch, caplog):
     """If multiple version are found, the most recent is returned."""
     monkeypatch.setattr('fnmatch.filter', mock.Mock(
         return_value=['gs1.00c.exe', 'gs4.32c.exe', 'gs4.31c.exe']))
     pygs = P.PyGs({})
     assert os.path.split(pygs.binary)[-1] == 'gs4.32c.exe"'
Example #13
0
 def test_gs_binary_missing(self, monkeypatch, caplog):
     """Test for when GS is not installed."""
     monkeypatch.setattr('fnmatch.filter', mock.Mock(return_value=[]))
     with pytest.raises(SystemExit):
         P.PyGs({})
     assert any([x for x in caplog.records if x.levelname == "ERROR"])
Example #14
0
 def test_set_gs_binary_posix_format(self):
     """Test setting binary using posix path format works."""
     pygs = P.PyGs({'binary': "/tools/foo/gs.exe"})
     assert pygs.binary == os.path.join("C:\\", "tools", "foo", "gs.exe")
Example #15
0
 def test_find_gs_binary(self):
     """Test finding GS exe."""
     pygs = P.PyGs({})
     assert "gswin" in pygs.binary
Example #16
0
 def test_gs_pdf_missing(self, capsys):
     p = P.PyGs({})
     with pytest.raises(SystemExit):
         p.make_img_from_pdf("missing123.pdf")
     out,err = capsys.readouterr()
     assert p.msgs['GS_MISSING_PDF'] in out
Example #17
0
 def test_gs_missing(self, tmpdir, caplog):
     """Test for when invalid gs binary is found or specified."""
     pygs = P.PyGs({'binary': str(tmpdir.join('gsblah'))})
     with pytest.raises(SystemExit):
         pygs._run_gs("", "", "")
     assert any([x for x in caplog.records if x.levelname == "ERROR"])