Ejemplo n.º 1
0
 def bmp_or_jpg_to_pdf(list_of_file_paths, destination_directory, delete_source=False):
     operation_result = {}
     invalid_file_paths = []
     for files in list_of_file_paths:
         if os.path.exists(files):
             continue
         else:
             invalid_file_paths.append(files)
             list_of_file_paths.remove(files)
     for files in list_of_file_paths:
         image = PyImage()
         image.density("600")
         image.read(files)
         file_name = os.path.basename(files)
         name, ext = os.path.splitext(file_name)
         file_name_at_destination = os.path.join(destination_directory, name + ".pdf")
         image.write(file_name_at_destination)
     if delete_source is True:
         for files in list_of_file_paths:
             os.remove(files)
     if not invalid_file_paths:
         operation_result.update({"code": 0, "invalid_file_paths": "None"})
     else:
         invalid_files = ",".join(list_of_file_paths)
         operation_result.update({"code": 1, "invalid_file_paths": invalid_files})
     return operation_result
Ejemplo n.º 2
0
def NumpytoIM(img, usm=None, verbose=False):
    if verbose:
        print "Converting numpy array to ImageMagick"

    out_img = PMImage()
    if img.dtype == 'uint16':
        out_img.depth(16)
    else:
        out_img.depth(8)
    out_img.magick('RGB')
    h,w,c = img.shape
    size_str = str(w)+'x'+str(h)
    out_img.size(size_str)

    b = Blob()
    b.data = img.tostring()
    out_img.read(b)
    out_img.magick('PNG')

    # Check if USM sharpening should be used
    if usm != None:
        if verbose:
            print "Running unsharp mask filter"
        r,s,a,t = (usm)
        out_img.unsharpmask(r,s,a,t)

    return out_img
Ejemplo n.º 3
0
def main():
    # os.environ["MAGICK_HOME"] = r"path_to_ImageMagick"

    if len(sys.argv) == 1:
        dirpath = '.'
    else:
        dirpath = sys.argv[1]

    temp_dir = os.path.join(dirpath, '.temp')
    if not os.path.exists(temp_dir):
        os.mkdir(temp_dir)

    for file_name in os.listdir(dirpath):
        if not os.path.isfile(file_name) or not file_name.endswith('.pdf'):
            continue
        print('Converting file {} ...'.format(file_name))
        in_path = os.path.join(dirpath, file_name)
        with open(in_path, 'rb') as handle:
            inputpdf = PdfFileReader(handle)
            for i in xrange(inputpdf.numPages):
                outputpdf = PdfFileWriter()
                outputpdf.addPage(inputpdf.getPage(i))
                new_file_name = file_name.replace('.pdf', '') + '_{}.pdf'.format(i)
                new_in_path = os.path.join(temp_dir, new_file_name)
                with open(new_in_path, 'wb') as handle:
                    outputpdf.write(handle)

                output_file_name = new_file_name.replace('.pdf', '.jpeg')
                output_path = os.path.join(dirpath, output_file_name)
                p = Image()
                p.density('1080')
                p.read(os.path.abspath(new_in_path))
                p.write(os.path.abspath(output_path))
                os.remove(new_in_path)
        os.rmdir(temp_dir)
def pdf2bmp(path):

    img = Image()
    f = open(path, "rb")
    opened_pdf = PdfFileReader(f)
    page_num = opened_pdf.getNumPages()

    if page_num == 1:
        img.read(path)
        output_bmp = path.replace(".pdf", ".bmp")
        img.write(output_bmp)

    else:
        for i in range(page_num):
            pdfw = PdfFileWriter()
            pdfw.addPage(opened_pdf.getPage(i))

            output_path = os.path.splitext(path)[0]
            output_pdf = output_path + "_page(" + str(i + 1) + ").pdf"
            with open(output_pdf, "wb") as output:
                pdfw.write(output)

            img.read(output_pdf)
            output_bmp = output_pdf.replace(".pdf", ".bmp")
            img.write(output_bmp)

    f.close()

    return output_bmp, page_num
Ejemplo n.º 5
0
Archivo: utils.py Proyecto: unanono/VCR
def pdf2images(name):
    np = getPdfNumPages(name)
    for p in range(np):
        i = Image()
        i.density('200')
        i.quality(100)
        i.depth(24)
        #i.backgroundColor(
        #i.channel(
        i.read(name + '[' + str(p) + ']')
        i.write(name + str(p) + defaultImageExtension)
Ejemplo n.º 6
0
def readPdf_page_by_page(filepath):
    #获取一个pdf对象
    pdf_input = PdfFileReader(open(filepath, 'rb'))
    #获取pdf页数
    page_count = pdf_input.getNumPages()
    #获取pdf第n页的内容
    for n in range(page_count):

        im = Image()
        #im.density("300")
        im.read(filepath + '[' + str(1) + ']')
        im.magick("jpg")
        im.write(filepath + str(n + 1) + ".jpg")
Ejemplo n.º 7
0
def main():
    if len(sys.argv) < 2:
        print('invalid usage!')
        return
    pdf_filename = sys.argv[1]
    pdf = PdfFileReader(file(pdf_filename, "rb"))
    npage = pdf.getNumPages()
    fname = pdf_filename.split('/')[-1]
    tmppath = '/dev/shm/'
    for p in range(npage):
        im = Image()
        im.density('300')
        im.read(pdf_filename + '[' + str(p) + ']')
        im.write(tmppath + fname + '_' + str(p) + out_format)
Ejemplo n.º 8
0
class Answers:
    def __init__(self, filename, i):
        self.image = Image()
        self.image.density('%d' % DENSITY)
        self.image.read('%s[%d]' % (filename, i))

        temp = tempfile.NamedTemporaryFile(suffix='.png')
        self.image.write(temp.name)
        self.pimage = pygame.image.load(temp.name)
        temp.close()

    def draw(self, screen, C):
        pygame.draw.rect(screen, (255, 255, 255), (0, 0, W, H), 0)

        scaled = self.pimage
        screen.blit(scaled, (0, 0), (C[0], C[1], C[2] - C[0], C[3] - C[1]))
        pygame.display.flip()
Ejemplo n.º 9
0
class Answers:
    def __init__(self, filename, i):
        self.image = Image()
        self.image.density('%d' % DENSITY)
        self.image.read('%s[%d]' % (filename, i))

        temp = tempfile.NamedTemporaryFile(suffix='.png')
        self.image.write(temp.name)
        self.pimage = pygame.image.load(temp.name)
        temp.close()

    def draw(self, screen, C):
        pygame.draw.rect(screen, (255, 255, 255), (0, 0, W, H), 0)

        scaled = self.pimage
        screen.blit(scaled, (0, 0), (C[0], C[1], C[2] - C[0], C[3] - C[1]))
        pygame.display.flip()
def walk_menu(entry):
    if isinstance(entry, xdg.Menu.Menu) and entry.Show is True:
        map(walk_menu, entry.getEntries())
    elif isinstance(entry, xdg.Menu.MenuEntry) and entry.Show is True:
        # byte 1 signals another entry
        conn.sendall('\x01')
        img_path = icon_attr(entry.DesktopEntry).encode('utf-8')
        if img_path and os.path.isfile(img_path):
            try:
                # Create an empty image and set the background color to
                # transparent. This is important to have transparent background
                # when converting from SVG
                img = Image()
                img.backgroundColor(Color(0, 0, 0, 0xffff))
                img.read(img_path)
                # scale the image to 48x48 pixels
                img.scale(Geometry(48, 48))
                # ensure the image is converted to ICO
                img.magick('ICO')
                b = Blob()
                img.write(b)
                # icon length plus data
                conn.sendall(struct.pack('i', len(b.data)))
                conn.sendall(b.data)
            except Exception:
                conn.sendall(struct.pack('i', 0))
        else:
            conn.sendall(struct.pack('i', 0))

        name = entry.DesktopEntry.getName()
        # name length plus data
        conn.sendall(struct.pack('i', len(name)))
        conn.sendall(name)

        command = re.sub(' -caption "%c"| -caption %c',
                ' -caption "%s"' % name, entry.DesktopEntry.getExec())
        command = re.sub(' [^ ]*%[fFuUdDnNickvm]', '', command)
        if entry.DesktopEntry.getTerminal():
                command = 'xterm -title "%s" -e %s' % (name, command)

        # command length plus data
        conn.sendall(struct.pack('i', len(command)))
        conn.sendall(command)
Ejemplo n.º 11
0
def pdf2jpg(pdf,temp):
    #Generate the path for the jpg file. Need to use a temp directory in case
    #pdf location is read only.
    pdf = str(pdf)
    base = os.path.basename(pdf)
    basefile = os.path.splitext(base)
    jpg = temp + basefile[0] + ".jpg"
    #jpg = str(jpg.replace("\\","\\\\"))
    jpg = str(jpg)
    pdf = str(pdf)
    img = PMImage()
    img.density('300')
    img.depth(24)
    img.read(pdf)
    img.write(jpg)    
    img = Image.open(jpg)
    rgbimg = Image.new("RGBA", img.size)
    rgbimg.paste(img)
    rgbimg.save(jpg)
    return jpg
Ejemplo n.º 12
0
def pdftojpg(fname):
    reader = PdfFileReader(open(fname, "rb"))
    directory = os.path.basename(fname)
    if not os.path.exists(directory):
        os.makedirs(directory)
    else:
        shutil.rmtree(directory)
        time.sleep(2)
        os.makedirs(directory)
    for page_num in range(reader.getNumPages()):
        writer = PdfFileWriter()
        writer.addPage(reader.getPage(page_num))
        temp = NamedTemporaryFile(prefix=str(page_num), suffix=".pdf", delete=False)
        writer.write(temp)
        temp.close()

        im = Magick_Image()
        im.density("300") # DPI, for better quality
        im.read(temp.name)
        im.write(directory+"/some%d.jpg" % (int(page_num)+1))

        os.remove(temp.name)
    return(directory)
Ejemplo n.º 13
0
def pdf_to_image():
    for pdf in [
            pdf_file for pdf_file in os.listdir(pdf_dir)
            if pdf_file.endswith(".pdf")
    ]:
        input_pdf = pdf_dir + "\\" + pdf + "[1]"
        img = Image()
        img.density('300')
        print input_pdf
        img.read(input_pdf)

        size = "%sx%s" % (img.columns(), img.rows())

        output_img = Image(size, bg_colour)
        output_img.type = img.type
        output_img.composite(img, 0, 0,
                             PythonMagick.CompositeOperator.SrcOverCompositeOp)
        output_img.resize(str(img.rows()))
        output_img.magick('JPG')
        output_img.quality(75)

        output_jpg = input_pdf.replace(".pdf", ".jpg")
        output_img.write(output_jpg)
Ejemplo n.º 14
0
def to_imagemagick(img, bits=16):
    '''Convert numpy array to Imagemagick format.

    :param img: image to convert
    :type img: Numpy ndarray
    :rtype: PythonMagick.Image
    '''

    if not isinstance(img, PMImage):

        img = _scale(img, bits=bits)

        LOGGER.debug("Converting from Numpy to ImageMagick.")
        out_img = PMImage()
        if img.dtype == np.uint8:
            out_img.depth(8)
        else:
            out_img.depth(16)

        shape = img.shape
        # Convert also B&W images to 3-channel arrays
        if len(shape) == 2:
            tmp = np.empty((shape[0], shape[1], 3), dtype=img.dtype)
            tmp[:, :, 0] = img
            tmp[:, :, 1] = img
            tmp[:, :, 2] = img
            img = tmp
        out_img.magick('RGB')
        out_img.size(str(shape[1]) + 'x' + str(shape[0]))
        blob = Blob()
        blob.data = img.tostring()

        out_img.read(blob)
        out_img.magick('PNG')

        return out_img
    return img
def convert(args):

    dirname = getcwd()
    ifilename = path.join(
        dirname, args.ifile) if not path.isabs(args.ifile) else args.ifile
    ofilename = path.join(
        dirname, args.ofile) if not path.isabs(args.ofile) else args.ofile
    ofilename_n_ext = path.splitext(ofilename)[0]

    reader = PdfFileReader(open(ifilename, "rb"))
    for page_num in xrange(reader.getNumPages()):
        writer = PdfFileWriter()
        writer.addPage(reader.getPage(page_num))
        with open(path.join(dirname, 'temp.pdf'), 'wb') as temp:
            writer.write(temp)

        im = Image()
        im.density("300")  # DPI, for better quality
        im.backgroundColor('white')
        im.fillColor('white')
        im.read(path.join(dirname, 'temp.pdf'))
        im.write("%s_%d.jpg" % (ofilename_n_ext, page_num))

        remove(path.join(dirname, 'temp.pdf'))
Ejemplo n.º 16
0
        dir, fname = os.path.split(_pdf_path)
        base, ext = os.path.splitext(fname)
        out_base, _ = os.path.splitext(_pdf_path)

        # convert the pdf file to the jpg images
        command = 'pdftoppm %s %s -jpeg' % (_pdf_path.replace(
            ' ', '\ '), out_base.replace(' ', '\ '))
        os.system(command)

        paths = []
        # convert the jpg files to the list of cv image
        for f in os.listdir(dir):
            path = os.path.join(dir, f)
            if os.path.exists(path) and f.find(
                    base) != -1 and os.path.splitext(f)[1].find('jpg') != -1:
                paths.append(path)

        return paths


if __name__ == '__main__':
    import os, PythonMagick
    from PythonMagick import Image
    from datetime import datetime

    bg_colour = "#ffffff"
    input_pdf = pdf_dir + "\\" + pdf
    img = Image()
    img.density('300')
    img.read(input_pdf)
Ejemplo n.º 17
0
 def process(self):
     wf = str(self.artifact.previous_artifact_filepath) # complains when unicode
     image = Image()
     image.read(wf)
     self.do_magick(image)
     image.write(self.artifact.filepath())
Ejemplo n.º 18
0
    def tiff_to_pdf(list_of_file_paths, destination_directory):
        """
        This method converts individual TIFF image files to PDF.
        :param list_of_file_paths: This argument is the list of absolute file paths for example
        ['C:/User/Documents/Images/Image1.tiff', 'C:/User/Documents/Images/Image2.tiff' ]
        :param destination_directory: Pass in the absolute path to the directory you want the
        converted files to be saved to.
        :return: This method returns a dictionary giving information about the success or failure
        of the operation and also gives the list of files that failed the conversion..
        """
        operation_result = {}
        invalid_file_paths = []
        for files in list_of_file_paths:
            if os.path.exists(files):
                continue
            else:
                invalid_file_paths.append(files)
                list_of_file_paths.remove(files)

        if not os.path.exists(os.path.join(os.getcwd(), "temp_dir")):
            os.mkdir(os.path.join(os.getcwd(), "temp_dir"))

        path_to_temp = os.path.join(os.getcwd(), "temp_dir")

        for image in list_of_file_paths:
            temp_file_list = []
            img = Image.open(image)
            tif_file_full_name = os.path.basename(image)
            tif_file_name, tif_file_ext = os.path.splitext(tif_file_full_name)
            for i in range(100):
                try:
                    img.seek(i)
                    temp_file_list.append(os.path.join(path_to_temp, 'page' + str(i + 1) + ".tif"))
                    img.save(os.path.join(path_to_temp, 'page' + str(i + 1) + ".tif"))
                except EOFError:
                    break

            png_temp_array = []
            for tif_img in temp_file_list:
                img_png = Image.open(tif_img)
                file_name = os.path.basename(tif_img)
                name, ext = os.path.splitext(file_name)
                file_name_at_destination = os.path.join(path_to_temp, name + ".png")
                png_temp_array.append(file_name_at_destination)
                img_png.save(file_name_at_destination)
                os.remove(tif_img)

            pdf_temp_array = []
            for png_img in png_temp_array:
                image = PyImage()
                image.density("600")
                image.read(png_img)
                png_file_name = os.path.basename(png_img)
                png_name, png_ext = os.path.splitext(png_file_name)
                file_name_at_destination = os.path.join(path_to_temp, png_name + ".pdf")
                pdf_temp_array.append(file_name_at_destination)
                image.write(file_name_at_destination)
                os.remove(png_img)

            if pdf_temp_array.__len__() > 1:
                ImageOpr.merge_pdf(pdf_temp_array, destination_directory, tif_file_name, delete_source=False)
                for pdfs in pdf_temp_array:
                    os.remove(pdfs)
            else:
                os.rename(pdf_temp_array[0], os.path.join(path_to_temp, tif_file_name + ".pdf"))
                destination_directory_file_name = os.path.join(destination_directory, tif_file_name + ".pdf")
                shutil.move(os.path.join(path_to_temp, tif_file_name + ".pdf"), destination_directory_file_name)

        shutil.rmtree(path_to_temp, ignore_errors=True)

        if not invalid_file_paths:
            operation_result.update({"code": 0, "invalid_file_paths": "None"})
        else:
            invalid_files = ",".join(list_of_file_paths)
            operation_result.update({"code": 1, "invalid_file_paths": invalid_files})
        return operation_result
Ejemplo n.º 19
0
        out_file_pdf = os.path.join(outDir, "P" + str(i) + ".pdf")
        out_file_jpg = out_file_pdf.replace(".pdf", ".jpg")

        if not os.path.exists(out_file_pdf):

            # Read page i from pdfFile
            pageObj = in_file_pdf.getPage(i)
            pdfOut = PdfFileWriter()
            pdfOut.addPage(pageObj)
            print("Page %d" % i)

            # Write page i to a separate pdf file
            outputStream = file(out_file_pdf, "wb")
            pdfOut.write(outputStream)
            outputStream.close()

        if not os.path.exists(out_file_jpg):

            img.read(out_file_pdf)
            size = "%sx%s" % (img.columns(), img.rows())

            output_img = Image(size, bg_colour)
            output_img.type = img.type
            output_img.composite(
                img, 0, 0, PythonMagick.CompositeOperator.SrcOverCompositeOp)
            output_img.resize(str(img.rows()))
            output_img.magick('JPG')
            output_img.quality(75)
            output_img.write(out_file_jpg)
            print("Save page %d to %s" % (i, out_file_jpg))
Ejemplo n.º 20
0
import os
from pyPdf import PdfFileReader, PdfFileWriter
from tempfile import NamedTemporaryFile
from PythonMagick import Image
import sys

pdfname=sys.argv[1]
#os.system("rm /tmp/test2/*")
#os.system("mkdir /tmp/test2")
cmd="cp /opt/%s /tmp/test2/ocr.pdf"%(pdfname)

#os.system("cp /opt/corrected.html /tmp/test2/some_0.hocr")
os.system(cmd)

reader = PdfFileReader(open("/tmp/test2/ocr.pdf", "rb"))
for page_num in xrange(reader.getNumPages()):
    writer = PdfFileWriter()
    writer.addPage(reader.getPage(page_num))
    temp = NamedTemporaryFile(prefix=str(page_num), suffix=".pdf", delete=False)
    writer.write(temp)
    temp.close()

    im = Image()
    im.density("300") # DPI, for better quality
    im.read(temp.name)
    im.write("/tmp/test2/some_%d.jpg" % (page_num))

    os.remove(temp.name)
Ejemplo n.º 21
0
class Map:
    def __init__(self, map_path):
        self._map_path = map_path
        self._map_image = None

        self._ocr = OCR()

        self._x = 0.0
        self._y = 0.0
        self._width = 0.0
        self._height = 0.0

        # The OCR gets a bit buggy for scale factors
        # smaller than 3 and bigger than 5.
        self._scale_factor = 5

        self._RefreshCoordinates()

    def IsValid(self):
        if self._x is 0 or self._y is 0:
            return False

        if self._width is 0 or self._height is 0:
            return False

        if self._width < 0.001 or self._width > 0.04:
            return False

        if self._height > -0.001 or self._height < -0.04:
            return False

        map_geometry = self._GetMapGeometry()

        height_pixel_ratio = self.GetHeight() / map_geometry.height()
        width_pixel_ratio = self.GetWidth() / map_geometry.width()

        # The ratio should not be very different from each other, otherwise
        # we OCR'ed one of the coordinates wrong.
        if (abs(height_pixel_ratio) - abs(width_pixel_ratio)) > 0.0001:
            return False

        return True

    def SetScaleFactor(self, factor):
        if factor is self._scale_factor:
            return

        self._scale_factor = factor

        if self._map_image:
            self._GenerateImage()

        self._RefreshCoordinates()

    def GetMapImage(self):
        return self._CropGeometry(self._GetMapGeometry())

    def GetX(self):
        if self._x:
            return self._x

        map_geometry = self._GetMapGeometry()

        offset = _MAGIC_COORDINATE_OFFSET_ * self._scale_factor
        width = _MAGIC_COORDINATE_BBOX_WIDTH_ * self._scale_factor
        height = _MAGIC_COORDINATE_BBOX_HEIGHT_ * self._scale_factor

        coordinate_geometry = Geometry(width, height, map_geometry.xOff(), map_geometry.yOff() - offset - height)

        image = self._CropGeometry(coordinate_geometry)
        self._x = self._ocr.GetDecimalDegrees(image)

        return self._x

    def GetY(self):
        if self._y:
            return self._y

        map_geometry = self._GetMapGeometry()

        offset = _MAGIC_COORDINATE_OFFSET_ * self._scale_factor
        width = _MAGIC_COORDINATE_BBOX_HEIGHT_ * self._scale_factor
        height = _MAGIC_COORDINATE_BBOX_WIDTH_ * self._scale_factor

        coordinate_geometry = Geometry(width, height, map_geometry.xOff() - offset - width, map_geometry.yOff())

        image = self._CropGeometry(coordinate_geometry)
        image.rotate(90)
        self._y = self._ocr.GetDecimalDegrees(image)

        return self._y

    def GetWidth(self):
        if self._width:
            return self._width

        map_geometry = self._GetMapGeometry()

        offset = _MAGIC_COORDINATE_OFFSET_ * self._scale_factor
        width = _MAGIC_COORDINATE_BBOX_WIDTH_ * self._scale_factor
        height = _MAGIC_COORDINATE_BBOX_HEIGHT_ * self._scale_factor

        x_offset = map_geometry.xOff() + map_geometry.width()
        y_offset = map_geometry.yOff() + map_geometry.height()

        coordinate_geometry = Geometry(width, height, x_offset - width, y_offset + offset)

        image = self._CropGeometry(coordinate_geometry)
        self._width = self._ocr.GetDecimalDegrees(image) - self.GetX()

        return self._width

    def GetHeight(self):
        if self._height:
            return self._height

        map_geometry = self._GetMapGeometry()

        offset = _MAGIC_COORDINATE_OFFSET_ * self._scale_factor
        width = _MAGIC_COORDINATE_BBOX_HEIGHT_ * self._scale_factor
        height = _MAGIC_COORDINATE_BBOX_WIDTH_ * self._scale_factor

        x_offset = map_geometry.xOff() + map_geometry.width()
        y_offset = map_geometry.yOff() + map_geometry.height()

        coordinate_geometry = Geometry(width, height, x_offset + offset, y_offset - height)

        image = self._CropGeometry(coordinate_geometry)
        image.rotate(90)
        self._height = self._ocr.GetDecimalDegrees(image) - self.GetY()

        return self._height

    def _RefreshCoordinates(self):
        if self.IsValid():
            return

        self._x = 0.0
        self._y = 0.0
        self._width = 0.0
        self._height = 0.0

        self.GetWidth()
        self.GetHeight()

    def _CropGeometry(self, geometry):
        if not self._map_image:
            self._GenerateImage()

        image = Image(self._map_image)
        image.crop(geometry)

        return image

    def _GetMapGeometry(self):
        width = self.WIDTH - self.MARGIN_LEFT - self.MARGIN_RIGHT
        height = self.HEIGHT - self.MARGIN_TOP - self.MARGIN_BOTTOM

        width *= self._scale_factor
        height *= self._scale_factor
        margin_left = self.MARGIN_LEFT * self._scale_factor
        margin_top = self.MARGIN_TOP * self._scale_factor

        return Geometry(width, height, margin_left, margin_top)

    def _GenerateImage(self):
        scaled_density = 72 * self._scale_factor

        self._map_image = Image()
        self._map_image.density("%dx%d" % (scaled_density, scaled_density))
        self._map_image.read(self._map_path)
Ejemplo n.º 22
0
__author__ = 'davidhalldor'
# Setting up the libs
# http://stackoverflow.com/questions/13984357/pythonmagick-cant-find-my-pdf-files

from PythonMagick import Image
im = Image()

import os
for dirpath, dirnames, filenames in os.walk("Snidagerd"):
  print dirpath
  for fn in filenames:
    print "   ", fn
    new_fn = fn.replace(".pdf", ".jpg")
    print "New file name: ", new_fn
    if fn.find(".jpg") is -1:
        im.read(os.path.abspath(dirpath + os.sep + fn))
        im.write(os.path.abspath(dirpath + os.sep + new_fn))
        os.remove(os.path.abspath(dirpath + os.sep + fn))

Ejemplo n.º 23
0
APP_ID='??'
API_KEY='??'
SECRET_KEY='??'
path_wk=r'pdfkit安装位置设置'
pdfkit_config=pdfkit.configuration(wkhtmltopdf=path_wk)
pdfkit_options={'encoding':'UTF-8',}

os.chdir(path)
pdf_input=PdfFileReader(open(pdfname, 'rb'))
page_count=pdf_input.getNumPages()
page_range=range(page_count)

for page_num in page_range:
	im=Image()
	im.density(DPI)
	im.read(pdfname + '[' + str(page_num) +']')
	im.write(str(page_num)+ '.jpg')

client=AipOcr(APP_ID, API_KEY, SECRET_KEY)
def get_file_content(filePath):
	with open(filePath, 'rb') as fp:
		return fp.read()

options={}
options["language_type"]="CHN_ENG"
options["detect_direction"]="false"
options["detect_language"]="false"
options["probability"]="false"
allteststr=[]
for page_num in page_range:
	image=get_file_content(r'%s\%s.jpg' % (path, page_num))
Ejemplo n.º 24
0
def upload():
    if request.method == 'POST':
        file = request.files['file']
        name = os.path.splitext(file.filename)[0]
        extension = os.path.splitext(file.filename)[1]
        f_name = name + extension
        if extension == ".pdf":
            file.save(os.path.join(basedir,UPLOAD_FOLDER , f_name))
            filepath = os.path.join('./upload', f_name)
            os.makedirs(os.path.join("./images", name))
            filepath1 = os.path.join('./images', name)
            reader = PdfFileReader(open(filepath, "rb"))
            for page_num in xrange(reader.getNumPages()):
                writer = PdfFileWriter()
                writer.addPage(reader.getPage(page_num))
                temp = NamedTemporaryFile(prefix=str(page_num), suffix=".pdf", delete=False)
                writer.write(temp)
                temp.close()
                im = Image()
                im.density("300")  # DPI, for better quality
                im.read(temp.name)
                im.write("images/%s/ %d.jpg" % (str(name), page_num))
                os.remove(temp.name)
            os.remove(filepath)
            dirs = os.listdir(filepath1)
            dict = {}
            i = 0
            dict["file_name"] = f_name
            for file in dirs:
                filepath2 = os.path.join(filepath1, file)
                with io.open(filepath2, 'rb') as image_file:
                    content= base64.b64encode(image_file.read())
                    payload = {
                        "requests": [
                            {
                                "image": {
                                    "content": content
                                },
                                "features": [
                                    {
                                        "type": "TEXT_DETECTION"
                                    }
                                ]
                            }
                        ]
                    }
                r = requests.post(url, data=json.dumps(payload))
                r = json.loads(r.text)
                r=r['responses'][0]['textAnnotations'][0]['description']
                key = "page_no-" + str(file)
                dict[key] = r
            shutil.rmtree(filepath1)
            return json.dumps(dict)
        elif extension == '.png' or extension == '.jpg' or extension == '.jpeg':
            dict = {}
            dict["file_name"] = f_name
            file.save(os.path.join(basedir, UPLOAD_FOLDER, f_name))
            filepath = os.path.join('./upload', f_name)
            with io.open(filepath, 'rb') as image_file:
                content = base64.b64encode(image_file.read())
                payload = {
                    "requests": [
                        {
                            "image": {
                                "content": content
                            },
                            "features": [
                                {
                                    "type": "TEXT_DETECTION"
                                }
                            ]
                        }
                    ]
                }
            r = requests.post(url, data=json.dumps(payload))
            r=json.loads(r.text)
            r=r['responses'][0]['textAnnotations'][0]['description']
            dict["text"] = r
            os.remove(filepath)
            return json.dumps(dict)
        else:
            return json.dumps({'file format is wrong': f_name})

    return json.dumps({'file proess completed': f_name})