def bmp_or_jpg_to_pdf(list_of_file_paths, destination_directory, delete_source=False): operation_result = {} invalid_file_paths = [] for files in list_of_file_paths: if os.path.exists(files): continue else: invalid_file_paths.append(files) list_of_file_paths.remove(files) for files in list_of_file_paths: image = PyImage() image.density("600") image.read(files) file_name = os.path.basename(files) name, ext = os.path.splitext(file_name) file_name_at_destination = os.path.join(destination_directory, name + ".pdf") image.write(file_name_at_destination) if delete_source is True: for files in list_of_file_paths: os.remove(files) if not invalid_file_paths: operation_result.update({"code": 0, "invalid_file_paths": "None"}) else: invalid_files = ",".join(list_of_file_paths) operation_result.update({"code": 1, "invalid_file_paths": invalid_files}) return operation_result
def NumpytoIM(img, usm=None, verbose=False): if verbose: print "Converting numpy array to ImageMagick" out_img = PMImage() if img.dtype == 'uint16': out_img.depth(16) else: out_img.depth(8) out_img.magick('RGB') h,w,c = img.shape size_str = str(w)+'x'+str(h) out_img.size(size_str) b = Blob() b.data = img.tostring() out_img.read(b) out_img.magick('PNG') # Check if USM sharpening should be used if usm != None: if verbose: print "Running unsharp mask filter" r,s,a,t = (usm) out_img.unsharpmask(r,s,a,t) return out_img
def main(): # os.environ["MAGICK_HOME"] = r"path_to_ImageMagick" if len(sys.argv) == 1: dirpath = '.' else: dirpath = sys.argv[1] temp_dir = os.path.join(dirpath, '.temp') if not os.path.exists(temp_dir): os.mkdir(temp_dir) for file_name in os.listdir(dirpath): if not os.path.isfile(file_name) or not file_name.endswith('.pdf'): continue print('Converting file {} ...'.format(file_name)) in_path = os.path.join(dirpath, file_name) with open(in_path, 'rb') as handle: inputpdf = PdfFileReader(handle) for i in xrange(inputpdf.numPages): outputpdf = PdfFileWriter() outputpdf.addPage(inputpdf.getPage(i)) new_file_name = file_name.replace('.pdf', '') + '_{}.pdf'.format(i) new_in_path = os.path.join(temp_dir, new_file_name) with open(new_in_path, 'wb') as handle: outputpdf.write(handle) output_file_name = new_file_name.replace('.pdf', '.jpeg') output_path = os.path.join(dirpath, output_file_name) p = Image() p.density('1080') p.read(os.path.abspath(new_in_path)) p.write(os.path.abspath(output_path)) os.remove(new_in_path) os.rmdir(temp_dir)
def pdf2bmp(path): img = Image() f = open(path, "rb") opened_pdf = PdfFileReader(f) page_num = opened_pdf.getNumPages() if page_num == 1: img.read(path) output_bmp = path.replace(".pdf", ".bmp") img.write(output_bmp) else: for i in range(page_num): pdfw = PdfFileWriter() pdfw.addPage(opened_pdf.getPage(i)) output_path = os.path.splitext(path)[0] output_pdf = output_path + "_page(" + str(i + 1) + ").pdf" with open(output_pdf, "wb") as output: pdfw.write(output) img.read(output_pdf) output_bmp = output_pdf.replace(".pdf", ".bmp") img.write(output_bmp) f.close() return output_bmp, page_num
def pdf2images(name): np = getPdfNumPages(name) for p in range(np): i = Image() i.density('200') i.quality(100) i.depth(24) #i.backgroundColor( #i.channel( i.read(name + '[' + str(p) + ']') i.write(name + str(p) + defaultImageExtension)
def readPdf_page_by_page(filepath): #获取一个pdf对象 pdf_input = PdfFileReader(open(filepath, 'rb')) #获取pdf页数 page_count = pdf_input.getNumPages() #获取pdf第n页的内容 for n in range(page_count): im = Image() #im.density("300") im.read(filepath + '[' + str(1) + ']') im.magick("jpg") im.write(filepath + str(n + 1) + ".jpg")
def main(): if len(sys.argv) < 2: print('invalid usage!') return pdf_filename = sys.argv[1] pdf = PdfFileReader(file(pdf_filename, "rb")) npage = pdf.getNumPages() fname = pdf_filename.split('/')[-1] tmppath = '/dev/shm/' for p in range(npage): im = Image() im.density('300') im.read(pdf_filename + '[' + str(p) + ']') im.write(tmppath + fname + '_' + str(p) + out_format)
class Answers: def __init__(self, filename, i): self.image = Image() self.image.density('%d' % DENSITY) self.image.read('%s[%d]' % (filename, i)) temp = tempfile.NamedTemporaryFile(suffix='.png') self.image.write(temp.name) self.pimage = pygame.image.load(temp.name) temp.close() def draw(self, screen, C): pygame.draw.rect(screen, (255, 255, 255), (0, 0, W, H), 0) scaled = self.pimage screen.blit(scaled, (0, 0), (C[0], C[1], C[2] - C[0], C[3] - C[1])) pygame.display.flip()
def walk_menu(entry): if isinstance(entry, xdg.Menu.Menu) and entry.Show is True: map(walk_menu, entry.getEntries()) elif isinstance(entry, xdg.Menu.MenuEntry) and entry.Show is True: # byte 1 signals another entry conn.sendall('\x01') img_path = icon_attr(entry.DesktopEntry).encode('utf-8') if img_path and os.path.isfile(img_path): try: # Create an empty image and set the background color to # transparent. This is important to have transparent background # when converting from SVG img = Image() img.backgroundColor(Color(0, 0, 0, 0xffff)) img.read(img_path) # scale the image to 48x48 pixels img.scale(Geometry(48, 48)) # ensure the image is converted to ICO img.magick('ICO') b = Blob() img.write(b) # icon length plus data conn.sendall(struct.pack('i', len(b.data))) conn.sendall(b.data) except Exception: conn.sendall(struct.pack('i', 0)) else: conn.sendall(struct.pack('i', 0)) name = entry.DesktopEntry.getName() # name length plus data conn.sendall(struct.pack('i', len(name))) conn.sendall(name) command = re.sub(' -caption "%c"| -caption %c', ' -caption "%s"' % name, entry.DesktopEntry.getExec()) command = re.sub(' [^ ]*%[fFuUdDnNickvm]', '', command) if entry.DesktopEntry.getTerminal(): command = 'xterm -title "%s" -e %s' % (name, command) # command length plus data conn.sendall(struct.pack('i', len(command))) conn.sendall(command)
def pdf2jpg(pdf,temp): #Generate the path for the jpg file. Need to use a temp directory in case #pdf location is read only. pdf = str(pdf) base = os.path.basename(pdf) basefile = os.path.splitext(base) jpg = temp + basefile[0] + ".jpg" #jpg = str(jpg.replace("\\","\\\\")) jpg = str(jpg) pdf = str(pdf) img = PMImage() img.density('300') img.depth(24) img.read(pdf) img.write(jpg) img = Image.open(jpg) rgbimg = Image.new("RGBA", img.size) rgbimg.paste(img) rgbimg.save(jpg) return jpg
def pdftojpg(fname): reader = PdfFileReader(open(fname, "rb")) directory = os.path.basename(fname) if not os.path.exists(directory): os.makedirs(directory) else: shutil.rmtree(directory) time.sleep(2) os.makedirs(directory) for page_num in range(reader.getNumPages()): writer = PdfFileWriter() writer.addPage(reader.getPage(page_num)) temp = NamedTemporaryFile(prefix=str(page_num), suffix=".pdf", delete=False) writer.write(temp) temp.close() im = Magick_Image() im.density("300") # DPI, for better quality im.read(temp.name) im.write(directory+"/some%d.jpg" % (int(page_num)+1)) os.remove(temp.name) return(directory)
def pdf_to_image(): for pdf in [ pdf_file for pdf_file in os.listdir(pdf_dir) if pdf_file.endswith(".pdf") ]: input_pdf = pdf_dir + "\\" + pdf + "[1]" img = Image() img.density('300') print input_pdf img.read(input_pdf) size = "%sx%s" % (img.columns(), img.rows()) output_img = Image(size, bg_colour) output_img.type = img.type output_img.composite(img, 0, 0, PythonMagick.CompositeOperator.SrcOverCompositeOp) output_img.resize(str(img.rows())) output_img.magick('JPG') output_img.quality(75) output_jpg = input_pdf.replace(".pdf", ".jpg") output_img.write(output_jpg)
def to_imagemagick(img, bits=16): '''Convert numpy array to Imagemagick format. :param img: image to convert :type img: Numpy ndarray :rtype: PythonMagick.Image ''' if not isinstance(img, PMImage): img = _scale(img, bits=bits) LOGGER.debug("Converting from Numpy to ImageMagick.") out_img = PMImage() if img.dtype == np.uint8: out_img.depth(8) else: out_img.depth(16) shape = img.shape # Convert also B&W images to 3-channel arrays if len(shape) == 2: tmp = np.empty((shape[0], shape[1], 3), dtype=img.dtype) tmp[:, :, 0] = img tmp[:, :, 1] = img tmp[:, :, 2] = img img = tmp out_img.magick('RGB') out_img.size(str(shape[1]) + 'x' + str(shape[0])) blob = Blob() blob.data = img.tostring() out_img.read(blob) out_img.magick('PNG') return out_img return img
def convert(args): dirname = getcwd() ifilename = path.join( dirname, args.ifile) if not path.isabs(args.ifile) else args.ifile ofilename = path.join( dirname, args.ofile) if not path.isabs(args.ofile) else args.ofile ofilename_n_ext = path.splitext(ofilename)[0] reader = PdfFileReader(open(ifilename, "rb")) for page_num in xrange(reader.getNumPages()): writer = PdfFileWriter() writer.addPage(reader.getPage(page_num)) with open(path.join(dirname, 'temp.pdf'), 'wb') as temp: writer.write(temp) im = Image() im.density("300") # DPI, for better quality im.backgroundColor('white') im.fillColor('white') im.read(path.join(dirname, 'temp.pdf')) im.write("%s_%d.jpg" % (ofilename_n_ext, page_num)) remove(path.join(dirname, 'temp.pdf'))
dir, fname = os.path.split(_pdf_path) base, ext = os.path.splitext(fname) out_base, _ = os.path.splitext(_pdf_path) # convert the pdf file to the jpg images command = 'pdftoppm %s %s -jpeg' % (_pdf_path.replace( ' ', '\ '), out_base.replace(' ', '\ ')) os.system(command) paths = [] # convert the jpg files to the list of cv image for f in os.listdir(dir): path = os.path.join(dir, f) if os.path.exists(path) and f.find( base) != -1 and os.path.splitext(f)[1].find('jpg') != -1: paths.append(path) return paths if __name__ == '__main__': import os, PythonMagick from PythonMagick import Image from datetime import datetime bg_colour = "#ffffff" input_pdf = pdf_dir + "\\" + pdf img = Image() img.density('300') img.read(input_pdf)
def process(self): wf = str(self.artifact.previous_artifact_filepath) # complains when unicode image = Image() image.read(wf) self.do_magick(image) image.write(self.artifact.filepath())
def tiff_to_pdf(list_of_file_paths, destination_directory): """ This method converts individual TIFF image files to PDF. :param list_of_file_paths: This argument is the list of absolute file paths for example ['C:/User/Documents/Images/Image1.tiff', 'C:/User/Documents/Images/Image2.tiff' ] :param destination_directory: Pass in the absolute path to the directory you want the converted files to be saved to. :return: This method returns a dictionary giving information about the success or failure of the operation and also gives the list of files that failed the conversion.. """ operation_result = {} invalid_file_paths = [] for files in list_of_file_paths: if os.path.exists(files): continue else: invalid_file_paths.append(files) list_of_file_paths.remove(files) if not os.path.exists(os.path.join(os.getcwd(), "temp_dir")): os.mkdir(os.path.join(os.getcwd(), "temp_dir")) path_to_temp = os.path.join(os.getcwd(), "temp_dir") for image in list_of_file_paths: temp_file_list = [] img = Image.open(image) tif_file_full_name = os.path.basename(image) tif_file_name, tif_file_ext = os.path.splitext(tif_file_full_name) for i in range(100): try: img.seek(i) temp_file_list.append(os.path.join(path_to_temp, 'page' + str(i + 1) + ".tif")) img.save(os.path.join(path_to_temp, 'page' + str(i + 1) + ".tif")) except EOFError: break png_temp_array = [] for tif_img in temp_file_list: img_png = Image.open(tif_img) file_name = os.path.basename(tif_img) name, ext = os.path.splitext(file_name) file_name_at_destination = os.path.join(path_to_temp, name + ".png") png_temp_array.append(file_name_at_destination) img_png.save(file_name_at_destination) os.remove(tif_img) pdf_temp_array = [] for png_img in png_temp_array: image = PyImage() image.density("600") image.read(png_img) png_file_name = os.path.basename(png_img) png_name, png_ext = os.path.splitext(png_file_name) file_name_at_destination = os.path.join(path_to_temp, png_name + ".pdf") pdf_temp_array.append(file_name_at_destination) image.write(file_name_at_destination) os.remove(png_img) if pdf_temp_array.__len__() > 1: ImageOpr.merge_pdf(pdf_temp_array, destination_directory, tif_file_name, delete_source=False) for pdfs in pdf_temp_array: os.remove(pdfs) else: os.rename(pdf_temp_array[0], os.path.join(path_to_temp, tif_file_name + ".pdf")) destination_directory_file_name = os.path.join(destination_directory, tif_file_name + ".pdf") shutil.move(os.path.join(path_to_temp, tif_file_name + ".pdf"), destination_directory_file_name) shutil.rmtree(path_to_temp, ignore_errors=True) if not invalid_file_paths: operation_result.update({"code": 0, "invalid_file_paths": "None"}) else: invalid_files = ",".join(list_of_file_paths) operation_result.update({"code": 1, "invalid_file_paths": invalid_files}) return operation_result
out_file_pdf = os.path.join(outDir, "P" + str(i) + ".pdf") out_file_jpg = out_file_pdf.replace(".pdf", ".jpg") if not os.path.exists(out_file_pdf): # Read page i from pdfFile pageObj = in_file_pdf.getPage(i) pdfOut = PdfFileWriter() pdfOut.addPage(pageObj) print("Page %d" % i) # Write page i to a separate pdf file outputStream = file(out_file_pdf, "wb") pdfOut.write(outputStream) outputStream.close() if not os.path.exists(out_file_jpg): img.read(out_file_pdf) size = "%sx%s" % (img.columns(), img.rows()) output_img = Image(size, bg_colour) output_img.type = img.type output_img.composite( img, 0, 0, PythonMagick.CompositeOperator.SrcOverCompositeOp) output_img.resize(str(img.rows())) output_img.magick('JPG') output_img.quality(75) output_img.write(out_file_jpg) print("Save page %d to %s" % (i, out_file_jpg))
import os from pyPdf import PdfFileReader, PdfFileWriter from tempfile import NamedTemporaryFile from PythonMagick import Image import sys pdfname=sys.argv[1] #os.system("rm /tmp/test2/*") #os.system("mkdir /tmp/test2") cmd="cp /opt/%s /tmp/test2/ocr.pdf"%(pdfname) #os.system("cp /opt/corrected.html /tmp/test2/some_0.hocr") os.system(cmd) reader = PdfFileReader(open("/tmp/test2/ocr.pdf", "rb")) for page_num in xrange(reader.getNumPages()): writer = PdfFileWriter() writer.addPage(reader.getPage(page_num)) temp = NamedTemporaryFile(prefix=str(page_num), suffix=".pdf", delete=False) writer.write(temp) temp.close() im = Image() im.density("300") # DPI, for better quality im.read(temp.name) im.write("/tmp/test2/some_%d.jpg" % (page_num)) os.remove(temp.name)
class Map: def __init__(self, map_path): self._map_path = map_path self._map_image = None self._ocr = OCR() self._x = 0.0 self._y = 0.0 self._width = 0.0 self._height = 0.0 # The OCR gets a bit buggy for scale factors # smaller than 3 and bigger than 5. self._scale_factor = 5 self._RefreshCoordinates() def IsValid(self): if self._x is 0 or self._y is 0: return False if self._width is 0 or self._height is 0: return False if self._width < 0.001 or self._width > 0.04: return False if self._height > -0.001 or self._height < -0.04: return False map_geometry = self._GetMapGeometry() height_pixel_ratio = self.GetHeight() / map_geometry.height() width_pixel_ratio = self.GetWidth() / map_geometry.width() # The ratio should not be very different from each other, otherwise # we OCR'ed one of the coordinates wrong. if (abs(height_pixel_ratio) - abs(width_pixel_ratio)) > 0.0001: return False return True def SetScaleFactor(self, factor): if factor is self._scale_factor: return self._scale_factor = factor if self._map_image: self._GenerateImage() self._RefreshCoordinates() def GetMapImage(self): return self._CropGeometry(self._GetMapGeometry()) def GetX(self): if self._x: return self._x map_geometry = self._GetMapGeometry() offset = _MAGIC_COORDINATE_OFFSET_ * self._scale_factor width = _MAGIC_COORDINATE_BBOX_WIDTH_ * self._scale_factor height = _MAGIC_COORDINATE_BBOX_HEIGHT_ * self._scale_factor coordinate_geometry = Geometry(width, height, map_geometry.xOff(), map_geometry.yOff() - offset - height) image = self._CropGeometry(coordinate_geometry) self._x = self._ocr.GetDecimalDegrees(image) return self._x def GetY(self): if self._y: return self._y map_geometry = self._GetMapGeometry() offset = _MAGIC_COORDINATE_OFFSET_ * self._scale_factor width = _MAGIC_COORDINATE_BBOX_HEIGHT_ * self._scale_factor height = _MAGIC_COORDINATE_BBOX_WIDTH_ * self._scale_factor coordinate_geometry = Geometry(width, height, map_geometry.xOff() - offset - width, map_geometry.yOff()) image = self._CropGeometry(coordinate_geometry) image.rotate(90) self._y = self._ocr.GetDecimalDegrees(image) return self._y def GetWidth(self): if self._width: return self._width map_geometry = self._GetMapGeometry() offset = _MAGIC_COORDINATE_OFFSET_ * self._scale_factor width = _MAGIC_COORDINATE_BBOX_WIDTH_ * self._scale_factor height = _MAGIC_COORDINATE_BBOX_HEIGHT_ * self._scale_factor x_offset = map_geometry.xOff() + map_geometry.width() y_offset = map_geometry.yOff() + map_geometry.height() coordinate_geometry = Geometry(width, height, x_offset - width, y_offset + offset) image = self._CropGeometry(coordinate_geometry) self._width = self._ocr.GetDecimalDegrees(image) - self.GetX() return self._width def GetHeight(self): if self._height: return self._height map_geometry = self._GetMapGeometry() offset = _MAGIC_COORDINATE_OFFSET_ * self._scale_factor width = _MAGIC_COORDINATE_BBOX_HEIGHT_ * self._scale_factor height = _MAGIC_COORDINATE_BBOX_WIDTH_ * self._scale_factor x_offset = map_geometry.xOff() + map_geometry.width() y_offset = map_geometry.yOff() + map_geometry.height() coordinate_geometry = Geometry(width, height, x_offset + offset, y_offset - height) image = self._CropGeometry(coordinate_geometry) image.rotate(90) self._height = self._ocr.GetDecimalDegrees(image) - self.GetY() return self._height def _RefreshCoordinates(self): if self.IsValid(): return self._x = 0.0 self._y = 0.0 self._width = 0.0 self._height = 0.0 self.GetWidth() self.GetHeight() def _CropGeometry(self, geometry): if not self._map_image: self._GenerateImage() image = Image(self._map_image) image.crop(geometry) return image def _GetMapGeometry(self): width = self.WIDTH - self.MARGIN_LEFT - self.MARGIN_RIGHT height = self.HEIGHT - self.MARGIN_TOP - self.MARGIN_BOTTOM width *= self._scale_factor height *= self._scale_factor margin_left = self.MARGIN_LEFT * self._scale_factor margin_top = self.MARGIN_TOP * self._scale_factor return Geometry(width, height, margin_left, margin_top) def _GenerateImage(self): scaled_density = 72 * self._scale_factor self._map_image = Image() self._map_image.density("%dx%d" % (scaled_density, scaled_density)) self._map_image.read(self._map_path)
__author__ = 'davidhalldor' # Setting up the libs # http://stackoverflow.com/questions/13984357/pythonmagick-cant-find-my-pdf-files from PythonMagick import Image im = Image() import os for dirpath, dirnames, filenames in os.walk("Snidagerd"): print dirpath for fn in filenames: print " ", fn new_fn = fn.replace(".pdf", ".jpg") print "New file name: ", new_fn if fn.find(".jpg") is -1: im.read(os.path.abspath(dirpath + os.sep + fn)) im.write(os.path.abspath(dirpath + os.sep + new_fn)) os.remove(os.path.abspath(dirpath + os.sep + fn))
APP_ID='??' API_KEY='??' SECRET_KEY='??' path_wk=r'pdfkit安装位置设置' pdfkit_config=pdfkit.configuration(wkhtmltopdf=path_wk) pdfkit_options={'encoding':'UTF-8',} os.chdir(path) pdf_input=PdfFileReader(open(pdfname, 'rb')) page_count=pdf_input.getNumPages() page_range=range(page_count) for page_num in page_range: im=Image() im.density(DPI) im.read(pdfname + '[' + str(page_num) +']') im.write(str(page_num)+ '.jpg') client=AipOcr(APP_ID, API_KEY, SECRET_KEY) def get_file_content(filePath): with open(filePath, 'rb') as fp: return fp.read() options={} options["language_type"]="CHN_ENG" options["detect_direction"]="false" options["detect_language"]="false" options["probability"]="false" allteststr=[] for page_num in page_range: image=get_file_content(r'%s\%s.jpg' % (path, page_num))
def upload(): if request.method == 'POST': file = request.files['file'] name = os.path.splitext(file.filename)[0] extension = os.path.splitext(file.filename)[1] f_name = name + extension if extension == ".pdf": file.save(os.path.join(basedir,UPLOAD_FOLDER , f_name)) filepath = os.path.join('./upload', f_name) os.makedirs(os.path.join("./images", name)) filepath1 = os.path.join('./images', name) reader = PdfFileReader(open(filepath, "rb")) for page_num in xrange(reader.getNumPages()): writer = PdfFileWriter() writer.addPage(reader.getPage(page_num)) temp = NamedTemporaryFile(prefix=str(page_num), suffix=".pdf", delete=False) writer.write(temp) temp.close() im = Image() im.density("300") # DPI, for better quality im.read(temp.name) im.write("images/%s/ %d.jpg" % (str(name), page_num)) os.remove(temp.name) os.remove(filepath) dirs = os.listdir(filepath1) dict = {} i = 0 dict["file_name"] = f_name for file in dirs: filepath2 = os.path.join(filepath1, file) with io.open(filepath2, 'rb') as image_file: content= base64.b64encode(image_file.read()) payload = { "requests": [ { "image": { "content": content }, "features": [ { "type": "TEXT_DETECTION" } ] } ] } r = requests.post(url, data=json.dumps(payload)) r = json.loads(r.text) r=r['responses'][0]['textAnnotations'][0]['description'] key = "page_no-" + str(file) dict[key] = r shutil.rmtree(filepath1) return json.dumps(dict) elif extension == '.png' or extension == '.jpg' or extension == '.jpeg': dict = {} dict["file_name"] = f_name file.save(os.path.join(basedir, UPLOAD_FOLDER, f_name)) filepath = os.path.join('./upload', f_name) with io.open(filepath, 'rb') as image_file: content = base64.b64encode(image_file.read()) payload = { "requests": [ { "image": { "content": content }, "features": [ { "type": "TEXT_DETECTION" } ] } ] } r = requests.post(url, data=json.dumps(payload)) r=json.loads(r.text) r=r['responses'][0]['textAnnotations'][0]['description'] dict["text"] = r os.remove(filepath) return json.dumps(dict) else: return json.dumps({'file format is wrong': f_name}) return json.dumps({'file proess completed': f_name})