def convert_to_pdf(os_dir, chapter, file_names): """ Converts a collection of images to PDF format :param os_dir: Directory to save PDF in. :param chapter: Title of the PDF. :param file_names: Images to construct the PDF from. :return: """ print("Converting chapter %s to pdf..." % chapter) pdf_bytes = None try: pdf_bytes = img2pdf.convert(*[download_image(path) for path in file_names]) except img2pdf.PdfTooLargeError: # Sometimes the images are registered as having a dpi of 1. # Because PDF has a limitation of 200 inches max per side, a # special layout_fun has to be used, as to prevent an exception. # default manga size 5"x7" layout_fun = img2pdf.get_layout_fun(pagesize=(None, img2pdf.in_to_pt(7)), imgsize=None, border=None, fit=img2pdf.FitMode.into, auto_orient=False) pdf_bytes = img2pdf.convert(*[download_image(path) for path in file_names], layout_fun=layout_fun) file = open("%s/%s.pdf" % (os_dir, chapter), "wb") file.write(pdf_bytes) print("Conversion completed!")
def select_image_layer(infiles, output_file, log, context): """Selects the image layer for the output page. If possible this is the orientation-corrected input page, or an image of the whole page converted to PDF.""" options = context.get_options() page_pdf = next(ii for ii in infiles if ii.endswith('.ocr.oriented.pdf')) image = next(ii for ii in infiles if ii.endswith('.image')) if options.lossless_reconstruction: log.debug( f"{page_number(page_pdf):4d}: page eligible for lossless reconstruction" ) re_symlink(page_pdf, output_file, log) # Still points to multipage return pageinfo = get_pageinfo(image, context) # We rasterize a square DPI version of each page because most image # processing tools don't support rectangular DPI. Use the square DPI as it # accurately describes the image. It would be possible to resample the image # at this stage back to non-square DPI to more closely resemble the input, # except that the hocr renderer does not understand non-square DPI. The # sandwich renderer would be fine. dpi = get_page_square_dpi(pageinfo, options) layout_fun = img2pdf.get_fixed_dpi_layout_fun((dpi, dpi)) # This create a single page PDF with open(image, 'rb') as imfile, open(output_file, 'wb') as pdf: log.debug(f'{page_number(page_pdf):4d}: convert') img2pdf.convert( imfile, with_pdfrw=False, layout_fun=layout_fun, outputstream=pdf ) log.debug(f'{page_number(page_pdf):4d}: convert done')
def select_image_layer( infiles, output_file, log, pdfinfo, pdfinfo_lock): page_pdf = next(ii for ii in infiles if ii.endswith('.ocr.oriented.pdf')) image = next(ii for ii in infiles if ii.endswith('.image')) if lossless_reconstruction: log.debug("{:4d}: page eligible for lossless reconstruction".format( page_number(page_pdf))) re_symlink(page_pdf, output_file) else: pageinfo = get_pageinfo(image, pdfinfo, pdfinfo_lock) dpi = round(max(pageinfo['xres'], pageinfo['yres'], options.oversample)) imgsize = ((img2pdf.ImgSize.dpi, dpi), (img2pdf.ImgSize.dpi, dpi)) layout_fun = img2pdf.get_layout_fun(None, imgsize, None, None, None) with open(image, 'rb') as imfile, \ open(output_file, 'wb') as pdf: rawdata = imfile.read() pdf.write(img2pdf.convert( rawdata, producer="img2pdf", with_pdfrw=False, layout_fun=layout_fun))
def triage_image_file(input_file, output_file, log, options): try: log.info("Input file is not a PDF, checking if it is an image...") im = Image.open(input_file) except EnvironmentError as e: msg = str(e) # Recover the original filename realpath = '' if os.path.islink(input_file): realpath = os.path.realpath(input_file) elif os.path.isfile(input_file): realpath = '<stdin>' msg = msg.replace(input_file, realpath) log.error(msg) raise UnsupportedImageFormatError() from e else: log.info("Input file is an image") if 'dpi' in im.info: if im.info['dpi'] <= (96, 96) and not options.image_dpi: log.info("Image size: (%d, %d)" % im.size) log.info("Image resolution: (%d, %d)" % im.info['dpi']) log.error( "Input file is an image, but the resolution (DPI) is " "not credible. Estimate the resolution at which the " "image was scanned and specify it using --image-dpi.") raise DpiError() elif not options.image_dpi: log.info("Image size: (%d, %d)" % im.size) log.error( "Input file is an image, but has no resolution (DPI) " "in its metadata. Estimate the resolution at which " "image was scanned and specify it using --image-dpi.") raise DpiError() if 'iccprofile' not in im.info: if im.mode == 'RGB': log.info('Input image has no ICC profile, assuming sRGB') elif im.mode == 'CMYK': log.info('Input CMYK image has no ICC profile, not usable') raise UnsupportedImageFormatError() im.close() try: log.info("Image seems valid. Try converting to PDF...") layout_fun = img2pdf.default_layout_fun if options.image_dpi: layout_fun = img2pdf.get_fixed_dpi_layout_fun( (options.image_dpi, options.image_dpi)) with open(output_file, 'wb') as outf: img2pdf.convert( input_file, layout_fun=layout_fun, with_pdfrw=False, outputstream=outf) log.info("Successfully converted to PDF, processing...") except img2pdf.ImageOpenError as e: log.error(e) raise UnsupportedImageFormatError() from e
def select_image_layer( infiles, output_file, log, pdfinfo, pdfinfo_lock): page_pdf = next(ii for ii in infiles if ii.endswith('.ocr.oriented.pdf')) image = next(ii for ii in infiles if ii.endswith('.image')) if lossless_reconstruction: log.debug("{:4d}: page eligible for lossless reconstruction".format( page_number(page_pdf))) re_symlink(page_pdf, output_file) else: pageinfo = get_pageinfo(image, pdfinfo, pdfinfo_lock) dpi = get_page_dpi(pageinfo) dpi = float(dpi[0]), float(dpi[1]) layout_fun = img2pdf.get_fixed_dpi_layout_fun(dpi) with open(image, 'rb') as imfile, \ open(output_file, 'wb') as pdf: rawdata = imfile.read() img2pdf.convert( rawdata, with_pdfrw=False, layout_fun=layout_fun, outputstream=pdf)
def triage_image_file(input_file, output_file, log): try: log.info("Input file is not a PDF, checking if it is an image...") im = Image.open(input_file) except EnvironmentError as e: log.error(e) sys.exit(ExitCode.input_file) return else: log.info("Input file is an image") if 'dpi' in im.info: if im.info['dpi'] <= (96, 96) and not options.image_dpi: log.info("Image size: (%d, %d)" % im.size) log.info("Image resolution: (%d, %d)" % im.info['dpi']) log.error( "Input file is an image, but the resolution (DPI) is " "not credible. Estimate the resolution at which the " "image was scanned and specify it using --image-dpi.") sys.exit(ExitCode.input_file) elif not options.image_dpi: log.info("Image size: (%d, %d)" % im.size) log.error( "Input file is an image, but has no resolution (DPI) " "in its metadata. Estimate the resolution at which " "image was scanned and specify it using --image-dpi.") sys.exit(ExitCode.input_file) if 'iccprofile' not in im.info: if im.mode == 'RGB': log.info('Input image has no ICC profile, assuming sRGB') elif im.mode == 'CMYK': log.info('Input CMYK image has no ICC profile, not usable') sys.exit(ExitCode.input_file) im.close() try: log.info("Image seems valid. Try converting to PDF...") layout_fun = img2pdf.default_layout_fun if options.image_dpi: layout_fun = img2pdf.get_fixed_dpi_layout_fun( (options.image_dpi, options.image_dpi)) with open(output_file, 'wb') as outf: img2pdf.convert( input_file, layout_fun=layout_fun, with_pdfrw=False, outputstream=outf) log.info("Successfully converted to PDF, processing...") except img2pdf.ImageOpenError as e: log.error(e) sys.exit(ExitCode.input_file)
def test_jpg2pdf(self): with open(os.path.join(HERE, 'test.jpg'), 'r') as img_fp: with open(os.path.join(HERE, 'test.pdf'), 'r') as pdf_fp: self.assertEqual( img2pdf.convert([img_fp], 150, creationdate=moddate, moddate=moddate), pdf_fp.read())
def main(): parser = argparse.ArgumentParser() parser.add_argument('input_pdf_path', metavar='PATH') parser.add_argument('-o', '--output', metavar='out', type=argparse.FileType('wb'), help='Output PDF file') parser.add_argument('-s', '--skip', type=int, default=0, help='Skip over the first n page(s).') args = parser.parse_args() logger = logging.getLogger(__name__) logging.basicConfig(level='INFO', format='%(asctime)s - %(levelname)s - %(message)s') directory = './temp/' if not os.path.exists(directory): os.makedirs(directory) images_path = [] pdf = PdfFileReader(open(args.input_pdf_path, "rb")) for i in range(0, pdf.getNumPages()): logger.info("Processing page {}/{}".format(i + 1, pdf.getNumPages())) images_path.append("./temp/{}.jpg".format(i)) process_page(pdf, i, i < args.skip) logger.info('Writing to output PDF file') args.output.write(img2pdf.convert(*list(map(img2pdf.input_images, images_path)))) logger.info('Done') shutil.rmtree(directory, True)
def test_single_page_image(): filename = os.path.join(TEST_OUTPUT, 'image-mono.pdf') with NamedTemporaryFile() as im_tmp: im = Image.new('1', (8, 8), 0) for n in range(8): im.putpixel((n, n), 1) im.save(im_tmp.name, format='PNG') pdf_bytes = img2pdf.convert([im_tmp.name], dpi=8) with open(filename, 'wb') as pdf: pdf.write(pdf_bytes) pdfinfo = pageinfo.pdf_get_all_pageinfo(filename) assert len(pdfinfo) == 1 page = pdfinfo[0] assert not page['has_text'] assert len(page['images']) == 1 pdfimage = page['images'][0] assert pdfimage['width'] == 8 assert pdfimage['color'] == 'gray' # While unexpected, this is correct # PDF spec says /FlateDecode image must have /BitsPerComponent 8 # So mono images get upgraded to 8-bit assert pdfimage['bpc'] == 8 # DPI in a 1"x1" is the image width assert pdfimage['dpi_w'] == 8 assert pdfimage['dpi_h'] == 8
def main(): if sys.argv[1] == '--version': print(VERSION_STRING, file=sys.stderr) sys.exit(0) elif sys.argv[1] == '--list-langs': print('List of available languages (1):\neng', file=sys.stderr) sys.exit(0) elif sys.argv[1] == '--print-parameters': print("Some parameters", file=sys.stderr) print("textonly_pdf\t1\tSome help text") sys.exit(0) elif sys.argv[-2] == 'hocr': inputf = sys.argv[-4] output = sys.argv[-3] with Image.open(inputf) as im, open( output + '.hocr', 'w', encoding='utf-8' ) as f: w, h = im.size f.write(HOCR_TEMPLATE.format(str(w), str(h))) with open(output + '.txt', 'w') as f: f.write('') elif sys.argv[-2] == 'pdf': if 'textonly_pdf=1' in sys.argv: inputf = sys.argv[-4] output = sys.argv[-3] with Image.open(inputf) as im: dpi = im.info['dpi'] pagesize = im.size[0] / dpi[0], im.size[1] / dpi[1] ptsize = pagesize[0] * 72, pagesize[1] * 72 pdf_out = pypdf.PdfFileWriter() pdf_out.addBlankPage(ptsize[0], ptsize[1]) with open(output + '.pdf', 'wb') as f: pdf_out.write(f) with open(output + '.txt', 'w') as f: f.write('') else: inputf = sys.argv[-4] output = sys.argv[-3] pdf_bytes = img2pdf.convert([inputf], dpi=300) with open(output + '.pdf', 'wb') as f: f.write(pdf_bytes) with open(output + '.txt', 'w') as f: f.write('') elif sys.argv[-1] == 'stdout': inputf = sys.argv[-2] print( """Orientation: 0 Orientation in degrees: 0 Orientation confidence: 100.00 Script: 1 Script confidence: 100.00""", file=sys.stderr, ) else: print("Spoof doesn't understand arguments", file=sys.stderr) print(sys.argv, file=sys.stderr) sys.exit(1) sys.exit(0)
def main(): if sys.argv[1] == '--version': print(VERSION_STRING, file=sys.stderr) sys.exit(0) elif sys.argv[1] == '--list-langs': print('List of available languages (1):\neng', file=sys.stderr) sys.exit(0) elif sys.argv[-1] == 'hocr': inputf = sys.argv[-3] output = sys.argv[-2] with Image.open(inputf) as im, \ open(output + '.hocr', 'w', encoding='utf-8') as f: w, h = im.size f.write(HOCR_TEMPLATE.format(str(w), str(h))) elif sys.argv[-1] == 'pdf': inputf = sys.argv[-3] output = sys.argv[-2] pdf_bytes = img2pdf.convert([inputf], dpi=300) with open(output + '.pdf', 'wb') as f: f.write(pdf_bytes) elif sys.argv[-1] == 'stdout': inputf = sys.argv[-2] print("""Orientation: 0 Orientation in degrees: 0 Orientation confidence: 100.00 Script: 1 Script confidence: 100.00""", file=sys.stderr) else: print("Spoof doesn't understand arguments", file=sys.stderr) print(sys.argv, file=sys.stderr) sys.exit(1) sys.exit(0)
def test_single_page_image(): filename = os.path.join(TEST_OUTPUT, "image-mono.pdf") with NamedTemporaryFile() as im_tmp: im = Image.new("1", (8, 8), 0) for n in range(8): im.putpixel((n, n), 1) im.save(im_tmp.name, format="PNG") pdf_bytes = img2pdf.convert([im_tmp.name], dpi=8) with open(filename, "wb") as pdf: pdf.write(pdf_bytes) pdfinfo = pageinfo.pdf_get_all_pageinfo(filename) assert len(pdfinfo) == 1 page = pdfinfo[0] assert not page["has_text"] assert len(page["images"]) == 1 pdfimage = page["images"][0] assert pdfimage["width"] == 8 assert pdfimage["color"] == "gray" # While unexpected, this is correct # PDF spec says /FlateDecode image must have /BitsPerComponent 8 # So mono images get upgraded to 8-bit assert pdfimage["bpc"] == 8 # DPI in a 1"x1" is the image width assert pdfimage["dpi_w"] == 8 assert pdfimage["dpi_h"] == 8
def test_single_page_image(outdir): filename = outdir / 'image-mono.pdf' im_tmp = outdir / 'tmp.png' im = Image.new('1', (8, 8), 0) for n in range(8): im.putpixel((n, n), 1) im.save(str(im_tmp), format='PNG') imgsize = ((img2pdf.ImgSize.dpi, 8), (img2pdf.ImgSize.dpi, 8)) layout_fun = img2pdf.get_layout_fun(None, imgsize, None, None, None) im_bytes = im_tmp.read_bytes() pdf_bytes = img2pdf.convert( im_bytes, producer="img2pdf", with_pdfrw=False, layout_fun=layout_fun) filename.write_bytes(pdf_bytes) info = pdfinfo.PdfInfo(filename) assert len(info) == 1 page = info[0] assert not page.has_text assert len(page.images) == 1 pdfimage = page.images[0] assert pdfimage.width == 8 assert pdfimage.color == Colorspace.gray # DPI in a 1"x1" is the image width assert isclose(pdfimage.xres, 8) assert isclose(pdfimage.yres, 8)
def generatePdfCommand(options, args): """Build a PDF doc with the images.""" imgfiles = inputFiles(options, args) # Generate the PDF doc pdf_bytes = img2pdf.convert(imgfiles, dpi=25) file = open("out.pdf","wb") file.write(pdf_bytes) file.close()
def convert_to_pdf(img_list, output_folder, pdf_file_name): ''' Converts a list of image files into a pdf at the ouput directory ''' pdf_bytes = img2pdf.convert(img_list, dpi = 100) with open(os.path.join(output_folder, pdf_file_name), 'wb') as f: f.write(pdf_bytes)
def make_rotate_test(prefix, image_angle, page_angle): im = Image.open(fspath(resources / 'typewriter.png')) if image_angle != 0: ccw_angle = -image_angle % 360 im = im.transpose(getattr(Image, f'ROTATE_{ccw_angle}')) memimg = BytesIO() im.save(memimg, format='PNG') memimg.seek(0) mempdf = BytesIO() img2pdf.convert( memimg.read(), layout_fun=img2pdf.get_fixed_dpi_layout_fun((200, 200)), outputstream=mempdf, ) mempdf.seek(0) pike = pikepdf.open(mempdf) pike.pages[0].Rotate = page_angle target = outdir / f'{prefix}_{image_angle}_{page_angle}.pdf' pike.save(target) return target
def convert_pdf(url): f = [] for (dirpath, dirnames, filenames) in walk(join(CURRENT, url)): f.extend(filenames) break f = ["%s/%s" % (url, x) for x in f] print f pdf_bytes = img2pdf.convert(f, dpi=300, x=None, y=None) doc = open('result.pdf', 'wb') doc.write(pdf_bytes) doc.close()
def convert_pdf(title): from img2pdf import convert from natsort import natsorted files = natsorted(get_dir_files(title)) # To sort number in string. ex) [1.jpg, 10.jpg, 2.jpg ...] --> [1.jpg, 2.jpg, ... 10.jpg] print(files) pdf_bytes = convert(files, dpi=300, x=None, y=None) filepath = '{}.pdf'.format(join(title, title)) with open(join(MEDIA_DIR, filepath), 'wb') as doc: doc.write(pdf_bytes) return filepath
def chapters2pdf(self, from_chapter, merge=20, image_ext='jpg'): to_chapter = from_chapter + merge - 1 pdf_name = self.comic + '_' + str(from_chapter).zfill(3) + '_' + str(to_chapter).zfill(3) + '.pdf' pdf_file = os.path.join(os.getcwd(), pdf_name) image_list = [] for index in range(merge): image_folder = self.comic + '_' + str(from_chapter + index).zfill(3) image_path = os.path.join(os.getcwd(), image_folder, '*.' + image_ext) image_list += glob.glob(image_path) pdf_bytes = img2pdf.convert(image_list) with open(pdf_file, 'wb') as pdf: pdf.write(pdf_bytes) print('PDF Completed - ' + pdf_file)
def open_file(self): #Select multiple files #Convert them to a single pdf self.fileName = tkFileDialog.askopenfilename(parent=self.master, multiple=True) if len(self.fileName) == 0: return try: self.pdf = img2pdf.convert(sorted(self.fileName)) except: tkMessageBox.showerror("转换失败", "选择的文件不是图像文件,请重新选择", parent=self.master) return with open("PDF.pdf", "wb") as f: f.write(self.pdf) tkMessageBox.showinfo("转换成功", "已成功转换!转换后的pdf文件就在当前目录,文件名为PDF", parent=self.master)
def run(): directoryimg="/tmp/img" directorypdf="/tmp/pdf" directoryworking= os.getcwd() (listurl,lenlisturl)=parser(url) print str(lenlisturl)+" slides found" if not os.path.exists(directoryimg): os.makedirs(directoryimg) else : shutil.rmtree(directoryimg) os.makedirs(directoryimg) if not os.path.exists(directorypdf): os.makedirs(directorypdf) else : shutil.rmtree(directorypdf) os.makedirs(directorypdf) print "Begin download slides : Please wait ..." for index, item in enumerate(listurl): os.system('wget -q -P /tmp/img %s 2>&1 >/dev/null'%item) print "End Download" print "Begin convert slides to pdf file" files = listoffiles(directoryimg) for index, item in enumerate(files): pdf_bytes = img2pdf.convert([directoryimg+"/"+item]) (base, ext) = item.split('.',1) itempdf=directorypdf+"/"+base+".pdf" file = open(itempdf,"a") file.write(pdf_bytes) file.close() merger = PdfFileMerger() files = [x for x in os.listdir(directorypdf) if x.endswith('.pdf')] for fname in sorted(files): merger.append(PdfFileReader(open(os.path.join(directorypdf, fname), 'rb'))) merger.write(filename) print "End convert to pdf" print "File saved at "+directoryworking+"/"+filename
def test_single_page_image(): filename = os.path.join(TEST_OUTPUT, 'image-mono.pdf') with NamedTemporaryFile(mode='wb+', suffix='.png') as im_tmp: im = Image.new('1', (8, 8), 0) for n in range(8): im.putpixel((n, n), 1) im.save(im_tmp.name, format='PNG') imgsize = ((img2pdf.ImgSize.dpi, 8), (img2pdf.ImgSize.dpi, 8)) layout_fun = img2pdf.get_layout_fun(None, imgsize, None, None, None) im_tmp.seek(0) im_bytes = im_tmp.read() pdf_bytes = img2pdf.convert( im_bytes, producer="img2pdf", with_pdfrw=False, layout_fun=layout_fun) with open(filename, 'wb') as pdf: pdf.write(pdf_bytes) pdfinfo = pageinfo.pdf_get_all_pageinfo(filename) assert len(pdfinfo) == 1 page = pdfinfo[0] assert not page['has_text'] assert len(page['images']) == 1 pdfimage = page['images'][0] assert pdfimage['width'] == 8 assert pdfimage['color'] == 'gray' # While unexpected, this is correct # PDF spec says /FlateDecode image must have /BitsPerComponent 8 # So mono images get upgraded to 8-bit assert pdfimage['bpc'] == 8 # DPI in a 1"x1" is the image width assert pdfimage['dpi_w'] == 8 assert pdfimage['dpi_h'] == 8
def topdf(self): # self.f_pdf = os.path.join('static/pdf', self.fileExt+'.pdf') # self.filename = os.path.join('static/img',self.fileExt + '.jpg') self.f_pdf = self.fileExt+'.pdf' self.filename = self.fileExt + '.jpg' w = 612.0 h = w * self.height / self.width self.codes = "".join(unquote(self.base64img).split(',')[1:]) # static file (CDN) img = open(self.filename,'wb') img.write(base64.b64decode(self.codes)) img.close() sd = img2pdf.convert([StringIO.StringIO(base64.b64decode(self.codes))], 150, x=620, y="") hand = open(self.f_pdf, 'wb') hand.write(sd) hand.close() #self.save_pdf() pdfexsit = db.mdb('topdf', 'pdfexsit').perform() pdfexsit.insert({'pdf':self.fileExt}) return self.f_pdf
def imgmem_to_pdf(sourcedata, targetfile): """ Use the img2pdf library to create a lossless PDF from an image. img2pdf supports: | Format | Colorspace | Result | | --------------------- | ------------------------------ | ------------ | | JPEG | any | direct | | JPEG2000 | any | direct | | PNG (non-interlaced) | any | direct | | TIFF (CCITT Group 4) | monochrome | direct | | any | any except CMYK and monochrome | PNG Paeth | | any | monochrome | CCITT Group4 | | any | CMYK | flate | :param sourcedata: A memory blob that contains the data from the image. (eg. The image file was read into memory). :type sourcedata: Binary blob :param sourcefile: The FQPN filename of the file to create :type sourcefile: String :return: True if converted successfully, otherwise False :rtype: Boolean Dependency - img2pdf, which has a dependency on Pillow. https://gitlab.mister-muffin.de/josch/img2pdf https://pypi.org/project/img2pdf/ """ try: with open(targetfile, "wb") as target: target.write(img2pdf.convert(sourcedata)) return True except: return False
import os import img2pdf directory = '/mnt/Cargo_2/Sync/Πανεπιστημιο/Diploma Thesis/Python Scripts/vis-results' images = [] for root, _, files in os.walk(directory): for file in files: if '.png.png' in file and '.pdf' not in file: images.append(os.path.join(root, file)) for image in images: print(image) with open(image[:-8] + '.pdf', 'wb') as fp: fp.write(img2pdf.convert(image))
def run(self): self.start_new_session_and_model() self.load_model() pdf_reader = PdfFileReader(self.dir_pdf) num_pages = pdf_reader.getNumPages() dir_to_write_single_page = self.dir_out + '/single_page.pdf' dir_to_write_single_page_image = self.dir_out + '/single_page_image.jpg' dir_imgs_enhanced = os.path.join(self.dir_out, 'images') if os.path.isdir(dir_imgs_enhanced): os.system('rm -rf ' + dir_imgs_enhanced) os.makedirs(dir_imgs_enhanced) else: os.makedirs(dir_imgs_enhanced) #os.makedirs(dir_imgs_enhanced) indexer = 0 #pdf_reader = PdfFileReader('/home/vahid/Documents/en for num_page in range(num_pages): pdf_writer = PdfFileWriter() page_single = pdf_reader.getPage(num_page) pdf_writer.addPage(page_single) with open(dir_to_write_single_page, 'wb') as out: pdf_writer.write(out) #pdf_writer.write(out) pages = convert_from_path(dir_to_write_single_page, '500') for page in pages: page.save(dir_to_write_single_page_image, 'JPEG') img = cv2.imread(dir_to_write_single_page_image) bin_scales = [1] img_last = 0 for bin_s in bin_scales: res = self.predict(img, bin_s) img_fin = np.zeros((res.shape[0], res.shape[1], 3)) res[:, :][res[:, :] == 0] = 2 res = res - 1 res = res * 255 img_fin[:, :, 0] = res img_fin[:, :, 1] = res img_fin[:, :, 2] = res img_fin = img_fin.astype(np.uint8) img_fin = (res[:, :] == 0) * 255 img_last = img_last + img_fin img_last[:, :][img_last[:, :] > 0] = 255 img_last = (img_last[:, :] == 0) * 255 cv2.imwrite( os.path.join(dir_imgs_enhanced, '{0:04}'.format(indexer) + '.jpg'), img_last) indexer = indexer + 1 with open(self.dir_out + "/output.pdf", "wb") as f: f.write( img2pdf.convert([ dir_imgs_enhanced + '/' + i for i in os.listdir(dir_imgs_enhanced) if i.endswith(".jpg") ])) os.system('rm -rf ' + dir_imgs_enhanced)
import os from PIL import Image from img2pdf import convert from pdf2image import convert_from_path images = convert_from_path('/home/belval/example.pdf') for idx, img in enumerate(images): img.save('pdf_' + str(idx).zfill(len(str(len(images)))) + '.jpg', 'JPEG') dl = os.listdir(".") del dl[dl.index("asdf.py")] file_name = open("out.pdf", "wb") pdf_list = [] for f in dl: img = Image.open(f) if (int(f[-7:-4]) % 2) == 1: area = (125, 140, 1028 + 195, 1763) else: area = (231, 140, 1028 + 306, 1763) crop_img = img.crop(area) sizechange = crop_img.resize((1080, 2316)) sizechange.save(f) pdf_list.append(f) pdf = convert(pdf_list) file_name.write(pdf) file_name.close()
# Convert images to PDF via direct JPEG inclusion. # PyPi: https://pypi.org/project/img2pdf/ # pip install img2pdf import img2pdf # opening from filename with open("name.pdf","wb") as f: f.write(img2pdf.convert('test.jpg')) # opening from file handle with open("name.pdf","wb") as f1, open("test.jpg") as f2: f1.write(img2pdf.convert(f2)) # using in-memory image data with open("name.pdf","wb") as f: f.write(img2pdf.convert("\x89PNG...") # multiple inputs (variant 1) with open("name.pdf","wb") as f: f.write(img2pdf.convert("test1.jpg", "test2.png")) # multiple inputs (variant 2) with open("name.pdf","wb") as f: f.write(img2pdf.convert(["test1.jpg", "test2.png"])) # convert all files ending in .jpg inside a directory dirname = "/path/to/images"
def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw): with open(f, "rb") as inf: orig_imgdata = inf.read() output = img2pdf.convert(orig_imgdata, nodate=True, with_pdfrw=with_pdfrw) from pdfrw import PdfReader, PdfName, PdfWriter from pdfrw.py23_diffs import convert_load, convert_store x = PdfReader(PdfReaderIO(convert_load(output))) self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root, PdfName.Size]) self.assertIn(x.Root.Pages.Count, ('1', '2')) if len(x.Root.Pages.Kids) == '1': self.assertEqual(x.Size, '7') self.assertEqual(len(x.Root.Pages.Kids), 1) elif len(x.Root.Pages.Kids) == '2': self.assertEqual(x.Size, '10') self.assertEqual(len(x.Root.Pages.Kids), 2) self.assertEqual(x.Info, {}) self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages, PdfName.Type]) self.assertEqual(x.Root.Type, PdfName.Catalog) self.assertEqual(sorted(x.Root.Pages.keys()), [PdfName.Count, PdfName.Kids, PdfName.Type]) self.assertEqual(x.Root.Pages.Type, PdfName.Pages) orig_img = Image.open(f) for pagenum in range(len(x.Root.Pages.Kids)): # retrieve the original image frame that this page was # generated from orig_img.seek(pagenum) cur_page = x.Root.Pages.Kids[pagenum] ndpi = orig_img.info.get("dpi", (96.0, 96.0)) # In python3, the returned dpi value for some tiff images will # not be an integer but a float. To make the behaviour of # img2pdf the same between python2 and python3, we convert that # float into an integer by rounding. # Search online for the 72.009 dpi problem for more info. ndpi = (int(round(ndpi[0])), int(round(ndpi[1]))) imgwidthpx, imgheightpx = orig_img.size pagewidth = 72.0*imgwidthpx/ndpi[0] pageheight = 72.0*imgheightpx/ndpi[1] def format_float(f): if int(f) == f: return str(int(f)) else: return ("%.4f" % f).rstrip("0") self.assertEqual(sorted(cur_page.keys()), [PdfName.Contents, PdfName.MediaBox, PdfName.Parent, PdfName.Resources, PdfName.Type]) self.assertEqual(cur_page.MediaBox, ['0', '0', format_float(pagewidth), format_float(pageheight)]) self.assertEqual(cur_page.Parent, x.Root.Pages) self.assertEqual(cur_page.Type, PdfName.Page) self.assertEqual(cur_page.Resources.keys(), [PdfName.XObject]) self.assertEqual(cur_page.Resources.XObject.keys(), [PdfName.Im0]) self.assertEqual(cur_page.Contents.keys(), [PdfName.Length]) self.assertEqual(cur_page.Contents.Length, str(len(cur_page.Contents.stream))) self.assertEqual(cur_page.Contents.stream, "q\n%.4f 0 0 %.4f 0.0000 0.0000 cm\n" "/Im0 Do\nQ" % (pagewidth, pageheight)) imgprops = cur_page.Resources.XObject.Im0 # test if the filter is valid: self.assertIn( imgprops.Filter, [PdfName.DCTDecode, PdfName.JPXDecode, PdfName.FlateDecode, [PdfName.CCITTFaxDecode]]) # test if the image has correct size self.assertEqual(imgprops.Width, str(orig_img.size[0])) self.assertEqual(imgprops.Height, str(orig_img.size[1])) # if the input file is a jpeg then it should've been copied # verbatim into the PDF if imgprops.Filter in [PdfName.DCTDecode, PdfName.JPXDecode]: self.assertEqual( cur_page.Resources.XObject.Im0.stream, convert_load(orig_imgdata)) elif imgprops.Filter == [PdfName.CCITTFaxDecode]: tiff_header = tiff_header_for_ccitt( int(imgprops.Width), int(imgprops.Height), int(imgprops.Length), 4) imgio = BytesIO() imgio.write(tiff_header) imgio.write(convert_store( cur_page.Resources.XObject.Im0.stream)) imgio.seek(0) im = Image.open(imgio) self.assertEqual(im.tobytes(), orig_img.tobytes()) try: im.close() except AttributeError: pass elif imgprops.Filter == PdfName.FlateDecode: # otherwise, the data is flate encoded and has to be equal # to the pixel data of the input image imgdata = zlib.decompress( convert_store(cur_page.Resources.XObject.Im0.stream)) if imgprops.DecodeParms: if orig_img.format == 'PNG': pngidat, palette = img2pdf.parse_png(orig_imgdata) elif orig_img.format == 'TIFF' \ and orig_img.info['compression'] == "group4": offset, length = \ img2pdf.ccitt_payload_location_from_pil( orig_img) pngidat = orig_imgdata[offset:offset+length] else: pngbuffer = BytesIO() orig_img.save(pngbuffer, format="png") pngidat, palette = img2pdf.parse_png( pngbuffer.getvalue()) self.assertEqual(zlib.decompress(pngidat), imgdata) else: colorspace = imgprops.ColorSpace if colorspace == PdfName.DeviceGray: colorspace = 'L' elif colorspace == PdfName.DeviceRGB: colorspace = 'RGB' elif colorspace == PdfName.DeviceCMYK: colorspace = 'CMYK' else: raise Exception("invalid colorspace") im = Image.frombytes(colorspace, (int(imgprops.Width), int(imgprops.Height)), imgdata) if orig_img.mode == '1': self.assertEqual(im.tobytes(), orig_img.convert("L").tobytes()) elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"): self.assertEqual(im.tobytes(), orig_img.convert("RGB").tobytes()) # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does # not have the close() method try: im.close() except AttributeError: pass # now use pdfrw to parse and then write out both pdfs and check the # result for equality y = PdfReader(out) outx = BytesIO() outy = BytesIO() xwriter = PdfWriter() ywriter = PdfWriter() xwriter.trailer = x ywriter.trailer = y xwriter.write(outx) ywriter.write(outy) self.assertEqual(compare_pdf(outx.getvalue(), outy.getvalue()), True) # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the # close() method try: orig_img.close() except AttributeError: pass
import img2pdf import os from PyPDF2 import PdfFileWriter, PdfFileReader foldername = "Wingding Orgy: Hot Tails Extreme #1 [English]" img_group = "1160388" number_of_page = 34 if not os.path.exists(foldername+"/pdf"): os.makedirs(foldername+"/pdf") for x in range(1,number_of_page): pdf_bytes = img2pdf.convert([foldername+"/"+img_group+"_"+str(x)+".jpg"]) file = open(foldername+"/pdf/"+img_group+"_"+str(x)+".pdf","wb") file.write(pdf_bytes) print("Done convert : "+str(number_of_page)+" pages of "+foldername+" to pdf") print("EachPDF location : "+foldername+"/pdf/") # Creating a routine that appends files to the output file def append_pdf(input,output): [output.addPage(input.getPage(page_num)) for page_num in range(input.numPages)] # Creating an object where pdf pages are appended to output = PdfFileWriter() # Appending two pdf-pages from two different files for x in range(1,number_of_page): append_pdf(PdfFileReader(open(foldername+"/pdf/"+img_group+"_"+str(x)+".pdf","rb")),output) # Writing all the collected pages to a file output.write(open(foldername+"/"+foldername+".pdf","wb"))
def run(self): try: folder_path_img = self.directory_path + r'/Karavali Munjavu ' + self.date_today folder_path_pdf = self.directory_path + r'/Karavali Munjavu pdf ' + self.date_today makedirs( folder_path_img) # Make a folder in desktop with today's date. url = 'http://www.karavalimunjavu.com/' res = get(url) res.raise_for_status() soup = BeautifulSoup(res.text, 'lxml') self.start_signal_kr.emit(True) # Downloading images. for images in soup.select('img[data-big]'): page_no = str(path.basename(images.get('data-big'))[15:]) self.page_no_signal_kr.emit(page_no) img_download = get(url + images.get('data-big'), stream=True) file_size = int( img_download.headers.get("Content-Length", None)) progress = 0 with open(path.join(folder_path_img, page_no), 'wb') as f: for data in img_download: f.write(data) progress += len(data) percentage = (progress / file_size) * 100 self.percentage_signal_kr.emit(round(percentage, 0)) f.close() sleep(0.2) self.percentage_signal_kr.emit(0) makedirs(folder_path_pdf) # Converting images to pdf. print('Converting images to pdf') for page in listdir(folder_path_img): pdf_bytes = convert( Image.open(folder_path_img + '//' + page).filename) file = open(path.join(folder_path_pdf, page[:-4] + '.pdf'), 'wb') file.write(pdf_bytes) file.close() print('Images converted to pdf.') rmtree(folder_path_img) # Deleting folder containing images. self.pdf_progress_signal_kr.emit('Creating combined PDF') merge_pdf_in_folder(folder_path_pdf, self.directory_path, 'Karavali Munjavu ' + str(self.date_today)) # Merging PDFs self.pdf_progress_signal_kr.emit('Combined PDF created') rmtree(folder_path_pdf) # Deleting folder containing pdfs. self.done_signal_kr.emit( True ) # signal to let the main gui know that the process has completed. if self.recipient_address_list != ['']: self.email_progress_signal_kr.emit('Sending email') send_email_pdf(self.recipient_address_list, [ self.directory_path + r'/Karavali Munjavu ' + self.date_today + '.pdf' ], subject='Karavali Munjavu Newspaper ' + self.date_today) self.email_progress_signal_kr.emit('Mail sent successfully') except FileExistsError as file_error: self.error_signal_kr.emit(str(file_error)) except: self.error_signal_kr.emit( 'Error occured. Written to error_info_kr.txt.') error_file = open('error_info_kr.txt', 'w') error_file.write(traceback.format_exc()) error_file.close()
import img2pdf import argparse import os import io ap = argparse.ArgumentParser() ap.add_argument('-d', '--directory', required=True, help='path to folder contains images') ap.add_argument('-o', '--output', required=True, help='output pdf file') args = vars(ap.parse_args()) bytesIOs = [] for img in os.listdir(args['directory']): if img.endswith('.jpg'): with open(args['directory'] + '/' + img, 'rb') as f: bytesIOs.append(io.BytesIO(f.read())) with open(args['output'], 'wb') as o: o.write(img2pdf.convert([i.read() for i in bytesIOs]))
matches = re.findall('":"(.*?)"', data) for match in matches: sys.stdout.write("\rDownloading page " + str(matches.index(match) + 1) + "/" + str(npages) + "...") sys.stdout.flush() link = match.replace("\\u0026", "&").encode('utf-8') filename = hashlib.md5(link).hexdigest() f = open(cache + filename + ".jpg", "wb") f.write(requests.get(link).content) f.close() pages.append(filename) print("\nConverting images to pdf format...") for filename in pages: try: #python 2.x pdf_bytes = img2pdf.convert([cache + filename + ".jpg"]) except: #python 3.x pdf_bytes = img2pdf.convert(open(cache + filename + ".jpg","rb").read()) f = open(cache + filename + ".pdf", "wb") f.write(pdf_bytes) f.close() sys.stdout.write("\r'" + filename + ".pdf' created!") sys.stdout.flush() merger = PyPDF2.PdfFileMerger() print("\nMerging everything...") for filename in pages: merger.append(PyPDF2.PdfFileReader(open(cache + filename + '.pdf', 'rb'))) merger.write(title + ".pdf") print("Well done!")
def conversion(self, directory_path, conversion, delete_files, comic_name, chapter_number): if str(conversion).lower().strip() in ['pdf']: # Such kind of lambda functions and breaking is dangerous... im_files = [ image_files for image_files in sorted( glob.glob(str(directory_path) + "/" + "*.jpg"), key=lambda x: int( str((x.split('.')[0])).split(os.sep)[-1])) ] pdf_file_name = "{0} - Ch {1}.pdf".format(comic_name, chapter_number) try: with open( str(directory_path) + "/" + str(pdf_file_name), "wb") as f: f.write(img2pdf.convert(im_files)) print("Converted the file to pdf...") except Exception as FileWriteError: print("Coudn't write the pdf file...") print(FileWriteError) # Let's not delete the files if the conversion failed... delete_files = "No" pass try: self.conversion_cleaner(file_path=str(directory_path) + "/" + str(pdf_file_name)) except Exception as FileMoveError: print("Could not move the pdf file.") print(FileMoveError) pass elif str(conversion).lower().strip() in ['cbz']: # Such kind of lambda functions and breaking is dangerous... main_directory = str(directory_path).split(os.sep) main_directory.pop() cbz_directory = str(os.sep.join(main_directory)) + os.sep cbz_file_name = str(cbz_directory) + "{0} - Ch {1}".format( comic_name, chapter_number) try: shutil.make_archive(cbz_file_name, 'zip', directory_path, directory_path) os.rename( str(cbz_file_name) + ".zip", (str(cbz_file_name) + ".zip").replace(".zip", ".cbz")) except Exception as CBZError: print("Coudn't write the cbz file...") print(CBZError) # Let's not delete the files if the conversion failed... delete_files = "No" pass generatedFilePath = os.path.join( directory_path, cbz_file_name.split(os.sep).pop() + ".cbz") try: shutil.move( os.path.join(cbz_directory, cbz_file_name + ".cbz"), generatedFilePath) except Exception as FileDeleteError: print("Couldn't move the file or delete the directory.") print(FileDeleteError) pass try: self.conversion_cleaner(file_path=str(generatedFilePath)) except Exception as FileMoveError: print("Could not move the cbz file.") print(FileMoveError) pass elif str(conversion) == "None": pass else: print( "Seems like that conversion isn't supported yet. Please report it on the repository..." ) pass if str(delete_files).lower().strip() in ['no', 'false', 'delete']: for image_files in glob.glob(str(directory_path) + "/" + "*.jpg"): try: os.remove(image_files) except Exception as FileDeleteError: print("Couldn't delete the image file...") print(FileDeleteError) pass print("Deleted the files...")
current_dir = os.path.join(ROOT_DIR, current_dir) yearbook_pdf = os.path.join(current_dir, 'yearbook.pdf') collage_dir = os.path.join(current_dir, 'collages') collage_pdfs = os.listdir(collage_dir) # frontpage_pdf = os.path.join(ROOT_DIR, 'frontpage.pdf') a4inpt = (img2pdf.mm_to_pt(210), img2pdf.mm_to_pt(297) ) # specify page size (A4) layout_fun = img2pdf.get_layout_fun(a4inpt) # convert department pic 1 to pdf department_pic1_pdf = os.path.join(current_dir, 'department_pic1.pdf') department_pic1 = os.path.join(current_dir, 'department_pic1.jpg') if os.path.exists(department_pic1): with open(department_pic1_pdf, "wb") as f: f.write(img2pdf.convert(department_pic1, layout_fun=layout_fun)) # convert department pic 2 to pdf department_pic2_pdf = os.path.join(current_dir, 'department_pic2.pdf') department_pic2 = os.path.join(current_dir, 'department_pic2.jpg') if os.path.exists(department_pic2): with open(department_pic2_pdf, "wb") as f: f.write(img2pdf.convert(department_pic2, layout_fun=layout_fun)) merger = PdfFileMerger() # merger.append(frontpage_pdf) merger.append(yearbook_pdf) for collage in collage_pdfs: if collage.endswith('.pdf'): pdf = os.path.join(collage_dir, collage)
print ("Compilando em pdf...\n") namePDF = re.search("(?P<url>https?://[^\s]+(}))",script_tags[len(script_tags)-1].text).group("url") for i in range(len(namePDF)-1,0,-1): if(namePDF[i] == '/'): break namePDF = namePDF[i+1:-2] #listando os nomes das imagens baixadas em arq os.chdir('img/') arq = os.listdir(os.getcwd()) arq.sort() if ( len(arq) != num_pages): print("ERRO 1.2 tente novamente") os.system('rm *.jpg') sys.exit() with open("../"+namePDF+".pdf", "wb") as f: f.write(img2pdf.convert([i for i in arq if i.endswith(".jpg")])) reader = PyPDF2.PdfFileReader(open("../"+namePDF+".pdf",'rb')) pdfpages = reader.getNumPages() os.chdir('../') if (num_pages != pdfpages): os.system("rm "+namePDF+".pdf") print("ERRO 1.3 tente novamente") #removendo as imagens baixadas os.system('rm img/*.jpg')
def triage_image_file(input_file, output_file, log, options): try: log.info("Input file is not a PDF, checking if it is an image...") im = Image.open(input_file) except EnvironmentError as e: msg = str(e) # Recover the original filename realpath = '' if os.path.islink(input_file): realpath = os.path.realpath(input_file) elif os.path.isfile(input_file): realpath = '<stdin>' msg = msg.replace(input_file, realpath) log.error(msg) raise UnsupportedImageFormatError() from e else: log.info("Input file is an image") if 'dpi' in im.info: if im.info['dpi'] <= (96, 96) and not options.image_dpi: log.info("Image size: (%d, %d)" % im.size) log.info("Image resolution: (%d, %d)" % im.info['dpi']) log.error( "Input file is an image, but the resolution (DPI) is " "not credible. Estimate the resolution at which the " "image was scanned and specify it using --image-dpi.") raise DpiError() elif not options.image_dpi: log.info("Image size: (%d, %d)" % im.size) log.error("Input file is an image, but has no resolution (DPI) " "in its metadata. Estimate the resolution at which " "image was scanned and specify it using --image-dpi.") raise DpiError() if im.mode in ('RGBA', 'LA'): log.error("The input image has an alpha channel. Remove the alpha " "channel first.") raise UnsupportedImageFormatError() if 'iccprofile' not in im.info: if im.mode == 'RGB': log.info('Input image has no ICC profile, assuming sRGB') elif im.mode == 'CMYK': log.info('Input CMYK image has no ICC profile, not usable') raise UnsupportedImageFormatError() im.close() try: log.info("Image seems valid. Try converting to PDF...") layout_fun = img2pdf.default_layout_fun if options.image_dpi: layout_fun = img2pdf.get_fixed_dpi_layout_fun( (options.image_dpi, options.image_dpi)) with open(output_file, 'wb') as outf: img2pdf.convert(input_file, layout_fun=layout_fun, with_pdfrw=False, outputstream=outf) log.info("Successfully converted to PDF, processing...") except img2pdf.ImageOpenError as e: log.error(e) raise UnsupportedImageFormatError() from e
def conversion(self, directory_path, conversion, keep_files, comic_name, chapter_number): main_directory = str(directory_path).split(os.sep) main_directory.pop() converted_file_directory = str(os.sep.join(main_directory)) + os.sep if str(conversion).lower().strip() in ['pdf']: # Such kind of lambda functions and breaking is dangerous... im_files = [image_files for image_files in sorted(glob.glob(str(directory_path) + "/" + "*.jpg"), key=lambda x: int( str((x.split('.')[0])).split(os.sep)[-1]))] pdf_file_name = str(converted_file_directory) + "{0} - Ch {1}.pdf".format(comic_name, chapter_number) try: # This block is same as the one in the "cbz" conversion section. Check that one. if os.path.isfile(pdf_file_name): print('[Comic-dl] CBZ File Exist! Skipping : {0}\n'.format(pdf_file_name)) pass else: with open(pdf_file_name, "wb") as f: f.write(img2pdf.convert(im_files)) print("Converted the file to pdf...") except Exception as FileWriteError: print("Couldn't write the pdf file...") print(FileWriteError) # Let's not delete the files if the conversion failed... keep_files = "False" pass elif str(conversion).lower().strip() in ['cbz']: cbz_file_name = str(converted_file_directory) + "{0} - Ch {1}".format(comic_name, chapter_number) print("CBZ File : {0}".format(cbz_file_name)) try: """If the .cbz file exists, we don't need to make it again. If we do make it again, it'll make the .zip file and will hit and exception about file existing already. This raised #105. So, to fix the #105, we'll add this check and make things work just fine.""" if os.path.isfile(str(cbz_file_name) + ".cbz"): print('[Comic-dl] CBZ File Exist! Skipping : {0}\n'.format(cbz_file_name)) pass else: shutil.make_archive(cbz_file_name, 'zip', directory_path, directory_path) os.rename(str(cbz_file_name) + ".zip", (str(cbz_file_name)+".zip").replace(".zip", ".cbz")) except Exception as CBZError: print("Couldn't write the cbz file...") print(CBZError) # Let's not delete the files if the conversion failed... keep_files = "True" pass elif str(conversion) == "None": pass else: print("Seems like that conversion isn't supported yet. Please report it on the repository...") pass if str(keep_files).lower().strip() in ['no', 'false', 'delete']: try: shutil.rmtree(path=directory_path, ignore_errors=True) except Exception as DirectoryDeleteError: print("Couldn't move the file or delete the directory.") print(DirectoryDeleteError) pass print("Deleted the files...")
def merge(all_files): output_name = input("\n\nEnter a name for file: ") with open(output_name + ".pdf", "wb") as f: f.write(img2pdf.convert(all_files)) for i in all_files: os.remove(i)
""" Convertendo as imagens """ for img in ja_existem: im = Image.open("comics\\{}".format(img)) rgb_im = im.convert('RGB') if gray_scale: rgb_im = rgb_im.convert('L') rgb_im.save('comics_convertidos\\{}.jpg'.format(img.split(".")[0])) """ Buscando o caminho das imagens """ ja_existem = os.listdir("comics_convertidos") lista_imagens = [] for img in ja_existem: lista_imagens.append("comics_convertidos\{}".format(img)) """ Convertendo em pdf """ nome_manga = url_raiz.split("/")[3] nome_arquivo = "comics_fate_{}".format(nome_manga) if gray_scale: nome_arquivo = "{}_cinza.pdf".format(nome_arquivo) else: nome_arquivo = "{}_colorido.pdf".format(nome_arquivo) with open(nome_arquivo,"wb") as f: f.write(img2pdf.convert(lista_imagens))
def convert_to_pdf(img_list, output_folder, pdf_file_name): pdf_bytes = img2pdf.convert(img_list, dpi = 100) with open(os.path.join(output_folder, pdf_file_name), 'wb') as f: f.write(pdf_bytes)
def getPdfBytes(self): bytes = img2pdf.convert(self.outfiles) return bytes
text = msg.as_string() server.sendmail(fromaddr, toaddr, text) server.quit() print "process over check your email" return 0 GPIO.setmode(GPIO.BCM) GPIO.setup(21, GPIO.IN, pull_up_down=GPIO.PUD_UP) GPIO.setup(20, GPIO.OUT) GPIO.setup(16, GPIO.OUT) while 1: GPIO.output(20, 0) GPIO.output(16, 0) cam_on = GPIO.input(21) if (cam_on == 0): GPIO.output(20, 1) print "image capturing" os.system("sudo fswebcam -r 640x480 -s 15 image.jpg") print "image captured " with open("name.pdf", "wb") as f1, open("/home/pi/image.jpg") as f2: f1.write(img2pdf.convert(f2)) mail_on() GPIO.output(20, 1) GPIO.output(16, 1) time.sleep(7) cam_on = 1
def dir_to_pdf(path, save_path): # get all directory paths dirs = sorted(os.listdir(path), key=numericalSort) file_path_list = [] # all of the files tested and opened for directory in dirs: try: # open image using PIL library new_img = Image.open(os.path.join(path, directory)) except Exception as ex: # if any error occurs skip the file print('[ERROR] [%s] Cant open %s as image!' % (type(ex).__name__.upper(), os.path.join(path, directory))) else: file_path = os.path.join(path, directory) # if image has transparency if 'transparency' in new_img.info: # convert to RGBA mode new_img = new_img.convert('RGBA') # if image mode is RGBA if (new_img.mode == 'RGBA'): # convert image to RGB print('[CONVERT] [%s] RGBA to RGB' % os.path.basename(os.path.normpath(directory)).upper()) # create RGB image with white background of same size rgb = Image.new('RGB', new_img.size, (255, 255, 255)) # paste using alpha as mask rgb.paste(new_img, new_img.split()[3]) # get temporary path temp = tempfile.NamedTemporaryFile().name + '.jpg' # save image as temporary rgb.save(temp, 'JPEG') # overrite file_path file_path = temp file_path_list.append(file_path) # if no images exit if len(file_path_list) == 0: return try: # save as pdf using img2pdf with open( os.path.join(save_path, os.path.basename(os.path.normpath(path)) + '.pdf'), 'wb') as f: f.write(img2pdf.convert(file_path_list)) except Exception as e: logger.error('[%s] [%s] %s' % (type(e).__name__.upper(), path, e))
def img_pdf(files: list) -> None: directory = get_today_directory() year, month, day = get_now_day() output_file = f"{directory}/{year}{month}{day}_21jingji.pdf" with open(output_file, "wb") as f: f.write(img2pdf.convert(files))
def inboxScan(driver): #refresh driver.refresh() #identify email ids in inbox idList = [] time.sleep(5) html = driver.page_source soup = bs(html, 'lxml') table = soup.findAll('table')[3] for row in table.findAll('tr'): idList.append(str(row.get('id'))) #perform scan for each email in inbox i = 0 while i < len(idList): emailRow = driver.find_element_by_xpath('//*[@id="' + idList[i] + '"]') emailRow.click() time.sleep(2) #identify email body for bs4 html = driver.page_source page_body = driver.find_element_by_tag_name('body') soup = bs(html, 'lxml') emailContent = soup.findAll('table', {'role': 'presentation'}) #find sender for row in emailContent: for line in row.findAll(): if line.has_attr('email'): if not(line.get('email') == "*****@gmail.com"): sender = line #find subject for row in emailContent: for line in row.findAll('h2'): subject = line #check if subject.text contains "Receipt" and if sender.get('email') is from "*****@gmail.com" validEmails = ["*****@gmail.com", "*****@mms.att.net", "*****@dawson8a.com"] if "Receipt:" in subject.text and any(x in sender.get('email') for x in validEmails): #get file name from subject emailSubject = subject.text fileName = emailSubject.split(":")[1] fileName = fileName.strip() #LOCATE and download and name attachment to path time.sleep(1) page_body = driver.find_element_by_tag_name('body') page_body.send_keys(Keys.TAB) page_body.send_keys(Keys.TAB) page_body.send_keys(Keys.TAB) page_body.send_keys(Keys.TAB) page_body.send_keys(Keys.TAB) page_body.send_keys(Keys.TAB) page_body.send_keys(Keys.TAB) time.sleep(2) actionChains.send_keys(Keys.SHIFT + Keys.F10).perform() pyautogui.typewrite(['down','down','down','down', 'enter']) time.sleep(5) pyautogui.typewrite(r"C:\Users\m4k04\Desktop\gmailBot\{}.jpg".format(fileName)) pyautogui.typewrite(['enter']) time.sleep(2) #label pertinent page_body = driver.find_element_by_tag_name('body') time.sleep(1) page_body.send_keys('v') time.sleep(2) driver.find_element_by_xpath("//div[@title='Pertinent']").click() elif "Generate Report" in subject.text and any(x in sender.get('email') for x in validEmails): #generate report with open("report.pdf","wb") as f: f.write(img2pdf.convert([i for i in os.listdir(r'C:/Users/m4k04/Desktop/gmailBot') if i.endswith(".jpg")])) #create reply page_body.send_keys('r') time.sleep(3) attachButton = driver.find_element_by_xpath('//*[@data-tooltip = "Attach files"]') attachButton.click() time.sleep(3) pyautogui.typewrite(r"C:\Users\m4k04\Desktop\gmailBot\report.pdf") pyautogui.typewrite(['enter']) time.sleep(3) sendButton = driver.find_element_by_xpath('//*[@data-tooltip="Send (Ctrl-Enter)"]') sendButton.click() time.sleep(3) #label pertinent page_body = driver.find_element_by_tag_name('body') time.sleep(1) page_body.send_keys('v') time.sleep(2) driver.find_element_by_xpath("//div[@title='Pertinent']").click() else: #label nonpertinent page_body = driver.find_element_by_tag_name('body') time.sleep(1) page_body.send_keys('v') time.sleep(2) driver.find_element_by_xpath("//div[@title='NonPertinent']").click() time.sleep(2) i +=1 timeNow = datetime.datetime.now().strftime("%H:%M:%S") print("Gmail Scanned at: " + str(timeNow))
def write_pdf(image_list,pdf_name='name_removed.pdf'): with open('aaa.pdf', 'wb') as f: f.write(img2pdf.convert(image_list))
image_list = [str(p) for p in path_list if p.suffix in Extends] Sucsess_list = [] Error_list = [] Filename = Path(ZIP_FILE.stem) Filename.mkdir() i = 0 for image in image_list: try: with zipfilepointer.open(image) as img: with open( str(Filename / Path(Path(image).name).with_suffix('.pdf')), 'wb') as op: op.write(img2pdf.convert(img)) op.close() except: print('Error:', image) zipfilepointer.extract(image, Filename / 'Faild') Error_list.append(image) else: print('Sucsess:', image) Sucsess_list.append(image) i += 1 print('\nConvert Sucsess:') pprint.pprint(Sucsess_list) print('\nConvert Faild:') pprint.pprint(Error_list)
import img2pdf import os import numpy as np path = os.getcwd() + "/images" os.chdir(path) myimages = [] dirFiles = os.listdir(os.getcwd()) fnames = sorted( [fname for fname in os.listdir(os.getcwd()) if fname.endswith('.jpg')], key=lambda f: int(f.rsplit(os.path.extsep, 1)[0].rsplit(None, 1)[-1])) with open("output.pdf", "wb") as f: f.write(img2pdf.convert([i for i in fnames if i.endswith(".jpg")])) imgDir = os.listdir(path) for image in imgDir: if image.endswith(".jpg"): os.remove(os.path.join(path, image))
def handle(self, f=inputf, out=outputf): with open(f, "rb") as inf: orig_imgdata = inf.read() pdf = img2pdf.convert([f], nodate=True) imgdata = b"" instream = False imgobj = False colorspace = None imgfilter = None width = None height = None length = None # ugly workaround to parse the created pdf for line in pdf.split(b'\n'): if instream: if line == b"endstream": break else: imgdata += line + b'\n' else: if imgobj and line == b"stream": instream = True elif b"/Subtype /Image" in line: imgobj = True elif b"/Width" in line: width = int(line.split()[-1]) elif b"/Height" in line: height = int(line.split()[-1]) elif b"/Length" in line: length = int(line.split()[-1]) elif b"/Filter" in line: imgfilter = line.split()[-2] elif b"/ColorSpace" in line: colorspace = line.split()[-1] # remove trailing \n imgdata = imgdata[:-1] # test if the length field is correct self.assertEqual(len(imgdata), length) # test if the filter is valid: self.assertIn(imgfilter, [b"/DCTDecode", b"/JPXDecode", b"/FlateDecode"]) # test if the colorspace is valid self.assertIn(colorspace, [b"/DeviceGray", b"/DeviceRGB", b"/DeviceCMYK"]) # test if the image has correct size orig_img = Image.open(f) self.assertEqual(width, orig_img.size[0]) self.assertEqual(height, orig_img.size[1]) # if the input file is a jpeg then it should've been copied # verbatim into the PDF if imgfilter in [b"/DCTDecode", b"/JPXDecode"]: self.assertEqual(imgdata, orig_imgdata) elif imgfilter == b"/FlateDecode": # otherwise, the data is flate encoded and has to be equal to # the pixel data of the input image imgdata = zlib.decompress(imgdata) if colorspace == b"/DeviceGray": colorspace = 'L' elif colorspace == b"/DeviceRGB": colorspace = 'RGB' elif colorspace == b"/DeviceCMYK": colorspace = 'CMYK' else: raise Exception("invalid colorspace") im = Image.frombytes(colorspace, (width, height), imgdata) if orig_img.mode == '1': orig_img = orig_img.convert("L") elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"): orig_img = orig_img.convert("RGB") self.assertEqual(im.tobytes(), orig_img.tobytes()) # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the close() method try: im.close() except AttributeError: pass # lastly, make sure that the generated pdf matches bit by bit the # expected pdf with open(out, "rb") as outf: out = outf.read() self.assertEqual(pdf, out) # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the close() method try: orig_img.close() except AttributeError: pass
import img2pdf # opening from filename with open("name.pdf","wb") as f: f.write(img2pdf.convert('name.jpg'))
current_dir = os.path.join(ROOT_DIR, current_dir) yearbook_pdf = os.path.join(current_dir, 'yearbook.pdf') collage_dir = os.path.join(current_dir, 'collages') collage_pdfs = os.listdir(collage_dir) # frontpage_pdf = os.path.join(ROOT_DIR, 'frontpage.pdf') a4inpt = (img2pdf.mm_to_pt(210), img2pdf.mm_to_pt(297) ) # specify page size (A4) layout_fun = img2pdf.get_layout_fun(a4inpt) # convert close friends pic to pdf close_friends_pic_pdf = os.path.join(current_dir, 'closeFriendsPic.pdf') close_friends_pic = os.path.join(current_dir, 'closeFriendsPic.jpg') if os.path.exists(close_friends_pic): with open(close_friends_pic_pdf, "wb") as f: f.write(img2pdf.convert(close_friends_pic, layout_fun=layout_fun)) merger = PdfFileMerger() merger.append(FRONT_PAGE) merger.append(yearbook_pdf) if os.path.exists(close_friends_pic_pdf): merger.merge(1, close_friends_pic_pdf) for collage in collage_pdfs: if collage.endswith('.pdf'): pdf = os.path.join(collage_dir, collage) merger.append(pdf) merger.write( os.path.join(current_dir, 'final_yearbook_' + student_name + '.pdf'))
break else: print(folder, " klasörü önceden oluşturulmu.Burdan devam etmek ister misin?") print("\n") ans = input("Cevap: ") if ans == 'e' or ans == 'E': os.chdir(folder) break else: continue nu = input("Değiştirmek istediğin karakter hangisi?: ") print("\n\n") try: for i in range(1, int(page_no) + 1): path_array[i] = u_path.replace(nu, "-" + str(i)) new_url = url.scheme + "://" + url.netloc + path_array[i] urllib.request.urlretrieve(new_url, "deneme" + str(i) + ".jpg") print("*" * i + " " + str(i) + ".sayfa indirildi") except ModuleNotFoundError as e: print(e) print("\nPDF oluşturuluyor...") print("\n\n") pdf_name = input("PDF ismi: ") print("\n\n") with open(pdf_name + ".pdf", "wb") as f: f.write(img2pdf.convert([i for i in os.listdir('.') if i.endswith(".jpg")])) print("\nPDF oluşturuldu.")
def save_pdf(self): with open(self.filename, 'r') as img_fp: sd = img2pdf.convert([img_fp], 150, x=620, y="") hand = open(self.f_pdf, 'wb') hand.write(sd) hand.close()
def pngToPDF(fromFile, toFile): with open(toFile, "wb") as f: f.write(img2pdf.convert(fromFile))
imgs = [] for i, j in enumerate(imgUrls): print("downloading %s" % (j)) if not os.path.exists(topic + '/' + title): if not os.path.exists(topic): os.mkdir(topic) os.mkdir(topic + '/' + title) print("Directory ", topic + '/' + title, " Created ") else: print("Directory ", topic + '/' + title, " already exists") fullfilename = os.path.join(f'{topic}/{title}', f'{i}.jpeg') if not os.path.exists(fullfilename): urlretrieve(j, f"{fullfilename}") addWatermark(fullfilename) imgs.append(fullfilename) # if not os.path.exists(f"{topic}/{title} {item.id}.pdf"): if os.path.getsize(fullfilename) == 72032: imgs.remove(fullfilename) print('Ignored: ', fullfilename) with open(f"{topic}/{title} {item.id}.pdf", "wb") as f: f.write(img2pdf.convert(imgs)) filePath = f"{topic}/{title} {item.id}.pdf" newBlob = bucket.blob(f'chapter-pdf/{filePath}') newBlob.upload_from_filename(filePath, content_type='application/pdf') newBlob.make_public()
def gen_report(username, password, elabx, level): java1 = { 'url': 'http://care.srmuniv.ac.in/ktrcsejava1/', 'code': 'java/java.code.php', 'key': 'java' } java2 = { 'url': 'http://care.srmuniv.ac.in/ktrcsejava2/', 'code': 'java/java.code.php', 'key': 'java' } ada = { 'url': 'http://care.srmuniv.ac.in/ktrcseada/', 'code': 'daa/daa.code.php', 'key': 'daa' } it_ada = { 'url': 'http://care1.srmuniv.ac.in/ktritada/', 'code': 'daa/daa.code.php', 'key': 'daa' } it_java = { 'url': 'http://care1.srmuniv.ac.in/ktritjava/', 'code': 'java/java.code.php', 'key': 'java' } if (elabx == 'java1'): elab = java1 elif (elabx == 'java2'): elab = java2 elif (elabx == 'ada'): elab = ada elif (elabx == 'it_java'): elab = it_java elif (elabx == 'it_ada'): elab = it_ada else: return login_page = elab['url'] + 'login_check.php' home_page = elab['url'] + 'login/student/home.php' question_page = elab['url'] + 'login/student/code/' + elab[ 'code'] + '?id=' + level + '&value=' payload = {'uname': username, 'pass': password} print('eLab Report Generator : ' + payload['uname']) with requests.Session() as s: # login page s.post(login_page, data=payload) # home page s.get(home_page) # question page requests & responses s.get(elab['url'] + 'login/student/question.php') s.post(elab['url'] + 'login/student/home.helper.php', data={'text': elab['key'].upper()}) s.get(elab['url'] + 'login/student/question.php') s.get(elab['url'] + 'login/student/question.list.js') s.post(elab['url'] + 'login/student/course.get.php', data={'q': 'SESSION'}) s.post(elab['url'] + 'login/student/course.get.php', data={'q': 'VALUES'}) # individual question -> code page s.get(elab['url'] + 'login/student/code/' + elab['code'] + '?id=' + level + '&value=0') s.get(elab['url'] + 'Code-mirror/lib/codemirror.js') s.get(elab['url'] + 'Code-mirror/mode/clike/clike.js') s.get(elab['url'] + 'login/student/code/' + elab['key'] + '/code.elab.js') s.post(elab['url'] + 'login/student/code/code.get.php') s.post(elab['url'] + 'login/student/code/flag.checker.php') # get the code, evaluate it and download the report (if 100%) for i in range(0, 100): present_question = question_page + str(i) s.get(present_question) if (s.get(present_question).text.find('NOT ALLOCATED') == -1): code = s.get(elab['url'] + 'login/student/code/code.get.php') if (code.text != ''): if (elab['key'] == 'daa'): evaluate_payload_c = s.post( elab['url'] + 'login/student/code/' + elab['key'] + '/code.evaluate.elab.php', data={ 'code': code.text, 'input': '', 'language': 'c' }) evaluate_payload_cpp = s.post( elab['url'] + 'login/student/code/' + elab['key'] + '/code.evaluate.elab.php', data={ 'code': code.text, 'input': '', 'language': 'cpp' }) evaluate_payload_java = s.post( elab['url'] + 'login/student/code/' + elab['key'] + '/code.evaluate.elab.php', data={ 'code': code.text, 'input': '', 'language': 'java' }) evaluate_payload_python = s.post( elab['url'] + 'login/student/code/' + elab['key'] + '/code.evaluate.elab.php', data={ 'code': code.text, 'input': '', 'language': 'python' }) if '100' in [ evaluate_payload_c.text[-4:-1], evaluate_payload_cpp.text[-4:-1], evaluate_payload_java.text[-4:-1], evaluate_payload_python.text[-4:-1] ]: complete_percent = '100' else: complete_percent = '0' else: evaluate_payload = s.post( elab['url'] + 'login/student/code/' + elab['key'] + '/code.evaluate.elab.php', data={ 'code': code.text, 'input': '' }) complete_percent = evaluate_payload.text[-4:-1] if (complete_percent == '100'): print(str(i + 1) + ' : getting report') file = s.get(elab['url'] + 'login/student/code/getReport.php') with open( payload['uname'] + '-' + str(i).zfill(3) + '.png', 'wb') as f: f.write(file.content) else: print( str(i + 1) + ' : evaluation error : Couldn\'t get report') else: print(str(i + 1) + ' : No code written') else: print(str(i + 1) + ' : Question not allocated') # put all the images to PDF filename = payload['uname'] + '-' + elabx.upper( ) + '-Level-' + level + '.pdf' with open(filename, "ab") as f: f.write( img2pdf.convert([ i for i in sorted(os.listdir('.')) if i.endswith('.png') ])) print('PDF file named ' + filename + ' generated') # remove the image files for i in range(0, 100): if (os.path.isfile(payload['uname'] + '-' + str(i) + '.png')): os.remove(payload['uname'] + '-' + str(i) + '.png') print('Image files cleared')
def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw): with open(f, "rb") as inf: orig_imgdata = inf.read() output = img2pdf.convert(orig_imgdata, nodate=True, with_pdfrw=with_pdfrw) from io import StringIO, BytesIO from pdfrw import PdfReader, PdfName, PdfWriter from pdfrw.py23_diffs import convert_load, convert_store x = PdfReader(StringIO(convert_load(output))) self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root, PdfName.Size]) self.assertEqual(x.Size, '7') self.assertEqual(x.Info, {}) self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages, PdfName.Type]) self.assertEqual(x.Root.Type, PdfName.Catalog) self.assertEqual(sorted(x.Root.Pages.keys()), [PdfName.Count, PdfName.Kids, PdfName.Type]) self.assertEqual(x.Root.Pages.Count, '1') self.assertEqual(x.Root.Pages.Type, PdfName.Pages) self.assertEqual(len(x.Root.Pages.Kids), 1) self.assertEqual(sorted(x.Root.Pages.Kids[0].keys()), [PdfName.Contents, PdfName.MediaBox, PdfName.Parent, PdfName.Resources, PdfName.Type]) self.assertEqual(x.Root.Pages.Kids[0].MediaBox, ['0', '0', '115', '48']) self.assertEqual(x.Root.Pages.Kids[0].Parent, x.Root.Pages) self.assertEqual(x.Root.Pages.Kids[0].Type, PdfName.Page) self.assertEqual(x.Root.Pages.Kids[0].Resources.keys(), [PdfName.XObject]) self.assertEqual(x.Root.Pages.Kids[0].Resources.XObject.keys(), [PdfName.Im0]) self.assertEqual(x.Root.Pages.Kids[0].Contents.keys(), [PdfName.Length]) self.assertEqual(x.Root.Pages.Kids[0].Contents.Length, str(len(x.Root.Pages.Kids[0].Contents.stream))) self.assertEqual(x.Root.Pages.Kids[0].Contents.stream, "q\n115.0000 0 0 48.0000 0.0000 0.0000 cm\n/Im0 " "Do\nQ") imgprops = x.Root.Pages.Kids[0].Resources.XObject.Im0 # test if the filter is valid: self.assertIn( imgprops.Filter, [[PdfName.DCTDecode], [PdfName.JPXDecode], [PdfName.FlateDecode]]) # test if the colorspace is valid self.assertIn( imgprops.ColorSpace, [PdfName.DeviceGray, PdfName.DeviceRGB, PdfName.DeviceCMYK]) # test if the image has correct size orig_img = Image.open(f) self.assertEqual(imgprops.Width, str(orig_img.size[0])) self.assertEqual(imgprops.Height, str(orig_img.size[1])) # if the input file is a jpeg then it should've been copied # verbatim into the PDF if imgprops.Filter in [[PdfName.DCTDecode], [PdfName.JPXDecode]]: self.assertEqual( x.Root.Pages.Kids[0].Resources.XObject.Im0.stream, convert_load(orig_imgdata)) elif imgprops.Filter == [PdfName.FlateDecode]: # otherwise, the data is flate encoded and has to be equal to # the pixel data of the input image imgdata = zlib.decompress( convert_store( x.Root.Pages.Kids[0].Resources.XObject.Im0.stream)) colorspace = imgprops.ColorSpace if colorspace == PdfName.DeviceGray: colorspace = 'L' elif colorspace == PdfName.DeviceRGB: colorspace = 'RGB' elif colorspace == PdfName.DeviceCMYK: colorspace = 'CMYK' else: raise Exception("invalid colorspace") im = Image.frombytes(colorspace, (int(imgprops.Width), int(imgprops.Height)), imgdata) if orig_img.mode == '1': orig_img = orig_img.convert("L") elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"): orig_img = orig_img.convert("RGB") self.assertEqual(im.tobytes(), orig_img.tobytes()) # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have # the close() method try: im.close() except AttributeError: pass # now use pdfrw to parse and then write out both pdfs and check the # result for equality y = PdfReader(out) outx = BytesIO() outy = BytesIO() xwriter = PdfWriter() ywriter = PdfWriter() xwriter.trailer = x ywriter.trailer = y xwriter.write(outx) ywriter.write(outy) self.assertEqual(outx.getvalue(), outy.getvalue()) # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the # close() method try: orig_img.close() except AttributeError: pass
draw.text((71, 1310),pos,(0,0,0),font=font_pos) draw.text((71, 1500),quote,(0,0,0),font=font_quote) #A transparent circular image of required size is created for ID-card picture size = (770, 770) mask = Image.new('L', size, 0) dp = ImageDraw.Draw(mask) dp.ellipse((0, 0) + size, fill=255) im = Image.open('dataset/images/'+name+'.jpg') output = ImageOps.fit(im, mask.size, centering=(0.5, 0.5)) output.putalpha(mask) #Profile image is superimposed on the ID-card image with itself as a filter img.paste(output,(180,230),output) #Image is resized (with respect to 92dpi) and saved to output/png/ img = img.resize((322,482), Image.ANTIALIAS) img.save('output/png/'+name+'.png') #To convert to pdf the image is temporarily stored as jpeg rgb_img=img.convert('RGB') rgb_img.save("temp.jpg") temp=Image.open("temp.jpg") #The jpeg file is converted to pdf. pdf_bytes = img2pdf.convert("temp.jpg") file = open("output/pdf/"+name+".pdf", "wb") file.write(pdf_bytes) os.remove("temp.jpg") csvFile.close()