def convert_to_pdf(os_dir, chapter, file_names): """ Converts a collection of images to PDF format :param os_dir: Directory to save PDF in. :param chapter: Title of the PDF. :param file_names: Images to construct the PDF from. :return: """ print("Converting chapter %s to pdf..." % chapter) pdf_bytes = None try: pdf_bytes = img2pdf.convert(*[download_image(path) for path in file_names]) except img2pdf.PdfTooLargeError: # Sometimes the images are registered as having a dpi of 1. # Because PDF has a limitation of 200 inches max per side, a # special layout_fun has to be used, as to prevent an exception. # default manga size 5"x7" layout_fun = img2pdf.get_layout_fun(pagesize=(None, img2pdf.in_to_pt(7)), imgsize=None, border=None, fit=img2pdf.FitMode.into, auto_orient=False) pdf_bytes = img2pdf.convert(*[download_image(path) for path in file_names], layout_fun=layout_fun) file = open("%s/%s.pdf" % (os_dir, chapter), "wb") file.write(pdf_bytes) print("Conversion completed!")
def select_image_layer( infiles, output_file, log, pdfinfo, pdfinfo_lock): page_pdf = next(ii for ii in infiles if ii.endswith('.ocr.oriented.pdf')) image = next(ii for ii in infiles if ii.endswith('.image')) if lossless_reconstruction: log.debug("{:4d}: page eligible for lossless reconstruction".format( page_number(page_pdf))) re_symlink(page_pdf, output_file) else: pageinfo = get_pageinfo(image, pdfinfo, pdfinfo_lock) dpi = round(max(pageinfo['xres'], pageinfo['yres'], options.oversample)) imgsize = ((img2pdf.ImgSize.dpi, dpi), (img2pdf.ImgSize.dpi, dpi)) layout_fun = img2pdf.get_layout_fun(None, imgsize, None, None, None) with open(image, 'rb') as imfile, \ open(output_file, 'wb') as pdf: rawdata = imfile.read() pdf.write(img2pdf.convert( rawdata, producer="img2pdf", with_pdfrw=False, layout_fun=layout_fun))
def layout_handler( self, psopt, isopt, border, fit, ao, pspdf, ispdf, im): layout_fun = img2pdf.get_layout_fun(psopt, isopt, border, fit, ao) try: pwpdf, phpdf, iwpdf, ihpdf = \ layout_fun(im[0], im[1], (img2pdf.default_dpi, img2pdf.default_dpi)) self.assertEqual((pwpdf, phpdf), pspdf) self.assertEqual((iwpdf, ihpdf), ispdf) except img2pdf.NegativeDimensionError: self.assertEqual(None, pspdf) self.assertEqual(None, ispdf)
def test_single_page_image(outdir): filename = outdir / 'image-mono.pdf' im_tmp = outdir / 'tmp.png' im = Image.new('1', (8, 8), 0) for n in range(8): im.putpixel((n, n), 1) im.save(str(im_tmp), format='PNG') imgsize = ((img2pdf.ImgSize.dpi, 8), (img2pdf.ImgSize.dpi, 8)) layout_fun = img2pdf.get_layout_fun(None, imgsize, None, None, None) im_bytes = im_tmp.read_bytes() pdf_bytes = img2pdf.convert( im_bytes, producer="img2pdf", with_pdfrw=False, layout_fun=layout_fun) filename.write_bytes(pdf_bytes) info = pdfinfo.PdfInfo(filename) assert len(info) == 1 page = info[0] assert not page.has_text assert len(page.images) == 1 pdfimage = page.images[0] assert pdfimage.width == 8 assert pdfimage.color == Colorspace.gray # DPI in a 1"x1" is the image width assert isclose(pdfimage.xres, 8) assert isclose(pdfimage.yres, 8)
def test_single_page_image(): filename = os.path.join(TEST_OUTPUT, 'image-mono.pdf') with NamedTemporaryFile(mode='wb+', suffix='.png') as im_tmp: im = Image.new('1', (8, 8), 0) for n in range(8): im.putpixel((n, n), 1) im.save(im_tmp.name, format='PNG') imgsize = ((img2pdf.ImgSize.dpi, 8), (img2pdf.ImgSize.dpi, 8)) layout_fun = img2pdf.get_layout_fun(None, imgsize, None, None, None) im_tmp.seek(0) im_bytes = im_tmp.read() pdf_bytes = img2pdf.convert( im_bytes, producer="img2pdf", with_pdfrw=False, layout_fun=layout_fun) with open(filename, 'wb') as pdf: pdf.write(pdf_bytes) pdfinfo = pageinfo.pdf_get_all_pageinfo(filename) assert len(pdfinfo) == 1 page = pdfinfo[0] assert not page['has_text'] assert len(page['images']) == 1 pdfimage = page['images'][0] assert pdfimage['width'] == 8 assert pdfimage['color'] == 'gray' # While unexpected, this is correct # PDF spec says /FlateDecode image must have /BitsPerComponent 8 # So mono images get upgraded to 8-bit assert pdfimage['bpc'] == 8 # DPI in a 1"x1" is the image width assert pdfimage['dpi_w'] == 8 assert pdfimage['dpi_h'] == 8
def go_img2pdf(file, to_folder, new_name): a4_page_size = [img2pdf.in_to_pt(8.3), img2pdf.in_to_pt(11.7)] layout_function = img2pdf.get_layout_fun(a4_page_size) pdf = img2pdf.convert(file, layout_fun=layout_function) with open(to_folder + "/" + new_name, 'wb') as f: f.write(pdf)
import img2pdf import sys, os, os.path input_dir = sys.argv[1] output_file = sys.argv[2] directory = os.fsencode(input_dir).decode('utf-8') print(directory) a4inpt = (img2pdf.mm_to_pt(210), img2pdf.mm_to_pt(297)) layout_fun = img2pdf.get_layout_fun(a4inpt) with open(output_file, "wb") as f: for file in os.listdir(directory): filename = os.fsdecode(file) if filename.endswith(".jpg"): print("Adding " + filename) fullname = os.path.join(directory, filename) f.write(img2pdf.convert(fullname, layout_fun=layout_fun))
async def download(self, url, filetype='pdf'): ''' url : Manhwa chapter url from the topmanhwa.net web\n filetype : Type of file, available type is `pdf` and `zip` ''' if filetype not in ['pdf', 'zip']: raise AttributeError( "Invalid filetype, available filetype is pdf and zip") # Generate the filename filename = url.replace(f"{self.manga}/", "").split("/")[0].replace("-", " ").title() # Create downloads directory if it's not exists in the local if not os.path.exists(f"{os.getcwd()}/downloads"): os.mkdir(f"{os.getcwd()}/downloads") print("Downloads dir created.") if filetype == 'zip': # Download chapter as a ZIP file path = f"{os.getcwd()}/downloads/{filename}.zip" # Proceed download if the same file not in the local if not os.path.exists(path): imbytes = [] # Image bytes downloaded = 1 manhwa = await self._images(url) # Get the chapter images print(f"Downloading, {(downloaded/len(manhwa))*100:.1f}%") # Download the image async with aiohttp.ClientSession() as ses: for img, i in zip(manhwa, range(1, len(manhwa))): async with ses.get(img) as r: if r.status == 200: imbytes.append( await r.content.read()) # Saving image bytes downloaded += 1 print( f"Downloading, {(downloaded/len(manhwa))*100:.1f}%" ) # Archiving the image bytes z = zipfile.ZipFile(path, 'w') for byte, d, i in zip(imbytes, manhwa, range(1, len(manhwa) + 1)): z.writestr(f"{filename}_{i}.{d[-3:]}", byte, compress_type=zipfile.ZIP_DEFLATED) z.close() print( f"Download complete, {(downloaded/len(manhwa))*100:.1f}%") else: print("The file is already in the local, and using cached.") else: # Download chapter as a PDF file path = f"{os.getcwd()}/downloads/{filename}.pdf" # Proceed download if the same file not in the local if not os.path.exists(path): imbytes = [] # Image bytes downloaded = 1 manhwa = await self.images(url) # Get the chapter images print(f"Downloading, {(downloaded/len(manhwa))*100:.1f}%") # Download the image async with aiohttp.ClientSession() as ses: for img, i in zip(manhwa, range(1, len(manhwa))): async with ses.get(img) as r: if r.status == 200: imbytes.append( await r.content.read()) # Saving image bytes downloaded += 1 print( f"Downloading, {(downloaded/len(manhwa))*100:.1f}%" ) # Custom size of the layout a4inpt = (img2pdf.mm_to_pt(200), img2pdf.mm_to_pt(300)) layout_fun = img2pdf.get_layout_fun(a4inpt) # Merging the image bytes to PDF file with open(f"{os.getcwd()}/downloads/{filename}.pdf", "wb") as f: f.write(img2pdf.convert(imbytes, layout_fun=layout_fun)) else: print("The file is already in the local, and using cached.") return path
#for printing in console console_handler = logging.StreamHandler(sys.stdout) console_handler.setFormatter(FORMATTER) logger_obj.addHandler(console_handler) #for creating a log file file_handler = TimedRotatingFileHandler("myloggerfile.log",when="midnight",encoding="utf-8") file_handler.setFormatter(FORMATTER) logger_obj.addHandler(file_handler) import os import img2pdf #A4 Size paper dimensions 210mm x 297mm #img2pdf.mm_to_pt() converts millimeter to point a4inpt = (img2pdf.mm_to_pt(210),img2pdf.mm_to_pt(297)) layout_fun = img2pdf.get_layout_fun(pagesize=a4inpt) #Input directory name DIR_PATH="../Documents" file_name=DIR_PATH.split("/")[-1]+".pdf" with open(file_name,"wb") as f: f.write(img2pdf.convert([i for i in os.listdir(DIR_PATH) if i.endswith(".jpg")], layout_fun=layout_fun)) #api response !pip install requests import requests params={'a':'b'} data={'x':'y'} url="http:/127.0.0.1/" #for get request response=requests.get(url,params)
a4Height = 841.88976378 outputPdf = PyPDF2.PdfFileWriter() page = None for month in range(1, 13): monthString = "%02d" % month # Handle the image part. imageJpg = open("images/" + monthString + ".jpg", "rb") # Landscape A4 for the image. pageSize = (a4Height, a4Width) # TOP_OF_CAL_BOXES_PTS in pcal's pcaldefs.h. margin = 85 imageSize = ((img2pdf.ImgSize.abs, a4Height - margin), (img2pdf.ImgSize.abs, a4Width - margin)) layoutFun = img2pdf.get_layout_fun(pageSize, imageSize, border=None, fit=None, auto_orient=False) imageBytes = img2pdf.convert(imageJpg, layout_fun=layoutFun) imageBuf = io.BytesIO() imageBuf.write(imageBytes) imageBuf.seek(0) # Handle the calendar part. imagePdf = PyPDF2.PdfFileReader(imageBuf) imagePage = imagePdf.getPage(0) nextYear = str(time.localtime().tm_year + 1) lang = locale.getlocale()[0].split("_")[0] calPdf = PyPDF2.PdfFileReader(ps2Pdf(pcal(["-f", "calendar_" + lang + ".txt", monthString, nextYear]))) calPage = calPdf.getPage(0) # Portrait A4 page: upper half contains first calendar and the first image, # lower half contains the second calendar and the second image.
# importing necessary libraries import img2pdf import os import argparse #set layout letter = (img2pdf.in_to_pt(11), img2pdf.in_to_pt(8.5)) layout = img2pdf.get_layout_fun(letter) # with open('tile/test.pdf', "wb") as out_file: # images = [] # for i in range(0, 31 + 1): # fname = str(i) + '_1.png' # images.append(fname) # print(fname) # out_file.write(img2pdf.convert(images)) def process_images(min_range, max_range, prefix, suffix, out_file, layout): images = [] for i in range(min_range, max_range + 1): fname = prefix + str(i) + suffix images.append(fname) out_file.write(img2pdf.convert(images, layout_fun=layout)) with open('tile/img2pdf-2-2.pdf', "wb") as out_file: process_images(0, 31, 'tile/', '_2.png', out_file, layout) # directory = 'tile/'
imagelist = [i for i in os.listdir(os.getcwd()) if i.endswith(".jpg")] #sharink jpg cmd = 'python ./noteshrink.py -w -q -b N ' + ' '.join(imagelist) try: result = subprocess.call(shlex.split(cmd)) except OSError: result = -1 if result == 0: print('图片处理成功') else: print('图片处理失败') #compact pdf path = './tmp' os.chdir(path) imagelist = [i for i in os.listdir(os.getcwd()) if i.endswith(".png")] a4inpt = (img2pdf.mm_to_pt(210), img2pdf.mm_to_pt(297)) border = (50, 30) layout_fun = img2pdf.get_layout_fun(pagesize=a4inpt, border=border) with open("output.pdf", "wb") as f: f.write(img2pdf.convert(imagelist, layout_fun=layout_fun)) print('生成pdf成功: output.pdf') #pdf2word print('开始转换pdf到docx') parse("output.pdf", "output.docx") print('docx转换成功: output.docx')