コード例 #1
4
ファイル: main.py プロジェクト: tohyongcheng/mangapark-dl
def convert_to_pdf(os_dir, chapter, file_names):
    """
    Converts a collection of images to PDF format
    :param os_dir: Directory to save PDF in.
    :param chapter: Title of the PDF.
    :param file_names: Images to construct the PDF from.
    :return:
    """

    print("Converting chapter %s to pdf..." % chapter)

    pdf_bytes = None

    try:
        pdf_bytes = img2pdf.convert(*[download_image(path) for path in file_names])
    except img2pdf.PdfTooLargeError:
        # Sometimes the images are registered as having a dpi of 1.
        # Because PDF has a limitation of 200 inches max per side, a
        # special layout_fun has to be used, as to prevent an exception.
        # default manga size 5"x7"

        layout_fun = img2pdf.get_layout_fun(pagesize=(None, img2pdf.in_to_pt(7)),
                                            imgsize=None, border=None,
                                            fit=img2pdf.FitMode.into,
                                            auto_orient=False)
        pdf_bytes = img2pdf.convert(*[download_image(path) for path in file_names],
                                    layout_fun=layout_fun)

    file = open("%s/%s.pdf" % (os_dir, chapter), "wb")
    file.write(pdf_bytes)
    print("Conversion completed!")
コード例 #2
2
ファイル: _pipeline.py プロジェクト: jbarlow83/OCRmyPDF
def select_image_layer(infiles, output_file, log, context):
    """Selects the image layer for the output page. If possible this is the
    orientation-corrected input page, or an image of the whole page converted
    to PDF."""

    options = context.get_options()
    page_pdf = next(ii for ii in infiles if ii.endswith('.ocr.oriented.pdf'))
    image = next(ii for ii in infiles if ii.endswith('.image'))

    if options.lossless_reconstruction:
        log.debug(
            f"{page_number(page_pdf):4d}: page eligible for lossless reconstruction"
        )
        re_symlink(page_pdf, output_file, log)  # Still points to multipage
        return

    pageinfo = get_pageinfo(image, context)

    # We rasterize a square DPI version of each page because most image
    # processing tools don't support rectangular DPI. Use the square DPI as it
    # accurately describes the image. It would be possible to resample the image
    # at this stage back to non-square DPI to more closely resemble the input,
    # except that the hocr renderer does not understand non-square DPI. The
    # sandwich renderer would be fine.
    dpi = get_page_square_dpi(pageinfo, options)
    layout_fun = img2pdf.get_fixed_dpi_layout_fun((dpi, dpi))

    # This create a single page PDF
    with open(image, 'rb') as imfile, open(output_file, 'wb') as pdf:
        log.debug(f'{page_number(page_pdf):4d}: convert')
        img2pdf.convert(
            imfile, with_pdfrw=False, layout_fun=layout_fun, outputstream=pdf
        )
        log.debug(f'{page_number(page_pdf):4d}: convert done')
コード例 #3
2
ファイル: main.py プロジェクト: balu-/OCRmyPDF
def select_image_layer(
        infiles,
        output_file,
        log,
        pdfinfo,
        pdfinfo_lock):

    page_pdf = next(ii for ii in infiles if ii.endswith('.ocr.oriented.pdf'))
    image = next(ii for ii in infiles if ii.endswith('.image'))

    if lossless_reconstruction:
        log.debug("{:4d}: page eligible for lossless reconstruction".format(
            page_number(page_pdf)))
        re_symlink(page_pdf, output_file)
    else:
        pageinfo = get_pageinfo(image, pdfinfo, pdfinfo_lock)
        dpi = round(max(pageinfo['xres'], pageinfo['yres'],
                        options.oversample))
        imgsize = ((img2pdf.ImgSize.dpi, dpi), (img2pdf.ImgSize.dpi, dpi))

        layout_fun = img2pdf.get_layout_fun(None, imgsize, None, None, None)

        with open(image, 'rb') as imfile, \
                open(output_file, 'wb') as pdf:
            rawdata = imfile.read()
            pdf.write(img2pdf.convert(
                rawdata, producer="img2pdf", with_pdfrw=False,
                layout_fun=layout_fun))
コード例 #4
1
ファイル: pipeline.py プロジェクト: stweil/OCRmyPDF
def triage_image_file(input_file, output_file, log, options):
    try:
        log.info("Input file is not a PDF, checking if it is an image...")
        im = Image.open(input_file)
    except EnvironmentError as e:
        msg = str(e)

        # Recover the original filename
        realpath = ''
        if os.path.islink(input_file):
            realpath = os.path.realpath(input_file)
        elif os.path.isfile(input_file):
            realpath = '<stdin>'
        msg = msg.replace(input_file, realpath)
        log.error(msg)
        raise UnsupportedImageFormatError() from e
    else:
        log.info("Input file is an image")

        if 'dpi' in im.info:
            if im.info['dpi'] <= (96, 96) and not options.image_dpi:
                log.info("Image size: (%d, %d)" % im.size)
                log.info("Image resolution: (%d, %d)" % im.info['dpi'])
                log.error(
                    "Input file is an image, but the resolution (DPI) is "
                    "not credible.  Estimate the resolution at which the "
                    "image was scanned and specify it using --image-dpi.")
                raise DpiError()
        elif not options.image_dpi:
            log.info("Image size: (%d, %d)" % im.size)
            log.error(
                "Input file is an image, but has no resolution (DPI) "
                "in its metadata.  Estimate the resolution at which "
                "image was scanned and specify it using --image-dpi.")
            raise DpiError()

        if 'iccprofile' not in im.info:
            if im.mode == 'RGB':
                log.info('Input image has no ICC profile, assuming sRGB')
            elif im.mode == 'CMYK':
                log.info('Input CMYK image has no ICC profile, not usable')
                raise UnsupportedImageFormatError()
        im.close()

    try:
        log.info("Image seems valid. Try converting to PDF...")
        layout_fun = img2pdf.default_layout_fun
        if options.image_dpi:
            layout_fun = img2pdf.get_fixed_dpi_layout_fun(
                (options.image_dpi, options.image_dpi))
        with open(output_file, 'wb') as outf:
            img2pdf.convert(
                input_file,
                layout_fun=layout_fun,
                with_pdfrw=False,
                outputstream=outf)
        log.info("Successfully converted to PDF, processing...")
    except img2pdf.ImageOpenError as e:
        log.error(e)
        raise UnsupportedImageFormatError() from e
コード例 #5
0
ファイル: main.py プロジェクト: bhohbaum/OCRmyPDF
def select_image_layer(
        infiles,
        output_file,
        log,
        pdfinfo,
        pdfinfo_lock):

    page_pdf = next(ii for ii in infiles if ii.endswith('.ocr.oriented.pdf'))
    image = next(ii for ii in infiles if ii.endswith('.image'))

    if lossless_reconstruction:
        log.debug("{:4d}: page eligible for lossless reconstruction".format(
            page_number(page_pdf)))
        re_symlink(page_pdf, output_file)
    else:
        pageinfo = get_pageinfo(image, pdfinfo, pdfinfo_lock)
        dpi = get_page_dpi(pageinfo)
        dpi = float(dpi[0]), float(dpi[1])
        layout_fun = img2pdf.get_fixed_dpi_layout_fun(dpi)

        with open(image, 'rb') as imfile, \
                open(output_file, 'wb') as pdf:
            rawdata = imfile.read()
            img2pdf.convert(
                rawdata, with_pdfrw=False,
                layout_fun=layout_fun, outputstream=pdf)
コード例 #6
0
ファイル: main.py プロジェクト: bhohbaum/OCRmyPDF
def triage_image_file(input_file, output_file, log):
    try:
        log.info("Input file is not a PDF, checking if it is an image...")
        im = Image.open(input_file)
    except EnvironmentError as e:
        log.error(e)
        sys.exit(ExitCode.input_file)
        return
    else:
        log.info("Input file is an image")

        if 'dpi' in im.info:
            if im.info['dpi'] <= (96, 96) and not options.image_dpi:
                log.info("Image size: (%d, %d)" % im.size)
                log.info("Image resolution: (%d, %d)" % im.info['dpi'])
                log.error(
                    "Input file is an image, but the resolution (DPI) is "
                    "not credible.  Estimate the resolution at which the "
                    "image was scanned and specify it using --image-dpi.")
                sys.exit(ExitCode.input_file)
        elif not options.image_dpi:
            log.info("Image size: (%d, %d)" % im.size)
            log.error(
                "Input file is an image, but has no resolution (DPI) "
                "in its metadata.  Estimate the resolution at which "
                "image was scanned and specify it using --image-dpi.")
            sys.exit(ExitCode.input_file)

        if 'iccprofile' not in im.info:
            if im.mode == 'RGB':
                log.info('Input image has no ICC profile, assuming sRGB')
            elif im.mode == 'CMYK':
                log.info('Input CMYK image has no ICC profile, not usable')
                sys.exit(ExitCode.input_file)
        im.close()

    try:
        log.info("Image seems valid. Try converting to PDF...")
        layout_fun = img2pdf.default_layout_fun
        if options.image_dpi:
            layout_fun = img2pdf.get_fixed_dpi_layout_fun(
                (options.image_dpi, options.image_dpi))
        with open(output_file, 'wb') as outf:
            img2pdf.convert(
                input_file,
                layout_fun=layout_fun,
                with_pdfrw=False,
                outputstream=outf)
        log.info("Successfully converted to PDF, processing...")
    except img2pdf.ImageOpenError as e:
        log.error(e)
        sys.exit(ExitCode.input_file)
コード例 #7
0
 def test_jpg2pdf(self):
     with open(os.path.join(HERE, 'test.jpg'), 'r') as img_fp:
         with open(os.path.join(HERE, 'test.pdf'), 'r') as pdf_fp:
             self.assertEqual(
                 img2pdf.convert([img_fp], 150,
                                 creationdate=moddate, moddate=moddate),
                 pdf_fp.read())
コード例 #8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('input_pdf_path', metavar='PATH')
    parser.add_argument('-o', '--output', metavar='out', type=argparse.FileType('wb'),
                        help='Output PDF file')
    parser.add_argument('-s', '--skip', type=int, default=0,
                        help='Skip over the first n page(s).')
    args = parser.parse_args()

    logger = logging.getLogger(__name__)
    logging.basicConfig(level='INFO', format='%(asctime)s - %(levelname)s - %(message)s')

    directory = './temp/'
    if not os.path.exists(directory):
        os.makedirs(directory)

    images_path = []
    pdf = PdfFileReader(open(args.input_pdf_path, "rb"))
    for i in range(0, pdf.getNumPages()):
        logger.info("Processing page {}/{}".format(i + 1, pdf.getNumPages()))
        images_path.append("./temp/{}.jpg".format(i))
        process_page(pdf, i, i < args.skip)

    logger.info('Writing to output PDF file')
    args.output.write(img2pdf.convert(*list(map(img2pdf.input_images, images_path))))
    logger.info('Done')

    shutil.rmtree(directory, True)
コード例 #9
0
ファイル: test_pageinfo.py プロジェクト: mawekuwe/OCRmyPDF
def test_single_page_image():
    filename = os.path.join(TEST_OUTPUT, 'image-mono.pdf')

    with NamedTemporaryFile() as im_tmp:
        im = Image.new('1', (8, 8), 0)
        for n in range(8):
            im.putpixel((n, n), 1)
        im.save(im_tmp.name, format='PNG')

        pdf_bytes = img2pdf.convert([im_tmp.name], dpi=8)
        with open(filename, 'wb') as pdf:
            pdf.write(pdf_bytes)

    pdfinfo = pageinfo.pdf_get_all_pageinfo(filename)

    assert len(pdfinfo) == 1
    page = pdfinfo[0]

    assert not page['has_text']
    assert len(page['images']) == 1

    pdfimage = page['images'][0]
    assert pdfimage['width'] == 8
    assert pdfimage['color'] == 'gray'

    # While unexpected, this is correct
    # PDF spec says /FlateDecode image must have /BitsPerComponent 8
    # So mono images get upgraded to 8-bit
    assert pdfimage['bpc'] == 8

    # DPI in a 1"x1" is the image width
    assert pdfimage['dpi_w'] == 8
    assert pdfimage['dpi_h'] == 8
コード例 #10
0
ファイル: tesseract_noop.py プロジェクト: jbarlow83/OCRmyPDF
def main():
    if sys.argv[1] == '--version':
        print(VERSION_STRING, file=sys.stderr)
        sys.exit(0)
    elif sys.argv[1] == '--list-langs':
        print('List of available languages (1):\neng', file=sys.stderr)
        sys.exit(0)
    elif sys.argv[1] == '--print-parameters':
        print("Some parameters", file=sys.stderr)
        print("textonly_pdf\t1\tSome help text")
        sys.exit(0)
    elif sys.argv[-2] == 'hocr':
        inputf = sys.argv[-4]
        output = sys.argv[-3]
        with Image.open(inputf) as im, open(
            output + '.hocr', 'w', encoding='utf-8'
        ) as f:
            w, h = im.size
            f.write(HOCR_TEMPLATE.format(str(w), str(h)))
        with open(output + '.txt', 'w') as f:
            f.write('')
    elif sys.argv[-2] == 'pdf':
        if 'textonly_pdf=1' in sys.argv:
            inputf = sys.argv[-4]
            output = sys.argv[-3]
            with Image.open(inputf) as im:
                dpi = im.info['dpi']
                pagesize = im.size[0] / dpi[0], im.size[1] / dpi[1]
                ptsize = pagesize[0] * 72, pagesize[1] * 72

            pdf_out = pypdf.PdfFileWriter()
            pdf_out.addBlankPage(ptsize[0], ptsize[1])
            with open(output + '.pdf', 'wb') as f:
                pdf_out.write(f)
            with open(output + '.txt', 'w') as f:
                f.write('')
        else:
            inputf = sys.argv[-4]
            output = sys.argv[-3]
            pdf_bytes = img2pdf.convert([inputf], dpi=300)
            with open(output + '.pdf', 'wb') as f:
                f.write(pdf_bytes)
            with open(output + '.txt', 'w') as f:
                f.write('')
    elif sys.argv[-1] == 'stdout':
        inputf = sys.argv[-2]
        print(
            """Orientation: 0
Orientation in degrees: 0
Orientation confidence: 100.00
Script: 1
Script confidence: 100.00""",
            file=sys.stderr,
        )
    else:
        print("Spoof doesn't understand arguments", file=sys.stderr)
        print(sys.argv, file=sys.stderr)
        sys.exit(1)

    sys.exit(0)
コード例 #11
0
ファイル: tesseract_noop.py プロジェクト: balu-/OCRmyPDF
def main():
    if sys.argv[1] == '--version':
        print(VERSION_STRING, file=sys.stderr)
        sys.exit(0)
    elif sys.argv[1] == '--list-langs':
        print('List of available languages (1):\neng', file=sys.stderr)
        sys.exit(0)
    elif sys.argv[-1] == 'hocr':
        inputf = sys.argv[-3]
        output = sys.argv[-2]
        with Image.open(inputf) as im, \
                open(output + '.hocr', 'w', encoding='utf-8') as f:
            w, h = im.size
            f.write(HOCR_TEMPLATE.format(str(w), str(h)))
    elif sys.argv[-1] == 'pdf':
        inputf = sys.argv[-3]
        output = sys.argv[-2]
        pdf_bytes = img2pdf.convert([inputf], dpi=300)
        with open(output + '.pdf', 'wb') as f:
            f.write(pdf_bytes)
    elif sys.argv[-1] == 'stdout':
        inputf = sys.argv[-2]
        print("""Orientation: 0
Orientation in degrees: 0
Orientation confidence: 100.00
Script: 1
Script confidence: 100.00""", file=sys.stderr)
    else:
        print("Spoof doesn't understand arguments", file=sys.stderr)
        print(sys.argv, file=sys.stderr)
        sys.exit(1)

    sys.exit(0)
コード例 #12
0
def test_single_page_image():
    filename = os.path.join(TEST_OUTPUT, "image-mono.pdf")

    with NamedTemporaryFile() as im_tmp:
        im = Image.new("1", (8, 8), 0)
        for n in range(8):
            im.putpixel((n, n), 1)
        im.save(im_tmp.name, format="PNG")

        pdf_bytes = img2pdf.convert([im_tmp.name], dpi=8)
        with open(filename, "wb") as pdf:
            pdf.write(pdf_bytes)

    pdfinfo = pageinfo.pdf_get_all_pageinfo(filename)

    assert len(pdfinfo) == 1
    page = pdfinfo[0]

    assert not page["has_text"]
    assert len(page["images"]) == 1

    pdfimage = page["images"][0]
    assert pdfimage["width"] == 8
    assert pdfimage["color"] == "gray"

    # While unexpected, this is correct
    # PDF spec says /FlateDecode image must have /BitsPerComponent 8
    # So mono images get upgraded to 8-bit
    assert pdfimage["bpc"] == 8

    # DPI in a 1"x1" is the image width
    assert pdfimage["dpi_w"] == 8
    assert pdfimage["dpi_h"] == 8
コード例 #13
0
ファイル: test_pageinfo.py プロジェクト: stweil/OCRmyPDF
def test_single_page_image(outdir):
    filename = outdir / 'image-mono.pdf'

    im_tmp = outdir / 'tmp.png'
    im = Image.new('1', (8, 8), 0)
    for n in range(8):
        im.putpixel((n, n), 1)
    im.save(str(im_tmp), format='PNG')

    imgsize = ((img2pdf.ImgSize.dpi, 8), (img2pdf.ImgSize.dpi, 8))
    layout_fun = img2pdf.get_layout_fun(None, imgsize, None, None, None)

    im_bytes = im_tmp.read_bytes()
    pdf_bytes = img2pdf.convert(
            im_bytes, producer="img2pdf", with_pdfrw=False,
            layout_fun=layout_fun)
    filename.write_bytes(pdf_bytes)

    info = pdfinfo.PdfInfo(filename)

    assert len(info) == 1
    page = info[0]

    assert not page.has_text
    assert len(page.images) == 1

    pdfimage = page.images[0]
    assert pdfimage.width == 8
    assert pdfimage.color == Colorspace.gray

    # DPI in a 1"x1" is the image width
    assert isclose(pdfimage.xres, 8)
    assert isclose(pdfimage.yres, 8)
コード例 #14
0
ファイル: bscan.py プロジェクト: SylvainRoy/bscan
def generatePdfCommand(options, args):
    """Build a PDF doc with the images."""
    imgfiles = inputFiles(options, args)
    # Generate the PDF doc
    pdf_bytes = img2pdf.convert(imgfiles, dpi=25)
    file = open("out.pdf","wb")
    file.write(pdf_bytes)
    file.close()
コード例 #15
0
def convert_to_pdf(img_list, output_folder, pdf_file_name):
    '''
        Converts a list of image files into a pdf at the ouput directory
    '''

    pdf_bytes = img2pdf.convert(img_list, dpi = 100)

    with open(os.path.join(output_folder, pdf_file_name), 'wb') as f:
        f.write(pdf_bytes)
コード例 #16
0
ファイル: test_rotation.py プロジェクト: jbarlow83/OCRmyPDF
 def make_rotate_test(prefix, image_angle, page_angle):
     im = Image.open(fspath(resources / 'typewriter.png'))
     if image_angle != 0:
         ccw_angle = -image_angle % 360
         im = im.transpose(getattr(Image, f'ROTATE_{ccw_angle}'))
     memimg = BytesIO()
     im.save(memimg, format='PNG')
     memimg.seek(0)
     mempdf = BytesIO()
     img2pdf.convert(
         memimg.read(),
         layout_fun=img2pdf.get_fixed_dpi_layout_fun((200, 200)),
         outputstream=mempdf,
     )
     mempdf.seek(0)
     pike = pikepdf.open(mempdf)
     pike.pages[0].Rotate = page_angle
     target = outdir / f'{prefix}_{image_angle}_{page_angle}.pdf'
     pike.save(target)
     return target
コード例 #17
0
def convert_pdf(url):
    f = []
    for (dirpath, dirnames, filenames) in walk(join(CURRENT, url)):
        f.extend(filenames)
        break
    f = ["%s/%s" % (url, x) for x in f]
    print f

    pdf_bytes = img2pdf.convert(f, dpi=300, x=None, y=None)
    doc = open('result.pdf', 'wb')
    doc.write(pdf_bytes)
    doc.close()
コード例 #18
0
ファイル: core.py プロジェクト: nerogit/slideshare-download
def convert_pdf(title):
    from img2pdf import convert
    from natsort import natsorted
    files = natsorted(get_dir_files(title))
    # To sort number in string. ex) [1.jpg, 10.jpg, 2.jpg ...] --> [1.jpg, 2.jpg, ... 10.jpg]
    print(files)
    pdf_bytes = convert(files, dpi=300, x=None, y=None)
    filepath = '{}.pdf'.format(join(title, title))

    with open(join(MEDIA_DIR, filepath), 'wb') as doc:
        doc.write(pdf_bytes)
    return filepath
コード例 #19
0
    def chapters2pdf(self, from_chapter, merge=20, image_ext='jpg'):
        to_chapter = from_chapter + merge - 1
        pdf_name = self.comic + '_' + str(from_chapter).zfill(3) + '_' + str(to_chapter).zfill(3) + '.pdf'
        pdf_file = os.path.join(os.getcwd(), pdf_name)

        image_list = []
        for index in range(merge):
            image_folder = self.comic + '_' + str(from_chapter + index).zfill(3)
            image_path = os.path.join(os.getcwd(), image_folder, '*.' + image_ext)
            image_list += glob.glob(image_path)

        pdf_bytes = img2pdf.convert(image_list)
        with open(pdf_file, 'wb') as pdf:
            pdf.write(pdf_bytes)
            print('PDF Completed - ' + pdf_file)
コード例 #20
0
ファイル: convert_img_to_pdf.py プロジェクト: jl-yang/JPG_PDF
	def open_file(self):
		#Select multiple files
		#Convert them to a single pdf 
		self.fileName = tkFileDialog.askopenfilename(parent=self.master, multiple=True)
		
		if len(self.fileName) == 0:	
			return
			
			
		try:
			self.pdf = img2pdf.convert(sorted(self.fileName))
		except:
			tkMessageBox.showerror("转换失败", "选择的文件不是图像文件,请重新选择", parent=self.master)
			return
			
		with open("PDF.pdf", "wb") as f:			
			f.write(self.pdf)
		
		tkMessageBox.showinfo("转换成功", "已成功转换!转换后的pdf文件就在当前目录,文件名为PDF", parent=self.master)
コード例 #21
0
def run():
	directoryimg="/tmp/img"
	directorypdf="/tmp/pdf"
	directoryworking= os.getcwd()

	(listurl,lenlisturl)=parser(url)
	print str(lenlisturl)+" slides found"
	if not os.path.exists(directoryimg):
		os.makedirs(directoryimg)
	else :
		shutil.rmtree(directoryimg)
		os.makedirs(directoryimg)
	if not os.path.exists(directorypdf):
		os.makedirs(directorypdf)
	else :
		shutil.rmtree(directorypdf)
		os.makedirs(directorypdf)

	print "Begin download slides : Please wait ..."

	for index, item in enumerate(listurl):
	       os.system('wget -q -P /tmp/img %s 2>&1 >/dev/null'%item)

	print "End Download"
	print "Begin convert slides to pdf file"
	files = listoffiles(directoryimg)

	for index, item in enumerate(files):
		pdf_bytes = img2pdf.convert([directoryimg+"/"+item])
		(base, ext) = item.split('.',1)
		itempdf=directorypdf+"/"+base+".pdf"
		file = open(itempdf,"a")
		file.write(pdf_bytes)
		file.close()
	merger = PdfFileMerger()
	files = [x for x in os.listdir(directorypdf) if x.endswith('.pdf')]
	for fname in sorted(files):
	    merger.append(PdfFileReader(open(os.path.join(directorypdf, fname), 'rb')))

	merger.write(filename)
	print "End  convert to pdf"
	print "File saved at "+directoryworking+"/"+filename
コード例 #22
0
ファイル: test_pageinfo.py プロジェクト: balu-/OCRmyPDF
def test_single_page_image():
    filename = os.path.join(TEST_OUTPUT, 'image-mono.pdf')

    with NamedTemporaryFile(mode='wb+', suffix='.png') as im_tmp:
        im = Image.new('1', (8, 8), 0)
        for n in range(8):
            im.putpixel((n, n), 1)
        im.save(im_tmp.name, format='PNG')

        imgsize = ((img2pdf.ImgSize.dpi, 8), (img2pdf.ImgSize.dpi, 8))
        layout_fun = img2pdf.get_layout_fun(None, imgsize, None, None, None)

        im_tmp.seek(0)
        im_bytes = im_tmp.read()
        pdf_bytes = img2pdf.convert(
                im_bytes, producer="img2pdf", with_pdfrw=False,
                layout_fun=layout_fun)

        with open(filename, 'wb') as pdf:
            pdf.write(pdf_bytes)

    pdfinfo = pageinfo.pdf_get_all_pageinfo(filename)

    assert len(pdfinfo) == 1
    page = pdfinfo[0]

    assert not page['has_text']
    assert len(page['images']) == 1

    pdfimage = page['images'][0]
    assert pdfimage['width'] == 8
    assert pdfimage['color'] == 'gray'

    # While unexpected, this is correct
    # PDF spec says /FlateDecode image must have /BitsPerComponent 8
    # So mono images get upgraded to 8-bit
    assert pdfimage['bpc'] == 8

    # DPI in a 1"x1" is the image width
    assert pdfimage['dpi_w'] == 8
    assert pdfimage['dpi_h'] == 8
コード例 #23
0
ファイル: toPDF.py プロジェクト: CoderCai/toPDF
    def topdf(self):
        # self.f_pdf = os.path.join('static/pdf', self.fileExt+'.pdf')
        # self.filename = os.path.join('static/img',self.fileExt + '.jpg')
        self.f_pdf = self.fileExt+'.pdf'
        self.filename = self.fileExt + '.jpg'
        w = 612.0
        h = w * self.height / self.width
        self.codes = "".join(unquote(self.base64img).split(',')[1:])
        
        # static file (CDN)
        img = open(self.filename,'wb')
        img.write(base64.b64decode(self.codes))
        img.close()

        sd = img2pdf.convert([StringIO.StringIO(base64.b64decode(self.codes))], 150, x=620, y="")
        hand = open(self.f_pdf, 'wb')
        hand.write(sd)
        hand.close()
        #self.save_pdf()

        pdfexsit = db.mdb('topdf', 'pdfexsit').perform()
        pdfexsit.insert({'pdf':self.fileExt})
        return self.f_pdf
コード例 #24
0
ファイル: pdf_utilities.py プロジェクト: bschollnick/QuickBBS
def imgmem_to_pdf(sourcedata, targetfile):
    """
     Use the img2pdf library to create a lossless PDF from an image.

     img2pdf supports:
    | Format                | Colorspace                     |    Result    |
    | --------------------- | ------------------------------ | ------------ |
    | JPEG                  | any                            | direct       |
    | JPEG2000              | any                            | direct       |
    | PNG (non-interlaced)  | any                            | direct       |
    | TIFF (CCITT Group 4)  | monochrome                     | direct       |
    | any                   | any except CMYK and monochrome | PNG Paeth    |
    | any                   | monochrome                     | CCITT Group4 |
    | any                   | CMYK                           | flate        |


    :param sourcedata: A memory blob that contains the data from the image. (eg.
      The image file was read into memory).
    :type sourcedata: Binary blob

    :param sourcefile: The FQPN filename of the file to create
    :type sourcefile: String

    :return: True if converted successfully, otherwise False
    :rtype: Boolean

    Dependency - img2pdf, which has a dependency on Pillow.

    https://gitlab.mister-muffin.de/josch/img2pdf
    https://pypi.org/project/img2pdf/
    """
    try:
        with open(targetfile, "wb") as target:
            target.write(img2pdf.convert(sourcedata))
            return True
    except:
        return False
コード例 #25
0
import os
import img2pdf

directory = '/mnt/Cargo_2/Sync/Πανεπιστημιο/Diploma Thesis/Python Scripts/vis-results'
images = []
for root, _, files in os.walk(directory):
    for file in files:
        if '.png.png' in file and '.pdf' not in file:
            images.append(os.path.join(root, file))

for image in images:
    print(image)
    with open(image[:-8] + '.pdf', 'wb') as fp:
        fp.write(img2pdf.convert(image))
コード例 #26
0
    def run(self):
        self.start_new_session_and_model()
        self.load_model()

        pdf_reader = PdfFileReader(self.dir_pdf)
        num_pages = pdf_reader.getNumPages()

        dir_to_write_single_page = self.dir_out + '/single_page.pdf'
        dir_to_write_single_page_image = self.dir_out + '/single_page_image.jpg'

        dir_imgs_enhanced = os.path.join(self.dir_out, 'images')

        if os.path.isdir(dir_imgs_enhanced):
            os.system('rm -rf ' + dir_imgs_enhanced)
            os.makedirs(dir_imgs_enhanced)
        else:
            os.makedirs(dir_imgs_enhanced)
        #os.makedirs(dir_imgs_enhanced)

        indexer = 0
        #pdf_reader = PdfFileReader('/home/vahid/Documents/en
        for num_page in range(num_pages):
            pdf_writer = PdfFileWriter()

            page_single = pdf_reader.getPage(num_page)

            pdf_writer.addPage(page_single)

            with open(dir_to_write_single_page, 'wb') as out:
                pdf_writer.write(out)
            #pdf_writer.write(out)
            pages = convert_from_path(dir_to_write_single_page, '500')

            for page in pages:
                page.save(dir_to_write_single_page_image, 'JPEG')

            img = cv2.imread(dir_to_write_single_page_image)

            bin_scales = [1]
            img_last = 0

            for bin_s in bin_scales:
                res = self.predict(img, bin_s)

                img_fin = np.zeros((res.shape[0], res.shape[1], 3))
                res[:, :][res[:, :] == 0] = 2
                res = res - 1
                res = res * 255
                img_fin[:, :, 0] = res
                img_fin[:, :, 1] = res
                img_fin[:, :, 2] = res

                img_fin = img_fin.astype(np.uint8)
                img_fin = (res[:, :] == 0) * 255
                img_last = img_last + img_fin

            img_last[:, :][img_last[:, :] > 0] = 255
            img_last = (img_last[:, :] == 0) * 255

            cv2.imwrite(
                os.path.join(dir_imgs_enhanced,
                             '{0:04}'.format(indexer) + '.jpg'), img_last)
            indexer = indexer + 1

        with open(self.dir_out + "/output.pdf", "wb") as f:
            f.write(
                img2pdf.convert([
                    dir_imgs_enhanced + '/' + i
                    for i in os.listdir(dir_imgs_enhanced)
                    if i.endswith(".jpg")
                ]))

        os.system('rm -rf ' + dir_imgs_enhanced)
コード例 #27
0
import os
from PIL import Image
from img2pdf import convert
from pdf2image import convert_from_path

images = convert_from_path('/home/belval/example.pdf')

for idx, img in enumerate(images):
    img.save('pdf_' + str(idx).zfill(len(str(len(images)))) + '.jpg', 'JPEG')

dl = os.listdir(".")
del dl[dl.index("asdf.py")]

file_name = open("out.pdf", "wb")
pdf_list = []

for f in dl:
    img = Image.open(f)
    if (int(f[-7:-4]) % 2) == 1:
        area = (125, 140, 1028 + 195, 1763)
    else:
        area = (231, 140, 1028 + 306, 1763)
    crop_img = img.crop(area)
    sizechange = crop_img.resize((1080, 2316))
    sizechange.save(f)
    pdf_list.append(f)

pdf = convert(pdf_list)
file_name.write(pdf)
file_name.close()
コード例 #28
0
ファイル: img2pdf.py プロジェクト: rocketpy/tricks
#  Convert images to PDF via direct JPEG inclusion.

# PyPi: https://pypi.org/project/img2pdf/
# pip install img2pdf

import img2pdf

# opening from filename
with open("name.pdf","wb") as f:
	f.write(img2pdf.convert('test.jpg'))

# opening from file handle
with open("name.pdf","wb") as f1, open("test.jpg") as f2:
	f1.write(img2pdf.convert(f2))

# using in-memory image data
with open("name.pdf","wb") as f:
	f.write(img2pdf.convert("\x89PNG...")

          
# multiple inputs (variant 1)
with open("name.pdf","wb") as f:
	f.write(img2pdf.convert("test1.jpg", "test2.png"))

# multiple inputs (variant 2)
with open("name.pdf","wb") as f:
	f.write(img2pdf.convert(["test1.jpg", "test2.png"]))

          
# convert all files ending in .jpg inside a directory
dirname = "/path/to/images"
コード例 #29
0
ファイル: __init__.py プロジェクト: Shoeboxed/img2pdf
        def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw):
            with open(f, "rb") as inf:
                orig_imgdata = inf.read()
            output = img2pdf.convert(orig_imgdata, nodate=True,
                                     with_pdfrw=with_pdfrw)
            from pdfrw import PdfReader, PdfName, PdfWriter
            from pdfrw.py23_diffs import convert_load, convert_store
            x = PdfReader(PdfReaderIO(convert_load(output)))
            self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root,
                             PdfName.Size])
            self.assertIn(x.Root.Pages.Count, ('1', '2'))
            if len(x.Root.Pages.Kids) == '1':
                self.assertEqual(x.Size, '7')
                self.assertEqual(len(x.Root.Pages.Kids), 1)
            elif len(x.Root.Pages.Kids) == '2':
                self.assertEqual(x.Size, '10')
                self.assertEqual(len(x.Root.Pages.Kids), 2)
            self.assertEqual(x.Info, {})
            self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages,
                                                     PdfName.Type])
            self.assertEqual(x.Root.Type, PdfName.Catalog)
            self.assertEqual(sorted(x.Root.Pages.keys()),
                             [PdfName.Count, PdfName.Kids, PdfName.Type])
            self.assertEqual(x.Root.Pages.Type, PdfName.Pages)
            orig_img = Image.open(f)
            for pagenum in range(len(x.Root.Pages.Kids)):
                # retrieve the original image frame that this page was
                # generated from
                orig_img.seek(pagenum)
                cur_page = x.Root.Pages.Kids[pagenum]

                ndpi = orig_img.info.get("dpi", (96.0, 96.0))
                # In python3, the returned dpi value for some tiff images will
                # not be an integer but a float. To make the behaviour of
                # img2pdf the same between python2 and python3, we convert that
                # float into an integer by rounding.
                # Search online for the 72.009 dpi problem for more info.
                ndpi = (int(round(ndpi[0])), int(round(ndpi[1])))
                imgwidthpx, imgheightpx = orig_img.size
                pagewidth = 72.0*imgwidthpx/ndpi[0]
                pageheight = 72.0*imgheightpx/ndpi[1]

                def format_float(f):
                    if int(f) == f:
                        return str(int(f))
                    else:
                        return ("%.4f" % f).rstrip("0")

                self.assertEqual(sorted(cur_page.keys()),
                                 [PdfName.Contents, PdfName.MediaBox,
                                  PdfName.Parent, PdfName.Resources,
                                  PdfName.Type])
                self.assertEqual(cur_page.MediaBox,
                                 ['0', '0', format_float(pagewidth),
                                  format_float(pageheight)])
                self.assertEqual(cur_page.Parent, x.Root.Pages)
                self.assertEqual(cur_page.Type, PdfName.Page)
                self.assertEqual(cur_page.Resources.keys(),
                                 [PdfName.XObject])
                self.assertEqual(cur_page.Resources.XObject.keys(),
                                 [PdfName.Im0])
                self.assertEqual(cur_page.Contents.keys(),
                                 [PdfName.Length])
                self.assertEqual(cur_page.Contents.Length,
                                 str(len(cur_page.Contents.stream)))
                self.assertEqual(cur_page.Contents.stream,
                                 "q\n%.4f 0 0 %.4f 0.0000 0.0000 cm\n"
                                 "/Im0 Do\nQ" % (pagewidth, pageheight))

                imgprops = cur_page.Resources.XObject.Im0

                # test if the filter is valid:
                self.assertIn(
                    imgprops.Filter, [PdfName.DCTDecode, PdfName.JPXDecode,
                                      PdfName.FlateDecode,
                                      [PdfName.CCITTFaxDecode]])

                # test if the image has correct size
                self.assertEqual(imgprops.Width, str(orig_img.size[0]))
                self.assertEqual(imgprops.Height, str(orig_img.size[1]))
                # if the input file is a jpeg then it should've been copied
                # verbatim into the PDF
                if imgprops.Filter in [PdfName.DCTDecode,
                                       PdfName.JPXDecode]:
                    self.assertEqual(
                        cur_page.Resources.XObject.Im0.stream,
                        convert_load(orig_imgdata))
                elif imgprops.Filter == [PdfName.CCITTFaxDecode]:
                    tiff_header = tiff_header_for_ccitt(
                        int(imgprops.Width), int(imgprops.Height),
                        int(imgprops.Length), 4)
                    imgio = BytesIO()
                    imgio.write(tiff_header)
                    imgio.write(convert_store(
                        cur_page.Resources.XObject.Im0.stream))
                    imgio.seek(0)
                    im = Image.open(imgio)
                    self.assertEqual(im.tobytes(), orig_img.tobytes())
                    try:
                        im.close()
                    except AttributeError:
                        pass

                elif imgprops.Filter == PdfName.FlateDecode:
                    # otherwise, the data is flate encoded and has to be equal
                    # to the pixel data of the input image
                    imgdata = zlib.decompress(
                        convert_store(cur_page.Resources.XObject.Im0.stream))
                    if imgprops.DecodeParms:
                        if orig_img.format == 'PNG':
                            pngidat, palette = img2pdf.parse_png(orig_imgdata)
                        elif orig_img.format == 'TIFF' \
                                and orig_img.info['compression'] == "group4":
                            offset, length = \
                                    img2pdf.ccitt_payload_location_from_pil(
                                            orig_img)
                            pngidat = orig_imgdata[offset:offset+length]
                        else:
                            pngbuffer = BytesIO()
                            orig_img.save(pngbuffer, format="png")
                            pngidat, palette = img2pdf.parse_png(
                                    pngbuffer.getvalue())
                        self.assertEqual(zlib.decompress(pngidat), imgdata)
                    else:
                        colorspace = imgprops.ColorSpace
                        if colorspace == PdfName.DeviceGray:
                            colorspace = 'L'
                        elif colorspace == PdfName.DeviceRGB:
                            colorspace = 'RGB'
                        elif colorspace == PdfName.DeviceCMYK:
                            colorspace = 'CMYK'
                        else:
                            raise Exception("invalid colorspace")
                        im = Image.frombytes(colorspace,
                                             (int(imgprops.Width),
                                              int(imgprops.Height)),
                                             imgdata)
                        if orig_img.mode == '1':
                            self.assertEqual(im.tobytes(),
                                             orig_img.convert("L").tobytes())
                        elif orig_img.mode not in ("RGB", "L", "CMYK",
                                                   "CMYK;I"):
                            self.assertEqual(im.tobytes(),
                                             orig_img.convert("RGB").tobytes())
                        # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does
                        # not have the close() method
                        try:
                            im.close()
                        except AttributeError:
                            pass
            # now use pdfrw to parse and then write out both pdfs and check the
            # result for equality
            y = PdfReader(out)
            outx = BytesIO()
            outy = BytesIO()
            xwriter = PdfWriter()
            ywriter = PdfWriter()
            xwriter.trailer = x
            ywriter.trailer = y
            xwriter.write(outx)
            ywriter.write(outy)
            self.assertEqual(compare_pdf(outx.getvalue(), outy.getvalue()), True)
            # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the
            # close() method
            try:
                orig_img.close()
            except AttributeError:
                pass
コード例 #30
0
ファイル: imgtopdf.py プロジェクト: darknexxa/scrapperv2
import img2pdf
import os
from PyPDF2 import PdfFileWriter, PdfFileReader

foldername = "Wingding Orgy: Hot Tails Extreme #1 [English]"
img_group = "1160388"
number_of_page = 34

if not os.path.exists(foldername+"/pdf"):
	os.makedirs(foldername+"/pdf")
for x in range(1,number_of_page):
	pdf_bytes = img2pdf.convert([foldername+"/"+img_group+"_"+str(x)+".jpg"])
	file = open(foldername+"/pdf/"+img_group+"_"+str(x)+".pdf","wb")
	file.write(pdf_bytes)

print("Done convert : "+str(number_of_page)+" pages of "+foldername+" to pdf")
print("EachPDF location : "+foldername+"/pdf/")

# Creating a routine that appends files to the output file
def append_pdf(input,output):
    [output.addPage(input.getPage(page_num)) for page_num in range(input.numPages)]

# Creating an object where pdf pages are appended to
output = PdfFileWriter()

# Appending two pdf-pages from two different files
for x in range(1,number_of_page):
	append_pdf(PdfFileReader(open(foldername+"/pdf/"+img_group+"_"+str(x)+".pdf","rb")),output)

# Writing all the collected pages to a file
output.write(open(foldername+"/"+foldername+".pdf","wb"))
コード例 #31
0
    def run(self):

        try:
            folder_path_img = self.directory_path + r'/Karavali Munjavu ' + self.date_today
            folder_path_pdf = self.directory_path + r'/Karavali Munjavu pdf ' + self.date_today
            makedirs(
                folder_path_img)  # Make a folder in desktop with today's date.

            url = 'http://www.karavalimunjavu.com/'
            res = get(url)
            res.raise_for_status()
            soup = BeautifulSoup(res.text, 'lxml')

            self.start_signal_kr.emit(True)

            # Downloading images.
            for images in soup.select('img[data-big]'):
                page_no = str(path.basename(images.get('data-big'))[15:])
                self.page_no_signal_kr.emit(page_no)
                img_download = get(url + images.get('data-big'), stream=True)
                file_size = int(
                    img_download.headers.get("Content-Length", None))
                progress = 0
                with open(path.join(folder_path_img, page_no), 'wb') as f:
                    for data in img_download:
                        f.write(data)
                        progress += len(data)
                        percentage = (progress / file_size) * 100
                        self.percentage_signal_kr.emit(round(percentage, 0))
                    f.close()
                    sleep(0.2)
                    self.percentage_signal_kr.emit(0)

            makedirs(folder_path_pdf)

            # Converting images to pdf.
            print('Converting images to pdf')
            for page in listdir(folder_path_img):
                pdf_bytes = convert(
                    Image.open(folder_path_img + '//' + page).filename)
                file = open(path.join(folder_path_pdf, page[:-4] + '.pdf'),
                            'wb')
                file.write(pdf_bytes)
                file.close()
            print('Images converted to pdf.')

            rmtree(folder_path_img)  # Deleting folder containing images.

            self.pdf_progress_signal_kr.emit('Creating combined PDF')
            merge_pdf_in_folder(folder_path_pdf, self.directory_path,
                                'Karavali Munjavu ' +
                                str(self.date_today))  # Merging PDFs
            self.pdf_progress_signal_kr.emit('Combined PDF created')

            rmtree(folder_path_pdf)  # Deleting folder containing pdfs.

            self.done_signal_kr.emit(
                True
            )  # signal to let the main gui know that the process has completed.

            if self.recipient_address_list != ['']:

                self.email_progress_signal_kr.emit('Sending email')
                send_email_pdf(self.recipient_address_list, [
                    self.directory_path + r'/Karavali Munjavu ' +
                    self.date_today + '.pdf'
                ],
                               subject='Karavali Munjavu Newspaper ' +
                               self.date_today)
                self.email_progress_signal_kr.emit('Mail sent successfully')

        except FileExistsError as file_error:

            self.error_signal_kr.emit(str(file_error))

        except:

            self.error_signal_kr.emit(
                'Error occured. Written to error_info_kr.txt.')
            error_file = open('error_info_kr.txt', 'w')
            error_file.write(traceback.format_exc())
            error_file.close()
コード例 #32
0
import img2pdf
import argparse
import os
import io

ap = argparse.ArgumentParser()
ap.add_argument('-d', '--directory', required=True, help='path to folder contains images')
ap.add_argument('-o', '--output', required=True, help='output pdf file')
args = vars(ap.parse_args())

bytesIOs = []
for img in os.listdir(args['directory']):
  if img.endswith('.jpg'):
    with open(args['directory'] + '/' + img, 'rb') as f:
      bytesIOs.append(io.BytesIO(f.read()))

with open(args['output'], 'wb') as o:
  o.write(img2pdf.convert([i.read() for i in bytesIOs]))

コード例 #33
0
matches = re.findall('":"(.*?)"', data)

for match in matches:
    sys.stdout.write("\rDownloading page " + str(matches.index(match) + 1) + "/" + str(npages) + "...")
    sys.stdout.flush()
    link = match.replace("\\u0026", "&").encode('utf-8')
    filename = hashlib.md5(link).hexdigest()
    f = open(cache + filename + ".jpg", "wb")
    f.write(requests.get(link).content)
    f.close()
    pages.append(filename)

print("\nConverting images to pdf format...")
for filename in pages:
    try: #python 2.x
        pdf_bytes = img2pdf.convert([cache + filename + ".jpg"])
    except: #python 3.x
        pdf_bytes = img2pdf.convert(open(cache + filename + ".jpg","rb").read())
    f = open(cache + filename + ".pdf", "wb")
    f.write(pdf_bytes)
    f.close()
    sys.stdout.write("\r'" + filename + ".pdf' created!")
    sys.stdout.flush()

merger = PyPDF2.PdfFileMerger()
print("\nMerging everything...")
for filename in pages:
    merger.append(PyPDF2.PdfFileReader(open(cache + filename + '.pdf', 'rb')))
merger.write(title + ".pdf")
print("Well done!")
コード例 #34
0
ファイル: globalFunctions.py プロジェクト: sanyipapp/comic-dl
    def conversion(self, directory_path, conversion, delete_files, comic_name,
                   chapter_number):

        if str(conversion).lower().strip() in ['pdf']:
            # Such kind of lambda functions and breaking is dangerous...
            im_files = [
                image_files for image_files in sorted(
                    glob.glob(str(directory_path) + "/" + "*.jpg"),
                    key=lambda x: int(
                        str((x.split('.')[0])).split(os.sep)[-1]))
            ]
            pdf_file_name = "{0} - Ch {1}.pdf".format(comic_name,
                                                      chapter_number)
            try:
                with open(
                        str(directory_path) + "/" + str(pdf_file_name),
                        "wb") as f:
                    f.write(img2pdf.convert(im_files))
                    print("Converted the file to pdf...")
            except Exception as FileWriteError:
                print("Coudn't write the pdf file...")
                print(FileWriteError)
                # Let's not delete the files if the conversion failed...
                delete_files = "No"
                pass
            try:
                self.conversion_cleaner(file_path=str(directory_path) + "/" +
                                        str(pdf_file_name))
            except Exception as FileMoveError:
                print("Could not move the pdf file.")
                print(FileMoveError)
                pass

        elif str(conversion).lower().strip() in ['cbz']:
            # Such kind of lambda functions and breaking is dangerous...

            main_directory = str(directory_path).split(os.sep)
            main_directory.pop()
            cbz_directory = str(os.sep.join(main_directory)) + os.sep

            cbz_file_name = str(cbz_directory) + "{0} - Ch {1}".format(
                comic_name, chapter_number)

            try:
                shutil.make_archive(cbz_file_name, 'zip', directory_path,
                                    directory_path)
                os.rename(
                    str(cbz_file_name) + ".zip",
                    (str(cbz_file_name) + ".zip").replace(".zip", ".cbz"))
            except Exception as CBZError:
                print("Coudn't write the cbz file...")
                print(CBZError)
                # Let's not delete the files if the conversion failed...
                delete_files = "No"
                pass
            generatedFilePath = os.path.join(
                directory_path,
                cbz_file_name.split(os.sep).pop() + ".cbz")
            try:
                shutil.move(
                    os.path.join(cbz_directory, cbz_file_name + ".cbz"),
                    generatedFilePath)
            except Exception as FileDeleteError:
                print("Couldn't move the file or delete the directory.")
                print(FileDeleteError)
                pass
            try:
                self.conversion_cleaner(file_path=str(generatedFilePath))
            except Exception as FileMoveError:
                print("Could not move the cbz file.")
                print(FileMoveError)
                pass

        elif str(conversion) == "None":
            pass
        else:
            print(
                "Seems like that conversion isn't supported yet. Please report it on the repository..."
            )
            pass

        if str(delete_files).lower().strip() in ['no', 'false', 'delete']:
            for image_files in glob.glob(str(directory_path) + "/" + "*.jpg"):
                try:
                    os.remove(image_files)
                except Exception as FileDeleteError:
                    print("Couldn't delete the image file...")
                    print(FileDeleteError)
                    pass
            print("Deleted the files...")
コード例 #35
0
    current_dir = os.path.join(ROOT_DIR, current_dir)
    yearbook_pdf = os.path.join(current_dir, 'yearbook.pdf')
    collage_dir = os.path.join(current_dir, 'collages')
    collage_pdfs = os.listdir(collage_dir)
    # frontpage_pdf = os.path.join(ROOT_DIR, 'frontpage.pdf')

    a4inpt = (img2pdf.mm_to_pt(210), img2pdf.mm_to_pt(297)
              )  # specify page size (A4)
    layout_fun = img2pdf.get_layout_fun(a4inpt)

    # convert department pic 1 to pdf
    department_pic1_pdf = os.path.join(current_dir, 'department_pic1.pdf')
    department_pic1 = os.path.join(current_dir, 'department_pic1.jpg')
    if os.path.exists(department_pic1):
        with open(department_pic1_pdf, "wb") as f:
            f.write(img2pdf.convert(department_pic1, layout_fun=layout_fun))

    # convert department pic 2 to pdf
    department_pic2_pdf = os.path.join(current_dir, 'department_pic2.pdf')
    department_pic2 = os.path.join(current_dir, 'department_pic2.jpg')
    if os.path.exists(department_pic2):
        with open(department_pic2_pdf, "wb") as f:
            f.write(img2pdf.convert(department_pic2, layout_fun=layout_fun))

    merger = PdfFileMerger()
    # merger.append(frontpage_pdf)
    merger.append(yearbook_pdf)

    for collage in collage_pdfs:
        if collage.endswith('.pdf'):
            pdf = os.path.join(collage_dir, collage)
コード例 #36
0
print ("Compilando em pdf...\n")
namePDF = re.search("(?P<url>https?://[^\s]+(}))",script_tags[len(script_tags)-1].text).group("url")
for i in range(len(namePDF)-1,0,-1):
	if(namePDF[i] == '/'):
		break
namePDF = namePDF[i+1:-2]

#listando os nomes das imagens baixadas em arq
os.chdir('img/')
arq = os.listdir(os.getcwd())
arq.sort()

if ( len(arq) != num_pages):
	print("ERRO 1.2 tente novamente")
	os.system('rm *.jpg')
	sys.exit()

with open("../"+namePDF+".pdf", "wb") as f:	
	f.write(img2pdf.convert([i for i in arq if i.endswith(".jpg")]))


reader = PyPDF2.PdfFileReader(open("../"+namePDF+".pdf",'rb'))
pdfpages = reader.getNumPages() 

os.chdir('../')
if (num_pages != pdfpages):
	os.system("rm "+namePDF+".pdf")
	print("ERRO 1.3 tente novamente")

#removendo as imagens baixadas
os.system('rm img/*.jpg')
コード例 #37
0
def triage_image_file(input_file, output_file, log, options):
    try:
        log.info("Input file is not a PDF, checking if it is an image...")
        im = Image.open(input_file)
    except EnvironmentError as e:
        msg = str(e)

        # Recover the original filename
        realpath = ''
        if os.path.islink(input_file):
            realpath = os.path.realpath(input_file)
        elif os.path.isfile(input_file):
            realpath = '<stdin>'
        msg = msg.replace(input_file, realpath)
        log.error(msg)
        raise UnsupportedImageFormatError() from e
    else:
        log.info("Input file is an image")

        if 'dpi' in im.info:
            if im.info['dpi'] <= (96, 96) and not options.image_dpi:
                log.info("Image size: (%d, %d)" % im.size)
                log.info("Image resolution: (%d, %d)" % im.info['dpi'])
                log.error(
                    "Input file is an image, but the resolution (DPI) is "
                    "not credible.  Estimate the resolution at which the "
                    "image was scanned and specify it using --image-dpi.")
                raise DpiError()
        elif not options.image_dpi:
            log.info("Image size: (%d, %d)" % im.size)
            log.error("Input file is an image, but has no resolution (DPI) "
                      "in its metadata.  Estimate the resolution at which "
                      "image was scanned and specify it using --image-dpi.")
            raise DpiError()

        if im.mode in ('RGBA', 'LA'):
            log.error("The input image has an alpha channel. Remove the alpha "
                      "channel first.")
            raise UnsupportedImageFormatError()

        if 'iccprofile' not in im.info:
            if im.mode == 'RGB':
                log.info('Input image has no ICC profile, assuming sRGB')
            elif im.mode == 'CMYK':
                log.info('Input CMYK image has no ICC profile, not usable')
                raise UnsupportedImageFormatError()
        im.close()

    try:
        log.info("Image seems valid. Try converting to PDF...")
        layout_fun = img2pdf.default_layout_fun
        if options.image_dpi:
            layout_fun = img2pdf.get_fixed_dpi_layout_fun(
                (options.image_dpi, options.image_dpi))
        with open(output_file, 'wb') as outf:
            img2pdf.convert(input_file,
                            layout_fun=layout_fun,
                            with_pdfrw=False,
                            outputstream=outf)
        log.info("Successfully converted to PDF, processing...")
    except img2pdf.ImageOpenError as e:
        log.error(e)
        raise UnsupportedImageFormatError() from e
コード例 #38
0
    def conversion(self, directory_path, conversion, keep_files, comic_name, chapter_number):
        main_directory = str(directory_path).split(os.sep)
        main_directory.pop()
        converted_file_directory = str(os.sep.join(main_directory)) + os.sep

        if str(conversion).lower().strip() in ['pdf']:
            # Such kind of lambda functions and breaking is dangerous...
            im_files = [image_files for image_files in sorted(glob.glob(str(directory_path) + "/" + "*.jpg"),
                                                              key=lambda x: int(
                                                                  str((x.split('.')[0])).split(os.sep)[-1]))]
            pdf_file_name = str(converted_file_directory) + "{0} - Ch {1}.pdf".format(comic_name, chapter_number)
            try:
                # This block is same as the one in the "cbz" conversion section. Check that one.
                if os.path.isfile(pdf_file_name):
                    print('[Comic-dl] CBZ File Exist! Skipping : {0}\n'.format(pdf_file_name))
                    pass
                else:
                    with open(pdf_file_name, "wb") as f:
                        f.write(img2pdf.convert(im_files))
                        print("Converted the file to pdf...")
            except Exception as FileWriteError:
                print("Couldn't write the pdf file...")
                print(FileWriteError)
                # Let's not delete the files if the conversion failed...
                keep_files = "False"
                pass

        elif str(conversion).lower().strip() in ['cbz']:

            cbz_file_name = str(converted_file_directory) + "{0} - Ch {1}".format(comic_name, chapter_number)
            print("CBZ File : {0}".format(cbz_file_name))

            try:
                """If the .cbz file exists, we don't need to make it again. If we do make it again, it'll make the 
                .zip file and will hit and exception about file existing already. This raised #105.
                So, to fix the #105, we'll add this check and make things work just fine."""
                if os.path.isfile(str(cbz_file_name) + ".cbz"):
                    print('[Comic-dl] CBZ File Exist! Skipping : {0}\n'.format(cbz_file_name))
                    pass
                else:
                    shutil.make_archive(cbz_file_name, 'zip', directory_path, directory_path)
                    os.rename(str(cbz_file_name) + ".zip", (str(cbz_file_name)+".zip").replace(".zip", ".cbz"))
            except Exception as CBZError:
                print("Couldn't write the cbz file...")
                print(CBZError)
                # Let's not delete the files if the conversion failed...
                keep_files = "True"
                pass

        elif str(conversion) == "None":
            pass

        else:
            print("Seems like that conversion isn't supported yet. Please report it on the repository...")
            pass

        if str(keep_files).lower().strip() in ['no', 'false', 'delete']:
            try:
                shutil.rmtree(path=directory_path, ignore_errors=True)
            except Exception as DirectoryDeleteError:
                print("Couldn't move the file or delete the directory.")
                print(DirectoryDeleteError)
                pass
            print("Deleted the files...")
コード例 #39
0
ファイル: slidedown.py プロジェクト: NitishGadangi/SlideDown
def merge(all_files):
    output_name = input("\n\nEnter a name for file: ")
    with open(output_name + ".pdf", "wb") as f:
        f.write(img2pdf.convert(all_files))
    for i in all_files:
        os.remove(i)
コード例 #40
0
"""
Convertendo as imagens
"""
for img in ja_existem:
    im = Image.open("comics\\{}".format(img))
    rgb_im = im.convert('RGB')
    if gray_scale:
        rgb_im = rgb_im.convert('L')
    rgb_im.save('comics_convertidos\\{}.jpg'.format(img.split(".")[0]))

"""
Buscando o caminho das imagens
"""
ja_existem = os.listdir("comics_convertidos")
lista_imagens = []
for img in ja_existem:
    lista_imagens.append("comics_convertidos\{}".format(img))

"""
Convertendo em pdf
"""
nome_manga = url_raiz.split("/")[3]
nome_arquivo = "comics_fate_{}".format(nome_manga)
if gray_scale:
    nome_arquivo = "{}_cinza.pdf".format(nome_arquivo)
else:
    nome_arquivo = "{}_colorido.pdf".format(nome_arquivo)

with open(nome_arquivo,"wb") as f:
	f.write(img2pdf.convert(lista_imagens))     
コード例 #41
0
ファイル: scribd_downloader.py プロジェクト: kenorb/scribd_dl
def convert_to_pdf(img_list, output_folder, pdf_file_name):

    pdf_bytes = img2pdf.convert(img_list, dpi = 100)

    with open(os.path.join(output_folder, pdf_file_name), 'wb') as f:
        f.write(pdf_bytes)
コード例 #42
0
ファイル: pdfer.py プロジェクト: urakozz/python-pdfer
 def getPdfBytes(self):
     bytes = img2pdf.convert(self.outfiles)
     return bytes
コード例 #43
0
    text = msg.as_string()
    server.sendmail(fromaddr, toaddr, text)
    server.quit()
    print "process over check your email"
    return 0


GPIO.setmode(GPIO.BCM)
GPIO.setup(21, GPIO.IN, pull_up_down=GPIO.PUD_UP)
GPIO.setup(20, GPIO.OUT)
GPIO.setup(16, GPIO.OUT)

while 1:

    GPIO.output(20, 0)
    GPIO.output(16, 0)
    cam_on = GPIO.input(21)
    if (cam_on == 0):
        GPIO.output(20, 1)
        print "image capturing"
        os.system("sudo fswebcam -r 640x480 -s 15 image.jpg")
        print "image captured "
        with open("name.pdf", "wb") as f1, open("/home/pi/image.jpg") as f2:
            f1.write(img2pdf.convert(f2))

        mail_on()
        GPIO.output(20, 1)
        GPIO.output(16, 1)
        time.sleep(7)
        cam_on = 1
コード例 #44
0
ファイル: compostion.py プロジェクト: mHaisham/MangaK-UI
def dir_to_pdf(path, save_path):

    # get all directory paths
    dirs = sorted(os.listdir(path), key=numericalSort)

    file_path_list = []

    # all of the files tested and opened
    for directory in dirs:
        try:
            # open image using PIL library
            new_img = Image.open(os.path.join(path, directory))
        except Exception as ex:
            # if any error occurs skip the file
            print('[ERROR] [%s] Cant open %s as image!' %
                  (type(ex).__name__.upper(), os.path.join(path, directory)))
        else:

            file_path = os.path.join(path, directory)

            # if image has transparency
            if 'transparency' in new_img.info:

                # convert to RGBA mode
                new_img = new_img.convert('RGBA')

            # if image mode is RGBA
            if (new_img.mode == 'RGBA'):
                # convert image to RGB
                print('[CONVERT] [%s] RGBA to RGB' %
                      os.path.basename(os.path.normpath(directory)).upper())

                # create RGB image with white background of same size
                rgb = Image.new('RGB', new_img.size, (255, 255, 255))

                # paste using alpha as mask
                rgb.paste(new_img, new_img.split()[3])

                # get temporary path
                temp = tempfile.NamedTemporaryFile().name + '.jpg'

                # save image as temporary
                rgb.save(temp, 'JPEG')

                # overrite file_path
                file_path = temp

            file_path_list.append(file_path)

    # if no images exit
    if len(file_path_list) == 0:
        return

    try:
        # save as pdf using img2pdf
        with open(
                os.path.join(save_path,
                             os.path.basename(os.path.normpath(path)) +
                             '.pdf'), 'wb') as f:
            f.write(img2pdf.convert(file_path_list))
    except Exception as e:
        logger.error('[%s] [%s] %s' % (type(e).__name__.upper(), path, e))
コード例 #45
0
ファイル: 21jingji.py プロジェクト: pinsily/ZaoBao
def img_pdf(files: list) -> None:
    directory = get_today_directory()
    year, month, day = get_now_day()
    output_file = f"{directory}/{year}{month}{day}_21jingji.pdf"
    with open(output_file, "wb") as f:
        f.write(img2pdf.convert(files))
コード例 #46
0
def inboxScan(driver):
     #refresh
     driver.refresh()
     
     #identify email ids in inbox
     idList = []
     time.sleep(5)
     html = driver.page_source
     soup = bs(html, 'lxml')
     table = soup.findAll('table')[3]
     for row in table.findAll('tr'):
          idList.append(str(row.get('id')))

     #perform scan for each email in inbox

     i = 0
     while i < len(idList):
          emailRow = driver.find_element_by_xpath('//*[@id="' + idList[i] + '"]')
          emailRow.click()
          time.sleep(2)
          
          #identify email body for bs4
          html = driver.page_source
          page_body = driver.find_element_by_tag_name('body')
          soup = bs(html, 'lxml')
          emailContent = soup.findAll('table', {'role': 'presentation'})

          #find sender
          for row in emailContent:
               for line in row.findAll():
                    if line.has_attr('email'):
                         if not(line.get('email') == "*****@gmail.com"):
                              sender = line

          #find subject
          for row in emailContent:
               for line in row.findAll('h2'):
                    subject = line

          #check if subject.text contains "Receipt" and if sender.get('email') is from "*****@gmail.com"
          validEmails = ["*****@gmail.com", "*****@mms.att.net", "*****@dawson8a.com"]
          if "Receipt:" in subject.text and any(x in sender.get('email') for x in validEmails):

               #get file name from subject
               emailSubject = subject.text
               fileName = emailSubject.split(":")[1]
               fileName = fileName.strip()

               #LOCATE  and download and name attachment to path
               time.sleep(1)
               page_body = driver.find_element_by_tag_name('body')
               page_body.send_keys(Keys.TAB)
               page_body.send_keys(Keys.TAB)
               page_body.send_keys(Keys.TAB)
               page_body.send_keys(Keys.TAB)
               page_body.send_keys(Keys.TAB)
               page_body.send_keys(Keys.TAB)
               page_body.send_keys(Keys.TAB)
               time.sleep(2)
               actionChains.send_keys(Keys.SHIFT + Keys.F10).perform()
               pyautogui.typewrite(['down','down','down','down', 'enter'])
               time.sleep(5)
               pyautogui.typewrite(r"C:\Users\m4k04\Desktop\gmailBot\{}.jpg".format(fileName))
               pyautogui.typewrite(['enter'])
               time.sleep(2)
                         
               #label pertinent
               page_body = driver.find_element_by_tag_name('body')
               time.sleep(1)
               page_body.send_keys('v')
               time.sleep(2)
               driver.find_element_by_xpath("//div[@title='Pertinent']").click()

          elif "Generate Report" in subject.text and any(x in sender.get('email') for x in validEmails):
               #generate report
               with open("report.pdf","wb") as f:
                   f.write(img2pdf.convert([i for i in os.listdir(r'C:/Users/m4k04/Desktop/gmailBot') if i.endswith(".jpg")]))

               #create reply
               page_body.send_keys('r')
               time.sleep(3)

               attachButton = driver.find_element_by_xpath('//*[@data-tooltip = "Attach files"]')
               attachButton.click()

               time.sleep(3)

               pyautogui.typewrite(r"C:\Users\m4k04\Desktop\gmailBot\report.pdf")
               pyautogui.typewrite(['enter'])

               time.sleep(3)

               sendButton = driver.find_element_by_xpath('//*[@data-tooltip="Send ‪(Ctrl-Enter)‬"]')
               sendButton.click()

               time.sleep(3)
               
               #label pertinent
               page_body = driver.find_element_by_tag_name('body')
               time.sleep(1)
               page_body.send_keys('v')
               time.sleep(2)
               driver.find_element_by_xpath("//div[@title='Pertinent']").click()

                              
          else:
               #label nonpertinent
               page_body = driver.find_element_by_tag_name('body')
               time.sleep(1)
               page_body.send_keys('v')
               time.sleep(2)
               driver.find_element_by_xpath("//div[@title='NonPertinent']").click()

          time.sleep(2)
          i +=1
     timeNow = datetime.datetime.now().strftime("%H:%M:%S")
     print("Gmail Scanned at: " + str(timeNow))
コード例 #47
0
def write_pdf(image_list,pdf_name='name_removed.pdf'):
    with open('aaa.pdf', 'wb') as f:
        f.write(img2pdf.convert(image_list))
コード例 #48
0
ファイル: test.py プロジェクト: nyamel/zip2pdf
image_list = [str(p) for p in path_list if p.suffix in Extends]

Sucsess_list = []
Error_list = []

Filename = Path(ZIP_FILE.stem)
Filename.mkdir()

i = 0
for image in image_list:
    try:
        with zipfilepointer.open(image) as img:
            with open(
                    str(Filename / Path(Path(image).name).with_suffix('.pdf')),
                    'wb') as op:
                op.write(img2pdf.convert(img))
                op.close()
    except:
        print('Error:', image)
        zipfilepointer.extract(image, Filename / 'Faild')
        Error_list.append(image)
    else:
        print('Sucsess:', image)
        Sucsess_list.append(image)
        i += 1

print('\nConvert Sucsess:')
pprint.pprint(Sucsess_list)
print('\nConvert Faild:')
pprint.pprint(Error_list)
コード例 #49
0
import img2pdf
import os
import numpy as np

path = os.getcwd() + "/images"
os.chdir(path)
myimages = []
dirFiles = os.listdir(os.getcwd())
fnames = sorted(
    [fname for fname in os.listdir(os.getcwd()) if fname.endswith('.jpg')],
    key=lambda f: int(f.rsplit(os.path.extsep, 1)[0].rsplit(None, 1)[-1]))

with open("output.pdf", "wb") as f:
    f.write(img2pdf.convert([i for i in fnames if i.endswith(".jpg")]))

imgDir = os.listdir(path)
for image in imgDir:
    if image.endswith(".jpg"):
        os.remove(os.path.join(path, image))
コード例 #50
0
ファイル: __init__.py プロジェクト: MrSwiss/img2pdf
 def handle(self, f=inputf, out=outputf):
     with open(f, "rb") as inf:
         orig_imgdata = inf.read()
     pdf = img2pdf.convert([f], nodate=True)
     imgdata = b""
     instream = False
     imgobj = False
     colorspace = None
     imgfilter = None
     width = None
     height = None
     length = None
     # ugly workaround to parse the created pdf
     for line in pdf.split(b'\n'):
         if instream:
             if line == b"endstream":
                 break
             else:
                 imgdata += line + b'\n'
         else:
             if imgobj and line == b"stream":
                 instream = True
             elif b"/Subtype /Image" in line:
                 imgobj = True
             elif b"/Width" in line:
                 width = int(line.split()[-1])
             elif b"/Height" in line:
                 height = int(line.split()[-1])
             elif b"/Length" in line:
                 length = int(line.split()[-1])
             elif b"/Filter" in line:
                 imgfilter = line.split()[-2]
             elif b"/ColorSpace" in line:
                 colorspace = line.split()[-1]
     # remove trailing \n
     imgdata = imgdata[:-1]
     # test if the length field is correct
     self.assertEqual(len(imgdata), length)
     # test if the filter is valid:
     self.assertIn(imgfilter, [b"/DCTDecode", b"/JPXDecode", b"/FlateDecode"])
     # test if the colorspace is valid
     self.assertIn(colorspace, [b"/DeviceGray", b"/DeviceRGB", b"/DeviceCMYK"])
     # test if the image has correct size
     orig_img = Image.open(f)
     self.assertEqual(width, orig_img.size[0])
     self.assertEqual(height, orig_img.size[1])
     # if the input file is a jpeg then it should've been copied
     # verbatim into the PDF
     if imgfilter in [b"/DCTDecode", b"/JPXDecode"]:
         self.assertEqual(imgdata, orig_imgdata)
     elif imgfilter == b"/FlateDecode":
         # otherwise, the data is flate encoded and has to be equal to
         # the pixel data of the input image
         imgdata = zlib.decompress(imgdata)
         if colorspace == b"/DeviceGray":
             colorspace = 'L'
         elif colorspace == b"/DeviceRGB":
             colorspace = 'RGB'
         elif colorspace == b"/DeviceCMYK":
             colorspace = 'CMYK'
         else:
             raise Exception("invalid colorspace")
         im = Image.frombytes(colorspace, (width, height), imgdata)
         if orig_img.mode == '1':
             orig_img = orig_img.convert("L")
         elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"):
             orig_img = orig_img.convert("RGB")
         self.assertEqual(im.tobytes(), orig_img.tobytes())
         # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the close() method
         try:
             im.close()
         except AttributeError:
             pass
     # lastly, make sure that the generated pdf matches bit by bit the
     # expected pdf
     with open(out, "rb") as outf:
         out = outf.read()
     self.assertEqual(pdf, out)
     # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the close() method
     try:
         orig_img.close()
     except AttributeError:
         pass
コード例 #51
0
import img2pdf

# opening from filename
with open("name.pdf","wb") as f:
	f.write(img2pdf.convert('name.jpg'))
コード例 #52
0
    current_dir = os.path.join(ROOT_DIR, current_dir)
    yearbook_pdf = os.path.join(current_dir, 'yearbook.pdf')
    collage_dir = os.path.join(current_dir, 'collages')
    collage_pdfs = os.listdir(collage_dir)
    # frontpage_pdf = os.path.join(ROOT_DIR, 'frontpage.pdf')

    a4inpt = (img2pdf.mm_to_pt(210), img2pdf.mm_to_pt(297)
              )  # specify page size (A4)
    layout_fun = img2pdf.get_layout_fun(a4inpt)

    # convert close friends pic to pdf
    close_friends_pic_pdf = os.path.join(current_dir, 'closeFriendsPic.pdf')
    close_friends_pic = os.path.join(current_dir, 'closeFriendsPic.jpg')
    if os.path.exists(close_friends_pic):
        with open(close_friends_pic_pdf, "wb") as f:
            f.write(img2pdf.convert(close_friends_pic, layout_fun=layout_fun))

    merger = PdfFileMerger()
    merger.append(FRONT_PAGE)
    merger.append(yearbook_pdf)

    if os.path.exists(close_friends_pic_pdf):
        merger.merge(1, close_friends_pic_pdf)

    for collage in collage_pdfs:
        if collage.endswith('.pdf'):
            pdf = os.path.join(collage_dir, collage)
            merger.append(pdf)

    merger.write(
        os.path.join(current_dir, 'final_yearbook_' + student_name + '.pdf'))
コード例 #53
0
        break
    else:
        print(folder,
              " klasörü önceden oluşturulmu.Burdan devam etmek ister misin?")
        print("\n")
        ans = input("Cevap: ")
        if ans == 'e' or ans == 'E':
            os.chdir(folder)
            break
        else:
            continue
nu = input("Değiştirmek istediğin karakter hangisi?: ")
print("\n\n")
try:
    for i in range(1, int(page_no) + 1):
        path_array[i] = u_path.replace(nu, "-" + str(i))
        new_url = url.scheme + "://" + url.netloc + path_array[i]
        urllib.request.urlretrieve(new_url, "deneme" + str(i) + ".jpg")
        print("*" * i + "  " + str(i) + ".sayfa indirildi")
except ModuleNotFoundError as e:
    print(e)

print("\nPDF oluşturuluyor...")
print("\n\n")
pdf_name = input("PDF ismi: ")
print("\n\n")
with open(pdf_name + ".pdf", "wb") as f:
    f.write(img2pdf.convert([i for i in os.listdir('.')
                             if i.endswith(".jpg")]))

print("\nPDF oluşturuldu.")
コード例 #54
0
ファイル: toPDF.py プロジェクト: CoderCai/toPDF
 def save_pdf(self):
     with open(self.filename, 'r') as img_fp:
         sd = img2pdf.convert([img_fp], 150, x=620, y="")
         hand = open(self.f_pdf, 'wb')
         hand.write(sd)
         hand.close()
コード例 #55
0
def pngToPDF(fromFile, toFile):
    with open(toFile, "wb") as f:
        f.write(img2pdf.convert(fromFile))
コード例 #56
0
    imgs = []
    for i, j in enumerate(imgUrls):
        print("downloading %s" % (j))
        if not os.path.exists(topic + '/' + title):
            if not os.path.exists(topic):
                os.mkdir(topic)
            os.mkdir(topic + '/' + title)
            print("Directory ", topic + '/' + title, " Created ")
        else:
            print("Directory ", topic + '/' + title, " already exists")
        fullfilename = os.path.join(f'{topic}/{title}', f'{i}.jpeg')
        if not os.path.exists(fullfilename):
            urlretrieve(j, f"{fullfilename}")
            addWatermark(fullfilename)

        imgs.append(fullfilename)

        # if not os.path.exists(f"{topic}/{title} {item.id}.pdf"):
        if os.path.getsize(fullfilename) == 72032:
            imgs.remove(fullfilename)
            print('Ignored: ', fullfilename)

    with open(f"{topic}/{title} {item.id}.pdf", "wb") as f:
        f.write(img2pdf.convert(imgs))

    filePath = f"{topic}/{title} {item.id}.pdf"

    newBlob = bucket.blob(f'chapter-pdf/{filePath}')
    newBlob.upload_from_filename(filePath, content_type='application/pdf')
    newBlob.make_public()
コード例 #57
0
def gen_report(username, password, elabx, level):

    java1 = {
        'url': 'http://care.srmuniv.ac.in/ktrcsejava1/',
        'code': 'java/java.code.php',
        'key': 'java'
    }
    java2 = {
        'url': 'http://care.srmuniv.ac.in/ktrcsejava2/',
        'code': 'java/java.code.php',
        'key': 'java'
    }
    ada = {
        'url': 'http://care.srmuniv.ac.in/ktrcseada/',
        'code': 'daa/daa.code.php',
        'key': 'daa'
    }
    it_ada = {
        'url': 'http://care1.srmuniv.ac.in/ktritada/',
        'code': 'daa/daa.code.php',
        'key': 'daa'
    }
    it_java = {
        'url': 'http://care1.srmuniv.ac.in/ktritjava/',
        'code': 'java/java.code.php',
        'key': 'java'
    }

    if (elabx == 'java1'):
        elab = java1
    elif (elabx == 'java2'):
        elab = java2
    elif (elabx == 'ada'):
        elab = ada
    elif (elabx == 'it_java'):
        elab = it_java
    elif (elabx == 'it_ada'):
        elab = it_ada
    else:
        return

    login_page = elab['url'] + 'login_check.php'
    home_page = elab['url'] + 'login/student/home.php'
    question_page = elab['url'] + 'login/student/code/' + elab[
        'code'] + '?id=' + level + '&value='

    payload = {'uname': username, 'pass': password}

    print('eLab Report Generator : ' + payload['uname'])

    with requests.Session() as s:

        # login page

        s.post(login_page, data=payload)

        # home page

        s.get(home_page)

        # question page requests & responses

        s.get(elab['url'] + 'login/student/question.php')
        s.post(elab['url'] + 'login/student/home.helper.php',
               data={'text': elab['key'].upper()})
        s.get(elab['url'] + 'login/student/question.php')
        s.get(elab['url'] + 'login/student/question.list.js')
        s.post(elab['url'] + 'login/student/course.get.php',
               data={'q': 'SESSION'})
        s.post(elab['url'] + 'login/student/course.get.php',
               data={'q': 'VALUES'})

        # individual question -> code page

        s.get(elab['url'] + 'login/student/code/' + elab['code'] + '?id=' +
              level + '&value=0')
        s.get(elab['url'] + 'Code-mirror/lib/codemirror.js')
        s.get(elab['url'] + 'Code-mirror/mode/clike/clike.js')
        s.get(elab['url'] + 'login/student/code/' + elab['key'] +
              '/code.elab.js')
        s.post(elab['url'] + 'login/student/code/code.get.php')
        s.post(elab['url'] + 'login/student/code/flag.checker.php')

        # get the code, evaluate it and download the report (if 100%)

        for i in range(0, 100):

            present_question = question_page + str(i)
            s.get(present_question)

            if (s.get(present_question).text.find('NOT ALLOCATED') == -1):
                code = s.get(elab['url'] + 'login/student/code/code.get.php')

                if (code.text != ''):

                    if (elab['key'] == 'daa'):

                        evaluate_payload_c = s.post(
                            elab['url'] + 'login/student/code/' + elab['key'] +
                            '/code.evaluate.elab.php',
                            data={
                                'code': code.text,
                                'input': '',
                                'language': 'c'
                            })
                        evaluate_payload_cpp = s.post(
                            elab['url'] + 'login/student/code/' + elab['key'] +
                            '/code.evaluate.elab.php',
                            data={
                                'code': code.text,
                                'input': '',
                                'language': 'cpp'
                            })
                        evaluate_payload_java = s.post(
                            elab['url'] + 'login/student/code/' + elab['key'] +
                            '/code.evaluate.elab.php',
                            data={
                                'code': code.text,
                                'input': '',
                                'language': 'java'
                            })
                        evaluate_payload_python = s.post(
                            elab['url'] + 'login/student/code/' + elab['key'] +
                            '/code.evaluate.elab.php',
                            data={
                                'code': code.text,
                                'input': '',
                                'language': 'python'
                            })

                        if '100' in [
                                evaluate_payload_c.text[-4:-1],
                                evaluate_payload_cpp.text[-4:-1],
                                evaluate_payload_java.text[-4:-1],
                                evaluate_payload_python.text[-4:-1]
                        ]:
                            complete_percent = '100'
                        else:
                            complete_percent = '0'

                    else:
                        evaluate_payload = s.post(
                            elab['url'] + 'login/student/code/' + elab['key'] +
                            '/code.evaluate.elab.php',
                            data={
                                'code': code.text,
                                'input': ''
                            })
                        complete_percent = evaluate_payload.text[-4:-1]

                    if (complete_percent == '100'):

                        print(str(i + 1) + ' : getting report')
                        file = s.get(elab['url'] +
                                     'login/student/code/getReport.php')

                        with open(
                                payload['uname'] + '-' + str(i).zfill(3) +
                                '.png', 'wb') as f:
                            f.write(file.content)

                    else:

                        print(
                            str(i + 1) +
                            ' : evaluation error : Couldn\'t get report')

                else:
                    print(str(i + 1) + ' : No code written')

            else:
                print(str(i + 1) + ' : Question not allocated')

        # put all the images to PDF

        filename = payload['uname'] + '-' + elabx.upper(
        ) + '-Level-' + level + '.pdf'
        with open(filename, "ab") as f:
            f.write(
                img2pdf.convert([
                    i for i in sorted(os.listdir('.')) if i.endswith('.png')
                ]))

        print('PDF file named ' + filename + ' generated')

        # remove the image files

        for i in range(0, 100):
            if (os.path.isfile(payload['uname'] + '-' + str(i) + '.png')):
                os.remove(payload['uname'] + '-' + str(i) + '.png')

        print('Image files cleared')
コード例 #58
0
ファイル: __init__.py プロジェクト: xErik/img2pdf
        def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw):
            with open(f, "rb") as inf:
                orig_imgdata = inf.read()
            output = img2pdf.convert(orig_imgdata, nodate=True,
                                     with_pdfrw=with_pdfrw)
            from io import StringIO, BytesIO
            from pdfrw import PdfReader, PdfName, PdfWriter
            from pdfrw.py23_diffs import convert_load, convert_store
            x = PdfReader(StringIO(convert_load(output)))
            self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root,
                             PdfName.Size])
            self.assertEqual(x.Size, '7')
            self.assertEqual(x.Info, {})
            self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages,
                                                     PdfName.Type])
            self.assertEqual(x.Root.Type, PdfName.Catalog)
            self.assertEqual(sorted(x.Root.Pages.keys()),
                             [PdfName.Count, PdfName.Kids, PdfName.Type])
            self.assertEqual(x.Root.Pages.Count, '1')
            self.assertEqual(x.Root.Pages.Type, PdfName.Pages)
            self.assertEqual(len(x.Root.Pages.Kids), 1)
            self.assertEqual(sorted(x.Root.Pages.Kids[0].keys()),
                             [PdfName.Contents, PdfName.MediaBox,
                              PdfName.Parent, PdfName.Resources, PdfName.Type])
            self.assertEqual(x.Root.Pages.Kids[0].MediaBox,
                             ['0', '0', '115', '48'])
            self.assertEqual(x.Root.Pages.Kids[0].Parent, x.Root.Pages)
            self.assertEqual(x.Root.Pages.Kids[0].Type, PdfName.Page)
            self.assertEqual(x.Root.Pages.Kids[0].Resources.keys(),
                             [PdfName.XObject])
            self.assertEqual(x.Root.Pages.Kids[0].Resources.XObject.keys(),
                             [PdfName.Im0])
            self.assertEqual(x.Root.Pages.Kids[0].Contents.keys(),
                             [PdfName.Length])
            self.assertEqual(x.Root.Pages.Kids[0].Contents.Length,
                             str(len(x.Root.Pages.Kids[0].Contents.stream)))
            self.assertEqual(x.Root.Pages.Kids[0].Contents.stream,
                             "q\n115.0000 0 0 48.0000 0.0000 0.0000 cm\n/Im0 "
                             "Do\nQ")

            imgprops = x.Root.Pages.Kids[0].Resources.XObject.Im0

            # test if the filter is valid:
            self.assertIn(
                imgprops.Filter, [[PdfName.DCTDecode], [PdfName.JPXDecode],
                                  [PdfName.FlateDecode]])
            # test if the colorspace is valid
            self.assertIn(
                imgprops.ColorSpace, [PdfName.DeviceGray, PdfName.DeviceRGB,
                                      PdfName.DeviceCMYK])
            # test if the image has correct size
            orig_img = Image.open(f)
            self.assertEqual(imgprops.Width, str(orig_img.size[0]))
            self.assertEqual(imgprops.Height, str(orig_img.size[1]))
            # if the input file is a jpeg then it should've been copied
            # verbatim into the PDF
            if imgprops.Filter in [[PdfName.DCTDecode], [PdfName.JPXDecode]]:
                self.assertEqual(
                    x.Root.Pages.Kids[0].Resources.XObject.Im0.stream,
                    convert_load(orig_imgdata))
            elif imgprops.Filter == [PdfName.FlateDecode]:
                # otherwise, the data is flate encoded and has to be equal to
                # the pixel data of the input image
                imgdata = zlib.decompress(
                    convert_store(
                        x.Root.Pages.Kids[0].Resources.XObject.Im0.stream))
                colorspace = imgprops.ColorSpace
                if colorspace == PdfName.DeviceGray:
                    colorspace = 'L'
                elif colorspace == PdfName.DeviceRGB:
                    colorspace = 'RGB'
                elif colorspace == PdfName.DeviceCMYK:
                    colorspace = 'CMYK'
                else:
                    raise Exception("invalid colorspace")
                im = Image.frombytes(colorspace, (int(imgprops.Width),
                                                  int(imgprops.Height)),
                                     imgdata)
                if orig_img.mode == '1':
                    orig_img = orig_img.convert("L")
                elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"):
                    orig_img = orig_img.convert("RGB")
                self.assertEqual(im.tobytes(), orig_img.tobytes())
                # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have
                # the close() method
                try:
                    im.close()
                except AttributeError:
                    pass
            # now use pdfrw to parse and then write out both pdfs and check the
            # result for equality
            y = PdfReader(out)
            outx = BytesIO()
            outy = BytesIO()
            xwriter = PdfWriter()
            ywriter = PdfWriter()
            xwriter.trailer = x
            ywriter.trailer = y
            xwriter.write(outx)
            ywriter.write(outy)
            self.assertEqual(outx.getvalue(), outy.getvalue())
            # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the
            # close() method
            try:
                orig_img.close()
            except AttributeError:
                pass
コード例 #59
0
    draw.text((71, 1310),pos,(0,0,0),font=font_pos)
    draw.text((71, 1500),quote,(0,0,0),font=font_quote)

    #A transparent circular image of required size is created for ID-card picture
    size = (770, 770)
    mask = Image.new('L', size, 0)
    dp = ImageDraw.Draw(mask) 
    dp.ellipse((0, 0) + size, fill=255)
    im = Image.open('dataset/images/'+name+'.jpg')
    output = ImageOps.fit(im, mask.size, centering=(0.5, 0.5))
    output.putalpha(mask)

    #Profile image is superimposed on the ID-card image with itself as a filter
    img.paste(output,(180,230),output)

    #Image is resized (with respect to 92dpi) and saved to output/png/
    img = img.resize((322,482), Image.ANTIALIAS)
    img.save('output/png/'+name+'.png')

    #To convert to pdf the image is temporarily stored as jpeg
    rgb_img=img.convert('RGB')
    rgb_img.save("temp.jpg")
    temp=Image.open("temp.jpg")

    #The jpeg file is converted to pdf.
    pdf_bytes = img2pdf.convert("temp.jpg")  
    file = open("output/pdf/"+name+".pdf", "wb") 
    file.write(pdf_bytes) 

os.remove("temp.jpg")
csvFile.close()