コード例 #1
4
ファイル: main.py プロジェクト: tohyongcheng/mangapark-dl
def convert_to_pdf(os_dir, chapter, file_names):
    """
    Converts a collection of images to PDF format
    :param os_dir: Directory to save PDF in.
    :param chapter: Title of the PDF.
    :param file_names: Images to construct the PDF from.
    :return:
    """

    print("Converting chapter %s to pdf..." % chapter)

    pdf_bytes = None

    try:
        pdf_bytes = img2pdf.convert(*[download_image(path) for path in file_names])
    except img2pdf.PdfTooLargeError:
        # Sometimes the images are registered as having a dpi of 1.
        # Because PDF has a limitation of 200 inches max per side, a
        # special layout_fun has to be used, as to prevent an exception.
        # default manga size 5"x7"

        layout_fun = img2pdf.get_layout_fun(pagesize=(None, img2pdf.in_to_pt(7)),
                                            imgsize=None, border=None,
                                            fit=img2pdf.FitMode.into,
                                            auto_orient=False)
        pdf_bytes = img2pdf.convert(*[download_image(path) for path in file_names],
                                    layout_fun=layout_fun)

    file = open("%s/%s.pdf" % (os_dir, chapter), "wb")
    file.write(pdf_bytes)
    print("Conversion completed!")
コード例 #2
2
ファイル: main.py プロジェクト: balu-/OCRmyPDF
def select_image_layer(
        infiles,
        output_file,
        log,
        pdfinfo,
        pdfinfo_lock):

    page_pdf = next(ii for ii in infiles if ii.endswith('.ocr.oriented.pdf'))
    image = next(ii for ii in infiles if ii.endswith('.image'))

    if lossless_reconstruction:
        log.debug("{:4d}: page eligible for lossless reconstruction".format(
            page_number(page_pdf)))
        re_symlink(page_pdf, output_file)
    else:
        pageinfo = get_pageinfo(image, pdfinfo, pdfinfo_lock)
        dpi = round(max(pageinfo['xres'], pageinfo['yres'],
                        options.oversample))
        imgsize = ((img2pdf.ImgSize.dpi, dpi), (img2pdf.ImgSize.dpi, dpi))

        layout_fun = img2pdf.get_layout_fun(None, imgsize, None, None, None)

        with open(image, 'rb') as imfile, \
                open(output_file, 'wb') as pdf:
            rawdata = imfile.read()
            pdf.write(img2pdf.convert(
                rawdata, producer="img2pdf", with_pdfrw=False,
                layout_fun=layout_fun))
コード例 #3
2
ファイル: __init__.py プロジェクト: xErik/img2pdf
 def layout_handler(
         self, psopt, isopt, border, fit, ao, pspdf, ispdf, im):
     layout_fun = img2pdf.get_layout_fun(psopt, isopt, border, fit, ao)
     try:
         pwpdf, phpdf, iwpdf, ihpdf = \
             layout_fun(im[0], im[1], (img2pdf.default_dpi,
                                       img2pdf.default_dpi))
         self.assertEqual((pwpdf, phpdf), pspdf)
         self.assertEqual((iwpdf, ihpdf), ispdf)
     except img2pdf.NegativeDimensionError:
         self.assertEqual(None, pspdf)
         self.assertEqual(None, ispdf)
コード例 #4
0
ファイル: test_pageinfo.py プロジェクト: stweil/OCRmyPDF
def test_single_page_image(outdir):
    filename = outdir / 'image-mono.pdf'

    im_tmp = outdir / 'tmp.png'
    im = Image.new('1', (8, 8), 0)
    for n in range(8):
        im.putpixel((n, n), 1)
    im.save(str(im_tmp), format='PNG')

    imgsize = ((img2pdf.ImgSize.dpi, 8), (img2pdf.ImgSize.dpi, 8))
    layout_fun = img2pdf.get_layout_fun(None, imgsize, None, None, None)

    im_bytes = im_tmp.read_bytes()
    pdf_bytes = img2pdf.convert(
            im_bytes, producer="img2pdf", with_pdfrw=False,
            layout_fun=layout_fun)
    filename.write_bytes(pdf_bytes)

    info = pdfinfo.PdfInfo(filename)

    assert len(info) == 1
    page = info[0]

    assert not page.has_text
    assert len(page.images) == 1

    pdfimage = page.images[0]
    assert pdfimage.width == 8
    assert pdfimage.color == Colorspace.gray

    # DPI in a 1"x1" is the image width
    assert isclose(pdfimage.xres, 8)
    assert isclose(pdfimage.yres, 8)
コード例 #5
0
ファイル: test_pageinfo.py プロジェクト: balu-/OCRmyPDF
def test_single_page_image():
    filename = os.path.join(TEST_OUTPUT, 'image-mono.pdf')

    with NamedTemporaryFile(mode='wb+', suffix='.png') as im_tmp:
        im = Image.new('1', (8, 8), 0)
        for n in range(8):
            im.putpixel((n, n), 1)
        im.save(im_tmp.name, format='PNG')

        imgsize = ((img2pdf.ImgSize.dpi, 8), (img2pdf.ImgSize.dpi, 8))
        layout_fun = img2pdf.get_layout_fun(None, imgsize, None, None, None)

        im_tmp.seek(0)
        im_bytes = im_tmp.read()
        pdf_bytes = img2pdf.convert(
                im_bytes, producer="img2pdf", with_pdfrw=False,
                layout_fun=layout_fun)

        with open(filename, 'wb') as pdf:
            pdf.write(pdf_bytes)

    pdfinfo = pageinfo.pdf_get_all_pageinfo(filename)

    assert len(pdfinfo) == 1
    page = pdfinfo[0]

    assert not page['has_text']
    assert len(page['images']) == 1

    pdfimage = page['images'][0]
    assert pdfimage['width'] == 8
    assert pdfimage['color'] == 'gray'

    # While unexpected, this is correct
    # PDF spec says /FlateDecode image must have /BitsPerComponent 8
    # So mono images get upgraded to 8-bit
    assert pdfimage['bpc'] == 8

    # DPI in a 1"x1" is the image width
    assert pdfimage['dpi_w'] == 8
    assert pdfimage['dpi_h'] == 8
コード例 #6
0
def go_img2pdf(file, to_folder, new_name):
    a4_page_size = [img2pdf.in_to_pt(8.3), img2pdf.in_to_pt(11.7)]
    layout_function = img2pdf.get_layout_fun(a4_page_size)
    pdf = img2pdf.convert(file, layout_fun=layout_function)
    with open(to_folder + "/" + new_name, 'wb') as f:
        f.write(pdf)
コード例 #7
0
ファイル: images2pdf.py プロジェクト: LarsAC/img2pdf_docker
import img2pdf
import sys, os, os.path

input_dir = sys.argv[1]
output_file = sys.argv[2]

directory = os.fsencode(input_dir).decode('utf-8')
print(directory)

a4inpt = (img2pdf.mm_to_pt(210), img2pdf.mm_to_pt(297))
layout_fun = img2pdf.get_layout_fun(a4inpt)

with open(output_file, "wb") as f:
    for file in os.listdir(directory):
        filename = os.fsdecode(file)
        if filename.endswith(".jpg"):
            print("Adding " + filename)
            fullname = os.path.join(directory, filename)
            f.write(img2pdf.convert(fullname, layout_fun=layout_fun))
コード例 #8
0
    async def download(self, url, filetype='pdf'):
        '''
        url      : Manhwa chapter url from the topmanhwa.net web\n
        filetype : Type of file, available type is `pdf` and `zip`
        '''
        if filetype not in ['pdf', 'zip']:
            raise AttributeError(
                "Invalid filetype, available filetype is pdf and zip")
        # Generate the filename
        filename = url.replace(f"{self.manga}/",
                               "").split("/")[0].replace("-", " ").title()

        # Create downloads directory if it's not exists in the local
        if not os.path.exists(f"{os.getcwd()}/downloads"):
            os.mkdir(f"{os.getcwd()}/downloads")
            print("Downloads dir created.")

        if filetype == 'zip':
            # Download chapter as a ZIP file
            path = f"{os.getcwd()}/downloads/{filename}.zip"

            # Proceed download if the same file not in the local
            if not os.path.exists(path):
                imbytes = []  # Image bytes
                downloaded = 1
                manhwa = await self._images(url)  # Get the chapter images
                print(f"Downloading, {(downloaded/len(manhwa))*100:.1f}%")

                # Download the image
                async with aiohttp.ClientSession() as ses:
                    for img, i in zip(manhwa, range(1, len(manhwa))):
                        async with ses.get(img) as r:
                            if r.status == 200:
                                imbytes.append(
                                    await
                                    r.content.read())  # Saving image bytes
                                downloaded += 1
                                print(
                                    f"Downloading, {(downloaded/len(manhwa))*100:.1f}%"
                                )

                # Archiving the image bytes
                z = zipfile.ZipFile(path, 'w')
                for byte, d, i in zip(imbytes, manhwa,
                                      range(1,
                                            len(manhwa) + 1)):
                    z.writestr(f"{filename}_{i}.{d[-3:]}",
                               byte,
                               compress_type=zipfile.ZIP_DEFLATED)
                z.close()

                print(
                    f"Download complete, {(downloaded/len(manhwa))*100:.1f}%")
            else:
                print("The file is already in the local, and using cached.")

        else:
            # Download chapter as a PDF file
            path = f"{os.getcwd()}/downloads/{filename}.pdf"

            # Proceed download if the same file not in the local
            if not os.path.exists(path):
                imbytes = []  # Image bytes
                downloaded = 1
                manhwa = await self.images(url)  # Get the chapter images
                print(f"Downloading, {(downloaded/len(manhwa))*100:.1f}%")

                # Download the image
                async with aiohttp.ClientSession() as ses:
                    for img, i in zip(manhwa, range(1, len(manhwa))):
                        async with ses.get(img) as r:
                            if r.status == 200:
                                imbytes.append(
                                    await
                                    r.content.read())  # Saving image bytes
                                downloaded += 1
                                print(
                                    f"Downloading, {(downloaded/len(manhwa))*100:.1f}%"
                                )

                # Custom size of the layout
                a4inpt = (img2pdf.mm_to_pt(200), img2pdf.mm_to_pt(300))
                layout_fun = img2pdf.get_layout_fun(a4inpt)

                # Merging the image bytes to PDF file
                with open(f"{os.getcwd()}/downloads/{filename}.pdf",
                          "wb") as f:
                    f.write(img2pdf.convert(imbytes, layout_fun=layout_fun))

            else:
                print("The file is already in the local, and using cached.")
        return path
コード例 #9
0
#for printing in console
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setFormatter(FORMATTER)
logger_obj.addHandler(console_handler)
#for creating a log file
file_handler = TimedRotatingFileHandler("myloggerfile.log",when="midnight",encoding="utf-8")
file_handler.setFormatter(FORMATTER)
logger_obj.addHandler(file_handler)


import os
import img2pdf
#A4 Size paper dimensions 210mm x 297mm
#img2pdf.mm_to_pt() converts millimeter to point
a4inpt = (img2pdf.mm_to_pt(210),img2pdf.mm_to_pt(297))
layout_fun = img2pdf.get_layout_fun(pagesize=a4inpt)
#Input directory name
DIR_PATH="../Documents"
file_name=DIR_PATH.split("/")[-1]+".pdf"
with open(file_name,"wb") as f:
    f.write(img2pdf.convert([i for i in os.listdir(DIR_PATH) if i.endswith(".jpg")], layout_fun=layout_fun))


#api response
!pip install requests
import requests
params={'a':'b'}
data={'x':'y'}
url="http:/127.0.0.1/"
#for get request
response=requests.get(url,params)
コード例 #10
0
ファイル: pdfcal.py プロジェクト: vmiklos/vmexam
a4Height = 841.88976378

outputPdf = PyPDF2.PdfFileWriter()

page = None
for month in range(1, 13):
    monthString = "%02d" % month

    # Handle the image part.
    imageJpg = open("images/" + monthString + ".jpg", "rb")
    # Landscape A4 for the image.
    pageSize = (a4Height, a4Width)
    # TOP_OF_CAL_BOXES_PTS in pcal's pcaldefs.h.
    margin = 85
    imageSize = ((img2pdf.ImgSize.abs, a4Height - margin), (img2pdf.ImgSize.abs, a4Width - margin))
    layoutFun = img2pdf.get_layout_fun(pageSize, imageSize, border=None, fit=None, auto_orient=False)
    imageBytes = img2pdf.convert(imageJpg, layout_fun=layoutFun)
    imageBuf = io.BytesIO()
    imageBuf.write(imageBytes)
    imageBuf.seek(0)

    # Handle the calendar part.
    imagePdf = PyPDF2.PdfFileReader(imageBuf)
    imagePage = imagePdf.getPage(0)
    nextYear = str(time.localtime().tm_year + 1)
    lang = locale.getlocale()[0].split("_")[0]
    calPdf = PyPDF2.PdfFileReader(ps2Pdf(pcal(["-f", "calendar_" + lang + ".txt", monthString, nextYear])))
    calPage = calPdf.getPage(0)

    # Portrait A4 page: upper half contains first calendar and the first image,
    # lower half contains the second calendar and the second image.
コード例 #11
0
# importing necessary libraries
import img2pdf
import os
import argparse

#set layout
letter = (img2pdf.in_to_pt(11), img2pdf.in_to_pt(8.5))
layout = img2pdf.get_layout_fun(letter)

# with open('tile/test.pdf', "wb") as out_file:
#     images = []
#     for i in range(0, 31 + 1):
#         fname = str(i) + '_1.png'
#         images.append(fname)
#         print(fname)
#     out_file.write(img2pdf.convert(images))


def process_images(min_range, max_range, prefix, suffix, out_file, layout):
    images = []
    for i in range(min_range, max_range + 1):
        fname = prefix + str(i) + suffix
        images.append(fname)
    out_file.write(img2pdf.convert(images, layout_fun=layout))


with open('tile/img2pdf-2-2.pdf', "wb") as out_file:
    process_images(0, 31, 'tile/', '_2.png', out_file, layout)

# directory = 'tile/'
コード例 #12
0
ファイル: all.py プロジェクト: rench/pyutils
imagelist = [i for i in os.listdir(os.getcwd()) if i.endswith(".jpg")]

#sharink jpg
cmd = 'python ./noteshrink.py -w -q -b N ' + ' '.join(imagelist)
try:
    result = subprocess.call(shlex.split(cmd))
except OSError:
    result = -1

if result == 0:
    print('图片处理成功')
else:
    print('图片处理失败')

#compact pdf
path = './tmp'
os.chdir(path)
imagelist = [i for i in os.listdir(os.getcwd()) if i.endswith(".png")]
a4inpt = (img2pdf.mm_to_pt(210), img2pdf.mm_to_pt(297))
border = (50, 30)
layout_fun = img2pdf.get_layout_fun(pagesize=a4inpt, border=border)

with open("output.pdf", "wb") as f:
    f.write(img2pdf.convert(imagelist, layout_fun=layout_fun))
    print('生成pdf成功: output.pdf')

#pdf2word
print('开始转换pdf到docx')
parse("output.pdf", "output.docx")
print('docx转换成功: output.docx')