コード例 #1
0
ファイル: pdfMastery.py プロジェクト: NeXX451/PDFMastery
def printBT():
    lblprintBTN.config(text="Working!")
    files = filedialog.askopenfiles()
    p = str(files[0].name).replace("/", "\\").rsplit('\\', 1)[0]
    os.chdir(p)
    for x in files:
        path = str(x.name).replace("/", "\\")
        print(path)
        with Pdf.open(path) as pd:
            length = len(pd.pages)
            name = path.split("\\")[-1]

            pdsplit = []
            pdsplit2 = []
            for n, page in enumerate(pd.pages):
                if n % 2 == 0:
                    pdsplit.append(page)
                else:
                    pdsplit2.append(page)
            i = 0
            pdfsplit1 = Pdf.new()
            pdfsplit2 = Pdf.new()
            for n, x in enumerate(pdsplit):
                pdfsplit1.pages.append(x)

            for n, x in enumerate(pdsplit2):
                pdfsplit2.pages.append(x)

            pdfsplit1.save(name.replace(".pdf", "xSplit1.pdf"))
            pdfsplit2.save(name.replace(".pdf", "xSplit2.pdf"))
    lblprintBTN.config(text="Done!")
コード例 #2
0
def test_page_labels():
    p = Pdf.new()
    d = Dictionary(Type=Name.Page, MediaBox=[0, 0, 612, 792], Resources=Dictionary())
    for n in range(5):
        p.pages.append(d)
        p.pages[n].Contents = Stream(p, b"BT (Page %s) Tj ET" % str(n).encode())

    p.Root.PageLabels = p.make_indirect(
        Dictionary(
            Nums=Array(
                [
                    0,  # new label rules begin at index 0
                    Dictionary(S=Name.r),  # use lowercase roman numerals, until...
                    2,  # new label rules begin at index 2
                    Dictionary(
                        S=Name.D, St=42, P='Prefix-'
                    ),  # label pages as 'Prefix-42', 'Prefix-43', ...
                ]
            )
        )
    )

    labels = ['i', 'ii', 'Prefix-42', 'Prefix-43', 'Prefix-44']
    for n in range(5):
        rawpage = p.pages[n]
        page = Page(rawpage)
        assert page.label == labels[n]
コード例 #3
0
def break_to_small_pdf_paths_original(pdf_path,
                                      output_directory=None,
                                      start_page=1,
                                      end_page=None,
                                      small_pdf_pages=25):
    #   logging.info("Splitting %s into segments of %d", pdf_path, 25)
    pdf_name_stem = Path(pdf_path).stem
    if output_directory == None:
        output_directory = os.path.join(
            os.path.dirname(pdf_path),
            Path(pdf_path).stem + "_small_originals")
    # noinspection PyArgumentList
    with Pdf.open(pdf_path) as pdf:
        if end_page == None:
            end_page = len(pdf.pages)
        pages = range(start_page, end_page + 1)
        page_sets = list_helper.divide_chunks(list_in=pages, n=small_pdf_pages)
        dest_pdfs = []
        for page_set in page_sets:
            pages = [pdf.pages[i - 1] for i in page_set]
            dest_pdf_path = os.path.join(
                output_directory,
                "%s_%04d-%04d.pdf" %
                (pdf_name_stem, page_set[0], page_set[-1]),
            )
            if not os.path.exists(dest_pdf_path):
                # noinspection PyArgumentList
                dest_pdf = Pdf.new()
                dest_pdf.pages.extend(pages)
                os.makedirs(os.path.dirname(dest_pdf_path), exist_ok=True)
                dest_pdf.save(filename_or_stream=dest_pdf_path)
            else:
                logging.warning("%s exists", dest_pdf_path)
            dest_pdfs.append(dest_pdf_path)
    return dest_pdfs
コード例 #4
0
def split_into_small_pdfs(pdf_path,
                          output_directory=None,
                          start_page=1,
                          end_page=None,
                          small_pdf_pages=25):

    pdf_name_stem = Path(pdf_path).stem
    if output_directory == None:
        output_directory = _get_ocr_dir(pdf_path)
    # noinspection PyArgumentList
    with Pdf.open(pdf_path) as pdf:
        if end_page == None:
            end_page = len(pdf.pages)
        pages = range(start_page, end_page + 1)
        page_sets = list_helper.divide_chunks(list_in=pages, n=small_pdf_pages)
        for page_set in page_sets:
            pages = [pdf.pages[i - 1] for i in page_set]
            dest_pdf_path = os.path.join(
                output_directory, "%s_%04d-%04d.pdf" %
                (pdf_name_stem, page_set[0], page_set[-1]))
            if not os.path.exists(dest_pdf_path):
                # noinspection PyArgumentList
                dest_pdf = Pdf.new()
                dest_pdf.pages.extend(pages)
                os.makedirs(os.path.dirname(dest_pdf_path), exist_ok=True)
                dest_pdf.save(filename=dest_pdf_path)
            else:
                logging.warning("%s exists", dest_pdf_path)
コード例 #5
0
def unlockPdf(filepath):
		PSWD = os.path.basename(filepath)[0:-4]
		pdffile = Pdf.open(filepath,password=PSWD)
		newPdf = Pdf.new()
		newPdf.pages.extend(pdffile.pages)
		OUTPUT_DIR = os.path.dirname(filepath)
		newPdf.save(OUTPUT_DIR + '/decrypted.pdf')
コード例 #6
0
def split_pdf(stream, opcode, data):
    pdf = stream
    if opcode == 0:
        new_pdf = Pdf.new()
        new_pdf.pages.append(pdf.pages[data["number"]])
        new_pdf.save(str(data["number"]) + '.pdf')
    elif opcode == 1:
        new_pdf = Pdf.new()
        for n, page in enumerate(pdf.pages):
            if n >= data["start"]:
                new_pdf.pages.append(page)
            if n == data["end"]:
                break
        new_pdf.author()
        new_pdf.save('output.pdf')
    pdf.close()
コード例 #7
0
ファイル: test_pages.py プロジェクト: mara004/pikepdf
 def pdfs():
     with Pdf.open(resources /
                   "content-stream-errors.pdf") as pdf, Pdf.new() as output:
         part = 1
         for _idx, page in enumerate(pdf.pages):
             if len(output.pages) == 2:
                 part_file = tmp_path / f"part-{part}.pdf"
                 output.save(part_file)
                 yield part_file
                 output = Pdf.new()
                 part += 1
             output.pages.append(page)
         if len(output.pages) > 0:
             part_file = tmp_path / f"part-{part}.pdf"
             output.save(part_file)
             yield part_file
コード例 #8
0
 def concatenate(n):
     output_pdf = Pdf.new()
     for i in range(n):
         print(i)
         pdf_page = Pdf.open(resources / 'pal.pdf')
         output_pdf.pages.extend(pdf_page.pages)
     output_pdf.save(outdir / f'{n}.pdf')
コード例 #9
0
def to_image_unlock(filepath, pw):
    data = Pdf.open(filepath, password=pw)
    newPdf = Pdf.new()
    newPdf.pages.extend(data.pages)
    newPdf.save(filepath)

    return to_image(filepath)
コード例 #10
0
def test_split_pdf(fourpages, outdir):
    for n, page in enumerate(fourpages.pages):
        outpdf = Pdf.new()
        outpdf.pages.append(page)
        outpdf.save(outdir / f"page{n + 1}.pdf")

    assert len([f for f in outdir.iterdir() if f.name.startswith('page')]) == 4
コード例 #11
0
ファイル: test_page.py プロジェクト: mara004/pikepdf
def test_push_stack(fourpages, outpdf):
    pdf = Pdf.new()
    pdf.add_blank_page(page_size=(1000, 1000))
    page = pdf.pages[0]

    pdf.pages.extend(fourpages.pages)

    page.Contents = pdf.make_stream(
        b"0.4 G\n"
        b"0 500 500 1000 re s\n"
        b"500 500 1000 1000 re s\n"
        b"-1 0 0 1 500 0 cm\n"
    )

    xobj1 = page.add_overlay(
        pdf.pages[1], Rectangle(0, 500, 500, 1000), push_stack=False
    )
    xobj2 = page.add_overlay(
        pdf.pages[2], Rectangle(500, 500, 1000, 1000), push_stack=True
    )

    draw_events = _simple_interpret_content_stream(page)
    # First page should be mirrored horizontally since stack was not pushed
    xobj, ctm = next(draw_events)
    assert xobj == xobj1
    assert ctm.a < 0 and ctm.d > 0, "Not horizontally mirrored as expected"

    # Second page should be in upper right corner, properly positioned for a 4-up
    xobj, ctm = next(draw_events)
    assert xobj == xobj2
    assert ctm.e >= 500 and ctm.f >= 500

    # Test requires visual confirmation
    del pdf.pages[1:]
    pdf.save(outpdf)
コード例 #12
0
def test_add_foreign_twice(graph, outpdf):
    out = Pdf.new()
    out.pages.append(out.copy_foreign(graph.pages[0]))
    assert len(out.pages) == 1
    out.pages.append(out.copy_foreign(graph.pages[0]))
    assert len(out.pages) == 2
    out.save(outpdf)
コード例 #13
0
ファイル: pdfMastery.py プロジェクト: NeXX451/PDFMastery
def img2pdfBT():
    lblimgtopdfBTN.config(text="Working!")
    path = filedialog.askdirectory()
    mname = simpledialog.askstring(title="Name", prompt="Enter prefix name.")
    for root, dirs, files in os.walk(path):
        lst = []
        for x in files:
            if x.endswith((".jpg", ".png")):
                print(root + "/" + x)

                image = PIL.Image.open(str(root + "/" + x).replace("/", "\\"))
                pdf_bytes = img2pdf.convert(image.filename)
                f = io.BytesIO(pdf_bytes)
                lst.append(f)

                image.close()

        pdf = Pdf.new()
        for y in lst:
            with Pdf.open(y) as pd:
                pdf.pages.extend(pd.pages)
        rootname = str(root).replace("/", "\\")
        nameoffile = str(rootname + "_" + str(mname) + ".pdf")
        pdf.save(nameoffile)
    lblimgtopdfBTN.config(text="Done!")
コード例 #14
0
 def pdfs():
     pdf = Pdf.open(resources / "content-stream-errors.pdf")
     output = Pdf.new()
     part = 1
     for _idx, page in enumerate(pdf.pages):
         if len(output.pages) == 2:
             part_file = tmp_path / "part-{0}.pdf".format(part)
             output.save(part_file)
             yield part_file
             output = Pdf.new()
             part += 1
         output.pages.append(page)
     if len(output.pages) > 0:
         part_file = tmp_path / "part-{0}.pdf".format(part)
         output.save(part_file)
         yield part_file
     output.close()
コード例 #15
0
 def concatenate(n):
     print('concatenating same page', n, 'times')
     output_pdf = Pdf.new()
     for i in range(n):
         print(i)
         pdf_page = Pdf.open(resources / 'pal.pdf')
         output_pdf.pages.extend(pdf_page.pages)
     output_pdf.save(outdir / '{}.pdf'.format(n))
コード例 #16
0
def mege_pdf(files_m):
    pdf_out = Pdf.new()
    out_p_name = files_m[0].replace(".pdf", "Combined.pdf")
    for name in files_m:
        src = Pdf.open(name)
        pdf_out.pages.extend(src.pages)
    pdf_out.save(out_p_name)
    print("Combined pdfs saved to : " + out_p_name)
コード例 #17
0
def extract_pages(from_s, to_s):
    out_pdf = Pdf.new()

    for s in range(from_s, to_s):
        print(s)
#   out_pdf.pages.append( sample_pdf.pages[s])

    out_pdf.save("Extracted_PAGES.pdf")
コード例 #18
0
ファイル: AufgabenK.py プロジェクト: niha1020/AufgabenK
    def get_path_value(self):
        #print(self.path_value.get())
        self.path = self.path_value.get()
        self.folder_name = self.path.rsplit("\\",1)[1]
        #print(self.folder_name) 

        #self.directory = self.path
        for folder_entry in os.scandir(self.path): #directory):
            self.folder_name = Path(folder_entry).stem
            #print(folder_entry.path)
            mergedObject = PdfFileMerger()

            for file_entry in os.scandir(folder_entry):
                self.file_name = Path(file_entry).stem
                print("Filenname", self.file_name)


                if not (file_entry.path.endswith(".jpg")
                        or  file_entry.path.endswith(".jpeg")
                        or file_entry.path.endswith(".png")
                        #or file_entry.path.endswith(".doc")
                        or file_entry.path.endswith(".docx")
                        or file_entry.path.endswith(".pdf")
                        )and file_entry.is_file():
                        print("Was ist das? Konvertier ich nicht  " + file_entry.path)
                        
                #if (file_entry.path.endswith("") and file_entry.is_file()):
                #        print("nothing")

                

                        

                if (file_entry.path.endswith(".jpg") or  file_entry.path.endswith(".jpeg") or file_entry.path.endswith(".png")) and file_entry.is_file():
                        #print(file_entry.path) 

                        image = Image.open(file_entry.path)
                        i = image.convert('RGB')
                        i.save(os.path.splitext(file_entry.path)[0] + ".pdf")

               # if (file_entry.path.endswith(".doc") ) and file_entry.is_file():
               #         convert(file_entry)
	                        

                if (file_entry.path.endswith(".docx")) and file_entry.is_file():
                        convert(file_entry)

            for file_entry in os.scandir(folder_entry):    
                if (file_entry.path.endswith(".pdf") and file_entry.is_file()):
                    #print(file_entry.path + ".pdf")
                    
                    new_pdf = Pdf.new()
                    with Pdf.open(file_entry.path, allow_overwriting_input=True) as pdf:
                        pdf.save(file_entry.path)
                        
                    mergedObject.append(PdfFileReader(file_entry.path, "rb"))

            mergedObject.write(self.path + "\\" + Path(folder_entry).stem + " - " +Path(self.path).stem  + ".pdf")
コード例 #19
0
def test_image_roundtrip(outdir, w, h, pixeldata, cs, bpc):
    pdf = Pdf.new()

    image_data = pixeldata * (w * h)

    image = Stream(pdf, image_data)
    image.Type = Name('/XObject')
    image.Subtype = Name('/Image')
    image.ColorSpace = Name(cs)
    image.BitsPerComponent = bpc
    image.Width = w
    image.Height = h

    xobj = {'/Im1': image}
    resources = {'/XObject': xobj}
    mediabox = [0, 0, 100, 100]
    stream = b'q 100 0 0 100 0 0 cm /Im1 Do Q'
    contents = Stream(pdf, stream)

    page_dict = {
        '/Type': Name('/Page'),
        '/MediaBox': mediabox,
        '/Contents': contents,
        '/Resources': resources,
    }
    page = pdf.make_indirect(page_dict)

    pdf.pages.append(page)
    outfile = outdir / f'test{w}{h}{cs[1:]}{bpc}.pdf'
    pdf.save(
        outfile, compress_streams=False, stream_decode_level=StreamDecodeLevel.none
    )

    with Pdf.open(outfile) as p2:
        pim = PdfImage(p2.pages[0].Resources.XObject['/Im1'])

        assert pim.bits_per_component == bpc
        assert pim.colorspace == cs
        assert pim.width == w
        assert pim.height == h
        if cs == '/DeviceRGB':
            assert pim.mode == 'RGB'
        elif cs == '/DeviceGray' and bpc == 8:
            assert pim.mode == 'L'
        elif cs == '/DeviceCMYK':
            assert pim.mode == 'CMYK'
        elif bpc == 1:
            assert pim.mode == '1'
        assert not pim.palette

        assert pim.filters == []
        assert pim.read_bytes() == pixeldata

        outstream = BytesIO()
        pim.extract_to(stream=outstream)
        outstream.seek(0)
        im = Image.open(outstream)
        assert pim.mode == im.mode
コード例 #20
0
ファイル: test_pages.py プロジェクト: ghaddarAbs/pikepdf
def test_split_pdf(resources, outdir):
    q = Pdf.open(resources / "fourpages.pdf")

    for n, page in enumerate(q.pages):
        outpdf = Pdf.new()
        outpdf.pages.append(page)
        outpdf.save(outdir / "page{}.pdf".format(n + 1))

    assert len([f for f in outdir.iterdir() if f.name.startswith('page')]) == 4
コード例 #21
0
 def export_page(self, page_idx):
     """Helper function that exports a single page given by index """
     page = self.reader.pages[page_idx]
     writer = Pdf.new()
     writer.pages.append(page)
     tmpfname = "./page.pdf"
     writer.save(tmpfname)
     writer.close()
     return tmpfname
コード例 #22
0
ファイル: test_sanity.py プロジェクト: EdwardBetts/pikepdf
def test_create_pdf(outdir):
    pdf = Pdf.new()

    font = pdf.make_indirect(
        Object.parse(b"""
            <<
                /Type /Font
                /Subtype /Type1
                /Name /F1
                /BaseFont /Helvetica
                /Encoding /WinAnsiEncoding
            >>"""))

    width, height = 100, 100
    image_data = b"\xff\x7f\x00" * (width * height)

    image = Stream(pdf, image_data)
    image.stream_dict = Object.parse(b"""
            <<
                /Type /XObject
                /Subtype /Image
                /ColorSpace /DeviceRGB
                /BitsPerComponent 8
                /Width 100
                /Height 100
            >>""")

    rfont = {'/F1': font}

    xobj = {'/Im1': image}

    resources = {
        '/Font': rfont,
        '/XObject': xobj
        }

    mediabox = [0, 0, 612, 792]

    stream = b"""
        BT /F1 24 Tf 72 720 Td (Hi there) Tj ET
        q 144 0 0 144 234 324 cm /Im1 Do Q
        """

    contents = Stream(pdf, stream)

    page_dict = {
        '/Type': Name('/Page'),
        '/MediaBox': mediabox,
        '/Contents': contents,
        '/Resources': resources
        }
    qpdf_page_dict = page_dict
    page = pdf.make_indirect(qpdf_page_dict)

    pdf.pages.append(page)
    pdf.save(outdir / 'hi.pdf')
コード例 #23
0
def test_issue_271():
    f1 = Pdf.new()
    f2 = Pdf.new()
    p1 = f1.add_blank_page()
    # copy p1 to f2 and change its mediabox

    f2.pages.append(p1)
    p2 = f2.pages[0]
    p2.MediaBox[0] = 1
    p2.Rotate = 1

    f2.pages.append(p1)
    p3 = f2.pages[1]

    assert p2.MediaBox[0] != p1.MediaBox[0]
    assert Name.Rotate in p2 and Name.Rotate not in p1

    assert p3.MediaBox[0] == p1.MediaBox[0]
    assert Name.Rotate not in p3
コード例 #24
0
def test_save_bytesio(resources, outpdf):
    with Pdf.open(resources / 'fourpages.pdf') as input_:
        pdf = Pdf.new()
        for page in input_.pages:
            pdf.pages.append(page)
        bio = BytesIO()
        pdf.save(bio)
        bio_value = bio.getvalue()
        assert bio_value != b''
        pdf.save(outpdf)
        assert outpdf.read_bytes() == bio_value
コード例 #25
0
def pdfConverter():
    url = request.form['url']
    endurl = request.form['end']
    print(endurl)
    end = endurl.replace("https://www.javatpoint.com/", "")
    print(end)
    print(url)
    print("Check")
    try:
        client = pdfcrowd.HtmlToPdfClient('demo',
                                          'ce544b6ea52a5621fb9d55f8b542d14d')
        client.convertUrlToFile(url, 'static/pdf/1.pdf')
        print("1")
        counter = 2
        noti = True
        while (noti == True):
            res = requests.get(url)
            soup = bs4.BeautifulSoup(res.text, "lxml")
            data = soup.select(".next", href=True)
            nextpoint = data[0]['href']
            print(nextpoint)
            url = "https://www.javatpoint.com/" + nextpoint
            if nextpoint != end:
                try:
                    client.convertUrlToFile(url,
                                            'static/pdf/' + f'{counter}.pdf')
                    print(counter)
                    counter += 1
                except pdfcrowd.Error as why:
                    sys.stderr.write('Pdfcrowd Error: {}\n'.format(why))
                    raise
            else:
                noti = False

        filepath_list = os.listdir('static/pdf/')
        print(filepath_list)
        pdf = Pdf.new()
        for file in filepath_list:
            if file.endswith('.pdf'):
                print(file)
                src = Pdf.open('static/pdf/' + file)
                print("@#$")
                pdf.pages.extend(src.pages)
        src = Pdf.open('static/final_pdf/blank.pdf')
        pdf.pages.extend(src.pages)
        pdf.save('static/final_pdf/merged.pdf')

        for file in filepath_list:
            if file.endswith('.pdf'):
                os.remove('static/pdf/' + file)
    except Exception as e:
        print(e)

    return render_template('download.html')
コード例 #26
0
def test_foreign_copied_pages_are_true_copies(graph, outpdf):
    out = Pdf.new()
    for n in range(4):
        out.pages.append(out.copy_foreign(graph.pages[0]))

    for n in [0, 2]:
        out.pages[n].Rotate = 180

    out.save(outpdf)
    reopened = Pdf.open(outpdf)
    assert reopened.pages[0].Rotate == 180
    assert reopened.pages[1].get(Name.Rotate, 0) == 0
コード例 #27
0
def test_repeat_using_intermediate(graph, outpdf):
    def _repeat_page(pdf_in, page, count, pdf_out):
        for _duplicate in range(count):
            pdf_new = Pdf.new()
            pdf_new.pages.append(pdf_in.pages[page])
            pdf_out.pages.extend(pdf_new.pages)
        return pdf_out

    with Pdf.new() as out:
        _repeat_page(graph, 0, 3, out)
        assert len(out.pages) == 3
        out.save(outpdf)
コード例 #28
0
def preprocess_pdf(fname='temp.pdf'):
	from pikepdf import Pdf
	tmp_output_file_path = fname+'.tmp'
	final_input_file_path = fname+'.tmp'
	pdf = Pdf.open(fname)
	new_pdf = Pdf.new()
	for page_obj in pdf.pages:
		new_pdf.pages.append(page_obj)
	new_pdf.save(tmp_output_file_path)
	rename(fname, final_input_file_path)
	rename(tmp_output_file_path, fname)
	print(f"Fixed {fname}")
コード例 #29
0
def juntar_merge_dois_pdfs():
    pdf = Pdf.new()
    fonte1 = Pdf.open('PDF_Exemplo2.pdf')
    fonte2 = Pdf.open('PDF_Exemplo3.pdf')

    pdf.pages.extend(fonte1.pages)
    pdf.pages.extend(fonte2.pages)

    pdf.save('pdf_combinado.pdf')
    fonte1.close()
    fonte2.close()
    pdf.close()
コード例 #30
0
def merge_files(input_path, list_files, output_file):
    """
    this function will merge pdf files
    :param string, list, string:
    :return:
    """
    pdf = Pdf.new()
    version = pdf.pdf_version
    for _file in list_files:
        src = Pdf.open(input_path + _file)
        pdf.pages.extend(src.pages)
    pdf.save(output_file, min_version=version)