def genBook(self, outputFile=None): ''' generate booklet ''' args = self.__inArgs if args.scanRotateAngle: self.objs = list(page_per_xobj(self.objs)) self.rotate(int(args.scanRotateAngle)) # self.modify('0-', self.splitPage) # self.__needFlatten = 1 # obj = self.objs.pop(0) # self.objs.append(obj) self.__gen = 1 if args.organizePages: self.organize(args.organizePages) self.__gen = 1 if args.insertPage: self.attach(args.insertPage, self.__blank) self.__needFlatten = 1 self.__gen = 1 if args.vCutPage: self.modify(args.vCutPage, self.splitPage) self.__needFlatten = 1 self.__gen = 1 if self.__needFlatten: self.flatten() # if args.group: # self.group(args.group) if args.bookletSize: self.booklet() self.__gen = 1 if self.__gen and outputFile is None: outputFile = os.path.splitext( args.inputFilePath)[0] + '_booklet' + '.pdf' PdfWriter(outputFile).addpages(self.objs).write()
def retrieve_images_into_pdf(): if pfile == "": # or pfile.endswith(".pdf"): messagebox.showerror("File Error-> Not PDF","Please select PDF file") else: from pdfrw import PdfReader, PdfWriter from pdfrw.findobjs import page_per_xobj ofile = 'extract.' + os.path.basename(pfile) pages = list(page_per_xobj(PdfReader(pfile).pages, margin=0.5*72)) if not pages: raise IndexError("No XObjects found") writer = PdfWriter(ofile) writer.addpages(pages) writer.write() messagebox.showinfo("Images retrieved","All images saved in '{0}'".format(ofile))
def extract_first_page(fname): # create a temporary PDF file path_tmpfile = os.path.join(tempfile.gettempdir(), os.path.basename(fname)) # extract all elements from the PDF and put them on separate pages pdf_reader = PdfReader(fname).pages pages = list(page_per_xobj(pdf_reader)) # write to temp file writer = PdfWriter(path_tmpfile) writer.addpages(pages) writer.write() return path_tmpfile
#!/usr/bin/env python ''' usage: extract.py <some.pdf> Locates Form XObjects and Image XObjects within the PDF, and creates a new PDF containing these -- one per page. Resulting file will be named extract.<some.pdf> ''' import sys import os from pdfrw import PdfReader, PdfWriter from pdfrw.findobjs import page_per_xobj inpfn, = sys.argv[1:] outfn = 'pdf_' + os.path.basename(inpfn) pages = list(page_per_xobj(PdfReader(inpfn).pages, margin=0.5*72)) pages = pages[1::2] if not pages: raise IndexError("No XObjects found") writer = PdfWriter(outfn) writer.addpages(pages) writer.write()
#!/usr/bin/env python ''' usage: extract.py <some.pdf> Locates Form XObjects and Image XObjects within the PDF, and creates a new PDF containing these -- one per page. Resulting file will be named extract.<some.pdf> ''' import sys import os from pdfrw import PdfReader, PdfWriter from pdfrw.findobjs import page_per_xobj inpfn, = sys.argv[1:] outfn = 'extract.' + os.path.basename(inpfn) pages = list(page_per_xobj(PdfReader(inpfn).pages, margin=0.5*72)) if not pages: raise IndexError("No XObjects found") writer = PdfWriter() writer.addpages(pages) writer.write(outfn)
Locates Form XObjects and Image XObjects within the PDF, and creates a new PDF containing these -- one per page. Resulting file will be named extract.<some.pdf> ''' import sys import os # --- Add pdfrw module to Python path --- currentdir = os.path.dirname(os.path.realpath(__file__)) parentdir = os.path.dirname(currentdir) sys.path.insert(0, parentdir) # --- Import pdrfw stuff --- from pdfrw import PdfReader, PdfWriter from pdfrw.findobjs import page_per_xobj # --- Main --- inpfn, = sys.argv[1:] outfn = 'extract.' + os.path.basename(inpfn) # pages = list(page_per_xobj(PdfReader(inpfn).pages, margin=0.5*72)) pages = list(page_per_xobj(PdfReader(inpfn).pages)) if not pages: raise IndexError("No XObjects found") writer = PdfWriter(outfn) writer.addpages(pages) writer.write()