def saveAs(self, fname): opdf=PdfWriter() #print type(opdf.trailer), type(opdf.trailer.Info), type(opdf.trailer.Info.Author) opdf.addpages(self.pdf.pages) opdf.trailer.Info=self.pdf.Info opdf.trailer.Root.Outlines=self.pdf.Root.Outlines opdf.write(fname)
def go(inpfn, outfn): reader = PdfReader(inpfn, decompress=False) page, = reader.pages writer = PdfWriter() writer.addpage(adjust(page)) writer.trailer.Info = IndirectPdfDict(reader.Info) writer.write(outfn)
def splitting(filenameOut ="out",*varargs): for file in varargs: if False == (isinstance(file,str)): raise ValueError("Errore: i file devono essere pdf") if False == (isinstance(filenameOut,str)): raise ValueError("Errore: il nome del file deve essere di tipo str") all = PdfWriter() numpage=float("inf") for file in varargs: reader = PdfReader(file) i=0 for page in reader.pages: i=i+1 if (numpage > i): numpage=i for i in range(numpage): for filename in varargs: reader = PdfReader(filename) all.addPage(reader.getPage(i)) all.write(filenameOut+".pdf")
def two_up(data): pdf = PdfReader(fdata=data) pages = PageMerge() + pdf.pages assert len(pages) == 2 left, right = pages rotation = 270 scale = 0.7071067811865476 # sqrt(0.5) x_increment = scale * pages.xobj_box[2] left.Rotate = rotation left.scale(scale) right.Rotate = rotation right.scale(scale) right.x = x_increment writer = PdfWriter() writer.addpage(pages.render()) # retain and update metadata pdf.Info.Creator = 'modulo-nic.py %s' % __version__ writer.trailer.Info = pdf.Info sys.stdout.write('Content-Type: application/x-pdf\n\n') writer.write(sys.stdout)
def splitting(*varargs,filenameOut ="out"): if(len(varargs)<=1): raise IndexError("Errore: inserire almeno due file.") for file in varargs: if False == (isinstance(file,str)): raise ValueError("Errore: i file devono essere pdf") if False == (isinstance(filenameOut,str)): raise ValueError("Errore: il nome del file deve essere di tipo str") all = PdfWriter() numpage=float("inf") for file in varargs: reader = PdfReader(file) i=0 for page in reader.pages: i=i+1 if (numpage > i): numpage=i for i in range(numpage): for filename in varargs: reader = PdfReader(filename) all.addPage(reader.getPage(i)) if(filenameOut.endswith('.pdf') == False): filenameOut = filenameOut+'.pdf' all.write(filenameOut)
def combine_match_sheets(match_sheets): output_fn = os.path.join(match_sheet_dir, "combined_match_sheets.pdf") writer = PdfWriter() for match_sheet in match_sheets: writer.addpages(PdfReader(match_sheet).pages) writer.write(output_fn) return output_fn
def save_pdf(infile, outpages): trailer = PdfReader(infile) outfn = create_filename(infile) writer = PdfWriter() writer.addpages(outpages) writer.trailer.Info = trailer.Info writer.trailer.Info.Producer = "https://github.com/sgelb/impositioner" writer.write(outfn)
def main(): parser = argparse.ArgumentParser(description="Strip ResearchGate additions from a PDF") parser.add_argument("infile", metavar="input-filename", type=str, nargs=1, help="PDF file to process") parser.add_argument("outfile", metavar="output-filename", type=str, nargs=1, help="name for processed output file") args = parser.parse_args() # This regular expression matches the form of the ResearchGate # underlinings in the content streams. We match against a truncated form # of the distinctive RGB triplet because it's not always given with # the same accuracy. # "0.3333333333 0.6941176471 0.9607843137" regex = re.compile(r"""(0\.33333[0-9]+ 0\.694117[0-9]+ 0\.960784[0-9]+ RG \d+\.?\d* w \d+\.?\d* \d+\.?\d* m \d+\.?\d* \d+\.?\d* )l S""") dict_pages = PdfReader(args.infile[0]).pages def fix_stream(contents): # Look for underlinings and make them invisible. if not hasattr(contents, "stream"): return s = contents.stream # We identify RG underlinings by their (hopefully unique) # RGB colour triplet. if s is not None and regex.search(s): # Minimal change: change the line draw commands to # moves, so no line is drawn. It would be more # satisfying to remove the stream entirely, but it's # simpler and safer to preserve the file structure # (in particular, the stream length) wherever possible. contents.stream = regex.sub("\\1m\nS", s) for page in dict_pages: if "/Annots" in page: # Remove all annotations. This may of course cause some # collateral damage, but PDFs of articles don't usually have # annotations so probably this will just strip ResearchGate # links. If this becomes a problem, it should be easy to # identify RG annotations and remove only them. page.pop("/Annots") # There may be a stream in the Contents object and/or in its # children, so we check for both. fix_stream(page.Contents) for contents in page.Contents: fix_stream(contents) writer = PdfWriter() # Start at the second page to remove the ResearchGate cover sheet. for page in dict_pages[1:]: writer.addpage(page) writer.write(args.outfile[0])
def combine(inpfn, outfn, x, y, gap): # Read all pages from input file pages = PdfReader(inpfn).pages # Object to write output PDF writer = PdfWriter() while pages: writer.addpage(getPages(pages, x, y, gap)) writer.write(outfn)
def test_pdf(pdfname): outfn = os.path.join(outdir, hashlib.md5(pdfname).hexdigest() + '.pdf') print >> stderr, ' ->', outfn trailer = PdfReader(pdfname, decompress=False) try: trailer.Info.OriginalFileName = pdfname except AttributeError: trailer.OriginalFileName = pdfname writer = PdfWriter() writer.trailer = trailer writer.write(outfn)
def writepdf(): outfn = "pwat." + os.path.basename(pdf) trailer = PdfReader(pdf) trailer.Info.Creator = "NOT" trailer.Info.Author = "NOT" trailer.Info.Title = "NOT" trailer.Info.Producer = "NOT" trailer.Info.CreationDate = "6/6/6" trailer.Info.ModDate = "6/6/6" writer = PdfWriter() writer.trailer = trailer writer.write(outfn)
def makeOnePagers(filename='GPO-CONAN-REV-2014.pdf' ,path='pdf/'): infile = PdfReader(filename) pages = len(infile.pages) print(pages) for i in range(pages): p = infile.pages[i] if(p and len(p)>0): outfile = PdfWriter() outfile.addPage(p) try: outfile.write('pdf/pageindex-%s.pdf' % str(i)) except: pass print(i)
def merge(*varargs,merge_file): if(merge_file.endswith('.pdf')): merge_file = merge_file+".pdf" for x in varargs: if(isinstance(x,str) == False): raise Exception("Errore: Tutti i parametri devono essere stringhe.") writer = PdfWriter() files = [] for x in varargs : if x.endswith('.pdf'): files.add(x) else: raise Exception("Errore tutti i parametri devono terminare con .pdf") for fname in sorted(files): writer.addpages(PdfReader(os.path.join('pdf_file', fname)).pages) writer.write("output.pdf")
def get(self,id): inpfn = 'teste.pdf' ranges = [id] # assert ranges, "Expected at least one range" # ranges = ([int(y) for y in x.split('-')] for x in ranges) outfn = '%sfrag' % os.path.basename(inpfn) pages = PdfReader(inpfn).pages outdata = PdfWriter() # for onerange in ranges: onerange = (onerange + onerange[-1:])[:2] for pagenum in range(onerange[0], onerange[1]+1): outdata.addpage(pages[pagenum-1]) outdata.write(outfn) # pdfout = base64.encodestring(open(outfn,"rb").read()) # self.write('<iframe src="data:application/pdf;base64,'+pdfout+'" style="position:fixed; top:0px; left:0px; bottom:0px; right:0px; width:100%; height:100%; border:none; margin:0; padding:0; overflow:hidden; z-index:999999;"/>')
def consolidateAllSheets(self, subDir=None): """ not sure if this is neccessary or maybe I can send multiple sheets to the browser """ writer = PdfWriter() if subDir != None: directory = self.printdirectory+subDir else: directory = self.printdirectory files = [x for x in os.listdir(directory) if x.endswith('.pdf')] for fname in sorted(files): writer.addpages(PdfReader(os.path.join(directory, fname)).pages) writer.write(directory+"output.pdf") for x in os.listdir(directory): if x == 'output.pdf': continue else: os.remove(directory+x)
def merge(*varargs, filenameOut='merge_file'): if (len(varargs) <=1): raise Exception('Errore: utilizzare almeno due file.') if(not (isinstance(filenameOut,str)) ): raise Exception('Errore: filenameOut deve essere una stringa.') if(filenameOut.endswith('.pdf') == False): filenameOut = filenameOut + ".pdf" writer = PdfWriter() for fname in varargs: if(isinstance(fname,str) == False): raise ValueError("Errore: Tutti i parametri devono essere stringhe.") if not fname.endswith('.pdf'): raise Exception("Errore: tutti i parametri devono terminare con .pdf") reader = PdfReader(fname) writer.addpages(reader.pages) writer.write(filenameOut)
def cleanPdf(srcPath, destPath=''): if os.path.exists(srcPath): if len(destPath) < 1: f, e = os.path.splitext(srcPath) destPath = f+'.cln.pdf' x = PdfReader(srcPath) for i, page in enumerate(x.pages): print 'page %05d: ' % i, xobjs = page.Resources.XObject for okey in xobjs.keys(): xobj = xobjs[okey] if DuoPdf.isDel(xobj): # xobj.pop('/SMask') xobjs.pop(okey) # print xobj # print xobj.SMask print '.', else: print '[%sx%s#%s]' % (xobj.Width, xobj.Height, xobj.Length), print 'done' print '[%s] -> [%s]' % (srcPath, destPath) PdfWriter().write(destPath, x)
def extract_sub_pdf(self, pI, pF, filename): """ copy the pages pI->pF into the file 'filename' @param pI,pF : integers, the page numbers @param filename : string 'pI' and 'pF' are the pdf numbers (the ones you see in the document), not the numbers in the python's list of pages (which begins at 0). """ output = PdfWriter(filename) pages = self.pdf_reader.pages for k in range(pI - 1, pF): output.addpage(pages[k]) #output.write(filename) output.write()
def write_link(self, file_path, uri): '''Write the "uri" into the Subject attribute of PDF file "file_path".''' fp = antiformat(file_path) if __debug__: log(f'reading PDF file {fp}') trailer = PdfReader(file_path) file = antiformat(f'[steel_blue3]{file_path}[/]') if not self.overwrite: subject = trailer.Info.Subject or '' if __debug__: log(f'found PDF Subject value {subject} on {fp}') if uri in subject: inform( f'Zotero link already present in PDF "Subject" field of {file}' ) return elif subject.startswith('zotero://select'): inform( f'Replacing existing Zotero link in PDF "Subject" field of {file}' ) subject = re.sub(r'(zotero://\S+)', uri, subject) trailer.Info.Subject = subject elif subject is not None: warn(f'Not overwriting existing PDF "Subject" value in {file}') return else: if __debug__: log(f'no prior PDF Subject field found on {fp}') inform( f'Writing Zotero link into PDF "Subject" field of {file}') trailer.Info.Subject = uri else: inform(f'Overwriting PDF "Subject" field of {file}') trailer.Info.Subject = uri if not self.dry_run: if __debug__: log(f'writing PDF file with new "Subject" field: {fp}') PdfWriter(file_path, trailer=trailer).write()
def get(self, request, format=None): books = Book.objects.all() book_num = len(books) writer = PdfWriter() for book in books: if book: writer.addpages(PdfReader(book.pdf).pages) with open(os.path.join('media','mergedfile.pdf'), 'wb') as pdfOutputFile: writer.write(pdfOutputFile) mergedbook = os.path.join('media','mergedfile.pdf') response = FileResponse(open(os.path.join('media', 'mergedfile.pdf'), 'rb')) response['content_type'] = "application/octet-stream" response['Content-Disposition'] = 'attachment; filename="mergedfile.pdf"' return response
def upload(): uploadedFiles = request.files.getlist('upload_files') if len(uploadedFiles) != 2: return 'Must be 2 files.' if uploadedFiles[0].filename == 'stamp.pdf' and uploadedFiles[ 1].filename == 'stamp.pdf': return 'stamp.pdf must be 1 file.' if uploadedFiles[0].filename == 'stamp.pdf': stampFile = uploadedFiles[0].stream inFile = uploadedFiles[1].stream elif uploadedFiles[1].filename == 'stamp.pdf': inFile = uploadedFiles[0].stream stampFile = uploadedFiles[1].stream else: return 'stamp.pdf must be provided.' outputPdf = PageMerge().add(PdfReader(stampFile).pages[0])[0] inputPdf = PdfReader(inFile) for page in inputPdf.pages: PageMerge(page).add(outputPdf, prepend=False).render() nowDatetime = datetime.now() nowString = nowDatetime.strftime('%Y%m%d%H%M%S') sendFileName = nowString + '.pdf' sendFilePath = 'tmp/' + sendFileName PdfWriter(sendFilePath, trailer=inputPdf).write() fileObj = open(sendFilePath, 'rb') return send_file(io.BytesIO(fileObj.read()), as_attachment=True, attachment_filename=sendFileName, mimetype='application/pdf')
def build(self): self.pdffile.init_input() print('in build') for pagenum, page in enumerate(self.pdffile.trailer.pages, 0): mbox = tuple(float(x) for x in page.MediaBox) page_x, page_y, page_x1, page_y1 = mbox page_w = page_x1 - page_x page_h = page_y1 - page_y xy_sign = self.signature.locations[pagenum][0] xy_date = self.signature.locations[pagenum][1] if xy_sign[0]: self.add_signature(xy_sign, page, page_w, page_h) if xy_date[0]: self.add_date(xy_date, page, page_w, page_h) # Write out the destination file PdfWriter(self.pdffile.outfn, trailer=self.pdffile.trailer).write()
def addWatermarkFile(self, fileInPath, watermarkPath, fileOutPath, underneath=True): """Adds watermarkPath to fileInPath and output to fileOutPath Returns True on success. Will raise exceptions from base on errors. Code based on example at https://github.com/pmaupin/pdfrw/blob/master/examples/watermark.py Same assumptions apply - pages the same size. """ wmark = PageMerge().add(PdfReader(watermarkPath).pages[0])[0] trailer = PdfReader(fileInPath) for page in trailer.pages: PageMerge(page).add(wmark, prepend=underneath).render() PdfWriter(fileOutPath, trailer=trailer).write() return True
def addWaterMark(pdfpath, watermarkcontent=None): try: now = datetime.datetime.now() watermarkpath = pdfpath.split('.')[0] + '-water' + '.pdf' out_path = pdfpath.split('.')[0] + '-out' + '.pdf' watermark = create_watermark(watermarkpath, watermarkcontent) # Get our files ready input_file = PdfReader(pdfpath) for page in input_file.pages: PageMerge(page).add(watermark, prepend=False).render() PdfWriter(out_path, trailer=input_file).write() os.remove(pdfpath) os.rename(out_path, pdfpath) os.remove(watermarkpath) print('覆盖水印pdf--%s--源文件%s' % (now, out_path)) except Exception: print('覆盖水印pdf出错--%s--源文件%s' % (now, out_path)) filepath = APILOG_PATH['excptionlogpath'] + '/' + now.strftime('%Y-%m-%d') f = open(filepath, 'a') f.writelines(now.strftime('%H:%M:%S') + '\n' + traceback.format_exc() + '\n\n') f.close() return pdfpath
def merge_file(self): if not self.output_file.text(): self.populate_file_name() return if self.pdf_list_widget.count() > 0: pdf_merger = PdfWriter() try: for i in range(self.pdf_list_widget.count()): pdf_merger.addpages( PdfReader(self.pdf_list_widget.item(i).text()).pages) pdf_merger.write(self.output_file.text()) #pdf_merger.close() self.pdf_list_widget.clear() self.dialog_message('Unificación completada!') except Exception as e: self.dialog_message(e) else: self.dialog_message('No hay archivos para unificar.')
def outimagesfunction( f1, f2, f3, f4, f5): #тут filenames это список образованный из ключей словаря psw = '' for x in range(12): psw = psw + random.choice(list('123456789qwertyuiopasdfghjklzxcvbnm')) fs = FileSystemStorage( location='./pdf4/media/' ) #это чтоб в конце цикла мы смогли временные файлы удалить imgs = [f1, f2, f3, f4, f5] #список для прохода по циклу i = 0 #счетчик для цикла, в цикле номер страницы результирующего файла writer = PdfWriter( ) #создаем writer для работы с pdfrw, чтоб после цикла записать результат for name in imgs: #проходим по списку imgs if name != None: #если элемент словаря не имеет значене None (если пользвотаель загрузил файл) i = i + 1 #номер страницы результирующего файла (1,2,3...) name = './pdf4/media/' + name #необходимо так как мы из вьюшки передали имена без './pdf4/media/' temppdf = './pdf4/media/' + str( i ) + '_' + psw + '.pdf' #переменная имени отдельного pdf-файла для каждой страницы pdf1 = open( temppdf, 'wb' ) #открываем (создаем) на запись файл с именем из переменной temppdf im = Image.open(name) if ('A' in im.getbands()) or ( 'a' in im.getbands()): #убираем альфа-канал если есть im.convert('RGB').save(name) pdf1.write(img2pdf.convert(name)) #записываем в него картинку pdf1.close() #закрываем его #до этого момента в цикле работали с модульем img2pdf, далее pdfrw page = PdfReader( temppdf, decompress=False ).pages #записываем в page страницу из файла temppdf writer.addpages(page) #добавляем во writer страницу page fs.delete(str(i) + '_' + psw + '.pdf') #удаляем временные pdf-файлы writer.write('./pdf4/media/pdfresult/' + psw + '.pdf') #записываем writer в файл url = 'https://pdf4you.ru/media/pdfresult/' + psw + '.pdf' return url
def update_and_move(self, targetdir: str, doctitle: str, tags: List[str], date: str): """Update metadata of pdf and move to target directory. Arguments: targetdir {str} -- Target directory where pdf shall be placed. doctitle {str} -- New document title of pdf. tags {List[str]} -- Keywords/tags which shall be added to pdf. date {str} -- Date which will be entered into pdf filename. """ pdf = PdfReader(self.filepath) # Check for correct file ending if doctitle[-4:] != ".pdf": filename = date + " " + doctitle + ".pdf" else: filename = date + " " + doctitle doctitle = doctitle[0:-4] # Check for unique filename n = 1 if os.path.isfile(os.path.join(targetdir, filename)): filename = filename[0:-4] + "-" + str(n) + ".pdf" while os.path.isfile(os.path.join(targetdir, filename)): regex = re.compile(r"-\d{1,}.pdf", re.IGNORECASE) filename = regex.sub("-" + str(n) + ".pdf", filename) n = n + 1 # pdf.Info.Keywords = tags # pdf.Info.Title = doctitle # Write data writer = PdfWriter() writer.addpages(pdf.pages) writer.trailer.Info = IndirectPdfDict(Title=doctitle, Keywords=tags) writer.write(os.path.join(targetdir, filename)) # try to delete file ## try: os.remove(self.filepath) except OSError as e: # if failed, report it back to the user ## print("Error: %s - %s." % (e.filename, e.strerror))
def PDF2Text(pdf): images = convert_from_path(pdf) txtlist = [] writer = PdfWriter() for img in images: img = img.convert('LA') img.save('cur_img.png') subprocess.run(['tesseract', 'cur_img.png', 'cur_txt', '--dpi', '125']) with open('cur_txt.txt', 'r', encoding='utf-8') as curfile: txtlist.extend(curfile.readlines()) del txtlist[-1] txtlist.append('\n') subprocess.run( ['tesseract', 'cur_img.png', 'cur_pdf', '--dpi', '125', 'pdf']) writer.addpages(PdfReader('cur_pdf.pdf').pages) writer.write('searchable.pdf') return txtlist
def __init__(self, pages, item_nu, raw_pages): self.item_nu = item_nu self.date = 1 self.name = find_paper_name(raw_pages[0]) self.authors = "author1" # Writes PDF file outdata = PdfWriter() for page in pages: outdata.addpage(Paper.pages_readpdf[page]) outdata.write(FILE_PATH + "output\\%s.pdf" % item_nu) logger.info("Created file operations_%s, with pages: %s", item_nu, pages) # Allows the ability to skip groups if DEFAULT_GROUP: self.group = "default_group" else: self.group = raw_input("What group is item %s with name %s" % (item_nu, name))
class Worker(QRunnable): def __init__(self, fileCopy, fileInsInto, pageStart, pageIteration): super().__init__() self.pageStart = pageStart self.fileCopy = fileCopy self.page = self.pageStart - 1 self.pages = 0 self.pageIterate = pageIteration self.currentPage = 0 self.writer = PdfWriter() self.fileInsInto = fileInsInto # create unique identifier for each worker self.jobID = str(uuid.uuid4().hex) self.signals = WorkerSignals() def run(self): self.signals.started.emit(self.jobID) self.fPath = PdfReader(self.fileInsInto) outfn = os.path.splitext(self.fileInsInto)[0] + "_MERGED.pdf" copyFrom_totalPages = len(PdfReader(self.fileCopy).pages) # Calculate the total pages for the current fileInsertInto file insInto_totalPages = len(self.fPath.pages) # Calculate total number of pages upon merging...round up to nearest # whole number self.totalPages = math.ceil( (insInto_totalPages + ((insInto_totalPages - self.pageStart) / self.pageIterate) * copyFrom_totalPages)) while self.pages < self.totalPages: if self.pages == self.page: self.writer.addpages(PdfReader(self.fileCopy).pages) self.page += self.pageIterate self.pages += copyFrom_totalPages else: self.writer.addpage(self.fPath.pages[self.currentPage]) self.currentPage += 1 self.pages += 1 # Write all the files into a file which is named as shown below # File directory is that of the last insert into file chosen self.writer.write(outfn) self.signals.finished.emit(self.jobID)
def concatenate(input_paths, output_path, details=None): """Given an ordered sequence of paths to pdf files, concatenate to the desired output path with the given details. Args: input_paths: A sequence of paths to pdf files. output_path: The desired path for the concatenated pdf. details: A dictionary of metadata values desired for the final pdf. """ writer = PdfWriter() for path in input_paths: reader = PdfReader(path) writer.addpages(reader.pages) writer.trailer.Info = IndirectPdfDict() if details is not None: for metadata, value in details.items(): writer.trailer.Info[PdfName(metadata)] = value writer.write(output_path)
def consolidateAllSheets(self, subDir=None): """ not sure if this is neccessary or maybe I can send multiple sheets to the browser """ writer = PdfWriter() if subDir != None: directory = self.printdirectory + subDir else: directory = self.printdirectory files = [x for x in os.listdir(directory) if x.endswith('.pdf')] for fname in sorted(files): writer.addpages(PdfReader(os.path.join(directory, fname)).pages) writer.write(directory + "output.pdf") for x in os.listdir(directory): if x == 'output.pdf': continue else: os.remove(directory + x)
def generate_pdf(): print("generating final pdf...") writer = PdfWriter() for subdir, dirs, files in os.walk(targetDir): # sort_files(files) # for file in files: for file in pages: print(file) if file.endswith("svg") != True: continue file = file.replace("svg", "pdf") filepath = subdir + os.sep + file if filepath.endswith(".pdf"): writer.addpages(PdfReader(filepath).pages) # writer.write(config["story"] + "_" + config["lang"] + "_" + config["gender"] + "_WEB" + ".pdf") writer.write(outputFile) print("done!")
def merge_pdfs(file_path, sup_url): _file_path = file_path file_path = _file_path + '.pdf' file_path_bak = _file_path + '_bak.pdf' if len(sup_url) != 0 and not os.path.exists(file_path): sup_file_path = _file_path + '_sup.pdf' writer = PdfWriter() for inpfn in [file_path_bak, sup_file_path]: try: writer.addpages(PdfReader(inpfn).pages) except Exception: print(inpfn) traceback.print_exc() exit(-1) writer.write(file_path) os.remove(file_path_bak) os.remove(sup_file_path) else: if os.path.exists(file_path_bak): os.rename(file_path_bak, file_path)
def LoopDevices(self, looplist=None, Plot=False, plotdir=None, plotting=False): """loops through all the devices listed in looplist""" #reset list of pdf file names, needed for adding pdf files together self.pdf_file_list = [] if (looplist == None): self.ErrorCode(101) return else: for k in looplist: print(self.prompt, " currently working on ", k) #self.ManipTable(k) self.PT1(k) print(type(k)) if (Plot): #self.PlotDeviceStatus(self, k , plotdir = plotdir) self.PlotDeviceStatus(k, symbol='d', color1='b', plotdir=plotdir, plotting=plotting) # done with looping merge all files inot one large one if (plotdir != None): masterfile = plotdir + 'alldevices.pdf' writer = PdfWriter() for inpfn in self.pdf_file_list: writer.addpages(PdfReader(inpfn).pages) writer.write(masterfile) print(self.prompt, 'figure saved in ', masterfile) return 1
def append_js_to_pdf(file_name): pdf_writer = PdfWriter() pdf_reader = PdfReader(file_name) try: js = open(sys.argv[1]).read() except: # js = "this.getField('residency_duration_ratio').value = (event.value / 365).toString().split('.')[1];" # js = "this.getField('residency_duration_ratio').value = 1" js = "app.alert('hi')" for page_index in pdf_reader.pages: page = page_index page.Type = PdfName.Page try: print(page.Annots) for field in page.Annots: field.update(PdfDict(AA=PdfDict(V=make_js_action(js)))) except: pass # page.AA = PdfDict() # page.AA.O = make_js_action(js) pdf_writer.addpage(page) pdf_writer.write('test.pdf')
def reverse(self, out_path: str = None) -> None: ''' Reverse the page order (from last to first) of the PDF. Note: The default settings this will overwrite this object's PDF file. Args: out_path: Optional string, default=None. If supplied, the output will be saved to this path, instead of overwriting this PDF object's path. Returns: None Raises: No exceptions raised Examples: Invoke like this to overwrite this PDF's file: ```>>> my_pdf.reverse()``` Pass in a path to save as a new file. ```>>> my_pdf.reverse('/path/to/new/file.pdf')``` ''' if not out_path: out_path = self.path outdata = PdfWriter(out_path) in_pdf = PdfReader(self.path) pages = in_pdf.pages for i in range((len(pages) - 1), -1, -1): outdata.addpage(pages[i]) outdata.write()
def main(self, folder: cli.ExistingDirectory): paths = [p for p in folder.list() if p.suffix in self.ALLOWED_SUFFIXES] paths.sort() with tempfile.TemporaryDirectory() as temp_dir: temp_dir_path = local.path(temp_dir) print("Converting images to pdfs") for path in paths: print(f"Converting {path}") im = Image.open(path) im = im.convert("RGB") im.save(temp_dir_path / path.basename + ".pdf") pdf_paths = [p for p in temp_dir_path.list() if p.suffix == ".pdf"] pdf_paths.sort() print("Concatenating PDFs") writer = PdfWriter() for page in pdf_paths: writer.addpages(PdfReader(str(page)).pages) writer.write(str(folder / "output.pdf"))
def post(self,request): serializer = BookSerializer(data=request.data) if serializer.is_valid(): inpfn = serializer.validated_data['pdf'] page_range = [int(y) for y in serializer.validated_data['page'].split('-')] page_start = int(page_range[0]) page_end = int(page_range[1]) path = os.path.join('/books/pdfs', 'extracted_page_{}-{}.pdf'.format(page_start, page_end)) outfn = os.path.join('media', 'extracted_page_{}-{}.pdf'.format(page_start, page_end)) pages = PdfReader(inpfn).pages outdata = PdfWriter(outfn) page_range = (page_range + page_range[-1:])[:2] for pagenum in range(page_range[0], page_range[1]+1): outdata.addpage(pages[pagenum-1]) outdata.write() serializer.validated_data['pdf'] = os.path.join('extracted_page_{}-{}.pdf'.format(page_start, page_end)) serializer.save() return Response(serializer.data, status=status.HTTP_201_CREATED) return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
def book_list(request): if request.method == 'POST': books = Book.objects.all() book_num = len(books) writer = PdfWriter() for book in books: if book: writer.addpages(PdfReader(book.pdf).pages) with open(os.path.join('media', 'mergedfile.pdf'), 'wb') as pdfOutputFile: writer.write(pdfOutputFile) response = FileResponse(open(os.path.join('media', 'mergedfile.pdf'), 'rb')) response['content_type'] = "application/octet-stream" response['Content-Disposition'] = 'attachment; filename="mergedfile.pdf"' return response else: books = Book.objects.all() return render(request, 'book_list.html', { 'books': books })
#!/usr/bin/env python # -*- coding: utf-8 -*- ''' usage: alter.py my.pdf Creates alter.my.pdf Demonstrates making a slight alteration to a preexisting PDF file. Also demonstrates Unicode support. ''' import sys import os from pdfrw import PdfReader, PdfWriter inpfn, = sys.argv[1:] outfn = 'alter.' + os.path.basename(inpfn) trailer = PdfReader(inpfn) trailer.Info.Title = 'My New Title Goes Here - 我的新名称在这儿' writer = PdfWriter() writer.trailer = trailer writer.write(outfn)
def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw): with open(f, "rb") as inf: orig_imgdata = inf.read() output = img2pdf.convert(orig_imgdata, nodate=True, with_pdfrw=with_pdfrw) from io import StringIO, BytesIO from pdfrw import PdfReader, PdfName, PdfWriter from pdfrw.py23_diffs import convert_load, convert_store x = PdfReader(StringIO(convert_load(output))) self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root, PdfName.Size]) self.assertEqual(x.Size, '7') self.assertEqual(x.Info, {}) self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages, PdfName.Type]) self.assertEqual(x.Root.Type, PdfName.Catalog) self.assertEqual(sorted(x.Root.Pages.keys()), [PdfName.Count, PdfName.Kids, PdfName.Type]) self.assertEqual(x.Root.Pages.Count, '1') self.assertEqual(x.Root.Pages.Type, PdfName.Pages) self.assertEqual(len(x.Root.Pages.Kids), 1) self.assertEqual(sorted(x.Root.Pages.Kids[0].keys()), [PdfName.Contents, PdfName.MediaBox, PdfName.Parent, PdfName.Resources, PdfName.Type]) self.assertEqual(x.Root.Pages.Kids[0].MediaBox, ['0', '0', '115', '48']) self.assertEqual(x.Root.Pages.Kids[0].Parent, x.Root.Pages) self.assertEqual(x.Root.Pages.Kids[0].Type, PdfName.Page) self.assertEqual(x.Root.Pages.Kids[0].Resources.keys(), [PdfName.XObject]) self.assertEqual(x.Root.Pages.Kids[0].Resources.XObject.keys(), [PdfName.Im0]) self.assertEqual(x.Root.Pages.Kids[0].Contents.keys(), [PdfName.Length]) self.assertEqual(x.Root.Pages.Kids[0].Contents.Length, str(len(x.Root.Pages.Kids[0].Contents.stream))) self.assertEqual(x.Root.Pages.Kids[0].Contents.stream, "q\n115.0000 0 0 48.0000 0.0000 0.0000 cm\n/Im0 " "Do\nQ") imgprops = x.Root.Pages.Kids[0].Resources.XObject.Im0 # test if the filter is valid: self.assertIn( imgprops.Filter, [[PdfName.DCTDecode], [PdfName.JPXDecode], [PdfName.FlateDecode]]) # test if the colorspace is valid self.assertIn( imgprops.ColorSpace, [PdfName.DeviceGray, PdfName.DeviceRGB, PdfName.DeviceCMYK]) # test if the image has correct size orig_img = Image.open(f) self.assertEqual(imgprops.Width, str(orig_img.size[0])) self.assertEqual(imgprops.Height, str(orig_img.size[1])) # if the input file is a jpeg then it should've been copied # verbatim into the PDF if imgprops.Filter in [[PdfName.DCTDecode], [PdfName.JPXDecode]]: self.assertEqual( x.Root.Pages.Kids[0].Resources.XObject.Im0.stream, convert_load(orig_imgdata)) elif imgprops.Filter == [PdfName.FlateDecode]: # otherwise, the data is flate encoded and has to be equal to # the pixel data of the input image imgdata = zlib.decompress( convert_store( x.Root.Pages.Kids[0].Resources.XObject.Im0.stream)) colorspace = imgprops.ColorSpace if colorspace == PdfName.DeviceGray: colorspace = 'L' elif colorspace == PdfName.DeviceRGB: colorspace = 'RGB' elif colorspace == PdfName.DeviceCMYK: colorspace = 'CMYK' else: raise Exception("invalid colorspace") im = Image.frombytes(colorspace, (int(imgprops.Width), int(imgprops.Height)), imgdata) if orig_img.mode == '1': orig_img = orig_img.convert("L") elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"): orig_img = orig_img.convert("RGB") self.assertEqual(im.tobytes(), orig_img.tobytes()) # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have # the close() method try: im.close() except AttributeError: pass # now use pdfrw to parse and then write out both pdfs and check the # result for equality y = PdfReader(out) outx = BytesIO() outy = BytesIO() xwriter = PdfWriter() ywriter = PdfWriter() xwriter.trailer = x ywriter.trailer = y xwriter.write(outx) ywriter.write(outy) self.assertEqual(outx.getvalue(), outy.getvalue()) # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the # close() method try: orig_img.close() except AttributeError: pass
1) Concatenating multiple input PDFs. 2) adding metadata to the PDF. If you do not need to add metadata, look at subset.py, which has a simpler interface to PdfWriter. ''' import sys import os import find_pdfrw from pdfrw import PdfReader, PdfWriter, IndirectPdfDict inputs = sys.argv[1:] assert inputs outfn = 'output.pdf' writer = PdfWriter() for inpfn in inputs: writer.addpages(PdfReader(inpfn.pages) writer.trailer.Info = IndirectPdfDict( Title = 'your title goes here', Author = 'your name goes here', Subject = 'what is it all about?', Creator = 'some script goes here', ) writer.write(outfn)
def notebook(path, uuid, path_annotated_pdf, is_landscape, path_templates=None): rm_files_path = "%s/%s" % (path, uuid) annotations_pdf = [] p = 0 while True: rm_file_name = "%s/%d" % (rm_files_path, p) rm_file = "%s.rm" % rm_file_name if not os.path.exists(rm_file): break overlay = _render_rm_file(rm_file_name, PDFPageLayout(is_landscape=is_landscape)) annotations_pdf.append(overlay) p += 1 # Write empty notebook notes containing blank pages or templates writer = PdfWriter() templates = _get_templates_per_page(path, uuid, path_templates) for template in templates: if template is None: writer.addpage(_blank_page()) else: writer.addpage(template.pages[0]) writer.write(path_annotated_pdf) # Overlay empty notebook with annotations templates_pdf = PdfReader(path_annotated_pdf) for i in range(len(annotations_pdf)): templates_pdf.pages[i].Rotate = 90 if is_landscape else 0 is_empty_page = len(annotations_pdf[i].pages) <= 0 if is_empty_page: continue annotated_page = annotations_pdf[i].pages[0] annotated_page.Rotate = -90 if is_landscape else 0 merger = PageMerge(templates_pdf.pages[i]) merger.add(annotated_page).render() writer = PdfWriter() writer.write(path_annotated_pdf, templates_pdf)
def write_async(self, outfile, process_semaphore, progress_cb=None): pdf_writer = PdfWriter(version="1.5") pdf_group = PdfDict() pdf_group.indirect = True pdf_group.CS = PdfName.DeviceRGB pdf_group.I = PdfBool(True) pdf_group.S = PdfName.Transparency pdf_font_mapping = PdfDict() pdf_font_mapping.indirect = True pdf_font_mapping.F1 = self._build_font() for _ in self._pages: pdf_page = PdfDict() pdf_page.Type = PdfName.Page pdf_writer.addpage(pdf_page) # pdfrw makes a internal copy of the pages # use the copy so that references to pages in links are correct pdf_pages = list(pdf_writer.pagearray) # Handle all pages in parallel @asyncio.coroutine def make_page(page, pdf_page, psem): # Prepare everything in parallel @asyncio.coroutine def get_pdf_thumbnail(psem): if page.thumbnail is None: return None return (yield from page.thumbnail.pdf_thumbnail(psem)) @asyncio.coroutine def get_pdf_background(psem): if page.background is None: return None return (yield from page.background.pdf_image(psem)) @asyncio.coroutine def get_pdf_mask(foreground, psem): if foreground.color is not None: return None return (yield from foreground.pdf_mask(psem)) pdf_thumbnail, pdf_background, pdf_foregrounds, pdf_masks = ( yield from asyncio.gather( get_pdf_thumbnail(psem), get_pdf_background(psem), asyncio.gather(*[fg.pdf_image(psem) for fg in page.foreground]), asyncio.gather(*[get_pdf_mask(fg, psem) for fg in page.foreground]))) pdf_page.MediaBox = PdfArray([0, 0, PdfNumber(page.width), PdfNumber(page.height)]) pdf_page.Group = pdf_group pdf_resources = PdfDict() pdf_xobject = PdfDict() if pdf_thumbnail is not None: pdf_page.Thumb = pdf_thumbnail im_index = 0 # Save graphics state and scale unity rectangle to page size matrix = TransformationMatrix() matrix.scale(page.width, page.height) before_graphics = ("q\n" + "%s cm\n" % matrix.to_pdf()) after_graphics = "\nQ\n" contents = "" graphics = "" current_color = None if page.color != self._factory.WHITE: if current_color != page.color: current_color = page.color graphics += page.color.to_pdf() + " rg " graphics += ("0 0 1 1 re " + "f\n") if pdf_background is not None: pdf_xobject[PdfName("Im%d" % im_index)] = pdf_background graphics += "/Im%d Do\n" % im_index im_index += 1 for foreground, pdf_foreground, pdf_mask in zip( page.foreground, pdf_foregrounds, pdf_masks): if pdf_mask is not None: pdf_xobject[PdfName("Im%d" % im_index)] = pdf_mask im_index += 1 pdf_xobject[PdfName("Im%d" % im_index)] = pdf_foreground if (foreground.color is not None and current_color != foreground.color): current_color = foreground.color graphics += foreground.color.to_pdf() + " rg " graphics += "/Im%d Do\n" % im_index im_index += 1 if graphics: contents += (before_graphics + graphics.rstrip(" \n") + after_graphics) current_color = None before_text = ("BT\n" + "/F1 1 Tf 3 Tr\n") after_text = "\nET\n" text = "" pdf_annots = [] for t in page.text: if t.text: matrix = TransformationMatrix() # Glyph size is 0.5 x 1 matrix.scale(2 / len(t.text), 1) matrix.translate(-0.5, -0.5) if t.direction == "ltr": pass elif t.direction == "rtl": matrix.translate(0, -1) elif t.direction == "ttb": matrix.rotate(90) matrix.rotate(-t.rotation) matrix.translate(0.5, 0.5) matrix.scale(t.width, t.height) matrix.translate(t.x, t.y) text += "%s Tm %s Tj\n" % ( matrix.to_pdf(), PdfString().from_bytes( t.text.encode("utf-16-be"), bytes_encoding="hex")) if t.external_link is not None or t.internal_link is not None: pdf_annot = PdfDict() pdf_annots.append(pdf_annot) pdf_annot.Type = PdfName.Annot pdf_annot.Subtype = PdfName.Link pdf_annot.Border = [0, 0, 0] pdf_annot.Rect = [PdfNumber(t.x), PdfNumber(t.y), PdfNumber(t.x + t.width), PdfNumber(t.y + t.height)] if t.external_link is not None: pdf_a = PdfDict() pdf_annot.A = pdf_a pdf_a.Type = PdfName.Action pdf_a.S = PdfName.URI pdf_a.URI = t.external_link.decode("latin-1") if t.internal_link is not None: pdf_target_page = pdf_pages[t.internal_link[0]] target_x, target_y = t.internal_link[1] pdf_annot.Dest = [ pdf_target_page, PdfName.XYZ, PdfNumber(target_x), PdfNumber(target_y), 0] text = text.rstrip(" \n") if text: pdf_resources.Font = pdf_font_mapping contents += (before_text + text + after_text) contents = contents.rstrip(" \n") if contents: pdf_contents = PdfDict() pdf_contents.indirect = True pdf_page.Contents = pdf_contents if COMPRESS_PAGE_CONTENTS: pdf_contents.Filter = [PdfName.FlateDecode] pdf_contents.stream = zlib.compress( contents.encode("latin-1"), 9).decode("latin-1") else: pdf_contents.stream = contents if pdf_annots: pdf_page.Annots = pdf_annots if pdf_xobject: pdf_resources.XObject = pdf_xobject if pdf_resources: pdf_page.Resources = pdf_resources # Report progress nonlocal finished_pages finished_pages += 1 if progress_cb: progress_cb(finished_pages / len(self._pages)) finished_pages = 0 yield from asyncio.gather( *[make_page(page, pdf_page, process_semaphore) for page, pdf_page in zip(self._pages, pdf_pages)]) with TemporaryDirectory(prefix="djpdf-") as temp_dir: pdf_writer.write(path.join(temp_dir, "temp.pdf")) cmd = [QPDF_CMD, "--stream-data=preserve", "--object-streams=preserve", "--normalize-content=n"] if LINEARIZE_PDF: cmd.extend(["--linearize"]) cmd.extend([path.abspath(path.join(temp_dir, "temp.pdf")), path.abspath(outfile)]) yield from run_command_async(cmd, process_semaphore)
import sys import argparse import itertools from pdfrw import PdfWriter, PdfReader parser = argparse.ArgumentParser(description='Interlaces two pdf to make one complete pdf.') parser.add_argument('front_pdf_loc', type=str, help="PDF of fronts of pages") parser.add_argument('back_pdf_loc', type=str, help="PDF of backs of pages") parser.add_argument('output_loc', type=str, nargs='?', default="output.pdf", help="Output location for interlaced PDF") args = parser.parse_args() output = PdfWriter() front_pdf = PdfReader(args.front_pdf_loc) back_pdf = PdfReader(args.back_pdf_loc) if len(front_pdf.pages) != len(back_pdf.pages): print("PDFs must have the same number of pages") sys.exit(1) output.addpages(itertools.chain.from_iterable(zip(front_pdf.pages, back_pdf.pages[::-1]))) output.write(args.output_loc)
changes = [] for (srcpath, _, filenames) in os.walk('ramdisk/reference'): for name in filenames: if not name.endswith('.pdf'): continue src = os.path.join(srcpath, name) dst = src.replace('/reference/', '/tmp_results/') if not os.path.exists(dst): continue src_digest = get_digest(src) if not src_digest or src_digest not in expected: continue print src count += 1 trailer = make_canonical(PdfReader(src)) out = PdfWriter(tmp) out.write(trailer=trailer) match_digest = get_digest(tmp) if not match_digest: continue trailer = make_canonical(PdfReader(dst)) out = PdfWriter(tmp) out.write(trailer=trailer) if get_digest(tmp) != match_digest: continue goodcount += 1 print "OK" changes.append((src_digest, get_digest(dst))) print count, goodcount
def pdf(rm_files_path, path_highlighter, pages, path_original_pdf, path_annotated_pdf, path_oap_pdf): """ Render pdf with annotations. The path_oap_pdf defines the pdf which includes only annotated pages. """ base_pdf = PdfReader(open(path_original_pdf, "rb")) # Parse remarkable files and write into pdf annotations_pdf = [] offsets = [] for page_nr in range(base_pdf.numPages): rm_file_name = "%s/%d" % (rm_files_path, page_nr) rm_file = "%s.rm" % rm_file_name if not os.path.exists(rm_file): annotations_pdf.append(None) offsets.append(None) continue if hasattr(base_pdf, "Root") and hasattr(base_pdf.Root, "Pages") and hasattr( base_pdf.Root.Pages, "MediaBox"): default_layout = base_pdf.Root.Pages.MediaBox else: default_layout = None page_layout = PDFPageLayout(base_pdf.pages[page_nr], default_layout=default_layout) if page_layout.layout is None: annotations_pdf.append(None) offsets.append(None) continue page_file = os.path.join(path_highlighter, f"{pages[page_nr]}.json") annotated_page, offset = _render_rm_file( rm_file_name, page_layout=page_layout, page_file=page_file, ) if len(annotated_page.pages) <= 0: annotations_pdf.append(None) else: page = annotated_page.pages[0] annotations_pdf.append(page) offsets.append(offset) # Merge annotations pdf and original pdf writer_full = PdfWriter() writer_oap = PdfWriter() for i in range(base_pdf.numPages): annotations_page = annotations_pdf[i] if annotations_page is not None: # The annotations page is at least as large as the base PDF page, # so we merge the base PDF page under the annotations page. merger = PageMerge(annotations_page) pdf = merger.add(base_pdf.pages[i], prepend=True)[0] pdf.x -= offsets[i][0] pdf.y -= offsets[i][1] merger.render() writer_oap.addpage(annotations_page) writer_full.addpage(annotations_page) else: writer_full.addpage(base_pdf.pages[i]) writer_full.write(path_annotated_pdf) writer_oap.write(path_oap_pdf)
page.AA = PdfDict() # You probably should just wrap each JS action with a try/catch, # because Chrome does no error reporting or even logging otherwise; # you just get a silent failure. page.AA.O = make_js_action(""" try { %s } catch (e) { app.alert(e.message); } """ % (script)) page.Annots = PdfArray(annots) return page if len(sys.argv) > 1: js_file = open(sys.argv[1], 'r') fields = [] for line in js_file: if not line.startswith('/// '): break pieces = line.split() params = [pieces[1]] + [float(token) for token in pieces[2:]] fields.append(make_field(*params)) js_file.seek(0) out = PdfWriter() out.addpage(make_page(fields, js_file.read())) out.write('result.pdf')
usage: 4up.py my.pdf Creates 4up.my.pdf with a single output page for every 4 input pages. """ import sys import os from pdfrw import PdfReader, PdfWriter, PageMerge def get4(srcpages): scale = 0.5 srcpages = PageMerge() + srcpages x_increment, y_increment = (scale * i for i in srcpages.xobj_box[2:]) for i, page in enumerate(srcpages): page.scale(scale) page.x = x_increment if i & 1 else 0 page.y = 0 if i & 2 else y_increment return srcpages.render() inpfn, = sys.argv[1:] outfn = "4up." + os.path.basename(inpfn) pages = PdfReader(inpfn).pages writer = PdfWriter() for index in range(0, len(pages), 4): writer.addpage(get4(pages[index : index + 4])) writer.write(outfn)
parser.add_argument('--evenrev', dest='evenrev', action='store_const', const=True, default=False, help='reverses the even pages before shuffling') args = parser.parse_args() # The shuffling magic even = PdfReader(args.evenFile[0]) odd = PdfReader(args.oddFile[0]) isEvenReversed = args.evenrev; isOddReversed = args.oddrev; all = PdfWriter() blank = PageMerge() blank.mbox = [0, 0, 612, 792] # 8.5 x 11 blank = blank.render() if isEvenReversed and not isOddReversed: for i in range(0, len(odd.pages)): all.addpage(odd.pages[i]) all.addpage(even.pages[len(even.pages)-1-i]) elif isOddReversed and not isEvenReversed: for i in range(0, len(odd.pages)): all.addpage(odd.pages[len(odd.pages)-1-i]) all.addpage(even.pages[i]) elif isEvenReversed and isOddReversed: for i in range(0, len(odd.pages)): all.addpage(odd.pages[len(odd.pages)-1-i])
def save_to_file(pdf_obj, file_path): short_path_for_logging = '/'.join(file_path.split('/')[-3:]) logger.debug("Saving to file: " + short_path_for_logging) y = PdfWriter() y.write(file_path, pdf_obj) logger.debug("Done")
''' usage: unspread.py my.pdf Creates unspread.my.pdf Chops each page in half, e.g. if a source were created in booklet form, you could extract individual pages. ''' import sys import os from pdfrw import PdfReader, PdfWriter, PageMerge def splitpage(src): ''' Split a page into two (left and right) ''' # Yield a result for each half of the page for x_pos in (0, 0.5): yield PageMerge().add(src, viewrect=(x_pos, 0, 0.5, 1)).render() inpfn, = sys.argv[1:] outfn = 'unspread.' + os.path.basename(inpfn) writer = PdfWriter() for page in PdfReader(inpfn).pages: writer.addpages(splitpage(page)) writer.write(outfn)
def go(inpfn, outfn): pages = PdfReader(inpfn, decompress=False).pages writer = PdfWriter() while pages: writer.addpage(get4(pages)) writer.write(outfn)
So she did an 8.5x11" output with 0.5" margin all around (actual size of useful area 7.5x10") and we scaled it up by 4.8. We also copy the Info dict to the new PDF. ''' import sys import os from pdfrw import PdfReader, PdfWriter, PageMerge, IndirectPdfDict def adjust(page, margin=36, scale=4.8): info = PageMerge().add(page) x1, y1, x2, y2 = info.xobj_box viewrect = (margin, margin, x2 - x1 - 2 * margin, y2 - y1 - 2 * margin) page = PageMerge().add(page, viewrect=viewrect) page[0].scale(scale) return page.render() inpfn, = sys.argv[1:] outfn = 'poster.' + os.path.basename(inpfn) reader = PdfReader(inpfn) writer = PdfWriter(outfn) writer.addpage(adjust(reader.pages[0])) writer.trailer.Info = IndirectPdfDict(reader.Info or {}) writer.write()
#!/usr/bin/env python ''' usage: subset.py my.pdf page[range] [page[range]] ... eg. subset.py 1-3 5 7-9 Creates subset.my.pdf ''' import sys import os from pdfrw import PdfReader, PdfWriter inpfn = sys.argv[1] ranges = sys.argv[2:] assert ranges, "Expected at least one range" ranges = ([int(y) for y in x.split('-')] for x in ranges) outfn = 'subset.%s' % os.path.basename(inpfn) pages = PdfReader(inpfn).pages outdata = PdfWriter(outfn) for onerange in ranges: onerange = (onerange + onerange[-1:])[:2] for pagenum in range(onerange[0], onerange[1]+1): outdata.addpage(pages[pagenum-1]) outdata.write()
import sys import os import find_pdfrw from pdfrw import PdfReader, PdfWriter inpfn = sys.argv[1] rotate = sys.argv[2] ranges = sys.argv[3:] rotate = int(rotate) assert rotate % 90 == 0 ranges = [[int(y) for y in x.split('-')] for x in ranges] outfn = 'rotate.%s' % os.path.basename(inpfn) trailer = PdfReader(inpfn) pages = trailer.pages if not ranges: ranges = [[1, len(pages)]] for onerange in ranges: onerange = (onerange + onerange[-1:])[:2] for pagenum in range(onerange[0]-1, onerange[1]): pages[pagenum].Rotate = (int(pages[pagenum].inheritable.Rotate or 0) + rotate) % 360 outdata = PdfWriter() outdata.trailer = trailer outdata.write(outfn)
def pdf(rm_files_path, path_highlighter, path_original_pdf, path_annotated_pdf, path_oap_pdf): """ Render pdf with annotations. The path_oap_pdf defines the pdf which includes only annotated pages. """ base_pdf = PdfReader(open(path_original_pdf, "rb")) # Parse remarkable files and write into pdf annotations_pdf = [] for page_nr in range(base_pdf.numPages): rm_file_name = "%s/%d" % (rm_files_path, page_nr) rm_file = "%s.rm" % rm_file_name if not os.path.exists(rm_file): annotations_pdf.append(None) continue page_layout = PDFPageLayout(base_pdf.pages[page_nr]) if page_layout.layout is None: annotations_pdf.append(None) continue annotated_page = _render_rm_file(rm_file_name, page_layout=page_layout, path_highlighter=path_highlighter) if len(annotated_page.pages) <= 0: annotations_pdf.append(None) else: page = annotated_page.pages[0] annotations_pdf.append(page) # Merge annotations pdf and original pdf writer_full = PdfWriter() writer_oap = PdfWriter() for i in range(base_pdf.numPages): annotations_page = annotations_pdf[i] if annotations_page is not None: merger = PageMerge(base_pdf.pages[i]) merger.add(annotations_page).render() writer_oap.addpage(base_pdf.pages[i]) writer_full.addpage(base_pdf.pages[i]) writer_full.write(path_annotated_pdf) writer_oap.write(path_oap_pdf)
try: from pdfrw import PdfReader, PdfWriter except ImportError: print("Instale em seu sistema a biblioteca pdfrw!\n\n") print("sudo apt install python3-pdfrw\n") quit() # Limpa o \n do final da linha na lista def remove_quebra_de_linha(linha): return linha.replace('\n', '') # Vai ser o responsável em escrever o PDFao writer = PdfWriter() # Lista contendo arquivos pdf, linha a linha, com o caminho completo do sistema de arquivos # Deve estar algo como: # /home/meu_usuario/arquivos_pdf/arquivo1.pdf # /home/meu_usuario/arquivos_pdf/arquivo2.pdf pdf_list = open("my_pdfs.txt") # caminho completo do arquivo de saída. Dessa forma abaixo, gera na pasta do script pdefao = 'super.pdf' # Lê linha a linha da lista de pdfs e adiciona ao arquivao for arquivo in pdf_list: arquivo = remove_quebra_de_linha(arquivo) writer.addpages(PdfReader(arquivo).pages)