def merge_watermark(pdf_file, save_dir, owner_pwd, p_value, wm_attrs): out_file = os.path.join(save_dir, os.path.basename(pdf_file)) try: pdf_reader = PdfFileReader(pdf_file) except Exception as e: print('try to repair %s' % pdf_file) import fitz pdf_doc = fitz.open(pdf_file) repair_pdf_file = pdf_file.replace('.pdf', '_repaired.pdf') pdf_doc.save(repair_pdf_file) pdf_doc.close() shutil.move(repair_pdf_file, pdf_file) pdf_reader = PdfFileReader(pdf_file) if pdf_reader.isEncrypted: pdf_reader.decrypt('') pdf_writer = PdfFileWriter(out_file) first_page = pdf_reader.getPage(0) page_width = first_page.mediaBox.getWidth() page_height = first_page.mediaBox.getHeight() wm_attrs.update({'pagesize': (page_width, page_height)}) wm_file = create_watermark(**wm_attrs) # for Portrait wm_obj = PdfFileReader(wm_file) wm_page = wm_obj.getPage(0) for page_num in range(pdf_reader.numPages): current_page = pdf_reader.getPage(page_num) current_page.mergePage(wm_page) pdf_writer.addPage(current_page) if owner_pwd.lower() not in ['-1', 'no', 'none', 'null']: pdf_writer.encrypt('', ownerPwd=owner_pwd, P=p_value) key_file = os.path.join(wm_attrs['out_dir'], '..', 'permission_key') old_keys = None if os.path.exists(key_file): old_key_file = open(key_file, 'r', encoding='utf-8') old_keys = old_key_file.readlines() old_key_file.close() with open(key_file, 'w', encoding='utf-8') as f_log: if old_keys is not None: f_log.writelines(old_keys[-1000:]) f_log.write('%s %s %s\n' % (time.strftime('%Y-%m-%d %H:%M:%S'), os.path.relpath(out_file), owner_pwd)) pdf_writer.write() pdf_writer.close() if os.path.exists(wm_file): os.remove(wm_file)
def decrypt(query, pdfs): """Decrypt PDF files.""" try: for pdf in pdfs: reader = PdfFileReader(pdf, strict=False) if reader.isEncrypted: reader.decrypt(query) writer = PdfFileWriter() for i in xrange(reader.numPages): writer.addPage(reader.getPage(i)) noextpath = os.path.splitext(pdf)[0] out_file = "{} (decrypted).pdf".format(noextpath) with open(out_file, 'wb') as f: writer.write(f) notify.notify('Alfred PDF Tools', 'Decryption successfully completed.') else: notify.notify('Alfred PDF Tools', 'The PDF file is not encrypted.') except PdfReadError: notify.notify('Alfred PDF Tools', 'The entered password is not valid.')
def main(): """ [EXPLAIN.] """ if len(sys.argv) != 2: print("{}: <filepath>".format(sys.argv[0])) return 1 filepath = sys.argv[1].strip() r__ = PdfFileReader(open(filepath, "rb")) page_number = 0 while page_number < r__.numPages: page = r__.getPage(page_number) if "/XObject" in page["/Resources"]: x_object = page["/Resources"]["/XObject"].getObject() for obj in x_object: if x_object[obj]["/Subtype"] == "/Image": size = (x_object[obj]["/Width"], x_object[obj]["/Height"]) data = x_object[obj].getData() if x_object[obj]["/ColorSpace"] == "/DeviceRGB": mode = "RGB" else: mode = "P" _handle_filter(x_object, obj, mode, size, data) else: print("No image found.") page_number += 1
class Booklet: def __init__(self, args: Args, str="booklet.pdf", bind: str = "left") -> None: self.doc = PdfFileReader(args.docPath) self.numPages = self.doc.numPages assert (not self.numPages % 4), "your source pdf must have number of pages divisible by 4" outPath = args.outPath if args.outPath else "booklet.pdf" self.writer = PdfFileWriter(outPath) self.bind = bind def yieldSequence(self) -> Generator[List[int], None, None]: for i in range(0, self.numPages // 2): currentPage = [self.numPages // 2 - i - 1, self.numPages // 2 + i] if self.bind == "right" and not i % 2: currentPage = currentPage[::-1] if i % 2 and self.bind != "right": currentPage = currentPage[::-1] yield currentPage def makeBooklet(self) -> None: for spread in self.yieldSequence(): print(f"processing {spread}") width: List[int] height: List[int] width, height = [], [] for i, pageNum in enumerate(spread): width.append(self.doc.getPage(pageNum).mediaBox.getWidth()) height.append(self.doc.getPage(pageNum).mediaBox.getHeight()) sheet = PageObject.createBlankPage(None, sum(width), max(height)) for i, pageNum in enumerate(spread): shift = 0 if i == 0 else width[i - 1] page = self.doc.getPage(pageNum) sheet.mergeScaledTranslatedPage(page, 1, shift, 0) self.writer.addPage(sheet) def writeBooklet(self) -> None: self.writer.write() print("booklet is done")
def main(): if len(sys.argv) != 3: print("usage: python 2-up.py input_file output_file") sys.exit(1) print("2-up input " + sys.argv[1]) input1 = PdfFileReader(open(sys.argv[1], "rb")) output = PdfFileWriter() for i in range(0, input1.numPages - 1, 2): lhs = input1.getPage(i) rhs = input1.getPage(i + 1) lhs.mergeTranslatedPage(rhs, lhs.mediaBox.getUpperRight_x(), 0, True) output.addPage(lhs) print(str(i) + " "), sys.stdout.flush() print("writing " + sys.argv[2]) output_stream = open(sys.argv[2], "wb") output.write() print("done.")
def main(): if len(sys.argv) != 2: print("{}: <filepath>".format(sys.argv[0])) return 1 filepath = sys.argv[1].strip() r = PdfFileReader(open(filepath, "rb")) pageNo = 0 while (pageNo < r.numPages): page = r.getPage(pageNo) if '/XObject' in page['/Resources']: xObject = page['/Resources']['/XObject'].getObject() for obj in xObject: if xObject[obj]['/Subtype'] == '/Image': size = (xObject[obj]['/Width'], xObject[obj]['/Height']) data = xObject[obj].getData() if xObject[obj]['/ColorSpace'] == '/DeviceRGB': mode = "RGB" else: mode = "P" if '/Filter' in xObject[obj]: if xObject[obj]['/Filter'] == '/FlateDecode': img = Image.frombytes(mode, size, data) img.save(obj[1:] + ".png") elif xObject[obj]['/Filter'] == '/DCTDecode': img = open(obj[1:] + ".jpg", "wb") img.write(data) img.close() elif xObject[obj]['/Filter'] == '/JPXDecode': img = open(obj[1:] + ".jp2", "wb") img.write(data) img.close() elif xObject[obj]['/Filter'] == '/CCITTFaxDecode': img = open(obj[1:] + ".tiff", "wb") img.write(data) img.close() else: img = Image.frombytes(mode, size, data) img.save(obj[1:] + ".png") else: print("No image found.") pageNo += 1
def scale(query, pdfs): """Scale PDF files to a given page size.""" try: for pdf in pdfs: reader = PdfFileReader(pdf, strict=False) if reader.isEncrypted: raise FileEncryptedError writer = PdfFileWriter() w, h = [float(i) * 72 for i in query.split('x')] for i in xrange(reader.numPages): inp_page = reader.getPage(i) inp_page_w = float(inp_page.mediaBox[2]) inp_page_h = float(inp_page.mediaBox[3]) scale_w = w / inp_page_w scale_h = h / inp_page_h scale = min(scale_w, scale_h) out_page = PageObject.createBlankPage(None, w, h) out_page.mergeScaledTranslatedPage(inp_page, scale, 0, 0) writer.addPage(out_page) noextpath = os.path.splitext(pdf)[0] out_file = '{} (scaled).pdf'.format(noextpath) with open(out_file, 'wb') as f: writer.write(f) except FileEncryptedError: notify.notify('Alfred PDF Tools', 'Scale action cannot handle an encrypted PDF file.') except PdfReadError: notify.notify('Alfred PDF Tools', 'Cannot scale a malformed PDF file.')
def encrypt(query, pdfs): """Encrypt PDF files.""" for pdf in pdfs: reader = PdfFileReader(pdf, strict=False) if not reader.isEncrypted: writer = PdfFileWriter() for i in xrange(reader.numPages): writer.addPage(reader.getPage(i)) writer.encrypt(query) noextpath = os.path.splitext(pdf)[0] out_file = "{} (encrypted).pdf".format(noextpath) with open(out_file, 'wb') as f: writer.write(f) notify.notify('Alfred PDF Tools', 'Encryption successfully completed.') else: notify.notify('Alfred PDF Tools', 'The PDF file is already encrypted.')
def crop(pdfs): """Crop two-column pages.""" try: for pdf in pdfs: reader = PdfFileReader(pdf, strict=False) if reader.isEncrypted: raise FileEncryptedError writer = PdfFileWriter() for i in xrange(reader.numPages): # Make two copies of the input page. pp = reader.getPage(i) p = copy(pp) q = copy(pp) # The new media boxes are the previous crop boxes. p.mediaBox = copy(p.cropBox) q.mediaBox = copy(p.cropBox) x1, x2 = p.mediaBox.lowerLeft x3, x4 = p.mediaBox.upperRight x1, x2 = floor(x1), floor(x2) x3, x4 = floor(x3), floor(x4) x5, x6 = x1 + floor((x3 - x1) / 2), x2 + floor((x4 - x2) / 2) if (x3 - x1) > (x4 - x2): # Horizontal q.mediaBox.upperRight = (x5, x4) q.mediaBox.lowerLeft = (x1, x2) p.mediaBox.upperRight = (x3, x4) p.mediaBox.lowerLeft = (x5, x2) else: # Vertical p.mediaBox.upperRight = (x3, x4) p.mediaBox.lowerLeft = (x1, x6) q.mediaBox.upperRight = (x3, x6) q.mediaBox.lowerLeft = (x1, x2) p.artBox = p.mediaBox p.bleedBox = p.mediaBox p.cropBox = p.mediaBox q.artBox = q.mediaBox q.bleedBox = q.mediaBox q.cropBox = q.mediaBox writer.addPage(q) writer.addPage(p) noextpath = os.path.splitext(pdf)[0] out_file = '{} (cropped).pdf'.format(noextpath) with open(out_file, 'wb') as f: writer.write(f) except FileEncryptedError: notify.notify('Alfred PDF Tools', 'Crop action cannot handle an encrypted PDF file.') except PdfReadError: notify.notify('Alfred PDF Tools', 'Cannot crop a malformed PDF file.')
def slice_(query, abs_path, single, suffix): """Slice PDF files.""" try: reader = PdfFileReader(abs_path) if reader.isEncrypted: raise FileEncryptedError writer = PdfFileWriter() for i in xrange(reader.numPages): writer.addPage(reader.getPage(i)) tmp_file = tempfile.NamedTemporaryFile() writer.removeLinks() writer.write(tmp_file) reader = PdfFileReader(tmp_file) pages = [x.strip() for x in query.split(',')] for page in pages: if not page.replace('-', '').isdigit(): raise SyntaxError for page in pages: if "-" in page: if page.split('-')[1]: stop = int(page.split('-')[1]) else: stop = reader.numPages else: stop = int(page) if stop > reader.numPages: raise IndexError noextpath = os.path.splitext(abs_path)[0] if single: merger = PdfFileMerger(strict=False) for page in pages: if "-" in page: start = int(page.split('-')[0]) - 1 stop_str = page.split('-')[1] if stop_str: stop = int(stop_str) else: stop = reader.numPages if start == -1: raise StartValueZeroError if start >= stop: raise StartValueReverseError else: start = int(page) - 1 stop = int(page) merger.append(reader, pages=(start, stop)) merger.write(noextpath + ' (sliced).pdf') else: part_no = 0 for page in pages: merger = PdfFileMerger(strict=False) if "-" in page: start = int(page.split('-')[0]) - 1 stop_str = page.split('-')[1] if stop_str: stop = int(stop_str) else: stop = reader.numPages if start == -1: raise StartValueZeroError if start >= stop: raise StartValueReverseError else: start = int(page) - 1 stop = int(page) out_file = '{} ({} {}).pdf'.format(noextpath, suffix, part_no + 1) merger.append(reader, pages=(start, stop)) merger.write(out_file) part_no += 1 tmp_file.close() except SyntaxError: notify.notify('Alfred PDF Tools', 'The input syntax is not valid.') except IndexError: notify.notify('Alfred PDF Tools', 'Page number out of range.') except StartValueZeroError: notify.notify('Alfred PDF Tools', 'Page number cannot be zero.') except StartValueReverseError: notify.notify('Alfred PDF Tools', 'You cannot set a page range in reverse order.') except FileEncryptedError: notify.notify('Alfred PDF Tools', 'Slice action cannot handle an encrypted PDF file.') except PdfReadError: notify.notify('Alfred PDF Tools', 'Cannot slice a malformed PDF file.')
def split_size(query, abs_path, suffix): """Split PDF file by file size.""" try: if float(query) < 0: raise ValueError max_part_size = float(query) * 1000000 noextpath = os.path.splitext(abs_path)[0] reader = PdfFileReader(abs_path, strict=False) pg_cnt = reader.numPages if reader.isEncrypted: raise FileEncryptedError pg_sizes = [] for i in xrange(pg_cnt): writer = PdfFileWriter() writer.addPage(reader.getPage(i)) writer.removeLinks() tmp_file = tempfile.NamedTemporaryFile() writer.write(tmp_file) file_size = os.path.getsize(tmp_file.name) pg_sizes.append(file_size) tmp_file.close() writer = PdfFileWriter() for i in xrange(pg_cnt): writer.addPage(reader.getPage(i)) inp_file = tempfile.NamedTemporaryFile() writer.write(inp_file) inp_file_size = os.path.getsize(abs_path) sum_pg_sizes = sum(pg_sizes) dividend = min(inp_file_size, sum_pg_sizes) divisor = max(inp_file_size, sum_pg_sizes) quotient = dividend / divisor start = 0 stop = 1 pg_no = 0 while not stop > pg_cnt: out_file = '{} ({} {}).pdf'.format(noextpath, suffix, pg_no + 1) if quotient > 0.95: part = pg_sizes[start:stop] part_size = sum(part) part_pg_cnt = len(part) if part_size < max_part_size: if stop != pg_cnt: stop += 1 else: merger = PdfFileMerger(strict=False) merger.append(inp_file, pages=(start, stop)) merger.write(out_file) break else: if part_pg_cnt == 1: merger = PdfFileMerger(strict=False) merger.append(inp_file, pages=(start, stop)) merger.write(out_file) start = stop stop += 1 pg_no += 1 else: stop -= 1 merger = PdfFileMerger(strict=False) merger.append(inp_file, pages=(start, stop)) merger.write(out_file) start = stop stop += 1 pg_no += 1 else: part = pg_sizes[start:stop] part_size = sum(part) part_pg_cnt = len(part) if part_size < max_part_size: if stop != pg_cnt: stop += 1 else: merger = PdfFileMerger(strict=False) merger.append(inp_file, pages=(start, stop)) merger.write(out_file) break else: if part_pg_cnt == 1: merger = PdfFileMerger(strict=False) merger.append(inp_file, pages=(start, stop)) merger.write(out_file) start = stop stop += 1 pg_no += 1 else: stop -= 1 merger = PdfFileMerger(strict=False) merger.append(inp_file, pages=(start, stop)) merger.write(out_file) part_size = os.path.getsize(out_file) next_page = pg_sizes[stop:stop + 1][0] if part_size + next_page < max_part_size: os.remove(out_file) part_size_real = part_size / (stop - start) pg_sizes_real = [] for i in xrange(pg_cnt): if i >= start and i < stop: pg_sizes_real.append(part_size_real) else: pg_sizes_real.append(pg_sizes[i]) pg_sizes = pg_sizes_real stop += 1 else: start = stop stop += 1 pg_no += 1 inp_file.close() except ValueError: notify.notify('Alfred PDF Tools', 'The argument must be a positive numeric value.') except FileEncryptedError: notify.notify('Alfred PDF Tools', 'Split action cannot handle an encrypted PDF file.') except PdfReadError: notify.notify('Alfred PDF Tools', 'Cannot split a malformed PDF file.')
def split_count(query, abs_path, suffix): """Split PDF file by page count""" try: if not query.lstrip('+-').isdigit(): raise NotIntegerError if int(query) < 0: raise NegativeValueError reader = PdfFileReader(abs_path) if reader.isEncrypted: raise FileEncryptedError pg_cnt = int(query) start = 0 stop = pg_cnt writer = PdfFileWriter() for i in xrange(reader.numPages): writer.addPage(reader.getPage(i)) tmp_file = tempfile.NamedTemporaryFile() writer.removeLinks() writer.write(tmp_file) reader = PdfFileReader(tmp_file) num_pages = int(reader.numPages) quotient = num_pages / pg_cnt if quotient.is_integer(): for i in xrange(int(quotient)): merger = PdfFileMerger(strict=False) merger.append(tmp_file, pages=(start, stop)) noextpath = os.path.splitext(abs_path)[0] out_file = "{} ({} {}).pdf".format(noextpath, suffix, i + 1) merger.write(out_file) start = stop stop = start + pg_cnt else: for i in xrange(int(quotient) + 1): merger = PdfFileMerger(strict=False) merger.append(tmp_file, pages=(start, stop)) noextpath = os.path.splitext(abs_path)[0] out_file = "{} ({} {}).pdf".format(noextpath, suffix, i + 1) merger.write(out_file) if i != int(quotient) - 1: start = stop stop = start + pg_cnt else: start = int(quotient) * pg_cnt stop = num_pages tmp_file.close() except NotIntegerError: notify.notify('Alfred PDF Tools', 'The argument is not an integer.') except NegativeValueError: notify.notify('Alfred PDF Tools', 'Negative integer is not a valid argument.') except ZeroDivisionError: notify.notify('Alfred PDF Tools', 'Zero is not a valid argument.') except FileEncryptedError: notify.notify('Alfred PDF Tools', 'Split action cannot handle an encrypted PDF file.') except PdfReadError: notify.notify('Alfred PDF Tools', 'Cannot split a malformed PDF file.')