def highlights(files, options): """Print notes from highlighted text. All is printed to output or error stream (usually stdout and stderr). Options: * options.recursive Handle directories * options.use_title Print filename/document title instead of full path * options.valid_types PDF annotation types to process * options.filter_keys Only print stated keys. * options.remove_key Don't print key tags * options.with_path Print the file path with each line * options.with_page Print the page number with each line * options.buffered Buffer output * options.list_keys Print key only * options.stdout Output stream * options.stderr Error stream """ for path in files: if os.path.isdir(path): if options.recursive: highlights([os.path.join(path, p) for p in os.listdir(path)], options) continue # Omit directories document = Pdf(path, options, pgm=sys.argv[0]) if options.list_keys: options.remove_key = False for item in document.annotations(options): list_keys(item.note, options) else: for item in document.annotations(options): print_note(item.note, item.page, options)
def test_to_pdf(self): first_html = "<h1>Hola Mundo</h1>" pdf = Pdf(first_html) fh = open('tests/fixtures/hola_mundo.pdf', 'rb') data = fh.read() fh.close() output = pdf.to_pdf(first_html) self.assertPdfEquals(output, data)
def test_get_stream(self): first_html = "<h1>Hola Mundo</h1>" second_html = "<h2>This is sparta</h2>" pdf = Pdf(first_html) pdf.append(second_html) fh = open('tests/fixtures/stream.pdf', 'rb') data = fh.read() fh.close() output = pdf.get_stream() self.assertPdfEquals(output, data)
def execute(self): pdf = Pdf() try: pdf.load(self.config.getInputFilePath()) for effector in self.effectorChain: pdf = effector.apply(pdf) pdf.save(self.config.getOutputFilePath()) finally: pdf.close()
def main(): pdf_path = path.abspath('cover.pdf') out_folder = path.abspath('out') config_path = path.abspath('config.json') parser = argparse.ArgumentParser() parser.add_argument('cover_type', help='Define the type of cover') args = parser.parse_args() config = Config(config_path, cover_type=args.cover_type) pdf = Pdf(pdf_path) pdf.set_cropbox(config.get_cover_geometry()) img = Img(pdf.cover.name, output_folder=out_folder) for width in config.get_output_width(): img.convert(width)
def confirmEmailSend(self): msg = messagebox.askquestion('Send Email', 'Are you sure to send email', icon='warning') if msg == 'yes': re_email = self.email_entry.get() print(re_email.strip()) db = Database() querys = db.fetch_today() db.db_close() pdf = Pdf() pdf.get_pdf(querys) send_email = EmailSend(re_email.strip()) send_email.send_email() messagebox.showinfo('Done','Your email has been sent') self.emailWin.quit() self.emailWin.destroy() else: self.emailWin.quit() self.emailWin.destroy() messagebox.showerror('Cancelled','Your email is not forwarded')
def generar_zip_contenedor(): archivos_pdf_generados = [] id_retiros = request.form.to_dict(False).get('recibo') if id_retiros: retiros = models.Retiro.select().where(id__in=id_retiros) to_text = Traductor().to_text for retiro in retiros: pdf = Pdf() html = render_template("recibo.html", cooperativa=retiro.socio.cooperativa, retiro=retiro, monto_como_cadena=to_text(retiro.monto)) pdf.append(html) titulo = models.Retiro.obtener_nombre_por_id(retiro.id) nombre_archivo = os.path.join(app.config['UPLOAD_FOLDER'], titulo + ".pdf") archivos_pdf_generados.append(nombre_archivo) archivo_temporal = open(nombre_archivo, 'wb') archivo_temporal.write(pdf.get_stream()) archivo_temporal.close() import zipfile nombre = "recibos_agrupados_%s.zip" % (retiro.fecha_como_string()) nombre_archivo = os.path.join(app.config['UPLOAD_FOLDER'], nombre) zip = zipfile.ZipFile(nombre_archivo, mode='w') for nombre_pdf in archivos_pdf_generados: print "zipeando", [nombre_pdf] zip.write(nombre_pdf) zip.close() return jsonify(name=nombre) else: abort(404)
def generar_pdf_concatenado(): id_retiros = request.form.to_dict(False).get('recibo') if id_retiros: retiros = models.Retiro.select().where(id__in=id_retiros) to_text = Traductor().to_text pdf = Pdf() for retiro in retiros: html = render_template("recibo.html", cooperativa=retiro.socio.cooperativa, retiro=retiro, monto_como_cadena=to_text(retiro.monto)) pdf.append(html) titulo = "recibos_agrupados_%s.pdf" % (retiro.fecha_como_string()) nombre_archivo = os.path.join(app.config['UPLOAD_FOLDER'], titulo) archivo_temporal = open(nombre_archivo, 'wb') archivo_temporal.write(pdf.get_stream()) archivo_temporal.close() return jsonify(name=titulo) else: abort(404)
def test_append(self): first_html = "<h1>Hola Mundo</h1>" second_html = "<h2>This is sparta</h2>" pdf = Pdf(first_html) pdf.append(second_html) self.assertEquals(pdf.htmls, [first_html, second_html])
def get_json_from_file(file_path): with open(file_path) as f: return json.load(f) if __name__ == '__main__': if len(sys.argv) != 4: sys.stderr.write('Argument count is invalid!\n') exit(1) current_pdf_file_path = sys.argv[1] old_generated_JSON_file_path = sys.argv[2] new_generated_JSON_file_path = sys.argv[3] new_pdf = Pdf(current_pdf_file_path) old_json = get_json_from_file(old_generated_JSON_file_path) # Exit if PDF file is the same as the last time. new_pdf_hash = new_pdf.get_pdf_hash() old_pdf_hash = old_json["referenced_pdf_hash"] if new_pdf_hash == old_pdf_hash: print("PDF file hasn't changed since the last time. Exiting.") exit(1) new_addresses = new_pdf.get_addresses() old_addresses = {Address(**address) for address in old_json["addresses"]} # Google Geocoding calls are pretty expensive, so we don't really want to query every address every time. addresses_to_be_removed = old_addresses - new_addresses
def pdf_watermark(): pdf = Pdf(title='PDF 文档加水印', mode='watermark') pdf.construct_gui()
def pdf_encrypt(): pdf = Pdf(title='PDF 文档加密', mode='encrypt') pdf.construct_gui()
def pdf_merge(): pdf = Pdf(title='PDF 文档合并', mode='merge') pdf.construct_gui()
parser.add_argument('--templatify-forms-whitelist', '--tfw', default='') parser.add_argument('--templatify-forms-uniquifier', '--tfu') parser.add_argument('--templatify-forms-padding', '--tfp', type=int, default=80) parser.add_argument('--templatify-forms-custom-padding', '--tfcp', default='{}') parser.add_argument('--templatify-forms-remove-dv', '--tfrd', action='store_true') parser.add_argument('--save', '-s') args = parser.parse_args() pdf = Pdf().load(args.pdf) if args.compare: other = Pdf().load(args.compare) for k, v in pdf.objects.items(): if k not in other.objects: print('===== {} ===== missing from other'.format(k)) if other.objects[k] != v: print(('===== {} =====\n' '{}\n' '\n' '===== other =====\n' '{}').format(k, v, other.objects[k])) for k in other.objects.keys(): if k not in pdf.objects: print('===== {} ===== missing'.format(k))
def anedit(path, target, options): """Edit notes from highlights in PDF files. All is printed to standard input or standard error. Options: * options.valid_types PDF annotation types to process * options.use_title Print filename/document title instead of filename * options.filter_keys Only print stated keys. * options.remove_key Don't print key tags * options.verbose Print varnings """ # FIXME: Who guarantees this method is only executed on valid files? # Also check for pusher, hillieo, hilliep, ... # wordlist for normalization wordlist = Dictionary() # open document document = Pdf(path, options, pgm=sys.argv[0]) # fetch notes notes = [] for n_annot, item in enumerate(document.annotations(options)): sugg = annotation_fixes(item.note, wordlist, options.verbose) notes.append((item, sugg)) # Walk through notes has_changes = False while len(notes) > 0: item, sugg = notes.pop(0) print "" print "\033[94m> {}: page {}, ETA {}\033[0m".format( item.page[0], item.page[1], len(notes)) print "Original: ", item.note if item.note != sugg: print "Suggested:", sugg elif options.diffs: continue valid_answers = 'nyecisq?' prompt = '[{}]: '.format('/'.join(valid_answers.title())) ans = 'NEIN' while ans not in valid_answers: ans = raw_input(prompt) \ .strip() \ .lower() \ .replace('yes', 'y') \ .replace('no', 'n') \ .replace('quit', 'q') \ .replace('ignore', 'i') \ .replace('ign', 'i') \ .replace('edit', 'e') \ .replace('correct', 'c') \ .replace('skip', 's') if ans == '': ans = valid_answers[0] # default is 'n' if ans == '?': ans = 'NEIN' print '''Usage: n no Stick with the original text (the default) y yes Accept the suggested text e edit Edit the original text c change Edit the suggested text i ignore Ignore for now (again prompted later) s skip Save and exit q quit Abort and exit (changes are lost) ''' if ans == 'y': # Use suggestion has_changes = True if item.key is None: item.set_content(sugg) else: item.set_content('<{}>{}</{}>'.format(item.key, sugg, item.key)) elif ans == 'n': # Use original pass elif ans in ('e', 'c'): # Edit manually def hook(): curr = ans == 'e' and item.note or sugg curr = curr.replace('\n', '\\n') readline.insert_text(curr) readline.redisplay() readline.set_pre_input_hook(hook) sugg = raw_input().strip().replace('\\n', '\n') readline.set_pre_input_hook(None) notes.insert(0, (item, sugg)) elif ans == 'i': # Ignore note for now notes.append((item, sugg)) elif ans == 'q': # Quit immediately, don't save return elif ans == 's': # Skip the rest break # save changes if has_changes: document.save(target, options)
def pdf_split(): pdf = Pdf(title='PDF 文档分割', mode='split') pdf.construct_gui()
def __init__( self, app, network_manager, wait_timeout=20, wait_callback=None, viewport_size=(800, 600), user_agent='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.121 Safari/535.2', log_level=30, download_images=True, plugins_enabled=False, java_enabled=False, create_page_callback=None, is_popup=False, max_resource_queued=None, *args, **kargs): super(GhostWebPage, self).__init__(parent=app) self._app = app self.pdf_engine = Pdf() self.http_resources = [] self.http_resource_page = None self.max_resource_queued = max_resource_queued self.wait_timeout = wait_timeout self.wait_callback = wait_callback self.loaded = True self.create_page_callback = create_page_callback self.is_popup = is_popup # Internal library object self.ghostInit = GhostInit() self.setForwardUnsupportedContent(True) self.settings().setAttribute(QtWebKit.QWebSettings.AutoLoadImages, download_images) self.settings().setAttribute(QtWebKit.QWebSettings.JavascriptEnabled, True) self.settings().setAttribute( QtWebKit.QWebSettings.JavascriptCanOpenWindows, True) self.settings().setAttribute(QtWebKit.QWebSettings.PluginsEnabled, plugins_enabled) self.settings().setAttribute(QtWebKit.QWebSettings.JavaEnabled, java_enabled) self.set_viewport_size(*viewport_size) # Page signals self.loadFinished.connect(self._page_loaded) self.loadStarted.connect(self._page_load_started) self.loadProgress.connect(self._page_load_progress) self.unsupportedContent.connect(self._unsupported_content) self.network_manager = network_manager self.setNetworkAccessManager(self.network_manager) self.network_manager.finished.connect(self._request_ended) # User Agent self.setUserAgent(user_agent) self.main_frame = self.mainFrame() self._unsupported_files = {} self.windowCloseRequested.connect(self._closeWindow) logger.setLevel(log_level)
def main(): arg_parser = argparse.ArgumentParser( description= 'Create a pdf document from a exported csv of Pivotal Tracker', formatter_class=argparse.ArgumentDefaultsHelpFormatter) arg_parser.add_argument('csv', help='the file path to the csv file') arg_parser.add_argument('-m', '--margin', type=int, default=5, help='margin of the page in mm') arg_parser.add_argument('-o', '--output', help='file path to the generated pdf') arg_parser.add_argument('-n', '--show-number', action='store_true', help='shows the story number on the bottom left') arg_parser.add_argument('-t', '--show-tasks', action='store_true', help='shows the tasks for each story') arg_parser.add_argument( '-c', '--collate', action='store_true', help='collate stories for easier sorting after cutting all pages') arg_parser.add_argument( '-s', '--strict', action='store_true', help='fails if the csv file does not contain all required columns') args = arg_parser.parse_args() output_file = args.output if args.output is not None \ else os.path.splitext(args.csv)[0] + '.pdf' page_margin = args.margin story_width = (297 - (page_margin * 2)) / 2 story_height = (210 - (page_margin * 2)) / 2 stories = [] with open(args.csv, 'r') as csvfile: reader = csv.reader(csvfile, delimiter=',') data = list(reader) if args.strict: validate_columns(data[0]) stories = map(partial(make_pivotal_story, data[0]), enumerate(data[1:], 1)) pdf = Pdf() pdf.set_auto_page_break(False) positions = [(page_margin, page_margin), (page_margin + story_width, page_margin), (page_margin, page_margin + story_height), (page_margin + story_width, page_margin + story_height)] stories = list(iterstories(stories, include_tasks=args.show_tasks)) chunk_function = stacked_chunks if args.collate else chunks for story_chunk in chunk_function(stories, 4): pdf.add_page('Landscape') for story, position in zip(story_chunk, positions): story.draw( pdf, position[0], position[1], story_width, story_height, args.show_number, ) pdf.output(output_file) open_file(output_file) return output_file