コード例 #1
0
def highlights(files, options):
    """Print notes from highlighted text.

    All is printed to output or error stream (usually stdout and stderr).

    Options:
    * options.recursive     Handle directories
    * options.use_title     Print filename/document title instead of full path
    * options.valid_types   PDF annotation types to process
    * options.filter_keys   Only print stated keys.
    * options.remove_key    Don't print key tags
    * options.with_path     Print the file path with each line
    * options.with_page     Print the page number with each line
    * options.buffered      Buffer output
    * options.list_keys     Print key only
    * options.stdout        Output stream
    * options.stderr        Error stream

    """
    for path in files:
        if os.path.isdir(path):
            if options.recursive:
                highlights([os.path.join(path, p) for p in os.listdir(path)],
                           options)
            continue  # Omit directories

        document = Pdf(path, options, pgm=sys.argv[0])

        if options.list_keys:
            options.remove_key = False
            for item in document.annotations(options):
                list_keys(item.note, options)
        else:
            for item in document.annotations(options):
                print_note(item.note, item.page, options)
コード例 #2
0
ファイル: pdf_test.py プロジェクト: ehx/RecibosCoop
 def test_to_pdf(self):
     first_html = "<h1>Hola Mundo</h1>"
     pdf = Pdf(first_html)
     fh = open('tests/fixtures/hola_mundo.pdf', 'rb')
     data = fh.read()
     fh.close()
     output = pdf.to_pdf(first_html)
     self.assertPdfEquals(output, data)
コード例 #3
0
ファイル: pdf_test.py プロジェクト: ehx/RecibosCoop
 def test_get_stream(self):
     first_html = "<h1>Hola Mundo</h1>"
     second_html = "<h2>This is sparta</h2>"
     pdf = Pdf(first_html)
     pdf.append(second_html)
     fh = open('tests/fixtures/stream.pdf', 'rb')
     data = fh.read()
     fh.close()
     output = pdf.get_stream()
     self.assertPdfEquals(output, data)
コード例 #4
0
ファイル: pdf_modifier.py プロジェクト: hrb23m/pydifier
    def execute(self):
        pdf = Pdf()

        try:
            pdf.load(self.config.getInputFilePath())
            for effector in self.effectorChain:
                pdf = effector.apply(pdf)

            pdf.save(self.config.getOutputFilePath())
        finally:
            pdf.close()
コード例 #5
0
def main():
    pdf_path = path.abspath('cover.pdf')
    out_folder = path.abspath('out')
    config_path = path.abspath('config.json')

    parser = argparse.ArgumentParser()
    parser.add_argument('cover_type', help='Define the type of cover')
    args = parser.parse_args()

    config = Config(config_path, cover_type=args.cover_type)

    pdf = Pdf(pdf_path)
    pdf.set_cropbox(config.get_cover_geometry())

    img = Img(pdf.cover.name, output_folder=out_folder)
    for width in config.get_output_width():
        img.convert(width)
コード例 #6
0
 def confirmEmailSend(self):
     msg = messagebox.askquestion('Send Email', 'Are you sure to send email', icon='warning')
     if msg == 'yes':
         re_email = self.email_entry.get()
         print(re_email.strip())
         db = Database()
         querys = db.fetch_today()
         db.db_close()
         pdf = Pdf()
         pdf.get_pdf(querys)
         send_email = EmailSend(re_email.strip())
         send_email.send_email()
         messagebox.showinfo('Done','Your email has been sent')
         self.emailWin.quit()
         self.emailWin.destroy()
         
     else:
         self.emailWin.quit()
         self.emailWin.destroy()
         messagebox.showerror('Cancelled','Your email is not forwarded')    
コード例 #7
0
ファイル: app.py プロジェクト: ehx/RecibosCoop
def generar_zip_contenedor():
    archivos_pdf_generados = []
    id_retiros = request.form.to_dict(False).get('recibo')

    if id_retiros:
        retiros = models.Retiro.select().where(id__in=id_retiros)
        to_text = Traductor().to_text

        for retiro in retiros:
            pdf = Pdf()
            html = render_template("recibo.html",
                                   cooperativa=retiro.socio.cooperativa,
                                   retiro=retiro,
                                   monto_como_cadena=to_text(retiro.monto))
            pdf.append(html)

            titulo = models.Retiro.obtener_nombre_por_id(retiro.id)
            nombre_archivo = os.path.join(app.config['UPLOAD_FOLDER'],
                                          titulo + ".pdf")

            archivos_pdf_generados.append(nombre_archivo)
            archivo_temporal = open(nombre_archivo, 'wb')
            archivo_temporal.write(pdf.get_stream())
            archivo_temporal.close()

        import zipfile

        nombre = "recibos_agrupados_%s.zip" % (retiro.fecha_como_string())
        nombre_archivo = os.path.join(app.config['UPLOAD_FOLDER'], nombre)
        zip = zipfile.ZipFile(nombre_archivo, mode='w')

        for nombre_pdf in archivos_pdf_generados:
            print "zipeando", [nombre_pdf]
            zip.write(nombre_pdf)

        zip.close()

        return jsonify(name=nombre)
    else:
        abort(404)
コード例 #8
0
ファイル: app.py プロジェクト: ehx/RecibosCoop
def generar_pdf_concatenado():
    id_retiros = request.form.to_dict(False).get('recibo')
    if id_retiros:
        retiros = models.Retiro.select().where(id__in=id_retiros)
        to_text = Traductor().to_text
        pdf = Pdf()

        for retiro in retiros:
            html = render_template("recibo.html",
                                   cooperativa=retiro.socio.cooperativa,
                                   retiro=retiro,
                                   monto_como_cadena=to_text(retiro.monto))
            pdf.append(html)

        titulo = "recibos_agrupados_%s.pdf" % (retiro.fecha_como_string())

        nombre_archivo = os.path.join(app.config['UPLOAD_FOLDER'], titulo)
        archivo_temporal = open(nombre_archivo, 'wb')
        archivo_temporal.write(pdf.get_stream())
        archivo_temporal.close()

        return jsonify(name=titulo)
    else:
        abort(404)
コード例 #9
0
ファイル: pdf_test.py プロジェクト: ehx/RecibosCoop
 def test_append(self):
     first_html = "<h1>Hola Mundo</h1>"
     second_html = "<h2>This is sparta</h2>"
     pdf = Pdf(first_html)
     pdf.append(second_html)
     self.assertEquals(pdf.htmls, [first_html, second_html])
コード例 #10
0
ファイル: parser.py プロジェクト: dpint/PostniNabiralniki
def get_json_from_file(file_path):
    with open(file_path) as f:
        return json.load(f)


if __name__ == '__main__':
    if len(sys.argv) != 4:
        sys.stderr.write('Argument count is invalid!\n')
        exit(1)

    current_pdf_file_path = sys.argv[1]
    old_generated_JSON_file_path = sys.argv[2]
    new_generated_JSON_file_path = sys.argv[3]

    new_pdf = Pdf(current_pdf_file_path)
    old_json = get_json_from_file(old_generated_JSON_file_path)

    # Exit if PDF file is the same as the last time.
    new_pdf_hash = new_pdf.get_pdf_hash()
    old_pdf_hash = old_json["referenced_pdf_hash"]

    if new_pdf_hash == old_pdf_hash:
        print("PDF file hasn't changed since the last time. Exiting.")
        exit(1)

    new_addresses = new_pdf.get_addresses()
    old_addresses = {Address(**address) for address in old_json["addresses"]}

    # Google Geocoding calls are pretty expensive, so we don't really want to query every address every time.
    addresses_to_be_removed = old_addresses - new_addresses
コード例 #11
0
ファイル: main.py プロジェクト: MTleen/202005-tf_idf
def pdf_watermark():
    pdf = Pdf(title='PDF 文档加水印', mode='watermark')
    pdf.construct_gui()
コード例 #12
0
ファイル: main.py プロジェクト: MTleen/202005-tf_idf
def pdf_encrypt():
    pdf = Pdf(title='PDF 文档加密', mode='encrypt')
    pdf.construct_gui()
コード例 #13
0
ファイル: main.py プロジェクト: MTleen/202005-tf_idf
def pdf_merge():
    pdf = Pdf(title='PDF 文档合并', mode='merge')
    pdf.construct_gui()
コード例 #14
0
parser.add_argument('--templatify-forms-whitelist', '--tfw', default='')
parser.add_argument('--templatify-forms-uniquifier', '--tfu')
parser.add_argument('--templatify-forms-padding',
                    '--tfp',
                    type=int,
                    default=80)
parser.add_argument('--templatify-forms-custom-padding',
                    '--tfcp',
                    default='{}')
parser.add_argument('--templatify-forms-remove-dv',
                    '--tfrd',
                    action='store_true')
parser.add_argument('--save', '-s')
args = parser.parse_args()

pdf = Pdf().load(args.pdf)

if args.compare:
    other = Pdf().load(args.compare)
    for k, v in pdf.objects.items():
        if k not in other.objects:
            print('===== {} ===== missing from other'.format(k))
        if other.objects[k] != v:
            print(('===== {} =====\n'
                   '{}\n'
                   '\n'
                   '===== other =====\n'
                   '{}').format(k, v, other.objects[k]))
    for k in other.objects.keys():
        if k not in pdf.objects:
            print('===== {} ===== missing'.format(k))
コード例 #15
0
ファイル: anedit.py プロジェクト: igsor/hillie
def anedit(path, target, options):
    """Edit notes from highlights in PDF files.

    All is printed to standard input or standard error.

    Options:
    * options.valid_types   PDF annotation types to process
    * options.use_title     Print filename/document title instead of filename
    * options.filter_keys   Only print stated keys.
    * options.remove_key    Don't print key tags
    * options.verbose       Print varnings

    """
    # FIXME: Who guarantees this method is only executed on valid files?
    # Also check for pusher, hillieo, hilliep, ...

    # wordlist for normalization
    wordlist = Dictionary()

    # open document
    document = Pdf(path, options, pgm=sys.argv[0])

    # fetch notes
    notes = []
    for n_annot, item in enumerate(document.annotations(options)):
        sugg = annotation_fixes(item.note, wordlist, options.verbose)
        notes.append((item, sugg))

    # Walk through notes
    has_changes = False
    while len(notes) > 0:
        item, sugg = notes.pop(0)

        print ""
        print "\033[94m> {}: page {}, ETA {}\033[0m".format(
            item.page[0], item.page[1], len(notes))
        print "Original: ", item.note
        if item.note != sugg:
            print "Suggested:", sugg
        elif options.diffs:
            continue

        valid_answers = 'nyecisq?'
        prompt = '[{}]: '.format('/'.join(valid_answers.title()))
        ans = 'NEIN'
        while ans not in valid_answers:
            ans = raw_input(prompt) \
                .strip() \
                .lower() \
                .replace('yes', 'y') \
                .replace('no', 'n') \
                .replace('quit', 'q') \
                .replace('ignore', 'i') \
                .replace('ign', 'i') \
                .replace('edit', 'e') \
                .replace('correct', 'c') \
                .replace('skip', 's')

            if ans == '':
                ans = valid_answers[0]  # default is 'n'

            if ans == '?':
                ans = 'NEIN'
                print '''Usage:

    n   no      Stick with the original text (the default)
    y   yes     Accept the suggested text
    e   edit    Edit the original text
    c   change  Edit the suggested text
    i   ignore  Ignore for now (again prompted later)
    s   skip    Save and exit
    q   quit    Abort and exit (changes are lost)

                '''

        if ans == 'y':  # Use suggestion
            has_changes = True
            if item.key is None:
                item.set_content(sugg)
            else:
                item.set_content('<{}>{}</{}>'.format(item.key, sugg,
                                                      item.key))
        elif ans == 'n':  # Use original
            pass
        elif ans in ('e', 'c'):  # Edit manually

            def hook():
                curr = ans == 'e' and item.note or sugg
                curr = curr.replace('\n', '\\n')
                readline.insert_text(curr)
                readline.redisplay()

            readline.set_pre_input_hook(hook)
            sugg = raw_input().strip().replace('\\n', '\n')
            readline.set_pre_input_hook(None)
            notes.insert(0, (item, sugg))
        elif ans == 'i':  # Ignore note for now
            notes.append((item, sugg))
        elif ans == 'q':  # Quit immediately, don't save
            return
        elif ans == 's':  # Skip the rest
            break

    # save changes
    if has_changes:
        document.save(target, options)
コード例 #16
0
ファイル: main.py プロジェクト: MTleen/202005-tf_idf
def pdf_split():
    pdf = Pdf(title='PDF 文档分割', mode='split')
    pdf.construct_gui()
コード例 #17
0
ファイル: ghost.py プロジェクト: vmlellis/Ghost.py
    def __init__(
            self,
            app,
            network_manager,
            wait_timeout=20,
            wait_callback=None,
            viewport_size=(800, 600),
            user_agent='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.121 Safari/535.2',
            log_level=30,
            download_images=True,
            plugins_enabled=False,
            java_enabled=False,
            create_page_callback=None,
            is_popup=False,
            max_resource_queued=None,
            *args,
            **kargs):

        super(GhostWebPage, self).__init__(parent=app)
        self._app = app
        self.pdf_engine = Pdf()
        self.http_resources = []
        self.http_resource_page = None
        self.max_resource_queued = max_resource_queued
        self.wait_timeout = wait_timeout
        self.wait_callback = wait_callback
        self.loaded = True
        self.create_page_callback = create_page_callback
        self.is_popup = is_popup
        # Internal library object
        self.ghostInit = GhostInit()

        self.setForwardUnsupportedContent(True)
        self.settings().setAttribute(QtWebKit.QWebSettings.AutoLoadImages,
                                     download_images)
        self.settings().setAttribute(QtWebKit.QWebSettings.JavascriptEnabled,
                                     True)
        self.settings().setAttribute(
            QtWebKit.QWebSettings.JavascriptCanOpenWindows, True)
        self.settings().setAttribute(QtWebKit.QWebSettings.PluginsEnabled,
                                     plugins_enabled)
        self.settings().setAttribute(QtWebKit.QWebSettings.JavaEnabled,
                                     java_enabled)

        self.set_viewport_size(*viewport_size)

        # Page signals
        self.loadFinished.connect(self._page_loaded)
        self.loadStarted.connect(self._page_load_started)
        self.loadProgress.connect(self._page_load_progress)
        self.unsupportedContent.connect(self._unsupported_content)
        self.network_manager = network_manager
        self.setNetworkAccessManager(self.network_manager)
        self.network_manager.finished.connect(self._request_ended)
        # User Agent
        self.setUserAgent(user_agent)

        self.main_frame = self.mainFrame()
        self._unsupported_files = {}
        self.windowCloseRequested.connect(self._closeWindow)

        logger.setLevel(log_level)
コード例 #18
0
def main():
    arg_parser = argparse.ArgumentParser(
        description=
        'Create a pdf document from a exported csv of Pivotal Tracker',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    arg_parser.add_argument('csv', help='the file path to the csv file')
    arg_parser.add_argument('-m',
                            '--margin',
                            type=int,
                            default=5,
                            help='margin of the page in mm')
    arg_parser.add_argument('-o',
                            '--output',
                            help='file path to the generated pdf')
    arg_parser.add_argument('-n',
                            '--show-number',
                            action='store_true',
                            help='shows the story number on the bottom left')
    arg_parser.add_argument('-t',
                            '--show-tasks',
                            action='store_true',
                            help='shows the tasks for each story')
    arg_parser.add_argument(
        '-c',
        '--collate',
        action='store_true',
        help='collate stories for easier sorting after cutting all pages')
    arg_parser.add_argument(
        '-s',
        '--strict',
        action='store_true',
        help='fails if the csv file does not contain all required columns')

    args = arg_parser.parse_args()

    output_file = args.output if args.output is not None \
        else os.path.splitext(args.csv)[0] + '.pdf'
    page_margin = args.margin
    story_width = (297 - (page_margin * 2)) / 2
    story_height = (210 - (page_margin * 2)) / 2
    stories = []

    with open(args.csv, 'r') as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        data = list(reader)
        if args.strict:
            validate_columns(data[0])
        stories = map(partial(make_pivotal_story, data[0]),
                      enumerate(data[1:], 1))

    pdf = Pdf()
    pdf.set_auto_page_break(False)

    positions = [(page_margin, page_margin),
                 (page_margin + story_width, page_margin),
                 (page_margin, page_margin + story_height),
                 (page_margin + story_width, page_margin + story_height)]

    stories = list(iterstories(stories, include_tasks=args.show_tasks))
    chunk_function = stacked_chunks if args.collate else chunks
    for story_chunk in chunk_function(stories, 4):
        pdf.add_page('Landscape')
        for story, position in zip(story_chunk, positions):
            story.draw(
                pdf,
                position[0],
                position[1],
                story_width,
                story_height,
                args.show_number,
            )

    pdf.output(output_file)

    open_file(output_file)

    return output_file