Python PdfFileWriter.appendPagesFromReader 예제들, PyPDF4.PdfFileWriter.appendPagesFromReader Python 예제들

예제 #1

0

파일 보기

    def decrypt_pdf(filename, password, decrypted_filename=None):
        """
        将加密的文件及逆行解密，并生成一个无需密码pdf文件
        :param filename: 原先加密的pdf文件
        :param password: 对应的密码
        :param decrypted_filename: 解密之后的文件名
        :return:
        """
        # 生成一个Reader和Writer
        print('run  jiemi')
        pdf_reader = PdfManager.get_reader(filename, password)
        if pdf_reader is None:
            return
        if not pdf_reader.isEncrypted:
            print('文件没有被加密，无需操作！')
            return
        pdf_writer = PdfFileWriter()

        pdf_writer.appendPagesFromReader(pdf_reader)

        if decrypted_filename is None:
            decrypted_filename = "".join(filename[:-4]) + '_' + 'decrypted' + '.pdf'

        # 写入新文件
        pdf_writer.write(open(decrypted_filename, 'wb'))

예제 #2

0

파일 보기

def decrypt_pdf(filename, password, decrypted_filename=None):
    print("解密第1阶段")
    pdf_reader = get_reader(filename, password)
    if pdf_reader is None:
        return
    if not pdf_reader.isEncrypted:
        print("文件没有被加密")
        return
    pdf_writer = PdfFileWriter()
    pdf_writer.appendPagesFromReader(pdf_reader)
    if decrypted_filename is None:
        decrypted_filename = "".join(filename.split('.')[:-1] + '_decrypted.pdf')
        pdf_writer.write(open(decrypted_filename, 'wb'))

예제 #3

0

파일 보기

파일: PDFutils.py 프로젝트: j-benoit/Prayon

def pdf_add_metadata(path, file, key, value, out_file, out_path=''):
    if out_path == '':
        out_path = path
    with open(os.path.join(path, file), 'rb') as pdf:
        try:
            pdf_reader = PdfFileReader(pdf)
            metadata = pdf_reader.getDocumentInfo()
            print(metadata)
            pdf_writer = PdfFileWriter()
            pdf_writer.appendPagesFromReader(pdf_reader)
            pdf_writer.addMetadata({
                '/' + key: value,
                #     # '/Title': 'PDF in Python'
            })
            file_out = open(os.path.join(out_path, out_file), 'wb')
            pdf_writer.write(file_out)
            #
            pdf.close()
            file_out.close()
            # print('File ' + os.path.basename(file) + ' has ' + str(pg) + ' page(s)')
        except ValueError:
            print(ValueError, 'rrrt')

예제 #4

0

파일 보기

파일: add_pages.py 프로젝트: Georgie-Statham/pdf_GUI

    def SaveButton(self, event):
        input_filename = Path(self.input_file)
        dlg = wx.FileDialog(self,
                            message="Save file as...",
                            defaultDir=os.getcwd(),
                            defaultFile=(f"{input_filename.with_suffix('')}+"
                                         f"{self.no_pages.GetValue()}"),
                            wildcard=pdfs,
                            style=wx.FD_SAVE | wx.FD_OVERWRITE_PROMPT)

        if not self.input_path:
            self.error_message("Please select a pdf")

        elif dlg.ShowModal() == wx.ID_OK:
            output_path = dlg.GetPath()
            pdf_writer = PdfFileWriter()
            pdf_reader = PdfFileReader(self.input_path)
            try:
                blank_pages = int(self.no_pages.GetValue())
            except ValueError:
                self.error_message(
                    "The number of pages added must be an integer")
            pdf_writer.appendPagesFromReader(pdf_reader)
            for _ in range(blank_pages):
                pdf_writer.addBlankPage()

            with open(output_path, 'wb') as output:
                pdf_writer.write(output)

            success_dlg = wx.MessageDialog(
                self, f"""You created {dlg.GetFilename()} and saved it at
{dlg.GetPath()}.""", "Success!", wx.OK | wx.ICON_INFORMATION)
            success_dlg.ShowModal()
            success_dlg.Destroy()
            dlg.Destroy()
            self.clear_func()

예제 #5

0

파일 보기

파일: libreoffice_facturx_macro.py 프로젝트: akretion/factur-x-libreoffice-extension

def generate_facturx_from_file(pdf_invoice,
                               facturx_xml,
                               facturx_level='autodetect',
                               check_xsd=True,
                               pdf_metadata=None,
                               output_pdf_file=None,
                               additional_attachments=None):
    """
    Generate a Factur-X invoice from a regular PDF invoice and a factur-X XML
    file. The method uses a file as input (regular PDF invoice) and re-writes
    the file (Factur-X PDF invoice).
    :param pdf_invoice: the regular PDF invoice as file path
    (type string) or as file object
    :type pdf_invoice: string or file
    :param facturx_xml: the Factur-X XML
    :type facturx_xml: bytes, string, file or etree object
    :param facturx_level: the level of the Factur-X XML file. Default value
    is 'autodetect'. The only advantage to specifiy a particular value instead
    of using the autodetection is for a very very small perf improvement.
    Possible values: minimum, basicwl, basic, en16931.
    :type facturx_level: string
    :param check_xsd: if enable, checks the Factur-X XML file against the XSD
    (XML Schema Definition). If this step has already been performed
    beforehand, you should disable this feature to avoid a double check
    and get a small performance improvement.
    :type check_xsd: boolean
    :param pdf_metadata: Specify the metadata of the generated Factur-X PDF.
    If pdf_metadata is None (default value), this lib will generate some
    metadata in English by extracting relevant info from the Factur-X XML.
    Here is an example for the pdf_metadata argument:
    pdf_metadata = {
        'author': 'Akretion',
        'keywords': 'Factur-X, Invoice',
        'title': 'Akretion: Invoice I1242',
        'subject':
          'Factur-X invoice I1242 dated 2017-08-17 issued by Akretion',
        }
    If you pass the pdf_metadata argument, you will not use the automatic
    generation based on the extraction of the Factur-X XML file, which will
    bring a very small perf improvement.
    :type pdf_metadata: dict
    :param output_pdf_file: File Path to the output Factur-X PDF file
    :type output_pdf_file: string or unicode
    :param additional_attachments: Specify the other files that you want to
    embed in the PDF file. It is a dict where keys are filepath and value
    is the description of the file (as unicode or string).
    :type additional_attachments: dict
    :return: Returns True. This method re-writes the input PDF invoice file,
    unless if the output_pdf_file is provided.
    :rtype: bool
    """
    assert isinstance(facturx_xml, bytes)
    xml_string = facturx_xml
    facturx_level = facturx_level.lower()
    additional_attachments_read = {}
    if additional_attachments:
        for attach_filepath, attach_desc in additional_attachments.items():
            filename = os.path.basename(attach_filepath)
            mod_timestamp = os.path.getmtime(attach_filepath)
            mod_dt = datetime.fromtimestamp(mod_timestamp)
            with open(attach_filepath, 'rb') as fa:
                fa.seek(0)
                additional_attachments_read[fa.read()] = {
                    'filename': filename,
                    'desc': attach_desc,
                    'mod_date': mod_dt,
                }
                fa.close()
    original_pdf = PdfFileReader(pdf_invoice)
    # Extract /OutputIntents obj from original invoice
    output_intents = _get_original_output_intents(original_pdf)
    new_pdf_filestream = PdfFileWriter()
    new_pdf_filestream._header = b_("%PDF-1.6")
    new_pdf_filestream.appendPagesFromReader(original_pdf)

    original_pdf_id = original_pdf.trailer.get('/ID')
    if original_pdf_id:
        new_pdf_filestream._ID = original_pdf_id
        # else : generate some ?
    _facturx_update_metadata_add_attachment(
        new_pdf_filestream,
        xml_string,
        pdf_metadata,
        facturx_level,
        output_intents=output_intents,
        additional_attachments=additional_attachments_read)
    if output_pdf_file:
        with open(output_pdf_file, 'wb') as output_f:
            new_pdf_filestream.write(output_f)
            output_f.close()
    else:
        with open(pdf_invoice, 'wb') as f:
            new_pdf_filestream.write(f)
            f.close()
    return True

예제 #6

0

파일 보기

def generate_facturx_from_file(pdf_invoice,
                               facturx_xml,
                               facturx_level='autodetect',
                               check_xsd=True,
                               pdf_metadata=None,
                               output_pdf_file=None,
                               additional_attachments=None,
                               attachments=None):
    """
    Generate a Factur-X invoice from a regular PDF invoice and a factur-X XML
    file. The method uses a file as input (regular PDF invoice) and re-writes
    the file (Factur-X PDF invoice).
    :param pdf_invoice: the regular PDF invoice as file path
    (type string) or as file object
    :type pdf_invoice: string or file
    :param facturx_xml: the Factur-X XML
    :type facturx_xml: bytes, string, file or etree object
    :param facturx_level: the level of the Factur-X XML file. Default value
    is 'autodetect'. The only advantage to specifiy a particular value instead
    of using the autodetection is for a very very small perf improvement.
    Possible values: minimum, basicwl, basic, en16931.
    :type facturx_level: string
    :param check_xsd: if enable, checks the Factur-X XML file against the XSD
    (XML Schema Definition). If this step has already been performed
    beforehand, you should disable this feature to avoid a double check
    and get a small performance improvement.
    :type check_xsd: boolean
    :param pdf_metadata: Specify the metadata of the generated Factur-X PDF.
    If pdf_metadata is None (default value), this lib will generate some
    metadata in English by extracting relevant info from the Factur-X XML.
    Here is an example for the pdf_metadata argument:
    pdf_metadata = {
        'author': 'Akretion',
        'keywords': 'Factur-X, Invoice',
        'title': 'Akretion: Invoice I1242',
        'subject':
          'Factur-X invoice I1242 dated 2017-08-17 issued by Akretion',
        }
    If you pass the pdf_metadata argument, you will not use the automatic
    generation based on the extraction of the Factur-X XML file, which will
    bring a very small perf improvement.
    :type pdf_metadata: dict
    :param output_pdf_file: File Path to the output Factur-X PDF file
    :type output_pdf_file: string or unicode
    :param attachments: Specify the other files that you want to
    embed in the PDF file. It is a dict where key is the filename and value
    is a dict. In this dict, keys are 'filepath' (value is the full file path)
    or 'filedata' (value is the encoded file),
    'description' (text description, optional) and
    'modification_datetime' (modification date and time as datetime object, optional).
    'creation_datetime' (creation date and time as datetime object, optional).
    :type attachments: dict
    :param additional_attachments: DEPRECATED. Use attachments instead.
    Undocumented.
    :return: Returns True. This method re-writes the input PDF invoice file,
    unless if the output_pdf_file is provided.
    :rtype: bool
    """
    start_chrono = datetime.now()
    logger.debug('generate_facturx_from_file with factur-x lib %s',
                 __version__)
    logger.debug('1st arg pdf_invoice type=%s', type(pdf_invoice))
    logger.debug('2nd arg facturx_xml type=%s', type(facturx_xml))
    logger.debug('optional arg facturx_level=%s', facturx_level)
    logger.debug('optional arg check_xsd=%s', check_xsd)
    logger.debug('optional arg pdf_metadata=%s', pdf_metadata)
    logger.debug('optional arg additional_attachments=%s',
                 additional_attachments)
    if not pdf_invoice:
        raise ValueError('Missing pdf_invoice argument')
    if not facturx_xml:
        raise ValueError('Missing facturx_xml argument')
    if not isinstance(facturx_level, (str, unicode)):
        raise ValueError('Wrong facturx_level argument')
    if not isinstance(check_xsd, bool):
        raise ValueError('check_xsd argument must be a boolean')
    if not isinstance(pdf_metadata, (type(None), dict)):
        raise ValueError('pdf_metadata argument must be a dict or None')
    if not isinstance(pdf_metadata, (dict, type(None))):
        raise ValueError('pdf_metadata argument must be a dict or None')
    if not isinstance(additional_attachments, (dict, type(None))):
        raise ValueError(
            'additional_attachments argument must be a dict or None')
    if not isinstance(output_pdf_file, (type(None), str, unicode)):
        raise ValueError('output_pdf_file argument must be a string or None')
    if isinstance(pdf_invoice, (str, unicode)):
        file_type = 'path'
    else:
        file_type = 'file'
    xml_root = None
    # in Python3, xml_string is a byte
    if isinstance(facturx_xml, (str, bytes)):
        xml_string = facturx_xml
    elif isinstance(facturx_xml, unicode):
        xml_string = facturx_xml.encode('utf8')
    elif isinstance(facturx_xml, type(etree.Element('pouet'))):
        xml_root = facturx_xml
        xml_string = etree.tostring(xml_root,
                                    pretty_print=True,
                                    encoding='UTF-8',
                                    xml_declaration=True)
    elif isinstance(facturx_xml, file):
        facturx_xml.seek(0)
        xml_string = facturx_xml.read()
        facturx_xml.close()
    else:
        raise TypeError(
            "The second argument of the method generate_facturx must be "
            "either a string, an etree.Element() object or a file "
            "(it is a %s)." % type(facturx_xml))
    # The additional_attachments arg is deprecated
    if attachments is None:
        attachments = {}
    if additional_attachments and not attachments:
        logger.warning(
            'The argument additional_attachments is deprecated. '
            'It will be removed in future versions. Use the argument '
            'attachments instead.')
        for attach_filepath, attach_desc in additional_attachments.items():
            filename = os.path.basename(attach_filepath)
            mod_timestamp = os.path.getmtime(attach_filepath)
            mod_dt = datetime.fromtimestamp(mod_timestamp)
            with open(attach_filepath, 'rb') as fa:
                fa.seek(0)
                attachments[filename] = {
                    'filedata': fa.read(),
                    'description': attach_desc,
                    'modification_datetime': mod_dt,
                }
                fa.close()
    if attachments:
        for filename, fadict in attachments.items():
            if filename in [FACTURX_FILENAME] + ZUGFERD_FILENAMES:
                logger.warning(
                    'You cannot provide as attachment a file named %s. '
                    'This file will NOT be attached.', filename)
                attachments.pop(filename)
                continue
            if fadict.get('filepath') and not fadict.get('filedata'):
                with open(fadict['filepath'], 'rb') as fa:
                    fa.seek(0)
                    fadict['filedata'] = fa.read()
                    fa.close()

                # As explained here
                # https://stackoverflow.com/questions/237079/how-to-get-file-creation-modification-date-times-in-python
                # creation date is not easy to get.
                # So we only implement getting the modification date
                if not fadict.get('modification_datetime'):
                    mod_timestamp = os.path.getmtime(fadict['filepath'])
                    fadict['modification_datetime'] = datetime.fromtimestamp(
                        mod_timestamp)
    if pdf_metadata is None:
        if xml_root is None:
            xml_root = etree.fromstring(xml_string)
        base_info = _extract_base_info(xml_root)
        pdf_metadata = _base_info2pdf_metadata(base_info)
    else:
        # clean-up pdf_metadata dict
        for key, value in pdf_metadata.items():
            if not isinstance(value, (str, unicode)):
                pdf_metadata[key] = ''
    facturx_level = facturx_level.lower()
    if facturx_level not in FACTURX_LEVEL2xsd:
        if xml_root is None:
            xml_root = etree.fromstring(xml_string)
        logger.debug('Factur-X level will be autodetected')
        facturx_level = get_facturx_level(xml_root)
    if check_xsd:
        check_facturx_xsd(xml_string,
                          flavor='factur-x',
                          facturx_level=facturx_level)
    original_pdf = PdfFileReader(pdf_invoice)
    # Extract /OutputIntents obj from original invoice
    output_intents = _get_original_output_intents(original_pdf)
    new_pdf_filestream = PdfFileWriter()
    new_pdf_filestream._header = b_("%PDF-1.6")
    new_pdf_filestream.appendPagesFromReader(original_pdf)

    original_pdf_id = original_pdf.trailer.get('/ID')
    logger.debug('original_pdf_id=%s', original_pdf_id)
    if original_pdf_id:
        new_pdf_filestream._ID = original_pdf_id
        # else : generate some ?
    _facturx_update_metadata_add_attachment(new_pdf_filestream,
                                            xml_string,
                                            pdf_metadata,
                                            facturx_level,
                                            output_intents=output_intents,
                                            additional_attachments=attachments)
    if output_pdf_file:
        with open(output_pdf_file, 'wb') as output_f:
            new_pdf_filestream.write(output_f)
            output_f.close()
    else:
        if file_type == 'path':
            with open(pdf_invoice, 'wb') as f:
                new_pdf_filestream.write(f)
                f.close()
        elif file_type == 'file':
            new_pdf_filestream.write(pdf_invoice)
    logger.info('%s file added to PDF invoice', FACTURX_FILENAME)
    end_chrono = datetime.now()
    logger.info('Factur-X invoice generated in %s seconds',
                (end_chrono - start_chrono).total_seconds())
    return True

예제 #7

0

파일 보기

파일: change_date.py 프로젝트: protoss97/pdfmetamodify

# -*- coding:utf-8 -*-
# Author:Liu Hongliang

from PyPDF4 import PdfFileReader, PdfFileWriter

source_file = open("f:\\c1.pdf", "rb")
fin = PdfFileReader(source_file)
info = dict(fin.getDocumentInfo())
info['/ModDate'] = info['/CreationDate']
print(info)
dest_file = open("f:\\c2.pdf", "wb")
fout = PdfFileWriter()
fout.addMetadata(info)
fout.appendPagesFromReader(fin)
fout.write(dest_file)
source_file.close()
dest_file.close()

예제 #8

0

파일 보기

        embeddedFilesDictionary.update(
            {NameObject("/EmbeddedFiles"): embeddedFilesNamesDictionary})
        myPdfFileWriterObj._root_object.update(
            {NameObject("/Names"): embeddedFilesDictionary})
    else:
        myPdfFileWriterObj._root_object["/Names"]["/EmbeddedFiles"][
            "/Names"].append(createStringObject(fname))
        myPdfFileWriterObj._root_object["/Names"]["/EmbeddedFiles"][
            "/Names"].append(filespec)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('config', help='Path to config file.')
    args = parser.parse_args()
    with open(args.config) as f:
        config = json.load(f)
    fr = PdfFileReader(config['dstFileName'], 'rb')
    fw = PdfFileWriter()
    fw.appendPagesFromReader(fr)
    for key in config['pdfInfo']:
        fw.addMetadata({key: config['pdfInfo'][key]})

    for oneAttachment in config['pdfAttachments']:
        with open(oneAttachment['srcFileName'], 'rb') as oneAttachmentFile:
            attachmentData = oneAttachmentFile.read()
            appendAttachment(fw, oneAttachment['attFileName'], attachmentData)

    with open(config['dstFileName'] + '.finalized.pdf', 'wb') as file:
        fw.write(file)

예제 #9

0

파일 보기

파일: PdfToBookSignaturesGUIv2.py 프로젝트: Fraser-D/PdfToBookbindingSignatures

def process_the_pdf():

    confirm = tk.messagebox.askyesno("Doing the thing",
                                     "Are you sure you want to do the thing?")
    if confirm == True:
        try:
            pdf = Pdfread(pdf_path.get())
            wf = Pdfwrite()
            wf.appendPagesFromReader(pdf)

            for i in range(blank_start.get()):
                wf.insertBlankPage(0)

            for i in range(end_blank_pages.get()):
                wf.addBlankPage()

            # ! --> here's where the shuffle goes, pos save temp pdf here?

            list_pages = []

            for i in range(wf.getNumPages()):
                list_pages.append(i)

            list_sigs_pages = []
            for group in chunker(list_pages, sig_thickness_x4.get()):
                list_sigs_pages.append(group)

            newpageorder = shufoutput(list_sigs_pages)

            newpdf = Pdfwrite()

            for page in newpageorder:
                newpdf.addPage(wf.getPage(page))

            temp = Pdfwrite()
            # up2 = turninto2up(newpdf)

            temp = newpdf

            destpath = pdf_output_path.get()

            with open(destpath, 'wb+') as out:
                temp.write(out)

            up2in = Pdfread(destpath)
            up2 = Pdfwrite()

            for i in range(up2in.getNumPages()):
                temp = up2in.getPage(i)
                up2.addPage(temp)

            up2 = frasers2up(up2)
            # up2 = turninto2up(up2in)

            with open(destpath, "wb+") as up2out:
                up2.write(up2out)

            tk.messagebox.showinfo(
                "Finished", "Finished, and maybe it even worked this time!")

        except FileNotFoundError:
            tk.messagebox.showerror(
                "File not found",
                "Either the input file or the output files is wrong.\nOr both of them. Who knows?"
            )
            pass
    else:
        pass