Ejemplo n.º 1
0
 def set_up_meta(self, control_sum):
     writer = pdfrw.PdfWriter()
     for page in pdfrw.PdfReader(self.path).pages:
         writer.addPage(page)
     writer.trailer.Info = pdfrw.IndirectPdfDict(Owner=self.user_id,
                                                 ControlSum=control_sum,
                                                 SignedBy='')
     writer.write(self.path)
Ejemplo n.º 2
0
    def make_output(self):
        # generate the output string
        ostr = libxmp.core._remove_trailing_whitespace(
            self.md.serialize_to_str().replace("\ufeff", ""))

        # assemble the output dictionary
        output_dict = pdfrw.IndirectPdfDict(Type=pdfrw.PdfName("Metadata"),
                                            Subtype=pdfrw.PdfName("XML"))
        output_dict.stream = ostr.encode("utf-8").decode("latin-1")
        return output_dict
Ejemplo n.º 3
0
 def pdfInfo(self):
     return pdfrw.IndirectPdfDict(
         Title=self.title,
         Author=self.author,
         Subject=self.subject,
         Keywords=self.keywords,
         Creator=self.creator,
         Producer=self.producer,
         CreationDate=self.pdftime(),
         ModDate=self.pdftime(),
     )
Ejemplo n.º 4
0
    def __init__(self):
        # load the sRGB2014 ICC color profile
        iccpath = pathlib.Path(
            __file__).absolute().parent / "icc" / "sRGB2014.icc"
        srgb = ImageCms.getOpenProfile(str(iccpath))

        # construct the correct pdf dict. first the output profile
        # N=3 is required for RGB colorspaces
        op = pdfrw.IndirectPdfDict(N=3, Alternate=pdfrw.PdfName("DeviceRGB"))
        op.stream = srgb.tobytes().decode("latin-1")

        # then the outputintents array
        oi = pdfrw.IndirectPdfDict(
            Type=pdfrw.PdfName("OutputIntent"),
            S=pdfrw.PdfName("GTS_PDFA1"),
            OutputConditionIdentifier="sRGB",
            DestOutputProfile=op,
            Info=srgb.profile.profile_description,
            # I am not sure whether this is correct, but it doesn't fail
            RegistryName="http://color.org/srgbprofiles.xalter")
        self.output_intent = [oi]
Ejemplo n.º 5
0
    return template_pdf


def build_datadict(in_file):
    o = []
    with open(in_file) as file:
        reader = csv.DictReader(file, delimiter=',')
        for row in reader:
            m = {}
            for f in FIELDS:
                if row[f] and not row[f].isspace() and not row[f] is None:
                    m[f] = row[f]
            if m:
                m['Date'] = "January 25th, 2020"
                o.append(m)
    return o


if __name__ == '__main__':
    data = build_datadict(IN_FILE)
    writer = pdfrw.PdfWriter()
    writer.trailer.Info = pdfrw.IndirectPdfDict(Title='Combined PDF')
    # Iterate array of 'data_dict's
    for d in data:
        this_pages = modify_form(TEMPLATE_FILE, d)  # fill the form
        this_pages.Root.AcroForm.update(
            pdfrw.PdfDict(NeedAppearances=pdfrw.PdfObject(
                'true')))  # maintain appearances
        writer.addpages(this_pages.pages)  # merge into single pdf
    writer.write(IN_FILE.split(".")[0] + ".pdf")
Ejemplo n.º 6
0
flag = sys.argv[1]

if flag == "-c":  # combine
    import pdfrw
    import natsort

    writer = pdfrw.PdfWriter()

    for file in natsort.natsorted(os.listdir(f"{script_root}/data")):
        if file.endswith(".pdf"):
            print(file)
            pdf_path = f"{script_root}/data/{file}"
            writer.addpages(pdfrw.PdfReader(pdf_path).pages)

    writer.trailer.Info = pdfrw.IndirectPdfDict(Title="")
    writer.write(f"{script_root}/data/combined.pdf")
elif flag == "-o":  # ocr
    import ocrmypdf

    for file in os.listdir(f"{script_root}/data"):
        if file.endswith(".pdf"):
            print(file)
            pdf_path = f"{script_root}/data/{file}"

            pid = os.fork()
            if pid > 0:  # parent process
                os.waitpid(pid, 0)  # wait for child process to end
            elif pid == 0:  # child process
                ocrmypdf.ocr(
                    input_file=pdf_path,