Ejemplo n.º 1
0
 def _encode_image(cls, image_filename, lossless):
     if lossless:
         img = PnmPicture.read_file(image_filename)
         if img.img_format == PnmPictureFormat.Bitmap:
             # PDFs use exactly inverted syntax for 1-bit images
             img.invert()
         imgdata = EncodedObject.create(img.data)
         (colorspace, bits_per_component) = {
             PnmPictureFormat.Bitmap: (PDFImageColorSpace.DeviceGray, 1),
             PnmPictureFormat.Graymap: (PDFImageColorSpace.DeviceGray, 8),
             PnmPictureFormat.Pixmap: (PDFImageColorSpace.DeviceRGB, 8),
         }[img.img_format]
         width = img.width
         height = img.height
     else:
         with open(image_filename, "rb") as f:
             imgdata = EncodedObject(encoded_data=f.read(),
                                     filtering=Filter.DCTDecode)
         (width, height, colorspace,
          bits_per_component) = cls._get_image_geometry(image_filename)
         colorspace = {
             "Gray": PDFImageColorSpace.DeviceGray,
             "sRGB": PDFImageColorSpace.DeviceRGB,
         }[colorspace]
     return PDFImage(width=width,
                     height=height,
                     colorspace=colorspace,
                     bits_per_component=bits_per_component,
                     imgdata=imgdata,
                     inverted=False)
	def _test_png_predictors(self, columns, encoded_data, pixel_data):
		obj = EncodedObject(encoded_data, Filter.Uncompressed, columns = columns, predictor = Predictor.PNGPredictionOptimum)
		obj_decode = obj.decode()
		self.assertEqual(len(obj_decode), len(pixel_data))
		for line_offset in range(0, len(pixel_data), columns):
			self.assertEqual(obj_decode[line_offset : line_offset + columns], pixel_data[line_offset : line_offset + columns], "Row %d" % (line_offset // columns))
		self.assertEqual(obj_decode, pixel_data)
Ejemplo n.º 3
0
    def _generate_form(self):
        font_xref = self._get_font_reference()
        seal_template = PDFTemplate(
            pkgutil.get_data("llpdf.resources", "seal.pdft"))
        seal_xref = seal_template.merge_into_pdf(self._pdf)["SealObject"]

        sign_template = PDFTemplate(
            pkgutil.get_data("llpdf.resources", "sign_form.pdft"))
        sign_template["FontXRef"] = font_xref
        sign_template["SealFormXRef"] = seal_xref
        signform_xref = sign_template.merge_into_pdf(
            self._pdf)["SignFormObject"]

        signform = self._pdf.lookup(signform_xref)
        signform.content[PDFName("/BBox")] = self._get_signature_bbox()
        signform_data = signform.stream.decode()

        (posx, posy, width, height) = self._get_signature_bbox()
        signform_vars = {
            "WIDTH": b"%.0f" % (width - 1),
            "HEIGHT": b"%.0f" % (height - 1),
            "TEXT": self._get_signing_text(),
        }
        for (varname, replacement) in signform_vars.items():
            key = ("${" + varname + "}").encode("ascii")
            signform_data = signform_data.replace(key, replacement)
        signform.set_stream(EncodedObject.create(signform_data, compress=True))
        return signform_xref
Ejemplo n.º 4
0
	def _add_xmp_metadata(self):
		info_node_xref = self._pdf.trailer[PDFName("/Info")]
		info_node = self._pdf.lookup(info_node_xref)

		metadata_date = Timestamp.localnow()
		modify_date = Timestamp.frompdf(info_node.content[PDFName("/ModDate")].decode("ascii")) if (PDFName("/ModDate") in info_node.content) else metadata_date
		create_date = Timestamp.frompdf(info_node.content[PDFName("/CreationDate")].decode("ascii")) if (PDFName("/CreationDate") in info_node.content) else metadata_date
		xmp_metadata = {
			"creator_tool":			self._pdf.get_info("Creator"),
			"producer":				self._pdf.get_info("Producer"),
			"modify_date":			modify_date.format_xml(),
			"create_date":			create_date.format_xml(),
			"metadata_date":		metadata_date.format_xml(),
			"description":			self._pdf.get_info("Subject"),
			"title":				self._pdf.get_info("Title"),
			"creator":				self._pdf.get_info("Author"),
			"keywords":				self._pdf.get_info("Keywords"),
			"document_uuid":		str(uuid.uuid4()),
			"instance_uuid":		str(uuid.uuid4()),
			"llpdf_version":	"llpdf " + llpdf.VERSION,
		}

		xmp_metadata_template = pkgutil.get_data("llpdf.resources", "xmp_metadata.xml").decode("utf-8")
		stream = (xmp_metadata_template % xmp_metadata).encode("utf-8")
		content = {
			PDFName("/Type"):			PDFName("/Metadata"),
			PDFName("/Subtype"):		PDFName("/XML"),
		}
		objid = self._pdf.get_free_objid()
		pdf_object = PDFObject.create(objid, gennum = 0, content = content, stream = EncodedObject.create(stream, compress = False))
		self._pdf.replace_object(pdf_object)
		return pdf_object.xref
Ejemplo n.º 5
0
 def _create_object(self, content, raw_stream=None):
     objid = self._pdf.get_free_objid()
     obj = PDFObject.create(objid=objid, gennum=0, content=content)
     if raw_stream is not None:
         obj.set_stream(EncodedObject.create(raw_stream))
     self._pdf.replace_object(obj)
     return PDFXRef(objid, 0)
Ejemplo n.º 6
0
	def get_fontfile_object(self, objid):
		content = {
			PDFName("/Length1"):	len(self._cleardata),
			PDFName("/Length2"):	len(self._cipherdata),
			PDFName("/Length3"):	len(self._trailerdata),
		}
		stream = EncodedObject.create(self._cleardata + self._cipherdata + self._trailerdata, compress = True)
		obj = PDFObject.create(objid, 0, content, stream)
		return obj
Ejemplo n.º 7
0
	def serialize_xref_object(self, trailer_dict, objid):
		offset_width = self._get_offset_width()
		content = dict(trailer_dict)
		content.update({
			PDFName("/Type"):	PDFName("/XRef"),
			PDFName("/Index"):	[ 0, self._max_objid + 1 ],
			PDFName("/Size"):	self._max_objid + 1,
			PDFName("/W"):		[ 1, offset_width, 1 ],
		})
		data = self._serialize_xref_data(offset_width)
		return PDFObject.create(objid = objid, gennum = 0, content = content, stream = EncodedObject.create(data))
Ejemplo n.º 8
0
	def _add_color_profile(self):
		if self._args.color_profile is None:
			profile_data = pkgutil.get_data("llpdf.resources", "sRGB_IEC61966-2-1_black_scaled.icc")
		else:
			with open(self._args.color_profile, "rb") as f:
				profile_data = f.read()

		content = {
			PDFName("/N"):			3,
			PDFName("/Range"):		[ 0, 1, 0, 1, 0, 1 ],
		}
		objid = self._pdf.get_free_objid()
		pdf_object = PDFObject.create(objid, gennum = 0, content = content, stream = EncodedObject.create(profile_data))
		self._pdf.replace_object(pdf_object)
		return pdf_object.xref
Ejemplo n.º 9
0
    def run(self):
        with open(self._args.embed_payload, "rb") as f:
            payload = f.read()

        objid = self._pdf.get_free_objid()
        self._log.debug(
            "Embedding %d bytes payload from file \"%s\" into PDF file as objid %d",
            len(payload), self._args.embed_payload, objid)

        mtime = os.stat(self._args.embed_payload).st_mtime
        mtime_str = datetime.datetime.utcfromtimestamp(mtime).strftime(
            "%Y-%m-%dT%H:%M:%SZ")
        content = {
            PDFName("/PDFMinify.OriginalFilename"):
            os.path.basename(self._args.embed_payload).encode(),
            PDFName("/PDFMinify.MTime"):
            mtime_str.encode(),
            PDFName("/PDFMinify.Version"):
            llpdf.VERSION.encode(),
        }
        obj = PDFObject.create(objid=objid, gennum=0, content=content)
        obj.set_stream(EncodedObject.create(payload, compress=False))
        self._pdf.replace_object(obj)
Ejemplo n.º 10
0
 def stream(self):
     if not self.has_stream:
         return None
     else:
         return EncodedObject.from_object(self)
Ejemplo n.º 11
0
 def test_flate_decompress(self):
     compressed_data = bytes.fromhex(
         "78 9c 73 cb cf 4f 4a 2c  02 00 07 eb 02 5a")
     obj = EncodedObject(compressed_data, Filter.FlateDecode)
     self.assertEqual(obj.decode(), b"Foobar")
Ejemplo n.º 12
0
 def test_no_decompress(self):
     uncompressed_data = b"Foobar"
     obj = EncodedObject(uncompressed_data, Filter.Uncompressed)
     self.assertEqual(obj.decode(), b"Foobar")
Ejemplo n.º 13
0
 def append_stream(self, text):
     new_data = text.encode("utf-8")
     if self.contents_obj.stream is not None:
         prev_data = self.contents_obj.stream.decode()
         new_data = prev_data + b"\n" + new_data
     self.contents_obj.set_stream(EncodedObject.create(new_data))
Ejemplo n.º 14
0
	def serialize(self, serializer):
		header = [ ]
		data = bytearray()
		for obj in self._contained_objects:
			obj_data = serializer.serialize(obj.content)
			offset = len(data)
			header.append(obj.objid)
			header.append(offset)
			data += obj_data + b"\n"

		header = " ".join(str(value) for value in header)
		header = header.encode("utf-8") + b"\n"
		full_data = header + data
		content = {
			PDFName("/Type"):	PDFName("/ObjStm"),
			PDFName("/N"):		self.objects_inside_count,
			PDFName("/First"):	len(header),
		}
		return PDFObject.create(objid = self.objid, gennum = 0, content = content, stream = EncodedObject.create(full_data))
Ejemplo n.º 15
0
 def run(self):
     for obj in self._pdf.stream_objects:
         if obj.stream.compressed and obj.stream.decompressible:
             uncompressed_stream = EncodedObject.create(obj.stream.decode(),
                                                        compress=False)
             obj.set_stream(uncompressed_stream)
Ejemplo n.º 16
0
    def run(self):
        # Put an ID into the PDF
        self._pdf.trailer[PDFName("/ID")] = [os.urandom(16), os.urandom(16)]

        # Do not interpolate any image objects
        for image_obj in self._pdf.image_objects:
            image_obj.content[PDFName("/Interpolate")] = False

        # No pages may be transparency groups
        for page in self._pdf.pages:
            if PDFName("/Group") in page.content:
                del page.content[PDFName("/Group")]

        # No transparency groups in Form XObjects
        for obj in self._pdf:
            if (obj.getattr(PDFName("/Type"))
                    == PDFName("/XObject")) and (obj.getattr(
                        PDFName("/Subtype")) == PDFName("/Form")) and (
                            obj.getattr(PDFName("/Group")) is not None):
                del obj.content[PDFName("/Group")]

        # Add color profile data
        color_profile_xref = self._add_color_profile()

        # Add color intent object
        color_intent_xref = self._add_color_intent(color_profile_xref)

        # Add XMP metadata
        metadata_xref = self._add_xmp_metadata()

        # Set output intent and metadata reference for all catalogs
        for obj in self._pdf:
            if obj.getattr(PDFName("/Type")) == PDFName("/Catalog"):
                obj.content[PDFName("/OutputIntents")] = color_intent_xref
                obj.content[PDFName("/Metadata")] = metadata_xref

        # Set all annotations with annotation flag "printable" (4)
        for obj in self._pdf:
            if obj.getattr(PDFName("/Type")) == PDFName("/Annot"):
                obj.content[PDFName("/F")] = 4

        fixed_descriptors = set()
        for obj in list(self._pdf):
            if obj.getattr(PDFName("/Type")) == PDFName("/Font"):
                font_obj = obj

                if font_obj.getattr(
                        PDFName("/Subtype")) == PDFName("/CIDFontType2"):
                    # Type2 fonts need to have a CIDtoGIDMap
                    font_obj.content[PDFName("/CIDToGIDMap")] = PDFName(
                        "/Identity")

                if PDFName("/FontDescriptor") in font_obj.content:
                    font_descriptor_xref = font_obj.content[PDFName(
                        "/FontDescriptor")]
                    if font_descriptor_xref in fixed_descriptors:
                        continue
                    fixed_descriptors.add(font_descriptor_xref)

                    font_descriptor_obj = self._pdf.lookup(
                        font_descriptor_xref)
                    if font_obj.getattr(
                            PDFName("/Subtype")) == PDFName("/Type1"):
                        # Update Type1 font descriptors with missing CharSet entries
                        font_file_obj = self._pdf.lookup(
                            font_descriptor_obj.content[PDFName("/FontFile")])
                        t1_font = T1Font.from_fontfile_obj(font_file_obj)
                        font_descriptor_obj.content[PDFName(
                            "/CharSet")] = t1_font.charset_string
                    elif font_obj.getattr(
                            PDFName("/Subtype")) == PDFName("/CIDFontType2"):
                        # Type2 font descriptors need to have a CIDSet
                        glyph_count = self.type2_font_glyph_count(
                            font_obj.content[PDFName("/W")])

                        full_bytes = glyph_count // 8
                        set_bits = glyph_count % 8
                        last_byte = ((1 << set_bits) - 1) << (8 - set_bits)
                        self._log.debug(
                            "Assuming CIDSet for %d glyphs of %d full 0xff bytes and a final value of 0x%x.",
                            glyph_count, full_bytes, last_byte)

                        cidset_objid = self._pdf.get_free_objid()
                        stream = (bytes([0xff]) * full_bytes) + bytes(
                            [last_byte])
                        pdf_object = PDFObject.create(
                            cidset_objid,
                            gennum=0,
                            content={},
                            stream=EncodedObject.create(stream))
                        self._pdf.replace_object(pdf_object)

                        font_descriptor_obj.content[PDFName(
                            "/CIDSet")] = pdf_object.xref
Ejemplo n.º 17
0
	def put_image(self, pdfimage):
		if pdfimage.image_format not in [ "JPEG", "RGB", "GRAY" ]:
			raise UnsupportedError("PDF can only handle JPEG, RGB or GRAY image formats, but %s was supplied." % (pdfimage.image_format))

		custom_metadata = {
			"resolution_dpi":	list(pdfimage.resolution_dpi),
			"comment":			pdfimage.comment,
		}
		image = self.pdf.new_object({
			PDFName("/Type"):				PDFName("/XObject"),
			PDFName("/Subtype"):			PDFName("/Image"),
			PDFName("/Interpolate"):		True,
			PDFName("/Width"):				pdfimage.dimensions.width,
			PDFName("/Height"):				pdfimage.dimensions.height,
			PDFName("/CustomMetadata"):		PDFString(json.dumps(custom_metadata)),
		})
		if pdfimage.image_format == "JPEG":
			image.set_stream(EncodedObject(encoded_data = pdfimage.data, filtering = Filter.DCTDecode))
		elif pdfimage.image_format in [ "RGB", "GRAY" ]:
			image.set_stream(EncodedObject.create(pdfimage.data, compress = True))
		else:
			raise NotImplementedError(pdfimage.image_format)

		if pdfimage.pixel_format == PixelFormat.RGB:
			image.content[PDFName("/ColorSpace")] = PDFName("/DeviceRGB")
			image.content[PDFName("/BitsPerComponent")] = 8
		elif pdfimage.pixel_format == PixelFormat.Grayscale:
			image.content[PDFName("/ColorSpace")] = PDFName("/DeviceGray")
			image.content[PDFName("/BitsPerComponent")] = 8
		elif pdfimage.pixel_format == PixelFormat.BlackWhite:
			image.content[PDFName("/ColorSpace")] = PDFName("/DeviceGray")
			image.content[PDFName("/BitsPerComponent")] = 1
		else:
			raise NotImplementedError(pdfimage.pixel_format)

		image_extents_mm = pdfimage.extents_mm
		image_scale_x = self.printable_area_mm.width / image_extents_mm.width
		image_scale_y = self.printable_area_mm.height / image_extents_mm.height
		image_scalar = min(image_scale_x, image_scale_y)
		if image_scalar > 1:
			# Never enlarge
			image_scalar = 1

		printed_size_mm = image_extents_mm * image_scalar
		page_dimensions_mm = self._hl_page.extents_mm
		offset_mm = (page_dimensions_mm - printed_size_mm) / 2
		offset_dots = offset_mm * 72 / 25.4
		printed_size_dots = printed_size_mm * 72 / 25.4
		params = {
			"xoffset":	offset_dots.width,
			"yoffset":	offset_dots.height,
			"xscalar":	printed_size_dots.width,
			"yscalar":	printed_size_dots.height,
		}

		self._hl_page.append_stream(textwrap.dedent("""\
		%(xscalar)f 0 0 %(yscalar)f %(xoffset)f %(yoffset)f cm
		/Img Do
		""" % (params)))

		page_obj = self._hl_page.page_obj
		if not PDFName("/Resources") in page_obj.content:
			page_obj.content[PDFName("/Resources")] = { }

		if not PDFName("/XObject") in page_obj.content[PDFName("/Resources")]:
			page_obj.content[PDFName("/Resources")][PDFName("/XObject")] = { }

		page_obj.content[PDFName("/Resources")][PDFName("/XObject")][PDFName("/Img")] = image.xref