def as_pdf_object(self, transform, page): """Return the PdfDict object representing the annotation, that will be inserted as is into the PDF document. :param list transform: Transformation matrix to transform the coords of the annotation from client-specified space to PDF user space. :param PdfDict page: The pdfrw page object from the PDF document :returns PdfDict: the annotation object to be inserted into the PDF """ bounding_box = transform_rect(self.make_rect(), transform) appearance_stream = self._make_appearance_stream_dict( bounding_box, transform, ) obj = PdfDict( Type=PdfName('Annot'), Subtype=PdfName(self.subtype), Rect=bounding_box, AP=appearance_stream, ) if self._page_as_parent: obj.P = page for name in self._related: subobject = self._related[name].as_pdf_object(transform, page) setattr(obj, name, subobject) self._add_metadata(obj, self._metadata) self.add_additional_pdf_object_data(obj) obj.indirect = True return obj
def _make_border_dict(width, style, dash_array=None): border = PdfDict( Type=PdfName('Border'), W=width, S=PdfName(style), ) if dash_array: if style != 'D': raise ValueError('Dash array only applies to dashed borders!') border.D = dash_array return border
def _add_graphics_state_resources(resources, A): """Add in the resources dict for turning on transparency in the graphics state. For example, if both stroke and fill were transparent, this would add: << /ExtGState /PdfAnnotatorGS << /CA 0.5 /ca 0.75 /Type /ExtGState >> >> to the Resources dict. Graphics states can also be specified externally, for use in explicit content streams. This is done by using the `graphics_states` property on the appearance object. """ states = [] internal_state = Annotation._get_internal_graphics_state(resources, A) if internal_state is not None: states.append((GRAPHICS_STATE_NAME, internal_state)) if A.graphics_states: for name, state in A.graphics_states.items(): states.append((name, state.as_pdf_dict())) if states: resources.ExtGState = PdfDict() for name, state in states: resources.ExtGState[PdfName(name)] = state
def _add_xobject_resources(resources, A): """Adds in provided, explicit XObjects into the appearance stream's Resources dict. This is used when the user is explicitly specifying the appearance stream and they want to include, say, an image. """ if A.xobjects: resources.XObject = PdfDict() for xobject_name, xobject in A.xobjects.items(): resources.XObject[PdfName(xobject_name)] = xobject
def _make_ap_resources(self): """Make the Resources entry for the appearance stream dictionary. Implement add_additional_resources to add additional entries - fonts, XObjects, graphics state - to the Resources dictionary. """ resources = PdfDict(ProcSet=PdfName('PDF')) self._add_graphics_state_resources(resources, self._appearance) self._add_xobject_resources(resources, self._appearance) self._add_font_resources(resources, self._appearance) self.add_additional_resources(resources) return resources
def _make_appearance_stream_dict(self, bounding_box, transform): resources = self._make_ap_resources() # Either use user-specified content stream or generate content stream # based on annotation type. stream = self._appearance.appearance_stream if stream is None: stream = self.make_appearance_stream() # Transform the appearance stream into PDF space and turn it into a str appearance_stream = stream.transform(transform).resolve() normal_appearance = PdfDict( stream=appearance_stream, BBox=bounding_box, Resources=resources, Matrix=translate(-bounding_box[0], -bounding_box[1]), Type=PdfName('XObject'), Subtype=PdfName('Form'), FormType=1, ) return PdfDict(N=normal_appearance)
def _add_metadata(self, obj, metadata): if metadata is None: return for name, value in metadata.iter(): obj[PdfName(name)] = serialize_value(value)
def _add_font_resources(resources, A): if A.fonts: resources.Font = PdfDict() for font_name, font in A.fonts.items(): resources.Font[PdfName(font_name)] = font
def update_metadata(trailer, options): # Update the PDF's Document Information Dictionary, which contains keys like # Title, Author, Subject, Keywords, Creator, Producer, CreationDate, and ModDate # (the latter two containing Date values, the rest strings). import codecs from pdfrw.objects import PdfString, PdfName # Create the metadata dict if it doesn't exist, since the caller may be adding fields. if not trailer.Info: trailer.Info = PdfDict() # Get a list of all metadata fields that exist in the PDF plus any fields # that there are metadata filters for (since they may insert field values). keys = set(str(k)[1:] for k in trailer.Info.keys()) \ | set(k for k in options.metadata_filters.keys() if k not in ("DEFAULT", "ALL")) # Update each metadata field. for key in keys: # Get the functions to apply to this field. functions = options.metadata_filters.get(key) if functions is None: # If nothing is defined for this field, use the DEFAULT functions. functions = options.metadata_filters.get("DEFAULT", []) # Append the ALL functions. functions += options.metadata_filters.get("ALL", []) # Run the functions on any existing values. value = trailer.Info[PdfName(key)] for f in functions: # Before passing to the function, convert from a PdfString to a Python string. if isinstance(value, PdfString): # decode from PDF's "(...)" syntax. value = value.decode() # Filter the value. value = f(value) # Convert Python data type to PdfString. if isinstance(value, str) or (sys.version_info < (3, ) and isinstance(value, unicode)): # Convert string to a PdfString instance. value = PdfString.from_unicode(value) elif isinstance(value, datetime): # Convert datetime into a PDF "D" string format. value = value.strftime("%Y%m%d%H%M%S%z") if len(value) == 19: # If TZ info was included, add an apostrophe between the hour/minutes offsets. value = value[:17] + "'" + value[17:] value = PdfString("(D:%s)" % value) elif value is None: # delete the metadata value pass else: raise ValueError( "Invalid type of value returned by metadata_filter function. %s was returned by %s." % (repr(value), f.__name__ or "anonymous function")) # Replace value. trailer.Info[PdfName(key)] = value
def make_page(page, pdf_page, psem): # Prepare everything in parallel @asyncio.coroutine def get_pdf_thumbnail(psem): if page.thumbnail is None: return None return (yield from page.thumbnail.pdf_thumbnail(psem)) @asyncio.coroutine def get_pdf_background(psem): if page.background is None: return None return (yield from page.background.pdf_image(psem)) @asyncio.coroutine def get_pdf_mask(foreground, psem): if foreground.color is not None: return None return (yield from foreground.pdf_mask(psem)) pdf_thumbnail, pdf_background, pdf_foregrounds, pdf_masks = ( yield from asyncio.gather( get_pdf_thumbnail(psem), get_pdf_background(psem), asyncio.gather( *[fg.pdf_image(psem) for fg in page.foreground]), asyncio.gather( *[get_pdf_mask(fg, psem) for fg in page.foreground]))) pdf_page.MediaBox = PdfArray( [0, 0, PdfNumber(page.width), PdfNumber(page.height)]) pdf_page.Group = pdf_group pdf_resources = PdfDict() pdf_colorspace = PdfDict() pdf_colorspace.DefaultRGB = default_rgb_colorspace pdf_resources.ColorSpace = pdf_colorspace pdf_xobject = PdfDict() if pdf_thumbnail is not None: pdf_page.Thumb = pdf_thumbnail im_index = 0 # Save graphics state and scale unity rectangle to page size matrix = TransformationMatrix() matrix.scale(page.width, page.height) before_graphics = ("q\n" + "%s cm\n" % matrix.to_pdf()) after_graphics = "\nQ\n" contents = "" graphics = "" current_color = None if page.color != self._factory.WHITE: if current_color != page.color: current_color = page.color graphics += page.color.to_pdf() + " rg " graphics += ("0 0 1 1 re " + "f\n") if pdf_background is not None: pdf_xobject[PdfName("Im%d" % im_index)] = pdf_background graphics += "/Im%d Do\n" % im_index im_index += 1 for foreground, pdf_foreground, pdf_mask in zip( page.foreground, pdf_foregrounds, pdf_masks): if pdf_mask is not None: pdf_xobject[PdfName("Im%d" % im_index)] = pdf_mask im_index += 1 pdf_xobject[PdfName("Im%d" % im_index)] = pdf_foreground if (foreground.color is not None and current_color != foreground.color): current_color = foreground.color graphics += foreground.color.to_pdf() + " rg " graphics += "/Im%d Do\n" % im_index im_index += 1 if graphics: contents += (before_graphics + graphics.rstrip(" \n") + after_graphics) current_color = None before_text = ("BT\n" + "/F1 1 Tf 3 Tr\n") after_text = "\nET\n" text = "" pdf_annots = [] for t in page.text: if t.text: matrix = TransformationMatrix() # Glyph size is 0.5 x 1 matrix.scale(2 / len(t.text), 1) matrix.translate(-0.5, -0.5) if t.direction == "ltr": pass elif t.direction == "rtl": matrix.translate(0, -1) elif t.direction == "ttb": matrix.rotate(90) matrix.rotate(-t.rotation) matrix.translate(0.5, 0.5) matrix.scale(t.width, t.height) matrix.translate(t.x, t.y) text += "%s Tm %s Tj\n" % ( matrix.to_pdf(), PdfString().from_bytes( t.text.encode("utf-16-be"), bytes_encoding="hex")) if t.external_link is not None or t.internal_link is not None: pdf_annot = PdfDict() pdf_annots.append(pdf_annot) pdf_annot.Type = PdfName.Annot pdf_annot.Subtype = PdfName.Link pdf_annot.Border = [0, 0, 0] pdf_annot.Rect = [ PdfNumber(t.x), PdfNumber(t.y), PdfNumber(t.x + t.width), PdfNumber(t.y + t.height) ] if t.external_link is not None: pdf_a = PdfDict() pdf_annot.A = pdf_a pdf_a.Type = PdfName.Action pdf_a.S = PdfName.URI pdf_a.URI = t.external_link.decode("latin-1") if t.internal_link is not None: pdf_target_page = pdf_pages[t.internal_link[0]] target_x, target_y = t.internal_link[1] pdf_annot.Dest = [ pdf_target_page, PdfName.XYZ, PdfNumber(target_x), PdfNumber(target_y), 0 ] text = text.rstrip(" \n") if text: pdf_resources.Font = pdf_font_mapping contents += (before_text + text + after_text) contents = contents.rstrip(" \n") if contents: pdf_contents = PdfDict() pdf_contents.indirect = True pdf_page.Contents = pdf_contents if COMPRESS_PAGE_CONTENTS: pdf_contents.Filter = [PdfName.FlateDecode] pdf_contents.stream = zlib.compress( contents.encode("latin-1"), 9).decode("latin-1") else: pdf_contents.stream = contents if pdf_annots: pdf_page.Annots = pdf_annots if pdf_xobject: pdf_resources.XObject = pdf_xobject if pdf_resources: pdf_page.Resources = pdf_resources # Report progress nonlocal finished_pages finished_pages += 1 if progress_cb: progress_cb(finished_pages / len(self._pages))
def _build_font(): with open(FONT_FILENAME, "rb") as f: embedded_font_stream = f.read() embedded_font = PdfDict() embedded_font.indirect = True embedded_font.Filter = [PdfName.FlateDecode] embedded_font.stream = zlib.compress(embedded_font_stream, 9).decode("latin-1") embedded_font.Length1 = len(embedded_font_stream) font_descriptor = PdfDict() font_descriptor.indirect = True font_descriptor.Ascent = 1000 font_descriptor.CapHeight = 1000 font_descriptor.Descent = -1 font_descriptor.Flags = 5 # FixedPitch + Symbolic font_descriptor.FontBBox = PdfArray([0, 0, 1000, 500]) font_descriptor.FontFile2 = embedded_font font_descriptor.FontName = PdfName.GlyphLessFont font_descriptor.ItalicAngle = 0 font_descriptor.StemV = 80 font_descriptor.Type = PdfName.FontDescriptor # Map everything to glyph 1 cid_to_gid_map_stream = b"\0\1" * (1 << 16) cid_to_gid_map = PdfDict() cid_to_gid_map.indirect = True cid_to_gid_map.Filter = [PdfName.FlateDecode] cid_to_gid_map.stream = zlib.compress(cid_to_gid_map_stream, 9).decode("latin-1") cid_to_gid_map.Length1 = len(cid_to_gid_map_stream) cid_system_info = PdfDict() cid_system_info.Ordering = PdfString.from_unicode("Identity") cid_system_info.Registry = PdfString.from_unicode("Adobe") cid_system_info.Supplement = 0 cid_font = PdfDict() cid_font.indirect = True cid_font.CIDToGIDMap = cid_to_gid_map cid_font.BaseFont = PdfName.GlyphLessFont cid_font.CIDSystemInfo = cid_system_info cid_font.FontDescriptor = font_descriptor cid_font.Subtype = PdfName.CIDFontType2 cid_font.Type = PdfName.Font cid_font.DW = 500 with open(UNICODE_CMAP_FILENAME, "rb") as f: unicode_cmap_stream = f.read() unicode_cmap = PdfDict() unicode_cmap.indirect = True unicode_cmap.Filter = [PdfName.FlateDecode] unicode_cmap.stream = zlib.compress(unicode_cmap_stream, 9).decode("latin-1") font = PdfDict() font.indirect = True font.BaseFont = PdfName.GlyphLessFont font.DescendantFonts = PdfArray([cid_font]) font.Encoding = PdfName("Identity-H") font.Subtype = PdfName.Type0 font.ToUnicode = unicode_cmap font.Type = PdfName.Font return font