def update_annotation(annotation, options): import re from pdfrw.objects import PdfString # Contents holds a plain-text representation of the annotation # content, such as for accessibility. All annotation types may # have a Contents. NM holds the "annotation name" which also # could have redactable text, I suppose. Markup annotations have # "T" fields that hold a title / text label. Subj holds a # comment subject. CA, RC, and AC are used in widget annotations. for string_field in ("Contents", "NM", "T", "Subj", "CA", "RC", "AC"): if getattr(annotation, string_field): value = getattr(annotation, string_field).to_unicode() for pattern, function in options.content_filters: value = pattern.sub(function, value) setattr(annotation, string_field, PdfString.from_unicode(value)) # A rich-text stream. Not implemented. Bail so that we don't # accidentally leak something that should be redacted. if annotation.RC: raise ValueError( "Annotation rich-text streams (Annot/RC) are not supported.") # An action, usually used for links. if annotation.A: update_annotation_action(annotation, annotation.A, options) if annotation.PA: update_annotation_action(annotation, annotation.PA, options) # If set, another annotation. if annotation.Popup: update_annotation(annotation.Popup, options)
def encode(self, value): x = PdfString.encode(value) if isinstance(value, type(u'')): y = PdfString.from_unicode(value) else: y = PdfString.from_bytes(value) self.assertEqual(x, y) return x
def fix_metadata(doc, title=None, creation_date=None): # Clear any existing XMP meta data doc.Root.Metadata = None meta = { 'Creator': 'OffeneGesetze.de', 'Keywords': 'Amtliches Werk nach §5 UrhG https://offenegesetze.de', 'ModDate': make_pdf_date(datetime.now()), } if title is not None: meta['Title'] = title if creation_date is not None: meta['CreationDate'] = make_pdf_date(creation_date) for key, val in meta.items(): if 'Date' not in key: val = PdfString.from_unicode(val) doc.Info[PdfName(key)] = val
def update_annotation_action(annotation, action, options): from pdfrw.objects import PdfString if action.URI and options.link_filters: value = action.URI.to_unicode() for func in options.link_filters: value = func(value, annotation) if value is None: # Remove annotation by supressing the action. action.URI = None else: action.URI = PdfString.from_unicode(value) if action.Next: # May be an Action or array of Actions to execute next. next_action = action.Next if isinstance(action.Next, dict): next_action = [action.Next] for a in next_action: update_annotation_action(annotation, a, options)