Esempio n. 1
0
def update_annotation(annotation, options):
    import re

    from pdfrw.objects import PdfString

    # Contents holds a plain-text representation of the annotation
    # content, such as for accessibility. All annotation types may
    # have a Contents. NM holds the "annotation name" which also
    # could have redactable text, I suppose. Markup annotations have
    # "T" fields that hold a title / text label. Subj holds a
    # comment subject. CA, RC, and AC are used in widget annotations.
    for string_field in ("Contents", "NM", "T", "Subj", "CA", "RC", "AC"):
        if getattr(annotation, string_field):
            value = getattr(annotation, string_field).to_unicode()
            for pattern, function in options.content_filters:
                value = pattern.sub(function, value)
            setattr(annotation, string_field, PdfString.from_unicode(value))

    # A rich-text stream. Not implemented. Bail so that we don't
    # accidentally leak something that should be redacted.
    if annotation.RC:
        raise ValueError(
            "Annotation rich-text streams (Annot/RC) are not supported.")

    # An action, usually used for links.
    if annotation.A:
        update_annotation_action(annotation, annotation.A, options)
    if annotation.PA:
        update_annotation_action(annotation, annotation.PA, options)

    # If set, another annotation.
    if annotation.Popup:
        update_annotation(annotation.Popup, options)
Esempio n. 2
0
 def encode(self, value):
     x = PdfString.encode(value)
     if isinstance(value, type(u'')):
         y = PdfString.from_unicode(value)
     else:
         y = PdfString.from_bytes(value)
     self.assertEqual(x, y)
     return x
def fix_metadata(doc, title=None, creation_date=None):
    # Clear any existing XMP meta data
    doc.Root.Metadata = None

    meta = {
        'Creator': 'OffeneGesetze.de',
        'Keywords': 'Amtliches Werk nach §5 UrhG https://offenegesetze.de',
        'ModDate': make_pdf_date(datetime.now()),
    }
    if title is not None:
        meta['Title'] = title
    if creation_date is not None:
        meta['CreationDate'] = make_pdf_date(creation_date)

    for key, val in meta.items():
        if 'Date' not in key:
            val = PdfString.from_unicode(val)
        doc.Info[PdfName(key)] = val
Esempio n. 4
0
def update_annotation_action(annotation, action, options):
    from pdfrw.objects import PdfString

    if action.URI and options.link_filters:
        value = action.URI.to_unicode()
        for func in options.link_filters:
            value = func(value, annotation)
        if value is None:
            # Remove annotation by supressing the action.
            action.URI = None
        else:
            action.URI = PdfString.from_unicode(value)

    if action.Next:
        # May be an Action or array of Actions to execute next.
        next_action = action.Next
        if isinstance(action.Next, dict):
            next_action = [action.Next]
        for a in next_action:
            update_annotation_action(annotation, a, options)