Beispiel #1
0
def test_update_hybrid_twice(fname):
    with open(os.path.join(PDF_DATA_DIR, fname), 'rb') as inf:
        w = IncrementalPdfFileWriter(inf)
        t_obj = w.trailer['/Info'].raw_get('/Title')
        assert isinstance(t_obj, generic.IndirectObject)
        w.objects[(t_obj.generation, t_obj.idnum)] \
            = generic.pdf_string('Updated')
        out = BytesIO()
        w.write(out)

    r = PdfFileReader(out)
    assert r.trailer['/Info']['/Title'] == 'Updated'
    container_info = r.xrefs.get_xref_container_info(1)
    assert container_info.xref_section_type == XRefSectionType.HYBRID_MAIN
    container_info = r.xrefs.get_xref_container_info(2)
    assert container_info.xref_section_type == XRefSectionType.STANDARD

    w = IncrementalPdfFileWriter(out)
    w.add_object(generic.pdf_string('This is an object'))
    w.write_in_place()

    r = PdfFileReader(out)
    assert '/XRefStm' not in r.trailer
    assert '/XRefStm' not in r.trailer_view
    assert r.trailer['/Info']['/Title'] == 'Updated'
    container_info = r.xrefs.get_xref_container_info(1)
    assert container_info.xref_section_type == XRefSectionType.HYBRID_MAIN
    container_info = r.xrefs.get_xref_container_info(2)
    assert container_info.xref_section_type == XRefSectionType.STANDARD
    container_info = r.xrefs.get_xref_container_info(3)
    assert container_info.xref_section_type == XRefSectionType.STANDARD
Beispiel #2
0
def test_code128_render():
    writer = IncrementalPdfFileWriter(BytesIO(MINIMAL))
    bb = barcodes.BarcodeBox("code128", "this is a test")
    xobj_ref = writer.add_object(bb.as_form_xobject())

    stamp_wrapper_stream = generic.StreamObject(
        stream_data=b'q 1 0 0 1 50 50 cm /Barcode Do Q')
    resources = generic.DictionaryObject({
        pdf_name('/XObject'):
        generic.DictionaryObject({pdf_name('/Barcode'): xobj_ref})
    })
    writer.add_stream_to_page(0, writer.add_object(stamp_wrapper_stream),
                              resources)
Beispiel #3
0
def test_double_sig_add_field_annots_indirect():
    w = IncrementalPdfFileWriter(BytesIO(MINIMAL_ONE_FIELD))
    out = signers.sign_pdf(
        w,
        signers.PdfSignatureMetadata(
            field_name='Sig1',
            certify=True,
            docmdp_permissions=fields.MDPPerm.FILL_FORMS),
        signer=FROM_CA,
    )

    # create a new signature field after signing
    w = IncrementalPdfFileWriter(out)
    # ... but first make the /Annots entry of the first page an indirect one
    first_page = w.root['/Pages']['/Kids'][0]
    annots_copy = generic.ArrayObject(first_page['/Annots'])
    first_page['/Annots'] = annots_ref = w.add_object(annots_copy)
    annots_copy.container_ref = annots_ref
    w.update_container(first_page)
    out = signers.sign_pdf(w,
                           signers.PdfSignatureMetadata(field_name='SigNew'),
                           signer=FROM_CA,
                           new_field_spec=fields.SigFieldSpec(
                               sig_field_name='SigNew', box=(10, 10, 10, 10)))

    r = PdfFileReader(out)
    s = r.embedded_signatures[0]
    assert s.field_name == 'Sig1'
    status = val_trusted(s, extd=True)
    assert status.modification_level == ModificationLevel.FORM_FILLING
    assert status.docmdp_ok

    s = r.embedded_signatures[1]
    assert s.field_name == 'SigNew'
    val_trusted(s)
Beispiel #4
0
def test_pades_dss_object_clobber(requests_mock):
    w = IncrementalPdfFileWriter(BytesIO(MINIMAL_TWO_FIELDS))
    meta1 = signers.PdfSignatureMetadata(
        field_name='Sig1',
        validation_context=live_testing_vc(requests_mock),
        subfilter=PADES,
        embed_validation_info=True,
    )
    dummy_ref = w.add_object(generic.pdf_string("Hi there")).reference

    out = signers.sign_pdf(w, meta1, signer=FROM_CA, timestamper=DUMMY_TS)
    w = IncrementalPdfFileWriter(out)
    # We're going to reassign the DSS object to another object ID, namely
    #  one that clobbers the dummy_ref object. This should be ample cause
    #  for suspicion.
    dss = w.root['/DSS']
    w.objects[(dummy_ref.generation, dummy_ref.idnum)] = dss
    w.root['/DSS'] = generic.IndirectObject(idnum=dummy_ref.idnum,
                                            generation=dummy_ref.generation,
                                            pdf=w)
    w.update_root()
    out = BytesIO()
    w.write(out)

    r = PdfFileReader(out)
    s = r.embedded_signatures[0]
    assert s.field_name == 'Sig1'
    val_trusted_but_modified(s)
Beispiel #5
0
def test_write_embedded_string():
    w = IncrementalPdfFileWriter(BytesIO(MINIMAL))
    with open(NOTO_SERIF_JP, 'rb') as ffile:
        ga = GlyphAccumulator(w, ffile, font_size=10)
    # shape the string, just to register the glyphs as used
    ga.shape('テスト')
    # ... but we're not going to use the result

    # hardcoded CIDs
    cid_hx = '0637062a0639'
    stream = generic.StreamObject(
        stream_data=f'BT /FEmb 18 Tf 0 100 Td <{cid_hx}> Tj ET'.encode('ascii')
    )
    stream_ref = w.add_object(stream)
    w.add_stream_to_page(
        0, stream_ref, resources=generic.DictionaryObject({
            pdf_name('/Font'): generic.DictionaryObject({
                pdf_name('/FEmb'): ga.as_resource()
            })
        })
    )
    out = BytesIO()
    w.write(out)
    out.seek(0)
    r = PdfFileReader(out)
    page_obj = r.root['/Pages']['/Kids'][0].get_object()
    conts = page_obj['/Contents']
    assert len(conts) == 2
    assert stream_ref.idnum in (c.idnum for c in conts)
Beispiel #6
0
def test_add_stream_to_direct_arr():
    w = writer.PdfFileWriter()
    w.insert_page(simple_page(w, 'Test Test', extra_stream=True))
    out = BytesIO()
    w.write(out)
    out.seek(0)
    w = IncrementalPdfFileWriter(out)

    new_stream = 'BT /F1 18 Tf 0 50 Td (Test2 Test2) Tj ET'.encode('ascii')
    stream = generic.StreamObject(stream_data=new_stream)
    stream_ref = w.add_object(stream)
    w.add_stream_to_page(0, stream_ref)

    out = BytesIO()
    w.write(out)
    out.seek(0)
    r = PdfFileReader(out)
    # check if the content stream was added
    page_obj_ref = r.root['/Pages']['/Kids'].raw_get(0)
    assert isinstance(page_obj_ref, generic.IndirectObject)
    page_obj = page_obj_ref.get_object()
    conts = page_obj['/Contents']
    assert len(conts) == 3
    assert stream_ref.idnum in (c.idnum for c in conts)
    # check if resource dictionary is still OK
    assert '/F1' in page_obj['/Resources']['/Font']
Beispiel #7
0
def test_write_embedded_string_objstream():
    ffile = ttLib.TTFont(NOTO_SERIF_JP)
    ga = GlyphAccumulator(ffile)
    cid_hx, _ = ga.feed_string('テスト')
    assert cid_hx == '0637062a0639'
    w = IncrementalPdfFileWriter(BytesIO(MINIMAL_XREF))
    obj_stream = w.prepare_object_stream()
    font_ref = ga.embed_subset(w, obj_stream=obj_stream)
    stream = generic.StreamObject(
        stream_data=f'BT /FEmb 18 Tf 0 100 Td <{cid_hx}> Tj ET'.encode(
            'ascii'))
    stream_ref = w.add_object(stream)
    w.add_stream_to_page(0,
                         stream_ref,
                         resources=generic.DictionaryObject({
                             pdf_name('/Font'):
                             generic.DictionaryObject(
                                 {pdf_name('/FEmb'): font_ref})
                         }))
    out = BytesIO()
    w.write(out)
    out.seek(0)
    r = PdfFileReader(out)
    page_obj = r.root['/Pages']['/Kids'][0].get_object()
    conts = page_obj['/Contents']
    assert len(conts) == 2
    assert stream_ref.idnum in (c.idnum for c in conts)
    assert font_ref.idnum in r.xrefs.in_obj_stream
    out.seek(0)

    # attempt to grab the font from the object stream
    font_ref.pdf = r
    font = font_ref.get_object()
    assert font['/Type'] == pdf_name('/Font')
Beispiel #8
0
def test_not_all_paths_cleared():

    w = IncrementalPdfFileWriter(BytesIO(MINIMAL_ONE_FIELD))
    # make /Fields indirect
    fields_arr = w.root['/AcroForm'].raw_get('/Fields')
    # just in case we ever end up declaring /Fields as indirect in the example
    assert isinstance(fields_arr, generic.ArrayObject)
    w.root['/AcroForm']['/Fields'] = w.root['/Blah'] = w.add_object(fields_arr)
    w.update_root()
    w.update_container(w.root['/AcroForm'])
    out = signers.sign_pdf(
        w,
        signature_meta=signers.PdfSignatureMetadata(field_name='Sig1'),
        signer=FROM_CA)

    # create a new signature field after signing
    w = IncrementalPdfFileWriter(out)
    out = signers.sign_pdf(
        w,
        signers.PdfSignatureMetadata(field_name='SigNew'),
        signer=FROM_CA,
    )

    r = PdfFileReader(out)
    val_trusted_but_modified(embedded_sig=r.embedded_signatures[0])
Beispiel #9
0
def test_bogus_metadata_manipulation():
    # test using a double signature created using Adobe Reader
    # (uses object streams, XMP metadata updates and all the fun stuff)

    infile = BytesIO(
        read_all(PDF_DATA_DIR + '/minimal-two-fields-signed-twice.pdf'))

    bogus = b'This is bogus data, yay!'

    def do_check():
        r = PdfFileReader(out)
        print(r.get_object(generic.Reference(2, 0, r), revision=3).data)
        s = r.embedded_signatures[0]
        status = validate_pdf_signature(s)
        assert status.modification_level == ModificationLevel.OTHER

    w = IncrementalPdfFileWriter(infile)
    w.root['/Metadata'] = w.add_object(generic.StreamObject(stream_data=bogus))
    w.update_root()
    out = BytesIO()
    w.write(out)
    do_check()

    w = IncrementalPdfFileWriter(infile)
    metadata_ref = w.root.raw_get('/Metadata')
    metadata_stream: generic.StreamObject = metadata_ref.get_object()
    metadata_stream.strip_filters()
    metadata_stream._data = bogus
    metadata_stream._encoded_data = None
    w.mark_update(metadata_ref)
    out = BytesIO()
    w.write(out)
    do_check()

    w = IncrementalPdfFileWriter(infile)
    w.root['/Metadata'] = generic.NullObject()
    w.update_root()
    out = BytesIO()
    w.write(out)
    do_check()

    w = IncrementalPdfFileWriter(infile)
    w.root['/Metadata'] = w.add_object(generic.NullObject())
    w.update_root()
    out = BytesIO()
    w.write(out)
    do_check()
Beispiel #10
0
def test_add_stream():
    w = IncrementalPdfFileWriter(BytesIO(MINIMAL))

    def stream_data(y):
        return f'BT /F1 18 Tf 0 {y} Td (Test Test) Tj ET'.encode('ascii')

    stream = generic.StreamObject(stream_data=stream_data(50))

    stream_ref = w.add_object(stream)
    w.add_stream_to_page(0, stream_ref)

    out = BytesIO()
    w.write(out)
    out.seek(0)
    r = PdfFileReader(out)
    # check if the content stream was added
    page_obj_ref = r.root['/Pages']['/Kids'].raw_get(0)
    assert isinstance(page_obj_ref, generic.IndirectObject)
    page_obj = page_obj_ref.get_object()
    conts = page_obj['/Contents']
    assert len(conts) == 2
    assert stream_ref.idnum in (c.idnum for c in conts)
    # check if resource dictionary is still OK
    assert '/F1' in page_obj['/Resources']['/Font']

    # let's try adding a third
    out.seek(0)
    w = IncrementalPdfFileWriter(out)

    stream = generic.StreamObject(stream_data=stream_data(100))
    new_stream_ref = w.add_object(stream)
    w.add_stream_to_page(0, new_stream_ref)

    out = BytesIO()
    w.write(out)
    out.seek(0)
    r = PdfFileReader(out)
    # check if the content stream was added
    page_obj_ref = r.root['/Pages']['/Kids'].raw_get(0)
    assert isinstance(page_obj_ref, generic.IndirectObject)
    page_obj = page_obj_ref.get_object()
    conts = page_obj['/Contents']
    assert len(conts) == 3
    ids = [c.idnum for c in conts]
    assert stream_ref.idnum in ids and new_stream_ref.idnum in ids
Beispiel #11
0
def test_code128_render():
    writer = IncrementalPdfFileWriter(BytesIO(MINIMAL))
    bb = barcodes.BarcodeBox("code128", "this is a test")
    xobj_ref = writer.add_object(bb.as_form_xobject())

    stamp_wrapper_stream = generic.StreamObject(
        stream_data=b'q 1 0 0 1 50 50 cm /Barcode Do Q')
    resources = generic.DictionaryObject({
        pdf_name('/XObject'):
        generic.DictionaryObject({pdf_name('/Barcode'): xobj_ref})
    })
    writer.add_stream_to_page(0, writer.add_object(stamp_wrapper_stream),
                              resources)

    # TODO try to read back the code using some kind of barcode scanning
    #  library, perhaps.

    compare_output(writer, f'{EXPECTED_OUTPUT_DIR}/code128-test.pdf')
Beispiel #12
0
def test_trailer_update():
    w = IncrementalPdfFileWriter(BytesIO(MINIMAL_ONE_FIELD))
    dt = generic.pdf_date(datetime.datetime(2020, 10, 10, tzinfo=pytz.utc))

    info = generic.DictionaryObject({pdf_name('/CreationDate'): dt})
    w.trailer['/Info'] = w.add_object(info)
    out = BytesIO()
    w.write(out)
    r = PdfFileReader(out)
    assert r.trailer['/Info']['/CreationDate'] == dt
Beispiel #13
0
def append_signature_field(pdf_out: IncrementalPdfFileWriter,
                           sig_field_spec: SigFieldSpec):
    """
    Append signature fields to a PDF file.

    :param pdf_out:
        Incremental writer to house the objects.
    :param sig_field_spec:
        A :class:`.SigFieldSpec` object describing the signature field
        to add.
    """
    root = pdf_out.root

    page_ref = pdf_out.find_page_for_modification(sig_field_spec.on_page)[0]
    # use default appearance
    field_created, sig_field_ref = _prepare_sig_field(
        sig_field_spec.sig_field_name,
        root,
        update_writer=pdf_out,
        existing_fields_only=False,
        box=sig_field_spec.box,
        include_on_page=page_ref,
        lock_sig_flags=False)
    if not field_created:
        raise PdfWriteError('Signature field with name %s already exists.' %
                            sig_field_spec.sig_field_name)

    sig_field = sig_field_ref.get_object()
    if sig_field_spec.seed_value_dict is not None:
        # /SV must be an indirect reference as per the spec
        sv_ref = pdf_out.add_object(
            sig_field_spec.seed_value_dict.as_pdf_object())
        sig_field[pdf_name('/SV')] = sv_ref

    lock = sig_field_spec.format_lock_dictionary()
    if lock is not None:
        sig_field[pdf_name('/Lock')] = pdf_out.add_object(lock)
Beispiel #14
0
def test_sign_reject_freed(forbid_freeing):

    w = IncrementalPdfFileWriter(BytesIO(MINIMAL_ONE_FIELD))
    w.root['/Blah'] = freed = w.add_object(generic.pdf_string('Hi there!'))
    out = signers.sign_pdf(
        w,
        signature_meta=signers.PdfSignatureMetadata(field_name='Sig1'),
        signer=FROM_CA)

    # free the dummy object we created before signing
    # since we don't have support for freeing objects in the writer (yet),
    # do it manually
    r = PdfFileReader(out)
    last_startxref = r.last_startxref

    len_out = out.seek(0, os.SEEK_END)
    out.write(b'\n'.join([
        b'xref', b'0 1', b'0000000000 65535 f ',
        b'%d 1' % freed.idnum, b'0000000000 00001 f ',
        b'trailer<</Prev %d>>' % last_startxref, b'startxref',
        b'%d' % len_out, b'%%EOF'
    ]))
    r = PdfFileReader(out)
    last_rev = r.xrefs.xref_sections - 1

    assert freed.reference in r.xrefs.refs_freed_in_revision(last_rev)

    sig = r.embedded_signatures[0]
    assert sig.signed_revision == 2

    # make a dummy rule that whitelists our freed object ref

    class AdHocRule(QualifiedWhitelistRule):
        def apply_qualified(self, old: HistoricalResolver,
                            new: HistoricalResolver):
            yield ModificationLevel.LTA_UPDATES, ReferenceUpdate(
                freed.reference, paths_checked=RawPdfPath('/Root', '/Pages'))

    val_status = validate_pdf_signature(
        sig,
        SIMPLE_V_CONTEXT(),
        diff_policy=StandardDiffPolicy(DEFAULT_DIFF_POLICY.global_rules +
                                       [AdHocRule()],
                                       DEFAULT_DIFF_POLICY.form_rule,
                                       reject_object_freeing=forbid_freeing))
    if forbid_freeing:
        assert val_status.modification_level == ModificationLevel.OTHER
    else:
        assert val_status.modification_level == ModificationLevel.LTA_UPDATES
Beispiel #15
0
def test_form_field_structure_modification():
    w = IncrementalPdfFileWriter(BytesIO(SIMPLE_FORM))
    meta = signers.PdfSignatureMetadata(field_name='Sig1')

    out = signers.sign_pdf(w, meta, signer=FROM_CA, timestamper=DUMMY_TS)
    w = IncrementalPdfFileWriter(out)
    field_arr = w.root['/AcroForm']['/Fields']
    # shallow copy the text field
    tf = generic.DictionaryObject(field_arr[1].get_object())
    tf['/T'] = generic.pdf_string('OtherField')
    field_arr.append(w.add_object(tf))
    w.update_container(field_arr)
    out = BytesIO()
    w.write(out)

    r = PdfFileReader(out)
    s = r.embedded_signatures[0]
    assert s.field_name == 'Sig1'
    val_trusted_but_modified(s)
Beispiel #16
0
def test_write_embedded_string_objstream():
    w = IncrementalPdfFileWriter(BytesIO(MINIMAL_XREF))
    obj_stream = w.prepare_object_stream()
    with open(NOTO_SERIF_JP, 'rb') as ffile:
        ga = GlyphAccumulator(w, ffile, font_size=10, obj_stream=obj_stream)
    # shape the string, just to register the glyphs as used
    ga.shape('テスト')
    # ... but we're not going to use the result

    # hardcoded CIDs
    cid_hx = '0637062a0639'
    font_ref = ga.as_resource()
    stream = generic.StreamObject(
        stream_data=f'BT /FEmb 18 Tf 0 100 Td <{cid_hx}> Tj ET'.encode(
            'ascii'))
    stream_ref = w.add_object(stream)
    w.add_stream_to_page(0,
                         stream_ref,
                         resources=generic.DictionaryObject({
                             pdf_name('/Font'):
                             generic.DictionaryObject(
                                 {pdf_name('/FEmb'): font_ref})
                         }))
    out = BytesIO()
    w.write(out)
    out.seek(0)
    r = PdfFileReader(out)
    page_obj = r.root['/Pages']['/Kids'][0].get_object()
    conts = page_obj['/Contents']
    assert len(conts) == 2
    assert stream_ref.idnum in (c.idnum for c in conts)
    xref_sections = r.xrefs._xref_sections
    last = xref_sections[len(xref_sections) - 1]
    assert font_ref.idnum in last.xref_data.xrefs_in_objstm
    out.seek(0)

    # attempt to grab the font from the object stream
    font_ref.pdf = r
    font = font_ref.get_object()
    assert font['/Type'] == pdf_name('/Font')
Beispiel #17
0
def test_pades_dss_object_typing_tamper(requests_mock, bogus_dss):
    w = IncrementalPdfFileWriter(BytesIO(MINIMAL_TWO_FIELDS))
    meta1 = signers.PdfSignatureMetadata(
        field_name='Sig1',
        validation_context=live_testing_vc(requests_mock),
        subfilter=PADES,
        embed_validation_info=True,
    )
    out = signers.sign_pdf(w, meta1, signer=FROM_CA, timestamper=DUMMY_TS)
    out.seek(0)
    original_out = out.read()

    # write some bogus reference into the DSS
    w = IncrementalPdfFileWriter(BytesIO(original_out))

    w.root['/DSS'] = w.add_object(bogus_dss)
    w.update_root()
    out = BytesIO()
    w.write(out)

    r = PdfFileReader(out)
    s = r.embedded_signatures[0]
    assert s.field_name == 'Sig1'
    val_trusted_but_modified(s)
Beispiel #18
0
def _prepare_sig_field(sig_field_name,
                       root,
                       update_writer: IncrementalPdfFileWriter,
                       existing_fields_only=False,
                       lock_sig_flags=True,
                       **kwargs):
    """
    Returns a tuple of a boolean and a reference to a signature field.
    The boolean is ``True`` if the field was created, and ``False`` otherwise.
    """
    if sig_field_name is None:  # pragma: nocover
        raise ValueError

    try:
        form = root['/AcroForm']

        try:
            fields = form['/Fields']
        except KeyError:
            raise ValueError('/AcroForm has no /Fields')

        candidates = enumerate_sig_fields_in(fields, with_name=sig_field_name)
        sig_field_ref = None
        try:
            field_name, value, sig_field_ref = next(candidates)
            if value is not None:
                raise SigningError(
                    'Signature field with name %s appears to be filled already.'
                    % sig_field_name)
        except StopIteration:
            if existing_fields_only:
                raise SigningError(
                    'No empty signature field with name %s found.' %
                    sig_field_name)
        form_created = False
    except KeyError:
        # we have to create the form
        if existing_fields_only:
            raise SigningError('This file does not contain a form.')
        # no AcroForm present, so create one
        form = generic.DictionaryObject()
        root[pdf_name('/AcroForm')] = update_writer.add_object(form)
        fields = generic.ArrayObject()
        form[pdf_name('/Fields')] = fields
        # now we need to mark the root as updated
        update_writer.update_root()
        form_created = True
        sig_field_ref = None

    if sig_field_ref is not None:
        return False, sig_field_ref

    if '.' in sig_field_name:
        raise NotImplementedError(
            "Creating fields deep in the form hierarchy is not supported"
            "right now.")

    # no signature field exists, so create one
    # default: grab a reference to the first page
    page_ref = update_writer.find_page_for_modification(0)[0]
    sig_form_kwargs = {'include_on_page': page_ref}
    sig_form_kwargs.update(**kwargs)
    sig_field = SignatureFormField(sig_field_name,
                                   writer=update_writer,
                                   **sig_form_kwargs)
    sig_field_ref = sig_field.reference
    fields.append(sig_field_ref)

    # make sure /SigFlags is present. If not, create it
    sig_flags = 3 if lock_sig_flags else 1
    form.setdefault(pdf_name('/SigFlags'), generic.NumberObject(sig_flags))
    # if a field was added to an existing form, register an extra update
    if not form_created:
        update_writer.update_container(fields)
    return True, sig_field_ref