def test_update_hybrid_twice(fname): with open(os.path.join(PDF_DATA_DIR, fname), 'rb') as inf: w = IncrementalPdfFileWriter(inf) t_obj = w.trailer['/Info'].raw_get('/Title') assert isinstance(t_obj, generic.IndirectObject) w.objects[(t_obj.generation, t_obj.idnum)] \ = generic.pdf_string('Updated') out = BytesIO() w.write(out) r = PdfFileReader(out) assert r.trailer['/Info']['/Title'] == 'Updated' container_info = r.xrefs.get_xref_container_info(1) assert container_info.xref_section_type == XRefSectionType.HYBRID_MAIN container_info = r.xrefs.get_xref_container_info(2) assert container_info.xref_section_type == XRefSectionType.STANDARD w = IncrementalPdfFileWriter(out) w.add_object(generic.pdf_string('This is an object')) w.write_in_place() r = PdfFileReader(out) assert '/XRefStm' not in r.trailer assert '/XRefStm' not in r.trailer_view assert r.trailer['/Info']['/Title'] == 'Updated' container_info = r.xrefs.get_xref_container_info(1) assert container_info.xref_section_type == XRefSectionType.HYBRID_MAIN container_info = r.xrefs.get_xref_container_info(2) assert container_info.xref_section_type == XRefSectionType.STANDARD container_info = r.xrefs.get_xref_container_info(3) assert container_info.xref_section_type == XRefSectionType.STANDARD
def test_code128_render(): writer = IncrementalPdfFileWriter(BytesIO(MINIMAL)) bb = barcodes.BarcodeBox("code128", "this is a test") xobj_ref = writer.add_object(bb.as_form_xobject()) stamp_wrapper_stream = generic.StreamObject( stream_data=b'q 1 0 0 1 50 50 cm /Barcode Do Q') resources = generic.DictionaryObject({ pdf_name('/XObject'): generic.DictionaryObject({pdf_name('/Barcode'): xobj_ref}) }) writer.add_stream_to_page(0, writer.add_object(stamp_wrapper_stream), resources)
def test_double_sig_add_field_annots_indirect(): w = IncrementalPdfFileWriter(BytesIO(MINIMAL_ONE_FIELD)) out = signers.sign_pdf( w, signers.PdfSignatureMetadata( field_name='Sig1', certify=True, docmdp_permissions=fields.MDPPerm.FILL_FORMS), signer=FROM_CA, ) # create a new signature field after signing w = IncrementalPdfFileWriter(out) # ... but first make the /Annots entry of the first page an indirect one first_page = w.root['/Pages']['/Kids'][0] annots_copy = generic.ArrayObject(first_page['/Annots']) first_page['/Annots'] = annots_ref = w.add_object(annots_copy) annots_copy.container_ref = annots_ref w.update_container(first_page) out = signers.sign_pdf(w, signers.PdfSignatureMetadata(field_name='SigNew'), signer=FROM_CA, new_field_spec=fields.SigFieldSpec( sig_field_name='SigNew', box=(10, 10, 10, 10))) r = PdfFileReader(out) s = r.embedded_signatures[0] assert s.field_name == 'Sig1' status = val_trusted(s, extd=True) assert status.modification_level == ModificationLevel.FORM_FILLING assert status.docmdp_ok s = r.embedded_signatures[1] assert s.field_name == 'SigNew' val_trusted(s)
def test_pades_dss_object_clobber(requests_mock): w = IncrementalPdfFileWriter(BytesIO(MINIMAL_TWO_FIELDS)) meta1 = signers.PdfSignatureMetadata( field_name='Sig1', validation_context=live_testing_vc(requests_mock), subfilter=PADES, embed_validation_info=True, ) dummy_ref = w.add_object(generic.pdf_string("Hi there")).reference out = signers.sign_pdf(w, meta1, signer=FROM_CA, timestamper=DUMMY_TS) w = IncrementalPdfFileWriter(out) # We're going to reassign the DSS object to another object ID, namely # one that clobbers the dummy_ref object. This should be ample cause # for suspicion. dss = w.root['/DSS'] w.objects[(dummy_ref.generation, dummy_ref.idnum)] = dss w.root['/DSS'] = generic.IndirectObject(idnum=dummy_ref.idnum, generation=dummy_ref.generation, pdf=w) w.update_root() out = BytesIO() w.write(out) r = PdfFileReader(out) s = r.embedded_signatures[0] assert s.field_name == 'Sig1' val_trusted_but_modified(s)
def test_write_embedded_string(): w = IncrementalPdfFileWriter(BytesIO(MINIMAL)) with open(NOTO_SERIF_JP, 'rb') as ffile: ga = GlyphAccumulator(w, ffile, font_size=10) # shape the string, just to register the glyphs as used ga.shape('テスト') # ... but we're not going to use the result # hardcoded CIDs cid_hx = '0637062a0639' stream = generic.StreamObject( stream_data=f'BT /FEmb 18 Tf 0 100 Td <{cid_hx}> Tj ET'.encode('ascii') ) stream_ref = w.add_object(stream) w.add_stream_to_page( 0, stream_ref, resources=generic.DictionaryObject({ pdf_name('/Font'): generic.DictionaryObject({ pdf_name('/FEmb'): ga.as_resource() }) }) ) out = BytesIO() w.write(out) out.seek(0) r = PdfFileReader(out) page_obj = r.root['/Pages']['/Kids'][0].get_object() conts = page_obj['/Contents'] assert len(conts) == 2 assert stream_ref.idnum in (c.idnum for c in conts)
def test_add_stream_to_direct_arr(): w = writer.PdfFileWriter() w.insert_page(simple_page(w, 'Test Test', extra_stream=True)) out = BytesIO() w.write(out) out.seek(0) w = IncrementalPdfFileWriter(out) new_stream = 'BT /F1 18 Tf 0 50 Td (Test2 Test2) Tj ET'.encode('ascii') stream = generic.StreamObject(stream_data=new_stream) stream_ref = w.add_object(stream) w.add_stream_to_page(0, stream_ref) out = BytesIO() w.write(out) out.seek(0) r = PdfFileReader(out) # check if the content stream was added page_obj_ref = r.root['/Pages']['/Kids'].raw_get(0) assert isinstance(page_obj_ref, generic.IndirectObject) page_obj = page_obj_ref.get_object() conts = page_obj['/Contents'] assert len(conts) == 3 assert stream_ref.idnum in (c.idnum for c in conts) # check if resource dictionary is still OK assert '/F1' in page_obj['/Resources']['/Font']
def test_write_embedded_string_objstream(): ffile = ttLib.TTFont(NOTO_SERIF_JP) ga = GlyphAccumulator(ffile) cid_hx, _ = ga.feed_string('テスト') assert cid_hx == '0637062a0639' w = IncrementalPdfFileWriter(BytesIO(MINIMAL_XREF)) obj_stream = w.prepare_object_stream() font_ref = ga.embed_subset(w, obj_stream=obj_stream) stream = generic.StreamObject( stream_data=f'BT /FEmb 18 Tf 0 100 Td <{cid_hx}> Tj ET'.encode( 'ascii')) stream_ref = w.add_object(stream) w.add_stream_to_page(0, stream_ref, resources=generic.DictionaryObject({ pdf_name('/Font'): generic.DictionaryObject( {pdf_name('/FEmb'): font_ref}) })) out = BytesIO() w.write(out) out.seek(0) r = PdfFileReader(out) page_obj = r.root['/Pages']['/Kids'][0].get_object() conts = page_obj['/Contents'] assert len(conts) == 2 assert stream_ref.idnum in (c.idnum for c in conts) assert font_ref.idnum in r.xrefs.in_obj_stream out.seek(0) # attempt to grab the font from the object stream font_ref.pdf = r font = font_ref.get_object() assert font['/Type'] == pdf_name('/Font')
def test_not_all_paths_cleared(): w = IncrementalPdfFileWriter(BytesIO(MINIMAL_ONE_FIELD)) # make /Fields indirect fields_arr = w.root['/AcroForm'].raw_get('/Fields') # just in case we ever end up declaring /Fields as indirect in the example assert isinstance(fields_arr, generic.ArrayObject) w.root['/AcroForm']['/Fields'] = w.root['/Blah'] = w.add_object(fields_arr) w.update_root() w.update_container(w.root['/AcroForm']) out = signers.sign_pdf( w, signature_meta=signers.PdfSignatureMetadata(field_name='Sig1'), signer=FROM_CA) # create a new signature field after signing w = IncrementalPdfFileWriter(out) out = signers.sign_pdf( w, signers.PdfSignatureMetadata(field_name='SigNew'), signer=FROM_CA, ) r = PdfFileReader(out) val_trusted_but_modified(embedded_sig=r.embedded_signatures[0])
def test_bogus_metadata_manipulation(): # test using a double signature created using Adobe Reader # (uses object streams, XMP metadata updates and all the fun stuff) infile = BytesIO( read_all(PDF_DATA_DIR + '/minimal-two-fields-signed-twice.pdf')) bogus = b'This is bogus data, yay!' def do_check(): r = PdfFileReader(out) print(r.get_object(generic.Reference(2, 0, r), revision=3).data) s = r.embedded_signatures[0] status = validate_pdf_signature(s) assert status.modification_level == ModificationLevel.OTHER w = IncrementalPdfFileWriter(infile) w.root['/Metadata'] = w.add_object(generic.StreamObject(stream_data=bogus)) w.update_root() out = BytesIO() w.write(out) do_check() w = IncrementalPdfFileWriter(infile) metadata_ref = w.root.raw_get('/Metadata') metadata_stream: generic.StreamObject = metadata_ref.get_object() metadata_stream.strip_filters() metadata_stream._data = bogus metadata_stream._encoded_data = None w.mark_update(metadata_ref) out = BytesIO() w.write(out) do_check() w = IncrementalPdfFileWriter(infile) w.root['/Metadata'] = generic.NullObject() w.update_root() out = BytesIO() w.write(out) do_check() w = IncrementalPdfFileWriter(infile) w.root['/Metadata'] = w.add_object(generic.NullObject()) w.update_root() out = BytesIO() w.write(out) do_check()
def test_add_stream(): w = IncrementalPdfFileWriter(BytesIO(MINIMAL)) def stream_data(y): return f'BT /F1 18 Tf 0 {y} Td (Test Test) Tj ET'.encode('ascii') stream = generic.StreamObject(stream_data=stream_data(50)) stream_ref = w.add_object(stream) w.add_stream_to_page(0, stream_ref) out = BytesIO() w.write(out) out.seek(0) r = PdfFileReader(out) # check if the content stream was added page_obj_ref = r.root['/Pages']['/Kids'].raw_get(0) assert isinstance(page_obj_ref, generic.IndirectObject) page_obj = page_obj_ref.get_object() conts = page_obj['/Contents'] assert len(conts) == 2 assert stream_ref.idnum in (c.idnum for c in conts) # check if resource dictionary is still OK assert '/F1' in page_obj['/Resources']['/Font'] # let's try adding a third out.seek(0) w = IncrementalPdfFileWriter(out) stream = generic.StreamObject(stream_data=stream_data(100)) new_stream_ref = w.add_object(stream) w.add_stream_to_page(0, new_stream_ref) out = BytesIO() w.write(out) out.seek(0) r = PdfFileReader(out) # check if the content stream was added page_obj_ref = r.root['/Pages']['/Kids'].raw_get(0) assert isinstance(page_obj_ref, generic.IndirectObject) page_obj = page_obj_ref.get_object() conts = page_obj['/Contents'] assert len(conts) == 3 ids = [c.idnum for c in conts] assert stream_ref.idnum in ids and new_stream_ref.idnum in ids
def test_code128_render(): writer = IncrementalPdfFileWriter(BytesIO(MINIMAL)) bb = barcodes.BarcodeBox("code128", "this is a test") xobj_ref = writer.add_object(bb.as_form_xobject()) stamp_wrapper_stream = generic.StreamObject( stream_data=b'q 1 0 0 1 50 50 cm /Barcode Do Q') resources = generic.DictionaryObject({ pdf_name('/XObject'): generic.DictionaryObject({pdf_name('/Barcode'): xobj_ref}) }) writer.add_stream_to_page(0, writer.add_object(stamp_wrapper_stream), resources) # TODO try to read back the code using some kind of barcode scanning # library, perhaps. compare_output(writer, f'{EXPECTED_OUTPUT_DIR}/code128-test.pdf')
def test_trailer_update(): w = IncrementalPdfFileWriter(BytesIO(MINIMAL_ONE_FIELD)) dt = generic.pdf_date(datetime.datetime(2020, 10, 10, tzinfo=pytz.utc)) info = generic.DictionaryObject({pdf_name('/CreationDate'): dt}) w.trailer['/Info'] = w.add_object(info) out = BytesIO() w.write(out) r = PdfFileReader(out) assert r.trailer['/Info']['/CreationDate'] == dt
def append_signature_field(pdf_out: IncrementalPdfFileWriter, sig_field_spec: SigFieldSpec): """ Append signature fields to a PDF file. :param pdf_out: Incremental writer to house the objects. :param sig_field_spec: A :class:`.SigFieldSpec` object describing the signature field to add. """ root = pdf_out.root page_ref = pdf_out.find_page_for_modification(sig_field_spec.on_page)[0] # use default appearance field_created, sig_field_ref = _prepare_sig_field( sig_field_spec.sig_field_name, root, update_writer=pdf_out, existing_fields_only=False, box=sig_field_spec.box, include_on_page=page_ref, lock_sig_flags=False) if not field_created: raise PdfWriteError('Signature field with name %s already exists.' % sig_field_spec.sig_field_name) sig_field = sig_field_ref.get_object() if sig_field_spec.seed_value_dict is not None: # /SV must be an indirect reference as per the spec sv_ref = pdf_out.add_object( sig_field_spec.seed_value_dict.as_pdf_object()) sig_field[pdf_name('/SV')] = sv_ref lock = sig_field_spec.format_lock_dictionary() if lock is not None: sig_field[pdf_name('/Lock')] = pdf_out.add_object(lock)
def test_sign_reject_freed(forbid_freeing): w = IncrementalPdfFileWriter(BytesIO(MINIMAL_ONE_FIELD)) w.root['/Blah'] = freed = w.add_object(generic.pdf_string('Hi there!')) out = signers.sign_pdf( w, signature_meta=signers.PdfSignatureMetadata(field_name='Sig1'), signer=FROM_CA) # free the dummy object we created before signing # since we don't have support for freeing objects in the writer (yet), # do it manually r = PdfFileReader(out) last_startxref = r.last_startxref len_out = out.seek(0, os.SEEK_END) out.write(b'\n'.join([ b'xref', b'0 1', b'0000000000 65535 f ', b'%d 1' % freed.idnum, b'0000000000 00001 f ', b'trailer<</Prev %d>>' % last_startxref, b'startxref', b'%d' % len_out, b'%%EOF' ])) r = PdfFileReader(out) last_rev = r.xrefs.xref_sections - 1 assert freed.reference in r.xrefs.refs_freed_in_revision(last_rev) sig = r.embedded_signatures[0] assert sig.signed_revision == 2 # make a dummy rule that whitelists our freed object ref class AdHocRule(QualifiedWhitelistRule): def apply_qualified(self, old: HistoricalResolver, new: HistoricalResolver): yield ModificationLevel.LTA_UPDATES, ReferenceUpdate( freed.reference, paths_checked=RawPdfPath('/Root', '/Pages')) val_status = validate_pdf_signature( sig, SIMPLE_V_CONTEXT(), diff_policy=StandardDiffPolicy(DEFAULT_DIFF_POLICY.global_rules + [AdHocRule()], DEFAULT_DIFF_POLICY.form_rule, reject_object_freeing=forbid_freeing)) if forbid_freeing: assert val_status.modification_level == ModificationLevel.OTHER else: assert val_status.modification_level == ModificationLevel.LTA_UPDATES
def test_form_field_structure_modification(): w = IncrementalPdfFileWriter(BytesIO(SIMPLE_FORM)) meta = signers.PdfSignatureMetadata(field_name='Sig1') out = signers.sign_pdf(w, meta, signer=FROM_CA, timestamper=DUMMY_TS) w = IncrementalPdfFileWriter(out) field_arr = w.root['/AcroForm']['/Fields'] # shallow copy the text field tf = generic.DictionaryObject(field_arr[1].get_object()) tf['/T'] = generic.pdf_string('OtherField') field_arr.append(w.add_object(tf)) w.update_container(field_arr) out = BytesIO() w.write(out) r = PdfFileReader(out) s = r.embedded_signatures[0] assert s.field_name == 'Sig1' val_trusted_but_modified(s)
def test_write_embedded_string_objstream(): w = IncrementalPdfFileWriter(BytesIO(MINIMAL_XREF)) obj_stream = w.prepare_object_stream() with open(NOTO_SERIF_JP, 'rb') as ffile: ga = GlyphAccumulator(w, ffile, font_size=10, obj_stream=obj_stream) # shape the string, just to register the glyphs as used ga.shape('テスト') # ... but we're not going to use the result # hardcoded CIDs cid_hx = '0637062a0639' font_ref = ga.as_resource() stream = generic.StreamObject( stream_data=f'BT /FEmb 18 Tf 0 100 Td <{cid_hx}> Tj ET'.encode( 'ascii')) stream_ref = w.add_object(stream) w.add_stream_to_page(0, stream_ref, resources=generic.DictionaryObject({ pdf_name('/Font'): generic.DictionaryObject( {pdf_name('/FEmb'): font_ref}) })) out = BytesIO() w.write(out) out.seek(0) r = PdfFileReader(out) page_obj = r.root['/Pages']['/Kids'][0].get_object() conts = page_obj['/Contents'] assert len(conts) == 2 assert stream_ref.idnum in (c.idnum for c in conts) xref_sections = r.xrefs._xref_sections last = xref_sections[len(xref_sections) - 1] assert font_ref.idnum in last.xref_data.xrefs_in_objstm out.seek(0) # attempt to grab the font from the object stream font_ref.pdf = r font = font_ref.get_object() assert font['/Type'] == pdf_name('/Font')
def test_pades_dss_object_typing_tamper(requests_mock, bogus_dss): w = IncrementalPdfFileWriter(BytesIO(MINIMAL_TWO_FIELDS)) meta1 = signers.PdfSignatureMetadata( field_name='Sig1', validation_context=live_testing_vc(requests_mock), subfilter=PADES, embed_validation_info=True, ) out = signers.sign_pdf(w, meta1, signer=FROM_CA, timestamper=DUMMY_TS) out.seek(0) original_out = out.read() # write some bogus reference into the DSS w = IncrementalPdfFileWriter(BytesIO(original_out)) w.root['/DSS'] = w.add_object(bogus_dss) w.update_root() out = BytesIO() w.write(out) r = PdfFileReader(out) s = r.embedded_signatures[0] assert s.field_name == 'Sig1' val_trusted_but_modified(s)
def _prepare_sig_field(sig_field_name, root, update_writer: IncrementalPdfFileWriter, existing_fields_only=False, lock_sig_flags=True, **kwargs): """ Returns a tuple of a boolean and a reference to a signature field. The boolean is ``True`` if the field was created, and ``False`` otherwise. """ if sig_field_name is None: # pragma: nocover raise ValueError try: form = root['/AcroForm'] try: fields = form['/Fields'] except KeyError: raise ValueError('/AcroForm has no /Fields') candidates = enumerate_sig_fields_in(fields, with_name=sig_field_name) sig_field_ref = None try: field_name, value, sig_field_ref = next(candidates) if value is not None: raise SigningError( 'Signature field with name %s appears to be filled already.' % sig_field_name) except StopIteration: if existing_fields_only: raise SigningError( 'No empty signature field with name %s found.' % sig_field_name) form_created = False except KeyError: # we have to create the form if existing_fields_only: raise SigningError('This file does not contain a form.') # no AcroForm present, so create one form = generic.DictionaryObject() root[pdf_name('/AcroForm')] = update_writer.add_object(form) fields = generic.ArrayObject() form[pdf_name('/Fields')] = fields # now we need to mark the root as updated update_writer.update_root() form_created = True sig_field_ref = None if sig_field_ref is not None: return False, sig_field_ref if '.' in sig_field_name: raise NotImplementedError( "Creating fields deep in the form hierarchy is not supported" "right now.") # no signature field exists, so create one # default: grab a reference to the first page page_ref = update_writer.find_page_for_modification(0)[0] sig_form_kwargs = {'include_on_page': page_ref} sig_form_kwargs.update(**kwargs) sig_field = SignatureFormField(sig_field_name, writer=update_writer, **sig_form_kwargs) sig_field_ref = sig_field.reference fields.append(sig_field_ref) # make sure /SigFlags is present. If not, create it sig_flags = 3 if lock_sig_flags else 1 form.setdefault(pdf_name('/SigFlags'), generic.NumberObject(sig_flags)) # if a field was added to an existing form, register an extra update if not form_created: update_writer.update_container(fields) return True, sig_field_ref