def test_key_encipherment_requirement_override(version, keylen, use_aes, use_crypt_filters): r = PdfFileReader(BytesIO(VECTOR_IMAGE_PDF)) w = writer.PdfFileWriter() sh = PubKeySecurityHandler.build_from_certs( [PUBKEY_SELFSIGNED_DECRYPTER.cert], keylen_bytes=keylen, version=version, use_aes=use_aes, use_crypt_filters=use_crypt_filters, perms=-44, ignore_key_usage=True) w.security_handler = sh w._encrypt = w.add_object(sh.as_pdf_object()) new_page_tree = w.import_object(r.root.raw_get('/Pages'), ) w.root['/Pages'] = new_page_tree out = BytesIO() w.write(out) r = PdfFileReader(out) result = r.decrypt_pubkey(PUBKEY_SELFSIGNED_DECRYPTER) assert result.status == AuthStatus.USER assert result.permission_flags == -44 page = r.root['/Pages']['/Kids'][0].get_object() assert '/ExtGState' in page['/Resources'] # just a piece of data I know occurs in the decoded content stream # of the (only) page in VECTOR_IMAGE_PDF assert b'0 1 0 rg /a0 gs' in page['/Contents'].data
def test_update_hybrid_twice(fname): with open(os.path.join(PDF_DATA_DIR, fname), 'rb') as inf: w = IncrementalPdfFileWriter(inf) t_obj = w.trailer['/Info'].raw_get('/Title') assert isinstance(t_obj, generic.IndirectObject) w.objects[(t_obj.generation, t_obj.idnum)] \ = generic.pdf_string('Updated') out = BytesIO() w.write(out) r = PdfFileReader(out) assert r.trailer['/Info']['/Title'] == 'Updated' container_info = r.xrefs.get_xref_container_info(1) assert container_info.xref_section_type == XRefSectionType.HYBRID_MAIN container_info = r.xrefs.get_xref_container_info(2) assert container_info.xref_section_type == XRefSectionType.STANDARD w = IncrementalPdfFileWriter(out) w.add_object(generic.pdf_string('This is an object')) w.write_in_place() r = PdfFileReader(out) assert '/XRefStm' not in r.trailer assert '/XRefStm' not in r.trailer_view assert r.trailer['/Info']['/Title'] == 'Updated' container_info = r.xrefs.get_xref_container_info(1) assert container_info.xref_section_type == XRefSectionType.HYBRID_MAIN container_info = r.xrefs.get_xref_container_info(2) assert container_info.xref_section_type == XRefSectionType.STANDARD container_info = r.xrefs.get_xref_container_info(3) assert container_info.xref_section_type == XRefSectionType.STANDARD
def test_legacy_encryption(use_owner_pass, rev, keylen_bytes, use_aes): r = PdfFileReader(BytesIO(VECTOR_IMAGE_PDF)) w = writer.PdfFileWriter() sh = StandardSecurityHandler.build_from_pw_legacy( rev, w._document_id[0].original_bytes, "ownersecret", "usersecret", keylen_bytes=keylen_bytes, use_aes128=use_aes, perms=-44) w.security_handler = sh w._encrypt = w.add_object(sh.as_pdf_object()) new_page_tree = w.import_object(r.root.raw_get('/Pages'), ) w.root['/Pages'] = new_page_tree out = BytesIO() w.write(out) r = PdfFileReader(out) result = r.decrypt("ownersecret" if use_owner_pass else "usersecret") if use_owner_pass: assert result.status == AuthStatus.OWNER assert result.permission_flags is None else: assert result.status == AuthStatus.USER assert result.permission_flags == -44 page = r.root['/Pages']['/Kids'][0].get_object() assert r.trailer['/Encrypt']['/P'] == -44 assert '/ExtGState' in page['/Resources'] # just a piece of data I know occurs in the decoded content stream # of the (only) page in VECTOR_IMAGE_PDF assert b'0 1 0 rg /a0 gs' in page['/Contents'].data
def test_add_revinfo_without_timestamp(requests_mock): w = IncrementalPdfFileWriter(BytesIO(MINIMAL)) # create signature without revocation info with freeze_time('2020-11-01'): out = signers.sign_pdf( w, signers.PdfSignatureMetadata(field_name='Sig1'), signer=FROM_CA, in_place=True ) # fast forward 1 month with freeze_time('2020-12-01'): vc = live_testing_vc(requests_mock) r = PdfFileReader(out) emb_sig = r.embedded_signatures[0] out = add_validation_info(emb_sig, vc) r = PdfFileReader(out) emb_sig = r.embedded_signatures[0] # even with revinfo, this should fail for lack of a timestamp with pytest.raises(SignatureValidationError, match='.*trusted timestamp.*'): validate_pdf_ltv_signature( emb_sig, RevocationInfoValidationType.PADES_LT, {'trust_roots': TRUST_ROOTS, 'retroactive_revinfo': True} ) # ... and certainly for LTA with pytest.raises(SignatureValidationError, match='Purported.*LTA.*'): validate_pdf_ltv_signature( emb_sig, RevocationInfoValidationType.PADES_LTA, {'trust_roots': TRUST_ROOTS, 'retroactive_revinfo': True} )
def _decrypt_pubkey(sedk: crypt.SimpleEnvelopeKeyDecrypter, infile, outfile, force): with pyhanko_exception_manager(): with open(infile, 'rb') as inf: r = PdfFileReader(inf) if r.security_handler is None: raise click.ClickException("File is not encrypted.") if not isinstance(r.security_handler, crypt.PubKeySecurityHandler): raise click.ClickException( "File was not encrypted with a public-key security handler." ) auth_result = r.decrypt_pubkey(sedk) if auth_result.status == crypt.AuthStatus.USER: # TODO read 2nd bit of perms in CMS enveloped data # is the one indicating that change of encryption is OK if not force: raise click.ClickException( "Change of encryption is typically not allowed with " "user access. Pass --force to decrypt the file anyway." ) elif auth_result.status == crypt.AuthStatus.FAILED: raise click.ClickException("Failed to decrypt the file.") w = copy_into_new_writer(r) with open(outfile, 'wb') as outf: w.write(outf)
def test_sign_with_later_revoked_nots(requests_mock): w = IncrementalPdfFileWriter(BytesIO(MINIMAL)) with freeze_time('2020-01-20'): out = signers.sign_pdf( w, signers.PdfSignatureMetadata(field_name='Sig1'), signer=REVOKED_SIGNER ) r = PdfFileReader(out) s = r.embedded_signatures[0] # there's no way to do a timestamp validation check here, so the checker # should assume the timestamp to be invalid with freeze_time('2020-12-05'): r = PdfFileReader(out) s = r.embedded_signatures[0] vc = live_testing_vc(requests_mock) val_status = validate_pdf_signature(s, vc) assert val_status.intact assert val_status.valid assert val_status.revoked assert not val_status.trusted summ = val_status.summary() assert 'INTACT' in summ assert 'REVOKED' in summ assert val_status.coverage == SignatureCoverageLevel.ENTIRE_FILE assert val_status.modification_level == ModificationLevel.NONE assert not val_status.bottom_line
def test_no_revinfo_to_be_added(requests_mock, in_place): buf = BytesIO(MINIMAL) w = IncrementalPdfFileWriter(buf) vc = live_testing_vc(requests_mock) signers.sign_pdf( w, signers.PdfSignatureMetadata( field_name='Sig1', embed_validation_info=True, validation_context=vc, subfilter=fields.SigSeedSubFilter.PADES ), signer=FROM_CA, timestamper=DUMMY_TS, in_place=True, ) orig_file_length = buf.seek(0, os.SEEK_END) r = PdfFileReader(buf) emb_sig = r.embedded_signatures[0] orig_dss = DocumentSecurityStore.read_dss(r) assert len(orig_dss.ocsps) == 1 assert len(orig_dss.crls) == 1 # test with same vc, this shouldn't change anything # Turn off VRI updates, since those always trigger a write. output = add_validation_info( emb_sig, vc, in_place=in_place, add_vri_entry=False ) if in_place: assert output is r.stream new_file_length = output.seek(0, os.SEEK_END) assert orig_file_length == new_file_length new_dss = DocumentSecurityStore.read_dss(PdfFileReader(output)) assert len(new_dss.ocsps) == 1 assert len(new_dss.crls) == 1
def test_identity_crypt_filter(use_alias, with_never_decrypt): w = writer.PdfFileWriter() sh = StandardSecurityHandler.build_from_pw("secret") w.security_handler = sh idf: IdentityCryptFilter = IdentityCryptFilter() assert sh.crypt_filter_config[pdf_name("/Identity")] is idf if use_alias: sh.crypt_filter_config._crypt_filters[pdf_name("/IdentityAlias")] = idf assert sh.crypt_filter_config[pdf_name("/IdentityAlias")] is idf if use_alias: # identity filter can't be serialised, so this should throw an error with pytest.raises(misc.PdfError): w._assign_security_handler(sh) return else: w._assign_security_handler(sh) test_bytes = b'This is some test data that should remain unencrypted.' test_stream = generic.StreamObject(stream_data=test_bytes, handler=sh) test_stream.apply_filter("/Crypt", params={pdf_name("/Name"): pdf_name("/Identity")}) ref = w.add_object(test_stream).reference out = BytesIO() w.write(out) r = PdfFileReader(out) r.decrypt("secret") the_stream = r.get_object(ref, never_decrypt=with_never_decrypt) assert the_stream.encoded_data == test_bytes assert the_stream.data == test_bytes
def test_no_changes_policy(): w = IncrementalPdfFileWriter(BytesIO(MINIMAL_ONE_FIELD)) out = signers.sign_pdf( w, signers.PdfSignatureMetadata( field_name='Sig1', certify=True, docmdp_permissions=fields.MDPPerm.FILL_FORMS), signer=FROM_CA, ) w = IncrementalPdfFileWriter(out) # do an /Info update dt = generic.pdf_date(datetime(2020, 10, 10, tzinfo=pytz.utc)) info = generic.DictionaryObject({pdf_name('/CreationDate'): dt}) w.set_info(info) w.write_in_place() # check with normal diff policy r = PdfFileReader(out) s = r.embedded_signatures[0] assert s.field_name == 'Sig1' status = val_trusted(s, extd=True) assert status.modification_level == ModificationLevel.LTA_UPDATES assert status.docmdp_ok # now check with the ultra-strict no-op policy r = PdfFileReader(out) s = r.embedded_signatures[0] status = validate_pdf_signature(s, diff_policy=NO_CHANGES_DIFF_POLICY) assert isinstance(s.diff_result, SuspiciousModification) assert not status.docmdp_ok
def test_embed_with_af(incremental): if incremental: w = IncrementalPdfFileWriter(BytesIO(MINIMAL)) else: r = PdfFileReader(BytesIO(MINIMAL)) w = writer.copy_into_new_writer(r) modified = datetime.now(tz=tzlocal.get_localzone()) created = modified - timedelta(days=1) ef_obj = embed.EmbeddedFileObject.from_file_data( w, data=VECTOR_IMAGE_PDF, params=embed.EmbeddedFileParams(creation_date=created, modification_date=modified)) spec = embed.FileSpec(file_spec_string='vector-test.pdf', embedded_data=ef_obj, description='Embedding test /w assoc file', af_relationship=generic.pdf_name('/Unspecified')) embed.embed_file(w, spec) out = BytesIO() w.write(out) r = PdfFileReader(out) assert r.input_version == (2, 0) emb_lst = r.root['/Names']['/EmbeddedFiles']['/Names'] assert len(emb_lst) == 2 assert emb_lst[0] == 'vector-test.pdf' spec_obj = emb_lst[1] assert '/UF' not in spec_obj assert spec_obj['/AFRelationship'] == '/Unspecified' stream = spec_obj['/EF']['/F'] assert stream.data == VECTOR_IMAGE_PDF assert '/UF' not in spec_obj['/EF'] assert r.root['/AF'].raw_get(0).reference == spec_obj.container_ref
def test_broken_obj_stream_fallback(fname, obj_to_get, expect_null): with open(os.path.join(PDF_DATA_DIR, fname), 'rb') as inf: r = PdfFileReader(inf, strict=False) obj = r.get_object(generic.Reference(idnum=obj_to_get)) if expect_null: assert isinstance(obj, generic.NullObject) else: # we set up the tests to always point to dictionaries assert isinstance(obj, generic.DictionaryObject)
def validate_signatures(ctx, infile, executive_summary, pretty_print, validation_context, trust, trust_replace, other_certs, ltv_profile, force_revinfo, soft_revocation_check, no_revocation_check, password, retroactive_revinfo): if no_revocation_check: soft_revocation_check = True if pretty_print and executive_summary: raise click.ClickException( "--pretty-print is incompatible with --executive-summary.") if ltv_profile is not None: ltv_profile = RevocationInfoValidationType(ltv_profile) vc_kwargs = _build_vc_kwargs( ctx, validation_context, trust, trust_replace, other_certs, retroactive_revinfo, allow_fetching=False if no_revocation_check else None) key_usage_settings = _get_key_usage_settings(ctx, validation_context) with pyhanko_exception_manager(): r = PdfFileReader(infile) sh = r.security_handler if isinstance(sh, crypt.StandardSecurityHandler): if password is None: password = getpass.getpass(prompt='File password: '******'t match.") elif sh is not None: raise click.ClickException( "The CLI supports only password-based encryption when " "validating (for now)") for ix, embedded_sig in enumerate(r.embedded_regular_signatures): fingerprint: str = embedded_sig.signer_cert.sha256.hex() status_str = _signature_status(ltv_profile, force_revinfo, soft_revocation_check, pretty_print, vc_kwargs, key_usage_settings, executive_summary, embedded_sig) name = embedded_sig.field_name if pretty_print: header = f'Field {ix + 1}: {name}' line = '=' * len(header) print(line) print(header) print(line) print('\n\n' + status_str) else: print('%s:%s:%s' % (name, fingerprint, status_str))
def test_copy_file(): r = PdfFileReader(BytesIO(MINIMAL_ONE_FIELD)) w = writer.copy_into_new_writer(r) old_root_ref = w.root_ref out = BytesIO() w.write(out) r = PdfFileReader(out) assert r.root_ref == old_root_ref assert len(r.root['/AcroForm']['/Fields']) == 1 assert len(r.root['/Pages']['/Kids']) == 1
def test_simple_sign_fresh_doc(): r = PdfFileReader(BytesIO(MINIMAL)) w = copy_into_new_writer(r) meta = signers.PdfSignatureMetadata(field_name='Sig1') out = signers.sign_pdf(w, meta, signer=SELF_SIGN) r = PdfFileReader(out) emb = r.embedded_signatures[0] assert emb.field_name == 'Sig1' val_untrusted(emb)
def test_decrypt_ef_without_explicit_crypt_filter(): # such files violate the spec, but since we can deal with them gracefully, # we certainly should with open(PDF_DATA_DIR + '/embedded-encrypted-nocf.pdf', 'rb') as inf: r = PdfFileReader(inf) ef_stm = r.root['/Names']['/EmbeddedFiles']['/Names'][1]['/EF'] \ .raw_get('/F') r.decrypt('secret') assert not ef_stm.get_object()._has_crypt_filter assert ef_stm.get_object().data == VECTOR_IMAGE_PDF
def test_historical_nonexistent_xref_access_nonstrict(): out = BytesIO() with open(NONEXISTENT_XREF_PATH, 'rb') as inf: w = IncrementalPdfFileWriter(inf) pg_dict = w.root['/Pages']['/Kids'][0] del pg_dict['/Bleh'] w.update_container(pg_dict) w.write(out) r = PdfFileReader(out, strict=False) hist_root = r.get_historical_root(0) bad_ref = hist_root['/Pages']['/Kids'][0].raw_get('/Bleh') assert isinstance(bad_ref.get_object(), generic.NullObject)
def test_tagged_path_count(): r = PdfFileReader(BytesIO(MINIMAL_TWO_FIELDS_TAGGED)) r = r.get_historical_resolver(0) r._load_reverse_xref_cache() # The path simplifier should eliminate all (pseudo-)duplicates refs except # these three: # - one from the AcroForm hierarchy # - one from the pages tree (through /Annots) # - one from the structure tree paths_to = r._indirect_object_access_cache[generic.Reference(7, 0, r)] assert len(paths_to) == 3
def test_sign_crypt_pubkey_rc4(): w = IncrementalPdfFileWriter(BytesIO(MINIMAL_PUBKEY_ONE_FIELD_RC4)) w.encrypt_pubkey(PUBKEY_SELFSIGNED_DECRYPTER) out = signers.sign_pdf(w, signers.PdfSignatureMetadata(), signer=FROM_CA, existing_fields_only=True) r = PdfFileReader(out) r.decrypt_pubkey(PUBKEY_SELFSIGNED_DECRYPTER) s = r.embedded_signatures[0] val_trusted(s)
def test_sign_crypt_aes256(password): w = IncrementalPdfFileWriter(BytesIO(MINIMAL_ONE_FIELD_AES256)) w.encrypt(password) out = signers.sign_pdf(w, signers.PdfSignatureMetadata(), signer=FROM_CA, existing_fields_only=True) r = PdfFileReader(out) r.decrypt(password) s = r.embedded_signatures[0] val_trusted(s)
def review_file(self, reader: PdfFileReader, base_revision: Union[int, HistoricalResolver], field_mdp_spec: Optional[FieldMDPSpec] = None, doc_mdp: Optional[MDPPerm] = None) \ -> Union[DiffResult, SuspiciousModification]: """ Implementation of :meth:`.DiffPolicy.review_file` that reviews each intermediate revision between the base revision and the current one individually. """ changed_form_fields = set() rev_count = reader.xrefs.total_revisions current_max = ModificationLevel.NONE if isinstance(base_revision, int): base_rev_resolver = reader.get_historical_resolver(base_revision) else: base_rev_resolver = base_revision base_revision = base_rev_resolver.revision # Note: there's a pragmatic reason why we iterate over all revisions # instead of just asking for all updated objects between the signed # revision and the most recent one: # # The effect of intermediate updates may not be detectable anymore in # the most recent version, so if we'd consolidate all checks into one, # we would have no way to tell whether or not the objects created # (and later forgotten) by these intermediate revisions actually # constituted legitimate changes. # (see the test_pades_revinfo tests for examples where this applies) # # Until we have a reference counter (which comes with its own # performance problems that may or may not be worse), I don't really # see a good way around this issue other than diffing every intermediate # version separately. for revision in range(base_revision + 1, rev_count): try: diff_result = self.apply( old=base_rev_resolver, new=reader.get_historical_resolver(revision), field_mdp_spec=field_mdp_spec, doc_mdp=doc_mdp) except SuspiciousModification as e: logger.warning( 'Error in diff operation between revision ' f'{base_revision} and {revision}', exc_info=e) return e current_max = max(current_max, diff_result.modification_level) changed_form_fields |= diff_result.changed_form_fields return DiffResult(current_max, changed_form_fields)
def test_copy_to_encrypted_file(): r = PdfFileReader(BytesIO(MINIMAL_ONE_FIELD)) w = writer.copy_into_new_writer(r) old_root_ref = w.root_ref w.encrypt("ownersecret", "usersecret") out = BytesIO() w.write(out) r = PdfFileReader(out) result = r.decrypt("ownersecret") assert result.status == AuthStatus.OWNER assert r.root_ref == old_root_ref assert len(r.root['/AcroForm']['/Fields']) == 1 assert len(r.root['/Pages']['/Kids']) == 1
def test_empty_user_pass(): r = PdfFileReader(BytesIO(MINIMAL_ONE_FIELD)) w = writer.copy_into_new_writer(r) old_root_ref = w.root_ref w.encrypt('ownersecret', '') out = BytesIO() w.write(out) r = PdfFileReader(out) result = r.decrypt('') assert result.status == AuthStatus.USER assert r.root_ref == old_root_ref assert len(r.root['/AcroForm']['/Fields']) == 1 assert len(r.root['/Pages']['/Kids']) == 1
def test_sign_reject_freed(forbid_freeing): w = IncrementalPdfFileWriter(BytesIO(MINIMAL_ONE_FIELD)) out = signers.sign_pdf( w, signature_meta=signers.PdfSignatureMetadata(field_name='Sig1'), signer=FROM_CA) # free the ref containing the /Info dictionary # since we don't have support for freeing objects in the writer (yet), # do it manually r = PdfFileReader(out) last_startxref = r.last_startxref # NOTE the linked list offsets are dummied out, but our Xref parser # doesn't care len_out = out.seek(0, os.SEEK_END) out.write(b'\n'.join([ b'xref', b'0 1', b'0000000000 65535 f ', b'2 1', b'0000000000 00001 f ', b'trailer<</Prev %d>>' % last_startxref, b'startxref', b'%d' % len_out, b'%%EOF' ])) r = PdfFileReader(out) last_rev = r.xrefs.xref_sections - 1 some_ref = generic.Reference(2, 0) assert some_ref in r.xrefs.refs_freed_in_revision(last_rev) sig = r.embedded_signatures[0] assert sig.signed_revision == 2 # make a dummy rule that whitelists our freed object ref class AdHocRule(QualifiedWhitelistRule): def apply_qualified(self, old: HistoricalResolver, new: HistoricalResolver): yield ModificationLevel.LTA_UPDATES, ReferenceUpdate( some_ref, paths_checked=RawPdfPath('/Root', '/Pages')) val_status = validate_pdf_signature( sig, SIMPLE_V_CONTEXT(), diff_policy=StandardDiffPolicy(DEFAULT_DIFF_POLICY.global_rules + [AdHocRule()], DEFAULT_DIFF_POLICY.form_rule, reject_object_freeing=forbid_freeing)) if forbid_freeing: assert val_status.modification_level == ModificationLevel.OTHER else: assert val_status.modification_level == ModificationLevel.LTA_UPDATES
def test_sign_crypt_rc4_new(password, file): w = IncrementalPdfFileWriter(BytesIO(sign_crypt_rc4_files[file])) w.encrypt(password) out = signers.sign_pdf( w, signers.PdfSignatureMetadata(field_name='SigNew'), signer=FROM_CA, ) out.seek(0) r = PdfFileReader(out) r.decrypt(password) s = r.embedded_signatures[0] val_trusted(s)
def test_create_fresh(zip1, zip2): pdf_out = writer.PdfFileWriter() p1 = simple_page(pdf_out, 'Hello world', compress=zip1) p2 = simple_page(pdf_out, 'Hello Page 2', compress=zip2) p1_ref = pdf_out.insert_page(p1) p2_ref = pdf_out.insert_page(p2) out = BytesIO() pdf_out.write(out) out.seek(0) r = PdfFileReader(out) pages = r.root['/Pages'] assert pages['/Count'] == 2 kids = pages['/Kids'] assert b'world' in kids[0].get_object()['/Contents'].data assert b'Page 2' in kids[1].get_object()['/Contents'].data assert r.find_page_for_modification(0)[0].idnum == p1_ref.idnum assert r.find_page_for_modification(1)[0].idnum == p2_ref.idnum assert r.find_page_for_modification(-1)[0].idnum == p2_ref.idnum assert r.find_page_for_modification(-2)[0].idnum == p1_ref.idnum with pytest.raises(ValueError): r.find_page_for_modification(2) with pytest.raises(ValueError): r.find_page_for_modification(-3)
def test_write_embedded_string(): w = IncrementalPdfFileWriter(BytesIO(MINIMAL)) with open(NOTO_SERIF_JP, 'rb') as ffile: ga = GlyphAccumulator(w, ffile, font_size=10) # shape the string, just to register the glyphs as used ga.shape('テスト') # ... but we're not going to use the result # hardcoded CIDs cid_hx = '0637062a0639' stream = generic.StreamObject( stream_data=f'BT /FEmb 18 Tf 0 100 Td <{cid_hx}> Tj ET'.encode('ascii') ) stream_ref = w.add_object(stream) w.add_stream_to_page( 0, stream_ref, resources=generic.DictionaryObject({ pdf_name('/Font'): generic.DictionaryObject({ pdf_name('/FEmb'): ga.as_resource() }) }) ) out = BytesIO() w.write(out) out.seek(0) r = PdfFileReader(out) page_obj = r.root['/Pages']['/Kids'][0].get_object() conts = page_obj['/Contents'] assert len(conts) == 2 assert stream_ref.idnum in (c.idnum for c in conts)
def test_sign_pss_md_discrepancy(): # Acrobat refuses to validate PSS signatures where the internal # hash functions disagree, but mathematically speaking, that shouldn't # be an issue. signer = signers.SimpleSigner.load( CRYPTO_DATA_DIR + '/selfsigned.key.pem', CRYPTO_DATA_DIR + '/selfsigned.cert.pem', ca_chain_files=(CRYPTO_DATA_DIR + '/selfsigned.cert.pem',), key_passphrase=b'secret', signature_mechanism=SignedDigestAlgorithm({ 'algorithm': 'rsassa_pss', 'parameters': RSASSAPSSParams({ 'mask_gen_algorithm': MaskGenAlgorithm({ 'algorithm': 'mgf1', 'parameters': DigestAlgorithm({'algorithm': 'sha512'}) }), 'hash_algorithm': DigestAlgorithm({'algorithm': 'sha256'}), 'salt_length': 478 }) }) ) w = IncrementalPdfFileWriter(BytesIO(MINIMAL)) meta = signers.PdfSignatureMetadata(field_name='Sig1') out = signers.sign_pdf(w, meta, signer=signer) r = PdfFileReader(out) emb = r.embedded_signatures[0] assert emb.field_name == 'Sig1' sda: SignedDigestAlgorithm = emb.signer_info['signature_algorithm'] assert sda.signature_algo == 'rsassa_pss' val_untrusted(emb)
def test_old_style_signing_cert_attr_mismatch(with_issser): if with_issser: # this file has an old-style signing cert attr with issuerSerial fname = 'pades-with-old-style-signing-cert-attr-issser.pdf' else: fname = 'pades-with-old-style-signing-cert-attr.pdf' with open(os.path.join(PDF_DATA_DIR, fname), 'rb') as f: r = PdfFileReader(f) s = r.embedded_signatures[0] signer_info = s.signer_info digest = s.compute_digest() # signer1-long has the same key as signer1 alt_cert = TESTING_CA.get_cert(CertLabel('signer1-long')) signer_info['sid'] = { 'issuer_and_serial_number': cms.IssuerAndSerialNumber({ 'issuer': alt_cert.issuer, 'serial_number': alt_cert.serial_number }) } with pytest.raises( SignatureValidationError, match="Signing certificate attribute does not match ") as exc_info: validate_sig_integrity( signer_info, alt_cert, expected_content_type='data', actual_digest=digest ) assert exc_info.value.ades_status == AdESStatus.INDETERMINATE assert exc_info.value.ades_subindication \ == AdESIndeterminate.NO_SIGNING_CERTIFICATE_FOUND
def test_overspecify_cms_digest_algo(): # TODO this behaviour is not ideal, but at least this test documents it signer = signers.SimpleSigner.load( CRYPTO_DATA_DIR + '/selfsigned.key.pem', CRYPTO_DATA_DIR + '/selfsigned.cert.pem', ca_chain_files=(CRYPTO_DATA_DIR + '/selfsigned.cert.pem',), key_passphrase=b'secret', # specify an algorithm object that also mandates a specific # message digest signature_mechanism=SignedDigestAlgorithm( {'algorithm': 'sha256_rsa'} ) ) w = IncrementalPdfFileWriter(BytesIO(MINIMAL)) # digest methods agree, so that should be OK out = signers.sign_pdf( w, signers.PdfSignatureMetadata(field_name='Sig1', md_algorithm='sha256'), signer=signer ) r = PdfFileReader(out) s = r.embedded_signatures[0] val_untrusted(s) w = IncrementalPdfFileWriter(BytesIO(MINIMAL)) with pytest.raises(SigningError): signers.sign_pdf( w, signers.PdfSignatureMetadata( field_name='Sig1', md_algorithm='sha512' ), signer=signer )
def test_write_embedded_string_objstream(): ffile = ttLib.TTFont(NOTO_SERIF_JP) ga = GlyphAccumulator(ffile) cid_hx, _ = ga.feed_string('テスト') assert cid_hx == '0637062a0639' w = IncrementalPdfFileWriter(BytesIO(MINIMAL_XREF)) obj_stream = w.prepare_object_stream() font_ref = ga.embed_subset(w, obj_stream=obj_stream) stream = generic.StreamObject( stream_data=f'BT /FEmb 18 Tf 0 100 Td <{cid_hx}> Tj ET'.encode( 'ascii')) stream_ref = w.add_object(stream) w.add_stream_to_page(0, stream_ref, resources=generic.DictionaryObject({ pdf_name('/Font'): generic.DictionaryObject( {pdf_name('/FEmb'): font_ref}) })) out = BytesIO() w.write(out) out.seek(0) r = PdfFileReader(out) page_obj = r.root['/Pages']['/Kids'][0].get_object() conts = page_obj['/Contents'] assert len(conts) == 2 assert stream_ref.idnum in (c.idnum for c in conts) assert font_ref.idnum in r.xrefs.in_obj_stream out.seek(0) # attempt to grab the font from the object stream font_ref.pdf = r font = font_ref.get_object() assert font['/Type'] == pdf_name('/Font')