def test_wrong_password(legacy): w = writer.PdfFileWriter() ref = w.add_object(generic.TextStringObject("Blah blah")) if legacy: sh = StandardSecurityHandler.build_from_pw_legacy( StandardSecuritySettingsRevision.RC4_OR_AES128, w._document_id[0].original_bytes, "ownersecret", "usersecret", keylen_bytes=16, use_aes128=True) else: sh = StandardSecurityHandler.build_from_pw("ownersecret", "usersecret") w.security_handler = sh w._encrypt = w.add_object(sh.as_pdf_object()) out = BytesIO() w.write(out) r = PdfFileReader(out) with pytest.raises(misc.PdfReadError): r.get_object(ref.reference) assert r.decrypt("thispasswordiswrong") == AuthResult.FAILED assert r.security_handler._auth_failed assert r.security_handler.get_string_filter()._auth_failed with pytest.raises(misc.PdfReadError): r.get_object(ref.reference)
def test_custom_crypt_filter(with_hex_filter, main_unencrypted): w = writer.PdfFileWriter() custom = pdf_name('/Custom') crypt_filters = { custom: StandardRC4CryptFilter(keylen=16), } if main_unencrypted: # streams/strings are unencrypted by default cfc = CryptFilterConfiguration(crypt_filters=crypt_filters) else: crypt_filters[STD_CF] = StandardAESCryptFilter(keylen=16) cfc = CryptFilterConfiguration(crypt_filters=crypt_filters, default_string_filter=STD_CF, default_stream_filter=STD_CF) sh = StandardSecurityHandler.build_from_pw_legacy( rev=StandardSecuritySettingsRevision.RC4_OR_AES128, id1=w.document_id[0], desired_user_pass="******", desired_owner_pass="******", keylen_bytes=16, crypt_filter_config=cfc) w._assign_security_handler(sh) test_data = b'This is test data!' dummy_stream = generic.StreamObject(stream_data=test_data) dummy_stream.add_crypt_filter(name=custom, handler=sh) ref = w.add_object(dummy_stream) dummy_stream2 = generic.StreamObject(stream_data=test_data) ref2 = w.add_object(dummy_stream2) if with_hex_filter: dummy_stream.apply_filter(pdf_name('/AHx')) out = BytesIO() w.write(out) r = PdfFileReader(out) r.decrypt("ownersecret") obj: generic.StreamObject = r.get_object(ref.reference) assert obj.data == test_data if with_hex_filter: cf_dict = obj['/DecodeParms'][1] else: cf_dict = obj['/DecodeParms'] assert cf_dict['/Name'] == pdf_name('/Custom') obj2: generic.DecryptedObjectProxy = r.get_object( ref2.reference, transparent_decrypt=False) raw = obj2.raw_object assert isinstance(raw, generic.StreamObject) if main_unencrypted: assert raw.encoded_data == test_data else: assert raw.encoded_data != test_data
def test_identity_crypt_filter(use_alias, with_never_decrypt): w = writer.PdfFileWriter() sh = StandardSecurityHandler.build_from_pw("secret") w.security_handler = sh idf: IdentityCryptFilter = IdentityCryptFilter() assert sh.crypt_filter_config[pdf_name("/Identity")] is idf if use_alias: sh.crypt_filter_config._crypt_filters[pdf_name("/IdentityAlias")] = idf assert sh.crypt_filter_config[pdf_name("/IdentityAlias")] is idf if use_alias: # identity filter can't be serialised, so this should throw an error with pytest.raises(misc.PdfError): w._assign_security_handler(sh) return else: w._assign_security_handler(sh) test_bytes = b'This is some test data that should remain unencrypted.' test_stream = generic.StreamObject(stream_data=test_bytes, handler=sh) test_stream.apply_filter("/Crypt", params={pdf_name("/Name"): pdf_name("/Identity")}) ref = w.add_object(test_stream).reference out = BytesIO() w.write(out) r = PdfFileReader(out) r.decrypt("secret") the_stream = r.get_object(ref, never_decrypt=with_never_decrypt) assert the_stream.encoded_data == test_bytes assert the_stream.data == test_bytes
def test_broken_obj_stream_fallback(fname, obj_to_get, expect_null): with open(os.path.join(PDF_DATA_DIR, fname), 'rb') as inf: r = PdfFileReader(inf, strict=False) obj = r.get_object(generic.Reference(idnum=obj_to_get)) if expect_null: assert isinstance(obj, generic.NullObject) else: # we set up the tests to always point to dictionaries assert isinstance(obj, generic.DictionaryObject)
def test_pubkey_wrong_cert(): r = PdfFileReader(BytesIO(VECTOR_IMAGE_PDF)) w = writer.PdfFileWriter() recpt_cert = load_cert_from_pemder( TESTING_CA_DIR + '/intermediate/newcerts/signer2.cert.pem') test_data = b'This is test data!' dummy_stream = generic.StreamObject(stream_data=test_data) ref = w.add_object(dummy_stream) w.encrypt_pubkey([recpt_cert]) out = BytesIO() w.write(out) r = PdfFileReader(out) result = r.decrypt_pubkey(PUBKEY_TEST_DECRYPTER) assert result.status == AuthStatus.FAILED with pytest.raises(misc.PdfError): r.get_object(ref.reference)
def test_historical_read(): reader = PdfFileReader(BytesIO(MINIMAL_ONE_FIELD)) assert reader.total_revisions == 2 # if this test file is ever replaced, the test will probably have to # be rewritten root_ref = generic.IndirectObject(1, 0, reader) acroform_ref = generic.IndirectObject(6, 0, reader) # current value current_root = reader.get_object(root_ref.reference, revision=1) assert current_root == reader.root reader.get_object(acroform_ref.reference, revision=1) previous_root = reader.get_object(root_ref.reference, revision=0) assert '/AcroForm' not in previous_root with pytest.raises(misc.PdfReadError): reader.get_object(acroform_ref.reference, revision=0) assert Reference(6, 0) in reader.xrefs.explicit_refs_in_revision(1) assert Reference(2, 0) in reader.xrefs.explicit_refs_in_revision(0) assert Reference(2, 0) not in reader.xrefs.explicit_refs_in_revision(1)
def test_custom_pubkey_crypt_filter(with_hex_filter, main_unencrypted): w = writer.PdfFileWriter() custom = pdf_name('/Custom') crypt_filters = { custom: PubKeyRC4CryptFilter(keylen=16), } if main_unencrypted: # streams/strings are unencrypted by default cfc = CryptFilterConfiguration(crypt_filters=crypt_filters) else: crypt_filters[DEFAULT_CRYPT_FILTER] = PubKeyAESCryptFilter( keylen=16, acts_as_default=True) cfc = CryptFilterConfiguration( crypt_filters=crypt_filters, default_string_filter=DEFAULT_CRYPT_FILTER, default_stream_filter=DEFAULT_CRYPT_FILTER) sh = PubKeySecurityHandler(version=SecurityHandlerVersion.RC4_OR_AES128, pubkey_handler_subfilter=PubKeyAdbeSubFilter.S5, legacy_keylen=16, crypt_filter_config=cfc) # if main_unencrypted, these should be no-ops sh.add_recipients([PUBKEY_TEST_DECRYPTER.cert]) # (this is always pointless, but it should be allowed) sh.add_recipients([PUBKEY_TEST_DECRYPTER.cert]) crypt_filters[custom].add_recipients([PUBKEY_TEST_DECRYPTER.cert]) w._assign_security_handler(sh) encrypt_dict = w._encrypt.get_object() cfs = encrypt_dict['/CF'] # no /Recipients in S5 mode assert '/Recipients' not in encrypt_dict assert isinstance(cfs[custom]['/Recipients'], generic.ByteStringObject) if main_unencrypted: assert DEFAULT_CRYPT_FILTER not in cfs else: default_rcpts = cfs[DEFAULT_CRYPT_FILTER]['/Recipients'] assert isinstance(default_rcpts, generic.ArrayObject) assert len(default_rcpts) == 2 # custom crypt filters can only have one set of recipients with pytest.raises(misc.PdfError): crypt_filters[custom].add_recipients([PUBKEY_TEST_DECRYPTER.cert]) test_data = b'This is test data!' dummy_stream = generic.StreamObject(stream_data=test_data) dummy_stream.add_crypt_filter(name=custom, handler=sh) ref = w.add_object(dummy_stream) dummy_stream2 = generic.StreamObject(stream_data=test_data) ref2 = w.add_object(dummy_stream2) if with_hex_filter: dummy_stream.apply_filter(pdf_name('/AHx')) out = BytesIO() w.write(out) r = PdfFileReader(out) r.decrypt_pubkey(PUBKEY_TEST_DECRYPTER) # the custom test filter shouldn't have been decrypted yet # so attempting to decode the stream should cause the crypt filter # to throw an error obj: generic.StreamObject = r.get_object(ref.reference) with pytest.raises(misc.PdfError): # noinspection PyStatementEffect obj.data r.security_handler.crypt_filter_config[custom].authenticate( PUBKEY_TEST_DECRYPTER) assert obj.data == test_data if with_hex_filter: cf_dict = obj['/DecodeParms'][1] else: cf_dict = obj['/DecodeParms'] assert cf_dict['/Name'] == pdf_name('/Custom') obj2: generic.DecryptedObjectProxy = r.get_object( ref2.reference, transparent_decrypt=False) raw = obj2.raw_object assert isinstance(raw, generic.StreamObject) if main_unencrypted: assert raw.encoded_data == test_data else: assert raw.encoded_data != test_data
def test_xref_access_no_decrypt(): r = PdfFileReader(BytesIO(MINIMAL_AES256)) # attempt to access xref stream, turn off transparent decryption obj = r.get_object(ref=generic.Reference(7, 0), transparent_decrypt=False) assert not isinstance(obj, generic.DecryptedObjectProxy)
def test_broken_objstream(fname, err, obj_to_get): with open(os.path.join(PDF_DATA_DIR, fname), 'rb') as inf: with pytest.raises(misc.PdfReadError, match=err): r = PdfFileReader(inf, strict=True) r.get_object(generic.Reference(idnum=obj_to_get))
def do_check(): r = PdfFileReader(out) print(r.get_object(generic.Reference(2, 0, r), revision=3).data) s = r.embedded_signatures[0] status = validate_pdf_signature(s) assert status.modification_level == ModificationLevel.OTHER