Python PdfFileReaderの例、pyhanko.pdf_utils.reader.PdfFileReader Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_utils.py プロジェクト: slad99/pyHanko

def test_key_encipherment_requirement_override(version, keylen, use_aes,
                                               use_crypt_filters):
    r = PdfFileReader(BytesIO(VECTOR_IMAGE_PDF))
    w = writer.PdfFileWriter()

    sh = PubKeySecurityHandler.build_from_certs(
        [PUBKEY_SELFSIGNED_DECRYPTER.cert],
        keylen_bytes=keylen,
        version=version,
        use_aes=use_aes,
        use_crypt_filters=use_crypt_filters,
        perms=-44,
        ignore_key_usage=True)
    w.security_handler = sh
    w._encrypt = w.add_object(sh.as_pdf_object())
    new_page_tree = w.import_object(r.root.raw_get('/Pages'), )
    w.root['/Pages'] = new_page_tree
    out = BytesIO()
    w.write(out)
    r = PdfFileReader(out)
    result = r.decrypt_pubkey(PUBKEY_SELFSIGNED_DECRYPTER)
    assert result.status == AuthStatus.USER
    assert result.permission_flags == -44
    page = r.root['/Pages']['/Kids'][0].get_object()
    assert '/ExtGState' in page['/Resources']
    # just a piece of data I know occurs in the decoded content stream
    # of the (only) page in VECTOR_IMAGE_PDF
    assert b'0 1 0 rg /a0 gs' in page['/Contents'].data

コード例 #2

0

ファイルを表示

ファイル: test_xref.py プロジェクト: MatthiasValvekens/pyHanko

def test_update_hybrid_twice(fname):
    with open(os.path.join(PDF_DATA_DIR, fname), 'rb') as inf:
        w = IncrementalPdfFileWriter(inf)
        t_obj = w.trailer['/Info'].raw_get('/Title')
        assert isinstance(t_obj, generic.IndirectObject)
        w.objects[(t_obj.generation, t_obj.idnum)] \
            = generic.pdf_string('Updated')
        out = BytesIO()
        w.write(out)

    r = PdfFileReader(out)
    assert r.trailer['/Info']['/Title'] == 'Updated'
    container_info = r.xrefs.get_xref_container_info(1)
    assert container_info.xref_section_type == XRefSectionType.HYBRID_MAIN
    container_info = r.xrefs.get_xref_container_info(2)
    assert container_info.xref_section_type == XRefSectionType.STANDARD

    w = IncrementalPdfFileWriter(out)
    w.add_object(generic.pdf_string('This is an object'))
    w.write_in_place()

    r = PdfFileReader(out)
    assert '/XRefStm' not in r.trailer
    assert '/XRefStm' not in r.trailer_view
    assert r.trailer['/Info']['/Title'] == 'Updated'
    container_info = r.xrefs.get_xref_container_info(1)
    assert container_info.xref_section_type == XRefSectionType.HYBRID_MAIN
    container_info = r.xrefs.get_xref_container_info(2)
    assert container_info.xref_section_type == XRefSectionType.STANDARD
    container_info = r.xrefs.get_xref_container_info(3)
    assert container_info.xref_section_type == XRefSectionType.STANDARD

コード例 #3

0

ファイルを表示

ファイル: test_utils.py プロジェクト: slad99/pyHanko

def test_legacy_encryption(use_owner_pass, rev, keylen_bytes, use_aes):
    r = PdfFileReader(BytesIO(VECTOR_IMAGE_PDF))
    w = writer.PdfFileWriter()
    sh = StandardSecurityHandler.build_from_pw_legacy(
        rev,
        w._document_id[0].original_bytes,
        "ownersecret",
        "usersecret",
        keylen_bytes=keylen_bytes,
        use_aes128=use_aes,
        perms=-44)
    w.security_handler = sh
    w._encrypt = w.add_object(sh.as_pdf_object())
    new_page_tree = w.import_object(r.root.raw_get('/Pages'), )
    w.root['/Pages'] = new_page_tree
    out = BytesIO()
    w.write(out)
    r = PdfFileReader(out)
    result = r.decrypt("ownersecret" if use_owner_pass else "usersecret")
    if use_owner_pass:
        assert result.status == AuthStatus.OWNER
        assert result.permission_flags is None
    else:
        assert result.status == AuthStatus.USER
        assert result.permission_flags == -44
    page = r.root['/Pages']['/Kids'][0].get_object()
    assert r.trailer['/Encrypt']['/P'] == -44
    assert '/ExtGState' in page['/Resources']
    # just a piece of data I know occurs in the decoded content stream
    # of the (only) page in VECTOR_IMAGE_PDF
    assert b'0 1 0 rg /a0 gs' in page['/Contents'].data

コード例 #4

0

ファイルを表示

ファイル: test_signing.py プロジェクト: MatthiasValvekens/pyHanko

def test_add_revinfo_without_timestamp(requests_mock):
    w = IncrementalPdfFileWriter(BytesIO(MINIMAL))

    # create signature without revocation info
    with freeze_time('2020-11-01'):
        out = signers.sign_pdf(
            w, signers.PdfSignatureMetadata(field_name='Sig1'),
            signer=FROM_CA, in_place=True
        )

    # fast forward 1 month
    with freeze_time('2020-12-01'):
        vc = live_testing_vc(requests_mock)
        r = PdfFileReader(out)
        emb_sig = r.embedded_signatures[0]
        out = add_validation_info(emb_sig, vc)

        r = PdfFileReader(out)
        emb_sig = r.embedded_signatures[0]

        # even with revinfo, this should fail for lack of a timestamp
        with pytest.raises(SignatureValidationError,
                           match='.*trusted timestamp.*'):
            validate_pdf_ltv_signature(
                emb_sig, RevocationInfoValidationType.PADES_LT,
                {'trust_roots': TRUST_ROOTS, 'retroactive_revinfo': True}
            )

        # ... and certainly for LTA
        with pytest.raises(SignatureValidationError,
                           match='Purported.*LTA.*'):
            validate_pdf_ltv_signature(
                emb_sig, RevocationInfoValidationType.PADES_LTA,
                {'trust_roots': TRUST_ROOTS, 'retroactive_revinfo': True}
            )

コード例 #5

0

ファイルを表示

def _decrypt_pubkey(sedk: crypt.SimpleEnvelopeKeyDecrypter, infile, outfile,
                    force):
    with pyhanko_exception_manager():
        with open(infile, 'rb') as inf:
            r = PdfFileReader(inf)
            if r.security_handler is None:
                raise click.ClickException("File is not encrypted.")
            if not isinstance(r.security_handler, crypt.PubKeySecurityHandler):
                raise click.ClickException(
                    "File was not encrypted with a public-key security handler."
                )
            auth_result = r.decrypt_pubkey(sedk)
            if auth_result.status == crypt.AuthStatus.USER:
                # TODO read 2nd bit of perms in CMS enveloped data
                #  is the one indicating that change of encryption is OK
                if not force:
                    raise click.ClickException(
                        "Change of encryption is typically not allowed with "
                        "user access. Pass --force to decrypt the file anyway."
                    )
            elif auth_result.status == crypt.AuthStatus.FAILED:
                raise click.ClickException("Failed to decrypt the file.")
            w = copy_into_new_writer(r)
            with open(outfile, 'wb') as outf:
                w.write(outf)

コード例 #6

0

ファイルを表示

ファイル: test_signing.py プロジェクト: MatthiasValvekens/pyHanko

def test_sign_with_later_revoked_nots(requests_mock):
    w = IncrementalPdfFileWriter(BytesIO(MINIMAL))
    with freeze_time('2020-01-20'):
        out = signers.sign_pdf(
            w, signers.PdfSignatureMetadata(field_name='Sig1'),
            signer=REVOKED_SIGNER
        )
        r = PdfFileReader(out)
        s = r.embedded_signatures[0]

    # there's no way to do a timestamp validation check here, so the checker
    # should assume the timestamp to be invalid
    with freeze_time('2020-12-05'):

        r = PdfFileReader(out)
        s = r.embedded_signatures[0]
        vc = live_testing_vc(requests_mock)
        val_status = validate_pdf_signature(s, vc)
        assert val_status.intact
        assert val_status.valid
        assert val_status.revoked
        assert not val_status.trusted

        summ = val_status.summary()
        assert 'INTACT' in summ
        assert 'REVOKED' in summ
        assert val_status.coverage == SignatureCoverageLevel.ENTIRE_FILE
        assert val_status.modification_level == ModificationLevel.NONE
        assert not val_status.bottom_line

コード例 #7

0

ファイルを表示

ファイル: test_signing.py プロジェクト: MatthiasValvekens/pyHanko

def test_no_revinfo_to_be_added(requests_mock, in_place):
    buf = BytesIO(MINIMAL)
    w = IncrementalPdfFileWriter(buf)

    vc = live_testing_vc(requests_mock)
    signers.sign_pdf(
        w, signers.PdfSignatureMetadata(
            field_name='Sig1', embed_validation_info=True,
            validation_context=vc, subfilter=fields.SigSeedSubFilter.PADES
        ), signer=FROM_CA, timestamper=DUMMY_TS, in_place=True,
    )

    orig_file_length = buf.seek(0, os.SEEK_END)
    r = PdfFileReader(buf)
    emb_sig = r.embedded_signatures[0]
    orig_dss = DocumentSecurityStore.read_dss(r)
    assert len(orig_dss.ocsps) == 1
    assert len(orig_dss.crls) == 1
    # test with same vc, this shouldn't change anything
    # Turn off VRI updates, since those always trigger a write.
    output = add_validation_info(
        emb_sig, vc, in_place=in_place, add_vri_entry=False
    )
    if in_place:
        assert output is r.stream

    new_file_length = output.seek(0, os.SEEK_END)
    assert orig_file_length == new_file_length
    new_dss = DocumentSecurityStore.read_dss(PdfFileReader(output))
    assert len(new_dss.ocsps) == 1
    assert len(new_dss.crls) == 1

コード例 #8

0

ファイルを表示

ファイル: test_utils.py プロジェクト: jackii/pyHanko

def test_identity_crypt_filter(use_alias, with_never_decrypt):
    w = writer.PdfFileWriter()
    sh = StandardSecurityHandler.build_from_pw("secret")
    w.security_handler = sh
    idf: IdentityCryptFilter = IdentityCryptFilter()
    assert sh.crypt_filter_config[pdf_name("/Identity")] is idf
    if use_alias:
        sh.crypt_filter_config._crypt_filters[pdf_name("/IdentityAlias")] = idf
        assert sh.crypt_filter_config[pdf_name("/IdentityAlias")] is idf
    if use_alias:
        # identity filter can't be serialised, so this should throw an error
        with pytest.raises(misc.PdfError):
            w._assign_security_handler(sh)
        return
    else:
        w._assign_security_handler(sh)
    test_bytes = b'This is some test data that should remain unencrypted.'
    test_stream = generic.StreamObject(stream_data=test_bytes, handler=sh)
    test_stream.apply_filter("/Crypt",
                             params={pdf_name("/Name"): pdf_name("/Identity")})
    ref = w.add_object(test_stream).reference
    out = BytesIO()
    w.write(out)

    r = PdfFileReader(out)
    r.decrypt("secret")
    the_stream = r.get_object(ref, never_decrypt=with_never_decrypt)
    assert the_stream.encoded_data == test_bytes
    assert the_stream.data == test_bytes

コード例 #9

0

ファイルを表示

ファイル: test_diff_analysis.py プロジェクト: jackii/pyHanko

def test_no_changes_policy():
    w = IncrementalPdfFileWriter(BytesIO(MINIMAL_ONE_FIELD))
    out = signers.sign_pdf(
        w,
        signers.PdfSignatureMetadata(
            field_name='Sig1',
            certify=True,
            docmdp_permissions=fields.MDPPerm.FILL_FORMS),
        signer=FROM_CA,
    )

    w = IncrementalPdfFileWriter(out)
    # do an /Info update
    dt = generic.pdf_date(datetime(2020, 10, 10, tzinfo=pytz.utc))
    info = generic.DictionaryObject({pdf_name('/CreationDate'): dt})
    w.set_info(info)
    w.write_in_place()

    # check with normal diff policy
    r = PdfFileReader(out)
    s = r.embedded_signatures[0]
    assert s.field_name == 'Sig1'
    status = val_trusted(s, extd=True)
    assert status.modification_level == ModificationLevel.LTA_UPDATES
    assert status.docmdp_ok

    # now check with the ultra-strict no-op policy
    r = PdfFileReader(out)
    s = r.embedded_signatures[0]
    status = validate_pdf_signature(s, diff_policy=NO_CHANGES_DIFF_POLICY)
    assert isinstance(s.diff_result, SuspiciousModification)
    assert not status.docmdp_ok

コード例 #10

0

ファイルを表示

ファイル: test_embed.py プロジェクト: MatthiasValvekens/pyHanko

def test_embed_with_af(incremental):
    if incremental:
        w = IncrementalPdfFileWriter(BytesIO(MINIMAL))
    else:
        r = PdfFileReader(BytesIO(MINIMAL))
        w = writer.copy_into_new_writer(r)

    modified = datetime.now(tz=tzlocal.get_localzone())
    created = modified - timedelta(days=1)
    ef_obj = embed.EmbeddedFileObject.from_file_data(
        w,
        data=VECTOR_IMAGE_PDF,
        params=embed.EmbeddedFileParams(creation_date=created,
                                        modification_date=modified))

    spec = embed.FileSpec(file_spec_string='vector-test.pdf',
                          embedded_data=ef_obj,
                          description='Embedding test /w assoc file',
                          af_relationship=generic.pdf_name('/Unspecified'))
    embed.embed_file(w, spec)
    out = BytesIO()
    w.write(out)
    r = PdfFileReader(out)
    assert r.input_version == (2, 0)
    emb_lst = r.root['/Names']['/EmbeddedFiles']['/Names']
    assert len(emb_lst) == 2
    assert emb_lst[0] == 'vector-test.pdf'
    spec_obj = emb_lst[1]
    assert '/UF' not in spec_obj
    assert spec_obj['/AFRelationship'] == '/Unspecified'
    stream = spec_obj['/EF']['/F']
    assert stream.data == VECTOR_IMAGE_PDF
    assert '/UF' not in spec_obj['/EF']

    assert r.root['/AF'].raw_get(0).reference == spec_obj.container_ref

コード例 #11

0

ファイルを表示

ファイル: test_xref.py プロジェクト: MatthiasValvekens/pyHanko

def test_broken_obj_stream_fallback(fname, obj_to_get, expect_null):
    with open(os.path.join(PDF_DATA_DIR, fname), 'rb') as inf:
        r = PdfFileReader(inf, strict=False)
        obj = r.get_object(generic.Reference(idnum=obj_to_get))
        if expect_null:
            assert isinstance(obj, generic.NullObject)
        else:
            # we set up the tests to always point to dictionaries
            assert isinstance(obj, generic.DictionaryObject)

コード例 #12

0

ファイルを表示

def validate_signatures(ctx, infile, executive_summary, pretty_print,
                        validation_context, trust, trust_replace, other_certs,
                        ltv_profile, force_revinfo, soft_revocation_check,
                        no_revocation_check, password, retroactive_revinfo):

    if no_revocation_check:
        soft_revocation_check = True

    if pretty_print and executive_summary:
        raise click.ClickException(
            "--pretty-print is incompatible with --executive-summary.")

    if ltv_profile is not None:
        ltv_profile = RevocationInfoValidationType(ltv_profile)

    vc_kwargs = _build_vc_kwargs(
        ctx,
        validation_context,
        trust,
        trust_replace,
        other_certs,
        retroactive_revinfo,
        allow_fetching=False if no_revocation_check else None)

    key_usage_settings = _get_key_usage_settings(ctx, validation_context)
    with pyhanko_exception_manager():
        r = PdfFileReader(infile)
        sh = r.security_handler
        if isinstance(sh, crypt.StandardSecurityHandler):
            if password is None:
                password = getpass.getpass(prompt='File password: '******'t match.")
        elif sh is not None:
            raise click.ClickException(
                "The CLI supports only password-based encryption when "
                "validating (for now)")

        for ix, embedded_sig in enumerate(r.embedded_regular_signatures):
            fingerprint: str = embedded_sig.signer_cert.sha256.hex()
            status_str = _signature_status(ltv_profile, force_revinfo,
                                           soft_revocation_check, pretty_print,
                                           vc_kwargs, key_usage_settings,
                                           executive_summary, embedded_sig)
            name = embedded_sig.field_name

            if pretty_print:
                header = f'Field {ix + 1}: {name}'
                line = '=' * len(header)
                print(line)
                print(header)
                print(line)
                print('\n\n' + status_str)
            else:
                print('%s:%s:%s' % (name, fingerprint, status_str))

コード例 #13

0

ファイルを表示

ファイル: test_utils.py プロジェクト: jackii/pyHanko

def test_copy_file():
    r = PdfFileReader(BytesIO(MINIMAL_ONE_FIELD))
    w = writer.copy_into_new_writer(r)
    old_root_ref = w.root_ref
    out = BytesIO()
    w.write(out)
    r = PdfFileReader(out)
    assert r.root_ref == old_root_ref
    assert len(r.root['/AcroForm']['/Fields']) == 1
    assert len(r.root['/Pages']['/Kids']) == 1

コード例 #14

0

ファイルを表示

ファイル: test_signing.py プロジェクト: MatthiasValvekens/pyHanko

def test_simple_sign_fresh_doc():
    r = PdfFileReader(BytesIO(MINIMAL))
    w = copy_into_new_writer(r)
    meta = signers.PdfSignatureMetadata(field_name='Sig1')
    out = signers.sign_pdf(w, meta, signer=SELF_SIGN)

    r = PdfFileReader(out)
    emb = r.embedded_signatures[0]
    assert emb.field_name == 'Sig1'
    val_untrusted(emb)

コード例 #15

0

ファイルを表示

ファイル: test_embed.py プロジェクト: MatthiasValvekens/pyHanko

def test_decrypt_ef_without_explicit_crypt_filter():
    # such files violate the spec, but since we can deal with them gracefully,
    # we certainly should

    with open(PDF_DATA_DIR + '/embedded-encrypted-nocf.pdf', 'rb') as inf:
        r = PdfFileReader(inf)
        ef_stm = r.root['/Names']['/EmbeddedFiles']['/Names'][1]['/EF'] \
            .raw_get('/F')
        r.decrypt('secret')
        assert not ef_stm.get_object()._has_crypt_filter
        assert ef_stm.get_object().data == VECTOR_IMAGE_PDF

コード例 #16

0

ファイルを表示

ファイル: test_xref.py プロジェクト: MatthiasValvekens/pyHanko

def test_historical_nonexistent_xref_access_nonstrict():
    out = BytesIO()
    with open(NONEXISTENT_XREF_PATH, 'rb') as inf:
        w = IncrementalPdfFileWriter(inf)
        pg_dict = w.root['/Pages']['/Kids'][0]
        del pg_dict['/Bleh']
        w.update_container(pg_dict)
        w.write(out)
    r = PdfFileReader(out, strict=False)
    hist_root = r.get_historical_root(0)
    bad_ref = hist_root['/Pages']['/Kids'][0].raw_get('/Bleh')
    assert isinstance(bad_ref.get_object(), generic.NullObject)

コード例 #17

0

ファイルを表示

ファイル: test_xref.py プロジェクト: MatthiasValvekens/pyHanko

def test_tagged_path_count():

    r = PdfFileReader(BytesIO(MINIMAL_TWO_FIELDS_TAGGED))
    r = r.get_historical_resolver(0)
    r._load_reverse_xref_cache()
    # The path simplifier should eliminate all (pseudo-)duplicates refs except
    # these three:
    #  - one from the AcroForm hierarchy
    #  - one from the pages tree (through /Annots)
    #  - one from the structure tree
    paths_to = r._indirect_object_access_cache[generic.Reference(7, 0, r)]
    assert len(paths_to) == 3

コード例 #18

0

ファイルを表示

ファイル: test_sign_encrypted.py プロジェクト: MatthiasValvekens/pyHanko

def test_sign_crypt_pubkey_rc4():
    w = IncrementalPdfFileWriter(BytesIO(MINIMAL_PUBKEY_ONE_FIELD_RC4))
    w.encrypt_pubkey(PUBKEY_SELFSIGNED_DECRYPTER)
    out = signers.sign_pdf(w,
                           signers.PdfSignatureMetadata(),
                           signer=FROM_CA,
                           existing_fields_only=True)

    r = PdfFileReader(out)
    r.decrypt_pubkey(PUBKEY_SELFSIGNED_DECRYPTER)
    s = r.embedded_signatures[0]
    val_trusted(s)

コード例 #19

0

ファイルを表示

ファイル: test_sign_encrypted.py プロジェクト: MatthiasValvekens/pyHanko

def test_sign_crypt_aes256(password):
    w = IncrementalPdfFileWriter(BytesIO(MINIMAL_ONE_FIELD_AES256))
    w.encrypt(password)
    out = signers.sign_pdf(w,
                           signers.PdfSignatureMetadata(),
                           signer=FROM_CA,
                           existing_fields_only=True)

    r = PdfFileReader(out)
    r.decrypt(password)
    s = r.embedded_signatures[0]
    val_trusted(s)

コード例 #20

0

ファイルを表示

ファイル: policies.py プロジェクト: MatthiasValvekens/pyHanko

    def review_file(self, reader: PdfFileReader,
                    base_revision: Union[int, HistoricalResolver],
                    field_mdp_spec: Optional[FieldMDPSpec] = None,
                    doc_mdp: Optional[MDPPerm] = None) \
            -> Union[DiffResult, SuspiciousModification]:
        """
        Implementation of :meth:`.DiffPolicy.review_file` that reviews
        each intermediate revision between the base revision and the current one
        individually.
        """

        changed_form_fields = set()

        rev_count = reader.xrefs.total_revisions
        current_max = ModificationLevel.NONE
        if isinstance(base_revision, int):
            base_rev_resolver = reader.get_historical_resolver(base_revision)
        else:
            base_rev_resolver = base_revision
            base_revision = base_rev_resolver.revision

        # Note: there's a pragmatic reason why we iterate over all revisions
        # instead of just asking for all updated objects between the signed
        # revision and the most recent one:
        #
        # The effect of intermediate updates may not be detectable anymore in
        # the most recent version, so if we'd consolidate all checks into one,
        # we would have no way to tell whether or not the objects created
        # (and later forgotten) by these intermediate revisions actually
        # constituted legitimate changes.
        # (see the test_pades_revinfo tests for examples where this applies)
        #
        # Until we have a reference counter (which comes with its own
        # performance problems that may or may not be worse), I don't really
        # see a good way around this issue other than diffing every intermediate
        # version separately.
        for revision in range(base_revision + 1, rev_count):
            try:
                diff_result = self.apply(
                    old=base_rev_resolver,
                    new=reader.get_historical_resolver(revision),
                    field_mdp_spec=field_mdp_spec,
                    doc_mdp=doc_mdp)
            except SuspiciousModification as e:
                logger.warning(
                    'Error in diff operation between revision '
                    f'{base_revision} and {revision}',
                    exc_info=e)
                return e
            current_max = max(current_max, diff_result.modification_level)
            changed_form_fields |= diff_result.changed_form_fields
        return DiffResult(current_max, changed_form_fields)

コード例 #21

0

ファイルを表示

ファイル: test_utils.py プロジェクト: slad99/pyHanko

def test_copy_to_encrypted_file():
    r = PdfFileReader(BytesIO(MINIMAL_ONE_FIELD))
    w = writer.copy_into_new_writer(r)
    old_root_ref = w.root_ref
    w.encrypt("ownersecret", "usersecret")
    out = BytesIO()
    w.write(out)
    r = PdfFileReader(out)
    result = r.decrypt("ownersecret")
    assert result.status == AuthStatus.OWNER
    assert r.root_ref == old_root_ref
    assert len(r.root['/AcroForm']['/Fields']) == 1
    assert len(r.root['/Pages']['/Kids']) == 1

コード例 #22

0

ファイルを表示

ファイル: test_utils.py プロジェクト: slad99/pyHanko

def test_empty_user_pass():
    r = PdfFileReader(BytesIO(MINIMAL_ONE_FIELD))
    w = writer.copy_into_new_writer(r)
    old_root_ref = w.root_ref
    w.encrypt('ownersecret', '')
    out = BytesIO()
    w.write(out)
    r = PdfFileReader(out)
    result = r.decrypt('')
    assert result.status == AuthStatus.USER
    assert r.root_ref == old_root_ref
    assert len(r.root['/AcroForm']['/Fields']) == 1
    assert len(r.root['/Pages']['/Kids']) == 1

コード例 #23

0

ファイルを表示

ファイル: test_diff_analysis.py プロジェクト: jackii/pyHanko

def test_sign_reject_freed(forbid_freeing):

    w = IncrementalPdfFileWriter(BytesIO(MINIMAL_ONE_FIELD))
    out = signers.sign_pdf(
        w,
        signature_meta=signers.PdfSignatureMetadata(field_name='Sig1'),
        signer=FROM_CA)

    # free the ref containing the /Info dictionary
    # since we don't have support for freeing objects in the writer (yet),
    # do it manually
    r = PdfFileReader(out)
    last_startxref = r.last_startxref

    # NOTE the linked list offsets are dummied out, but our Xref parser
    # doesn't care
    len_out = out.seek(0, os.SEEK_END)
    out.write(b'\n'.join([
        b'xref', b'0 1', b'0000000000 65535 f ', b'2 1',
        b'0000000000 00001 f ',
        b'trailer<</Prev %d>>' % last_startxref, b'startxref',
        b'%d' % len_out, b'%%EOF'
    ]))
    r = PdfFileReader(out)
    last_rev = r.xrefs.xref_sections - 1
    some_ref = generic.Reference(2, 0)

    assert some_ref in r.xrefs.refs_freed_in_revision(last_rev)

    sig = r.embedded_signatures[0]
    assert sig.signed_revision == 2

    # make a dummy rule that whitelists our freed object ref

    class AdHocRule(QualifiedWhitelistRule):
        def apply_qualified(self, old: HistoricalResolver,
                            new: HistoricalResolver):
            yield ModificationLevel.LTA_UPDATES, ReferenceUpdate(
                some_ref, paths_checked=RawPdfPath('/Root', '/Pages'))

    val_status = validate_pdf_signature(
        sig,
        SIMPLE_V_CONTEXT(),
        diff_policy=StandardDiffPolicy(DEFAULT_DIFF_POLICY.global_rules +
                                       [AdHocRule()],
                                       DEFAULT_DIFF_POLICY.form_rule,
                                       reject_object_freeing=forbid_freeing))
    if forbid_freeing:
        assert val_status.modification_level == ModificationLevel.OTHER
    else:
        assert val_status.modification_level == ModificationLevel.LTA_UPDATES

コード例 #24

0

ファイルを表示

ファイル: test_sign_encrypted.py プロジェクト: MatthiasValvekens/pyHanko

def test_sign_crypt_rc4_new(password, file):
    w = IncrementalPdfFileWriter(BytesIO(sign_crypt_rc4_files[file]))
    w.encrypt(password)
    out = signers.sign_pdf(
        w,
        signers.PdfSignatureMetadata(field_name='SigNew'),
        signer=FROM_CA,
    )
    out.seek(0)
    r = PdfFileReader(out)
    r.decrypt(password)

    s = r.embedded_signatures[0]
    val_trusted(s)

コード例 #25

0

ファイルを表示

ファイル: test_utils.py プロジェクト: jackii/pyHanko

def test_create_fresh(zip1, zip2):
    pdf_out = writer.PdfFileWriter()
    p1 = simple_page(pdf_out, 'Hello world', compress=zip1)
    p2 = simple_page(pdf_out, 'Hello Page 2', compress=zip2)
    p1_ref = pdf_out.insert_page(p1)
    p2_ref = pdf_out.insert_page(p2)

    out = BytesIO()
    pdf_out.write(out)
    out.seek(0)

    r = PdfFileReader(out)
    pages = r.root['/Pages']
    assert pages['/Count'] == 2
    kids = pages['/Kids']
    assert b'world' in kids[0].get_object()['/Contents'].data
    assert b'Page 2' in kids[1].get_object()['/Contents'].data

    assert r.find_page_for_modification(0)[0].idnum == p1_ref.idnum
    assert r.find_page_for_modification(1)[0].idnum == p2_ref.idnum
    assert r.find_page_for_modification(-1)[0].idnum == p2_ref.idnum
    assert r.find_page_for_modification(-2)[0].idnum == p1_ref.idnum

    with pytest.raises(ValueError):
        r.find_page_for_modification(2)
    with pytest.raises(ValueError):
        r.find_page_for_modification(-3)

コード例 #26

0

ファイルを表示

ファイル: test_text.py プロジェクト: terminalkitten/pdf-stamp

def test_write_embedded_string():
    w = IncrementalPdfFileWriter(BytesIO(MINIMAL))
    with open(NOTO_SERIF_JP, 'rb') as ffile:
        ga = GlyphAccumulator(w, ffile, font_size=10)
    # shape the string, just to register the glyphs as used
    ga.shape('テスト')
    # ... but we're not going to use the result

    # hardcoded CIDs
    cid_hx = '0637062a0639'
    stream = generic.StreamObject(
        stream_data=f'BT /FEmb 18 Tf 0 100 Td <{cid_hx}> Tj ET'.encode('ascii')
    )
    stream_ref = w.add_object(stream)
    w.add_stream_to_page(
        0, stream_ref, resources=generic.DictionaryObject({
            pdf_name('/Font'): generic.DictionaryObject({
                pdf_name('/FEmb'): ga.as_resource()
            })
        })
    )
    out = BytesIO()
    w.write(out)
    out.seek(0)
    r = PdfFileReader(out)
    page_obj = r.root['/Pages']['/Kids'][0].get_object()
    conts = page_obj['/Contents']
    assert len(conts) == 2
    assert stream_ref.idnum in (c.idnum for c in conts)

コード例 #27

0

ファイルを表示

def test_sign_pss_md_discrepancy():
    # Acrobat refuses to validate PSS signatures where the internal
    # hash functions disagree, but mathematically speaking, that shouldn't
    # be an issue.
    signer = signers.SimpleSigner.load(
        CRYPTO_DATA_DIR + '/selfsigned.key.pem',
        CRYPTO_DATA_DIR + '/selfsigned.cert.pem',
        ca_chain_files=(CRYPTO_DATA_DIR + '/selfsigned.cert.pem',),
        key_passphrase=b'secret', signature_mechanism=SignedDigestAlgorithm({
            'algorithm': 'rsassa_pss',
            'parameters': RSASSAPSSParams({
                'mask_gen_algorithm': MaskGenAlgorithm({
                    'algorithm': 'mgf1',
                    'parameters': DigestAlgorithm({'algorithm': 'sha512'})
                }),
                'hash_algorithm': DigestAlgorithm({'algorithm': 'sha256'}),
                'salt_length': 478
            })
        })
    )
    w = IncrementalPdfFileWriter(BytesIO(MINIMAL))
    meta = signers.PdfSignatureMetadata(field_name='Sig1')
    out = signers.sign_pdf(w, meta, signer=signer)

    r = PdfFileReader(out)
    emb = r.embedded_signatures[0]
    assert emb.field_name == 'Sig1'
    sda: SignedDigestAlgorithm = emb.signer_info['signature_algorithm']
    assert sda.signature_algo == 'rsassa_pss'
    val_untrusted(emb)

コード例 #28

0

ファイルを表示

def test_old_style_signing_cert_attr_mismatch(with_issser):

    if with_issser:
        # this file has an old-style signing cert attr with issuerSerial
        fname = 'pades-with-old-style-signing-cert-attr-issser.pdf'
    else:
        fname = 'pades-with-old-style-signing-cert-attr.pdf'
    with open(os.path.join(PDF_DATA_DIR, fname), 'rb') as f:
        r = PdfFileReader(f)
        s = r.embedded_signatures[0]
        signer_info = s.signer_info
        digest = s.compute_digest()
    # signer1-long has the same key as signer1
    alt_cert = TESTING_CA.get_cert(CertLabel('signer1-long'))
    signer_info['sid'] = {
        'issuer_and_serial_number': cms.IssuerAndSerialNumber({
            'issuer': alt_cert.issuer,
            'serial_number': alt_cert.serial_number
        })
    }
    with pytest.raises(
            SignatureValidationError,
            match="Signing certificate attribute does not match ") as exc_info:
        validate_sig_integrity(
            signer_info, alt_cert, expected_content_type='data',
            actual_digest=digest
        )

    assert exc_info.value.ades_status == AdESStatus.INDETERMINATE
    assert exc_info.value.ades_subindication \
           == AdESIndeterminate.NO_SIGNING_CERTIFICATE_FOUND

コード例 #29

0

ファイルを表示

def test_overspecify_cms_digest_algo():
    # TODO this behaviour is not ideal, but at least this test documents it

    signer = signers.SimpleSigner.load(
        CRYPTO_DATA_DIR + '/selfsigned.key.pem',
        CRYPTO_DATA_DIR + '/selfsigned.cert.pem',
        ca_chain_files=(CRYPTO_DATA_DIR + '/selfsigned.cert.pem',),
        key_passphrase=b'secret',
        # specify an algorithm object that also mandates a specific
        # message digest
        signature_mechanism=SignedDigestAlgorithm(
            {'algorithm': 'sha256_rsa'}
        )
    )
    w = IncrementalPdfFileWriter(BytesIO(MINIMAL))
    # digest methods agree, so that should be OK
    out = signers.sign_pdf(
        w,
        signers.PdfSignatureMetadata(field_name='Sig1', md_algorithm='sha256'),
        signer=signer

    )
    r = PdfFileReader(out)
    s = r.embedded_signatures[0]
    val_untrusted(s)

    w = IncrementalPdfFileWriter(BytesIO(MINIMAL))
    with pytest.raises(SigningError):
        signers.sign_pdf(
            w, signers.PdfSignatureMetadata(
                field_name='Sig1', md_algorithm='sha512'
            ), signer=signer
        )

コード例 #30

0

ファイルを表示

def test_write_embedded_string_objstream():
    ffile = ttLib.TTFont(NOTO_SERIF_JP)
    ga = GlyphAccumulator(ffile)
    cid_hx, _ = ga.feed_string('テスト')
    assert cid_hx == '0637062a0639'
    w = IncrementalPdfFileWriter(BytesIO(MINIMAL_XREF))
    obj_stream = w.prepare_object_stream()
    font_ref = ga.embed_subset(w, obj_stream=obj_stream)
    stream = generic.StreamObject(
        stream_data=f'BT /FEmb 18 Tf 0 100 Td <{cid_hx}> Tj ET'.encode(
            'ascii'))
    stream_ref = w.add_object(stream)
    w.add_stream_to_page(0,
                         stream_ref,
                         resources=generic.DictionaryObject({
                             pdf_name('/Font'):
                             generic.DictionaryObject(
                                 {pdf_name('/FEmb'): font_ref})
                         }))
    out = BytesIO()
    w.write(out)
    out.seek(0)
    r = PdfFileReader(out)
    page_obj = r.root['/Pages']['/Kids'][0].get_object()
    conts = page_obj['/Contents']
    assert len(conts) == 2
    assert stream_ref.idnum in (c.idnum for c in conts)
    assert font_ref.idnum in r.xrefs.in_obj_stream
    out.seek(0)

    # attempt to grab the font from the object stream
    font_ref.pdf = r
    font = font_ref.get_object()
    assert font['/Type'] == pdf_name('/Font')