Esempio n. 1
0
def test_object_free():
    xrefs = [[
        b'0 3', b'0000000000 65535 f', b'0000000100 00000 n',
        b'0000000200 00000 n'
    ], [b'0 2', b'0000000000 65535 f', b'0000000000 00001 f'],
             [b'0 2', b'0000000000 65535 f', b'0000000300 00001 n']]

    r = PdfFileReader(BytesIO(fmt_dummy_xrefs(xrefs)))
    assert r.xrefs.xref_sections == 3
    assert r.xrefs[generic.Reference(1, 0)] == generic.NullObject()
    assert generic.Reference(1, 0) in r.xrefs.refs_freed_in_revision(1)
    assert r.xrefs[generic.Reference(1, 1)] == 300
Esempio n. 2
0
def test_object_free_no_override():
    xrefs = [[
        b'0 3', b'0000000000 65535 f', b'0000000100 00000 n',
        b'0000000200 00000 n'
    ], [b'0 2', b'0000000000 65535 f', b'0000000000 00001 f'],
             [b'0 2', b'0000000000 65535 f', b'0000000300 00001 n'],
             [b'0 2', b'0000000000 65535 f', b'0000000000 00002 f']]

    r = PdfFileReader(BytesIO(fmt_dummy_xrefs(xrefs)))
    assert r.xrefs.xref_sections == 4
    assert r.xrefs[generic.Reference(1, 0)] is None
    assert r.xrefs[generic.Reference(1, 1)] is None
    assert generic.Reference(1, 0) in r.xrefs.refs_freed_in_revision(1)
    assert generic.Reference(1, 1) in r.xrefs.refs_freed_in_revision(3)
Esempio n. 3
0
def test_deep_modify():
    w = IncrementalPdfFileWriter(BytesIO(MINIMAL))
    obj3 = generic.Reference(3, 0, w)
    deep_obj = w.get_object(obj3)['/Resources']['/Font']['/F1']['/Subtype']
    assert deep_obj.container_ref.idnum == obj3.idnum

    w.update_container(deep_obj)
    assert (0, 3) in w.objects
Esempio n. 4
0
def test_refree_dead_object():
    # I've seen the pattern below in Acrobat output.
    xrefs = [
        [b'0 3',
         b'0000000000 65535 f',
         b'0000000000 00000 f',
         b'0000000200 00000 n'],
        [b'0 2',
         b'0000000000 65535 f',
         b'0000000000 00001 f'],
    ]

    r = PdfFileReader(BytesIO(fmt_dummy_xrefs(xrefs)))
    assert r.xrefs.total_revisions == 2
    assert generic.Reference(1, 0) not in r.xrefs.refs_freed_in_revision(0)
    assert generic.Reference(1, 0) in r.xrefs.refs_freed_in_revision(1)
    assert generic.Reference(1, 0) in r.xrefs.explicit_refs_in_revision(1)
Esempio n. 5
0
def test_broken_obj_stream_fallback(fname, obj_to_get, expect_null):
    with open(os.path.join(PDF_DATA_DIR, fname), 'rb') as inf:
        r = PdfFileReader(inf, strict=False)
        obj = r.get_object(generic.Reference(idnum=obj_to_get))
        if expect_null:
            assert isinstance(obj, generic.NullObject)
        else:
            # we set up the tests to always point to dictionaries
            assert isinstance(obj, generic.DictionaryObject)
Esempio n. 6
0
def test_refree_dead_object():
    # I've seen the pattern below in Acrobat output.
    # (minus the second update)
    xrefs = [
        [
            b'0 3', b'0000000000 65535 f', b'0000000000 00000 f',
            b'0000000200 00000 n'
        ],
        [b'0 2', b'0000000000 65535 f', b'0000000000 00001 f'],
        [b'0 2', b'0000000000 65535 f',
         b'0000000300 00001 n'],  # reintroduce as gen 1
    ]

    r = PdfFileReader(BytesIO(fmt_dummy_xrefs(xrefs)))
    assert r.xrefs.xref_sections == 3
    assert generic.Reference(1, 0) not in r.xrefs.refs_freed_in_revision(0)
    assert generic.Reference(1, 0) not in r.xrefs.refs_freed_in_revision(1)
    assert generic.Reference(1, 0) not in r.xrefs.explicit_refs_in_revision(1)
    assert generic.Reference(1, 1) in r.xrefs.explicit_refs_in_revision(2)
Esempio n. 7
0
 def _read_xref_stream_object(self):
     stream = self.stream
     idnum, generation = read_object_header(stream, strict=self.strict)
     xrefstream_ref = generic.Reference(idnum, generation, pdf=self.handler)
     xrefstream = generic.StreamObject.read_from_stream(
         stream, xrefstream_ref
     )
     xrefstream.container_ref = xrefstream_ref
     assert xrefstream.raw_get("/Type") == "/XRef"
     return xrefstream_ref, xrefstream
Esempio n. 8
0
def test_tagged_path_count():

    r = PdfFileReader(BytesIO(MINIMAL_TWO_FIELDS_TAGGED))
    r = r.get_historical_resolver(0)
    r._load_reverse_xref_cache()
    # The path simplifier should eliminate all (pseudo-)duplicates refs except
    # these three:
    #  - one from the AcroForm hierarchy
    #  - one from the pages tree (through /Annots)
    #  - one from the structure tree
    paths_to = r._indirect_object_access_cache[generic.Reference(7, 0, r)]
    assert len(paths_to) == 3
Esempio n. 9
0
def test_sign_reject_freed(forbid_freeing):

    w = IncrementalPdfFileWriter(BytesIO(MINIMAL_ONE_FIELD))
    out = signers.sign_pdf(
        w,
        signature_meta=signers.PdfSignatureMetadata(field_name='Sig1'),
        signer=FROM_CA)

    # free the ref containing the /Info dictionary
    # since we don't have support for freeing objects in the writer (yet),
    # do it manually
    r = PdfFileReader(out)
    last_startxref = r.last_startxref

    # NOTE the linked list offsets are dummied out, but our Xref parser
    # doesn't care
    len_out = out.seek(0, os.SEEK_END)
    out.write(b'\n'.join([
        b'xref', b'0 1', b'0000000000 65535 f ', b'2 1',
        b'0000000000 00001 f ',
        b'trailer<</Prev %d>>' % last_startxref, b'startxref',
        b'%d' % len_out, b'%%EOF'
    ]))
    r = PdfFileReader(out)
    last_rev = r.xrefs.xref_sections - 1
    some_ref = generic.Reference(2, 0)

    assert some_ref in r.xrefs.refs_freed_in_revision(last_rev)

    sig = r.embedded_signatures[0]
    assert sig.signed_revision == 2

    # make a dummy rule that whitelists our freed object ref

    class AdHocRule(QualifiedWhitelistRule):
        def apply_qualified(self, old: HistoricalResolver,
                            new: HistoricalResolver):
            yield ModificationLevel.LTA_UPDATES, ReferenceUpdate(
                some_ref, paths_checked=RawPdfPath('/Root', '/Pages'))

    val_status = validate_pdf_signature(
        sig,
        SIMPLE_V_CONTEXT(),
        diff_policy=StandardDiffPolicy(DEFAULT_DIFF_POLICY.global_rules +
                                       [AdHocRule()],
                                       DEFAULT_DIFF_POLICY.form_rule,
                                       reject_object_freeing=forbid_freeing))
    if forbid_freeing:
        assert val_status.modification_level == ModificationLevel.OTHER
    else:
        assert val_status.modification_level == ModificationLevel.LTA_UPDATES
Esempio n. 10
0
    def explicit_refs_in_revision(self, revision) -> Set[generic.Reference]:
        """
        Look up the object refs for all objects explicitly added or overwritten
        in a given revision.

        :param revision:
            A revision number. The oldest revision is zero.
        :return:
            A set of Reference objects.
        """
        section = self._xref_sections[revision]
        result = {
            generic.Reference(*ref, pdf=self.reader)
            for ref in section.xref_data.explicit_refs_in_revision
        }
        hybrid = section.xref_data.hybrid
        if hybrid is not None:
            # make sure we also account for refs in hybrid sections
            result |= {
                generic.Reference(*ref, pdf=self.reader)
                for ref in hybrid.xref_data.explicit_refs_in_revision
            }
        return result
Esempio n. 11
0
    def refs_freed_in_revision(self, revision) -> Set[generic.Reference]:
        """
        Look up the object refs for all objects explicitly freed
        in a given revision.

        :param revision:
            A revision number. The oldest revision is zero.
        :return:
            A set of Reference objects.
        """
        section = self._xref_sections[revision]
        return {
            generic.Reference(idnum, gen - 1, pdf=self.reader)
            for idnum, gen in section.xref_data.freed.items()
            if gen > 0  # don't acknowledge "dead" objects as freeings
        }
Esempio n. 12
0
    def _write_objects(self, stream, object_position_dict):
        # deal with objects in object streams first
        for obj_stream in self.object_streams:
            # first, register the object stream object
            #  (will get written later)
            stream_ref = self.add_object(obj_stream.as_pdf_object())
            # loop over all objects in the stream, and prepare
            # the data to put in the XRef table
            for ix, (idnum, obj) in enumerate(obj_stream._obj_refs.items()):
                object_position_dict[(0, idnum)] = (stream_ref.idnum, ix)

        for ix in sorted(self.objects.keys()):
            generation, idnum = ix
            obj = self.objects[ix]
            object_position_dict[ix] = stream.tell()
            stream.write(('%d %d obj\n' % (idnum, generation)).encode('ascii'))
            if self.security_handler is not None \
                    and idnum != self._encrypt.idnum:
                handler = self.security_handler
            else:
                handler = None
            container_ref = generic.Reference(idnum, generation, self)
            obj.write_to_stream(stream, handler, container_ref)
            stream.write(b'\nendobj\n')
Esempio n. 13
0
def test_xref_access_no_decrypt():
    r = PdfFileReader(BytesIO(MINIMAL_AES256))
    # attempt to access xref stream, turn off transparent decryption
    obj = r.get_object(ref=generic.Reference(7, 0), transparent_decrypt=False)
    assert not isinstance(obj, generic.DecryptedObjectProxy)
Esempio n. 14
0
def test_broken_objstream(fname, err, obj_to_get):
    with open(os.path.join(PDF_DATA_DIR, fname), 'rb') as inf:
        with pytest.raises(misc.PdfReadError, match=err):
            r = PdfFileReader(inf, strict=True)
            r.get_object(generic.Reference(idnum=obj_to_get))
Esempio n. 15
0
 def object_streams_used_in(self, revision):
     section = self._xref_sections[revision]
     return {
         generic.Reference(objstm_id, pdf=self.reader)
         for objstm_id in section.xref_data.obj_streams_used
     }
Esempio n. 16
0
 def do_check():
     r = PdfFileReader(out)
     print(r.get_object(generic.Reference(2, 0, r), revision=3).data)
     s = r.embedded_signatures[0]
     status = validate_pdf_signature(s)
     assert status.modification_level == ModificationLevel.OTHER