def __init__(self, stream_xrefs=True, init_page_tree=True): # root object root = generic.DictionaryObject({ pdf_name("/Type"): pdf_name("/Catalog"), }) id1 = generic.ByteStringObject(os.urandom(16)) id2 = generic.ByteStringObject(os.urandom(16)) id_obj = generic.ArrayObject([id1, id2]) # info object info = generic.DictionaryObject({ pdf_name('/Producer'): pdf_string(VENDOR) }) super().__init__(root, info, id_obj, stream_xrefs=stream_xrefs) if init_page_tree: pages = generic.DictionaryObject({ pdf_name("/Type"): pdf_name("/Pages"), pdf_name("/Count"): generic.NumberObject(0), pdf_name("/Kids"): generic.ArrayObject(), }) root[pdf_name('/Pages')] = self.add_object(pages)
def simple_page(pdf_out, ascii_text, compress=False, extra_stream=False): # based on the minimal pdf file of # https://brendanzagaeski.appspot.com/0004.html from pyhanko.pdf_utils import generic, writer from pyhanko.pdf_utils.generic import pdf_name from pyhanko.pdf_utils.misc import get_courier resources = generic.DictionaryObject({ pdf_name('/Font'): generic.DictionaryObject({pdf_name('/F1'): get_courier()}) }) media_box = generic.ArrayObject(map(generic.NumberObject, (0, 0, 300, 144))) def stream_data(txt, y): return f'BT /F1 18 Tf 0 {y} Td ({txt}) Tj ET'.encode('ascii') stream = generic.StreamObject(stream_data=stream_data(ascii_text, 0)) if compress: stream.compress() if extra_stream: stream2 = generic.StreamObject( stream_data=stream_data(ascii_text, 100)) if compress: stream2.compress() contents = generic.ArrayObject( [pdf_out.add_object(stream), pdf_out.add_object(stream2)]) else: contents = pdf_out.add_object(stream) return writer.PageObject(contents=contents, media_box=media_box, resources=resources)
def __init__(self, contents, media_box, resources=None): resources = resources or generic.DictionaryObject() if isinstance(contents, list): if not all(map(instance_test(generic.IndirectObject), contents)): raise PdfWriteError( 'Contents array must consist of indirect references' ) if not isinstance(contents, generic.ArrayObject): contents = generic.ArrayObject(contents) elif not isinstance(contents, generic.IndirectObject): raise PdfWriteError( 'Contents must be either an indirect reference or an array' ) if len(media_box) != 4: raise ValueError('Media box must consist of 4 coordinates.') super().__init__({ pdf_name('/Type'): pdf_name('/Page'), pdf_name('/MediaBox'): generic.ArrayObject( map(generic.NumberObject, media_box) ), pdf_name('/Resources'): resources, pdf_name('/Contents'): contents })
def as_pdf_object(self) -> generic.DictionaryObject: """ :return: A PDF dictionary representing this VRI entry. """ vri = generic.DictionaryObject({pdf_name('/Type'): pdf_name('/VRI')}) if self.ocsps: vri[pdf_name('/OCSP')] = generic.ArrayObject(self.ocsps) if self.crls: vri[pdf_name('/CRL')] = generic.ArrayObject(self.crls) vri[pdf_name('/Cert')] = generic.ArrayObject(self.certs) return vri
def __init__(self, field_name, *, box=None, include_on_page=None, combine_annotation=True, # this sets the "print" and "lock" bits annot_flags=0b10000100): if box is not None: rect = list(map(generic.FloatObject, box)) else: rect = [generic.FloatObject(0)] * 4 super().__init__({ # Signature field properties pdf_name('/FT'): pdf_name('/Sig'), pdf_name('/T'): pdf_string(field_name), }) if combine_annotation: annot_dict = self else: annot_dict = generic.DictionaryObject() # Annotation properties: bare minimum annot_dict['/Type'] = pdf_name('/Annot') annot_dict['/Subtype'] = pdf_name('/Widget') annot_dict['/F'] = generic.NumberObject(annot_flags) annot_dict['/Rect'] = generic.ArrayObject(rect) self.page_ref = include_on_page if include_on_page is not None: annot_dict['/P'] = include_on_page self.annot_dict = annot_dict
def test_pages_kids_tamper(bogus_kids, indirectify): w = IncrementalPdfFileWriter(BytesIO(MINIMAL)) # sign, then fill meta = signers.PdfSignatureMetadata(field_name='Sig1') out = signers.sign_pdf(w, meta, signer=FROM_CA) w = IncrementalPdfFileWriter(out) # add an empty sig field to trigger the annotation parsing logic # in the difference analysis tool fields.append_signature_field( w, sig_field_spec=fields.SigFieldSpec(sig_field_name="Extra")) page_root = w.root['/Pages'] if indirectify: bogus_kids = generic.ArrayObject(map(w.add_object, bogus_kids)) if bogus_kids is not None: page_root['/Kids'] = bogus_kids else: del page_root['/Kids'] w.update_container(page_root) out = BytesIO() w.write(out) r = PdfFileReader(out) s = r.embedded_signatures[0] assert s.field_name == 'Sig1' val_trusted_but_modified(s)
def test_double_sig_add_field_annots_indirect(): w = IncrementalPdfFileWriter(BytesIO(MINIMAL_ONE_FIELD)) out = signers.sign_pdf( w, signers.PdfSignatureMetadata( field_name='Sig1', certify=True, docmdp_permissions=fields.MDPPerm.FILL_FORMS), signer=FROM_CA, ) # create a new signature field after signing w = IncrementalPdfFileWriter(out) # ... but first make the /Annots entry of the first page an indirect one first_page = w.root['/Pages']['/Kids'][0] annots_copy = generic.ArrayObject(first_page['/Annots']) first_page['/Annots'] = annots_ref = w.add_object(annots_copy) annots_copy.container_ref = annots_ref w.update_container(first_page) out = signers.sign_pdf(w, signers.PdfSignatureMetadata(field_name='SigNew'), signer=FROM_CA, new_field_spec=fields.SigFieldSpec( sig_field_name='SigNew', box=(10, 10, 10, 10))) r = PdfFileReader(out) s = r.embedded_signatures[0] assert s.field_name == 'Sig1' status = val_trusted(s, extd=True) assert status.modification_level == ModificationLevel.FORM_FILLING assert status.docmdp_ok s = r.embedded_signatures[1] assert s.field_name == 'SigNew' val_trusted(s)
def init_xobject_dictionary(command_stream: bytes, box_width, box_height, resources: Optional[generic.DictionaryObject] = None) -> generic.StreamObject: """ Helper function to initialise form XObject dictionaries. .. note:: For utilities to handle image XObjects, see :mod:`.images`. :param command_stream: The XObject's raw appearance stream. :param box_width: The width of the XObject's bounding box. :param box_height: The height of the XObject's bounding box. :param resources: A resource dictionary to include with the form object. :return: A :class:`~.generic.StreamObject` representation of the form XObject. """ resources = resources or generic.DictionaryObject() return generic.StreamObject({ pdf_name('/BBox'): generic.ArrayObject(list( map(generic.FloatObject, (0.0, box_height, box_width, 0.0)) )), pdf_name('/Resources'): resources, pdf_name('/Type'): pdf_name('/XObject'), pdf_name('/Subtype'): pdf_name('/Form') }, stream_data=command_stream)
def empty_page(stream_xrefs=False): w = writer.PdfFileWriter(stream_xrefs=stream_xrefs) page = writer.PageObject(contents=w.add_object( generic.StreamObject(stream_data=b'')), media_box=generic.ArrayObject([0, 0, 595, 842])) w.insert_page(page) return w
def __init__( self, field_name, *, box=None, include_on_page=None, is_annotation=True, # this sets the "print" bit annot_flags=0b100): if box is not None: rect = list(map(generic.FloatObject, box)) else: rect = [generic.FloatObject(0)] * 4 super().__init__({ # Signature field properties pdf_name('/FT'): pdf_name('/Sig'), pdf_name('/T'): pdf_string(field_name), }) if is_annotation: # Annotation properties: bare minimum self['/Type'] = pdf_name('/Annot') self['/Subtype'] = pdf_name('/Widget') self['/F'] = generic.NumberObject(annot_flags) self['/Rect'] = generic.ArrayObject(rect) if include_on_page is not None: self['/P'] = include_on_page
def register_annotation(self, page_ref, annot_ref): """ Register an annotation to be added to a page. This convenience function takes care of calling :meth:`mark_update` where necessary. :param page_ref: Reference to the page object involved. :param annot_ref: Reference to the annotation object to be added. """ page_obj = page_ref.get_object() try: annots_ref = page_obj.raw_get('/Annots') if isinstance(annots_ref, generic.IndirectObject): annots = annots_ref.get_object() self.mark_update(annot_ref) else: # we need to update the entire page object if the annots array # is a direct object annots = annots_ref self.mark_update(page_ref) except KeyError: annots = generic.ArrayObject() self.mark_update(page_ref) page_obj[pdf_name('/Annots')] = annots annots.append(annot_ref)
def test_xref_stream_parse_width_value_default_ix2(): # no tail part encoded_entries = [ "0000000000", "0100000011", ] xref_data = b''.join(binascii.unhexlify(entr) for entr in encoded_entries) stream_obj = generic.StreamObject( dict_data={ generic.pdf_name('/W'): generic.ArrayObject(list( map(generic.NumberObject, [1, 4, 0]) )), generic.pdf_name('/Size'): 2 }, stream_data=xref_data ) expected_out = [ XRefEntry( xref_type=XRefType.FREE, location=None, idnum=0, generation=0 ), XRefEntry(xref_type=XRefType.STANDARD, location=0x11, idnum=1), ] actual_out = list(parse_xref_stream(stream_obj)) assert actual_out == expected_out
def test_append_sig_field_acro_update(): # test different configurations of the AcroForm w = PdfFileWriter() w.root['/AcroForm'] = generic.DictionaryObject( {pdf_name('/Fields'): generic.ArrayObject()}) w.insert_page(simple_page(w, 'Hello world')) out = BytesIO() w.write(out) out.seek(0) sp = fields.SigFieldSpec('InvisibleSig') w = IncrementalPdfFileWriter(out) fields.append_signature_field(w, sp) assert len(w.root['/AcroForm']['/Fields']) == 1 w = PdfFileWriter() # Technically, this is not standards-compliant, but our routine # shouldn't care w.root['/AcroForm'] = generic.DictionaryObject() w.insert_page(simple_page(w, 'Hello world')) out = BytesIO() w.write(out) out.seek(0) sp = fields.SigFieldSpec('InvisibleSig') w = IncrementalPdfFileWriter(out) with pytest.raises(ValueError): fields.append_signature_field(w, sp)
def write_to_stream(self, stream, encryption_key): # the caller is responsible for making sure that the stream # is registered in the position dictionary if encryption_key is not None: raise ValueError('XRef streams cannot be encrypted') index = [0, 1] subsections = _contiguous_xref_chunks(self.position_dict) stream_content = BytesIO() # write null object stream_content.write(b'\x00' * 9 + b'\xff\xff') for first_idnum, subsection in subsections: index += [first_idnum, len(subsection)] for position, generation in subsection: if isinstance(position, tuple): # reference to object in object stream assert generation == 0 obj_stream_num, ix = position stream_content.write(b'\x02') stream_content.write(struct.pack('>Q', obj_stream_num)) stream_content.write(struct.pack('>H', ix)) else: stream_content.write(b'\x01') stream_content.write(struct.pack('>Q', position)) stream_content.write(struct.pack('>H', generation)) index_entry = generic.ArrayObject(map(generic.NumberObject, index)) self[pdf_name('/Index')] = index_entry self._data = stream_content.getbuffer() super().write_to_stream(stream, None)
def __init__(self, cf: CIDFont): tt = cf.tt_font # Some metrics hhea = tt['hhea'] head = tt['head'] bbox = [head.xMin, head.yMin, head.xMax, head.yMax] os2 = tt['OS/2'] weight = os2.usWeightClass stemv = int(10 + 220 * (weight - 50) / 900) super().__init__({ pdf_name('/Type'): pdf_name('/FontDescriptor'), pdf_name('/FontName'): pdf_name('/' + cf.name), pdf_name('/Ascent'): generic.NumberObject(hhea.ascent), pdf_name('/Descent'): generic.NumberObject(hhea.descent), pdf_name('/FontBBox'): generic.ArrayObject( map(generic.NumberObject, bbox) ), # FIXME I'm setting the Serif and Symbolic flags here, but # is there any way we can read/infer those from the TTF metadata? pdf_name('/Flags'): generic.NumberObject(0b110), pdf_name('/StemV'): generic.NumberObject(stemv), pdf_name('/ItalicAngle'): generic.FloatObject( getattr(tt['post'], 'italicAngle', 0) ), pdf_name('/CapHeight'): generic.NumberObject( getattr(os2, 'sCapHeight', 750) ) })
def _widths(): current_chunk = [] prev_cid = None (first_cid, _), itr = peek(widths_by_cid_iter) for cid, width in itr: if current_chunk and cid != prev_cid + 1: yield generic.NumberObject(first_cid) yield generic.ArrayObject(current_chunk) current_chunk = [] first_cid = cid current_chunk.append(generic.NumberObject(width)) prev_cid = cid if current_chunk: yield generic.NumberObject(first_cid) yield generic.ArrayObject(current_chunk)
def register_widget_annotation(self, writer: BasePdfFileWriter, sig_field_ref): annot_dict = self.annot_dict if annot_dict is not self: annot_ref = writer.add_object(annot_dict) self['/Kids'] = generic.ArrayObject([annot_ref]) else: annot_ref = sig_field_ref writer.register_annotation(self.page_ref, annot_ref)
def as_pdf_object(self): """ Render this :class:`.SigCertConstraints` object to a PDF dictionary. :return: A :class:`~.generic.DictionaryObject`. """ result = generic.DictionaryObject({ pdf_name('/Type'): pdf_name('/SVCert'), pdf_name('/Ff'): generic.NumberObject(self.flags.value), }) if self.subjects is not None: result[pdf_name('/Subject')] = generic.ArrayObject( generic.ByteStringObject(cert.dump()) for cert in self.subjects ) if self.subject_dn: # FIXME Adobe Reader seems to ignore this for some reason. # Should try to figure out what I'm doing wrong result[pdf_name('/SubjectDN')] = generic.ArrayObject([ generic.DictionaryObject({ pdf_name('/' + key): pdf_string(value) for key, value in x509_name_keyval_pairs( self.subject_dn, abbreviate_oids=True ) }) ]) if self.issuers is not None: result[pdf_name('/Issuer')] = generic.ArrayObject( generic.ByteStringObject(cert.dump()) for cert in self.issuers ) if self.info_url is not None: result[pdf_name('/URL')] = pdf_string(self.info_url) result[pdf_name('/URLType')] = self.url_type if self.key_usage is not None: result[pdf_name('/KeyUsage')] = generic.ArrayObject( pdf_string(ku.encode_to_sv_string()) for ku in self.key_usage ) return result
def as_pdf_object(self): """ Convert the :class:`.DocumentSecurityStore` object to a python dictionary. This method also handles DSS updates. :return: A PDF object representing this DSS. """ pdf_dict = self.backing_pdf_object pdf_dict['/Certs'] = generic.ArrayObject(list(self.certs.values())) if self.vri_entries: pdf_dict['/VRI'] = generic.DictionaryObject(self.vri_entries) if self.ocsps: pdf_dict[pdf_name('/OCSPs')] = generic.ArrayObject(self.ocsps) if self.crls: pdf_dict[pdf_name('/CRLs')] = generic.ArrayObject(self.crls) return pdf_dict
def _import_object(self, obj: generic.PdfObject, reference_map: dict, obj_stream) -> generic.PdfObject: # TODO check the spec for guidance on fonts. Do font identifiers have # to be globally unique? # TODO deal with container_ref if isinstance(obj, generic.DecryptedObjectProxy): obj = obj.decrypted if isinstance(obj, generic.IndirectObject): try: return reference_map[obj.reference] except KeyError: refd = obj.get_object() # Add a placeholder to reserve the reference value. # This ensures correct behaviour in recursive calls # with self-references. new_ido = self.allocate_placeholder() reference_map[obj.reference] = new_ido imported = self._import_object(refd, reference_map, obj_stream) # if the imported object is a bare reference and/or a stream # object, we can't put it into an object stream. if isinstance(imported, OBJSTREAM_FORBIDDEN): obj_stream = None # fill in the placeholder self.add_object( imported, obj_stream=obj_stream, idnum=new_ido.idnum ) return new_ido elif isinstance(obj, generic.DictionaryObject): raw_dict = { k: self._import_object(v, reference_map, obj_stream) for k, v in obj.items() } if isinstance(obj, generic.StreamObject): # In the vast majority of use cases, I'd expect the content # to be available in encoded form by default. # By initialising the stream object in this way, we avoid # a potentially costly decoding operation. return generic.StreamObject( raw_dict, encoded_data=obj.encoded_data ) else: return generic.DictionaryObject(raw_dict) elif isinstance(obj, generic.ArrayObject): return generic.ArrayObject( self._import_object(v, reference_map, obj_stream) for v in obj ) else: return obj
def __init__(self, position_dict): super().__init__() self.position_dict = position_dict # type indicator is one byte wide # we use longs to indicate positions of objects (>Q) # two more bytes for the generation number of an uncompressed object widths = map(generic.NumberObject, (1, 8, 2)) self.update({ pdf_name('/W'): generic.ArrayObject(widths), pdf_name('/Type'): pdf_name('/XRef'), })
def __init__(self, field_name, include_on_page, *, writer, sig_object_ref=None, box=None, appearances: Optional[AnnotAppearances] = None): if box is not None: visible = True rect = list(map(generic.FloatObject, box)) if appearances is not None: ap = appearances.as_pdf_object() else: ap = None else: rect = [generic.FloatObject(0)] * 4 ap = None visible = False # this sets the "Print" bit, and activates "Locked" if the # signature field is ready to be filled flags = 0b100 if sig_object_ref is None else 0b10000100 super().__init__({ # Signature field properties pdf_name('/FT'): pdf_name('/Sig'), pdf_name('/T'): pdf_string(field_name), # Annotation properties: bare minimum pdf_name('/Type'): pdf_name('/Annot'), pdf_name('/Subtype'): pdf_name('/Widget'), pdf_name('/F'): generic.NumberObject(flags), pdf_name('/P'): include_on_page, pdf_name('/Rect'): generic.ArrayObject(rect) }) if sig_object_ref is not None: self[pdf_name('/V')] = sig_object_ref if ap is not None: self[pdf_name('/AP')] = ap # register ourselves self.reference = self_reference = writer.add_object(self) # if we're building an invisible form field, this is all there is to it if visible: writer.register_annotation(include_on_page, self_reference)
def test_sign_with_empty_kids(): w = IncrementalPdfFileWriter(BytesIO(MINIMAL)) fields.append_signature_field( w, fields.SigFieldSpec( sig_field_name='Sig1', combine_annotation=False, box=(20, 20, 80, 40) ) ) w.root['/AcroForm']['/Fields'][0]['/Kids'] = generic.ArrayObject() meta = signers.PdfSignatureMetadata(field_name='Sig1') with pytest.raises(SigningError, match="Failed to access.*annot.*"): signers.sign_pdf(w, meta, signer=FROM_CA)
def _build_type0_font_from_cidfont(writer, cidfont_obj: 'CIDFont', widths_by_cid_iter, vertical, obj_stream=None): # take the Identity-* encoding to inherit from the /Encoding # entry specified in our CIDSystemInfo dict encoding = 'Identity-V' if vertical else 'Identity-H' cidfont_obj.embed(writer, obj_stream=obj_stream) cidfont_ref = writer.add_object(cidfont_obj, obj_stream=obj_stream) type0 = generic.DictionaryObject({ pdf_name('/Type'): pdf_name('/Font'), pdf_name('/Subtype'): pdf_name('/Type0'), pdf_name('/DescendantFonts'): generic.ArrayObject([cidfont_ref]), pdf_name('/Encoding'): pdf_name('/' + encoding), pdf_name('/BaseFont'): pdf_name(f'/{cidfont_obj.name}-{encoding}'), }) # compute widths entry def _widths(): current_chunk = [] prev_cid = None (first_cid, _), itr = peek(widths_by_cid_iter) for cid, width in itr: if current_chunk and cid != prev_cid + 1: yield generic.NumberObject(first_cid) yield generic.ArrayObject(current_chunk) current_chunk = [] first_cid = cid current_chunk.append(generic.NumberObject(width)) prev_cid = cid if current_chunk: yield generic.NumberObject(first_cid) yield generic.ArrayObject(current_chunk) cidfont_obj[pdf_name('/W')] = generic.ArrayObject(list(_widths())) return type0
def test_sv_deserialisation(): sv_input = generic.DictionaryObject({ pdf_name('/SubFilter'): generic.ArrayObject( map(pdf_name, ['/foo', '/adbe.pkcs7.detached', '/bleh'])), pdf_name('/LegalAttestation'): generic.ArrayObject(['xyz', 'abc', 'def']), pdf_name('/AppearanceFilter'): generic.pdf_string('blah'), pdf_name('/LockDocument'): generic.pdf_name('/true') }) sv = fields.SigSeedValueSpec.from_pdf_object(sv_input) assert len(sv.subfilters) == 1 assert len(sv.legal_attestations) == 3 assert sv.lock_document == fields.SeedLockDocument.LOCK sv_output = sv.as_pdf_object() assert sv_output['/AppearanceFilter'] == sv_input['/AppearanceFilter'] assert sv_output['/LockDocument'] == sv_input['/LockDocument'] assert sv_output['/LegalAttestation'] == sv_input['/LegalAttestation'] with pytest.raises(SigningError): fields.SigSeedValueSpec.from_pdf_object( generic.DictionaryObject( {pdf_name('/LockDocument'): generic.pdf_name('/nonsense')})) fields.SigSeedValueSpec.from_pdf_object( generic.DictionaryObject( {pdf_name('/LockDocument'): generic.BooleanObject(True)})) bad_filter = generic.DictionaryObject( {pdf_name('/Filter'): pdf_name('/unsupported')}) # this should run fields.SigSeedValueSpec.from_pdf_object(bad_filter) with pytest.raises(SigningError): bad_filter[pdf_name('/Ff')] = \ generic.NumberObject(fields.SigSeedValFlags.FILTER.value) fields.SigSeedValueSpec.from_pdf_object(bad_filter)
def as_pdf_object(self): """ Render this :class:`.SigSeedValueSpec` object to a PDF dictionary. :return: A :class:`~.generic.DictionaryObject`. """ result = generic.DictionaryObject({ pdf_name('/Type'): pdf_name('/SV'), pdf_name('/Ff'): generic.NumberObject(self.flags.value), }) if self.subfilters is not None: result[pdf_name('/SubFilter')] = generic.ArrayObject( sf.value for sf in self.subfilters) if self.add_rev_info is not None: result[pdf_name('/AddRevInfo')] = generic.BooleanObject( self.add_rev_info) if self.digest_methods is not None: result[pdf_name('/DigestMethod')] = generic.ArrayObject( map(pdf_string, self.digest_methods)) if self.reasons is not None: result[pdf_name('/Reasons')] = generic.ArrayObject( pdf_string(reason) for reason in self.reasons) if self.timestamp_server_url is not None: result[pdf_name('/TimeStamp')] = generic.DictionaryObject({ pdf_name('/URL'): pdf_string(self.timestamp_server_url), pdf_name('/Ff'): generic.NumberObject(1 if self.timestamp_required else 0) }) if self.cert is not None: result[pdf_name('/Cert')] = self.cert.as_pdf_object() return result
def as_pdf_object(self) -> generic.DictionaryObject: """ Render this ``/FieldMDP`` policy description as a PDF dictionary. :return: A :class:`~.generic.DictionaryObject`. """ result = generic.DictionaryObject({ pdf_name('/Action'): self.action.value, }) if self.action != FieldMDPAction.ALL: result['/Fields'] = generic.ArrayObject( map(pdf_string, self.fields)) return result
def test_append_sig_field_acro_update(): # test different configurations of the AcroForm w = PdfFileWriter() w.root['/AcroForm'] = generic.DictionaryObject({ pdf_name('/Fields'): generic.ArrayObject() }) w.insert_page(simple_page(w, 'Hello world')) out = BytesIO() w.write(out) out.seek(0) sp = fields.SigFieldSpec('InvisibleSig') w = IncrementalPdfFileWriter(out) fields.append_signature_field(w, sp) assert len(w.root['/AcroForm']['/Fields']) == 1
def test_xref_stream_parse_entry_types(): encoded_entries = [ "0000000000ffff", # free "01000000110000", # regular objects "01000000840000", "01000000bc0005", "01000001b40000", "01000002990000", "02000000030001", # object in stream "03deadbeef1337", # undefined (should be ignored) "02000000030002", # object in stream "ffcafebabe0007", # another undefined one ] xref_data = b''.join(binascii.unhexlify(entr) for entr in encoded_entries) stream_obj = generic.StreamObject( dict_data={ generic.pdf_name('/W'): generic.ArrayObject(list( map(generic.NumberObject, [1, 4, 2]) )), generic.pdf_name('/Size'): 10 }, stream_data=xref_data ) expected_out = [ XRefEntry( xref_type=XRefType.FREE, location=None, idnum=0, generation=0xffff ), XRefEntry(xref_type=XRefType.STANDARD, location=0x11, idnum=1), XRefEntry(xref_type=XRefType.STANDARD, location=0x84, idnum=2), XRefEntry( xref_type=XRefType.STANDARD, location=0xbc, idnum=3, generation=5 ), XRefEntry(xref_type=XRefType.STANDARD, location=0x1b4, idnum=4), XRefEntry(xref_type=XRefType.STANDARD, location=0x299, idnum=5), XRefEntry( xref_type=XRefType.IN_OBJ_STREAM, location=ObjStreamRef(3, 1), idnum=6 ), XRefEntry( xref_type=XRefType.IN_OBJ_STREAM, location=ObjStreamRef(3, 2), idnum=8 # idnum jump because of undefined entry ), ] actual_out = list(parse_xref_stream(stream_obj)) assert actual_out == expected_out
def test_premature_xref_stream_end(): encoded_entries = ["000000ffff", "0100110000"] xref_data = b''.join(binascii.unhexlify(entr) for entr in encoded_entries) stream_obj = generic.StreamObject( dict_data={ generic.pdf_name('/W'): generic.ArrayObject(list( map(generic.NumberObject, [1, 2, 2]) )), generic.pdf_name('/Size'): 3 # one too many }, stream_data=xref_data ) with pytest.raises(misc.PdfReadError, match='incomplete entry'): list(parse_xref_stream(stream_obj))