def simple_page(pdf_out, ascii_text, compress=False, extra_stream=False): # based on the minimal pdf file of # https://brendanzagaeski.appspot.com/0004.html from pyhanko.pdf_utils import generic, writer from pyhanko.pdf_utils.generic import pdf_name from pyhanko.pdf_utils.misc import get_courier resources = generic.DictionaryObject({ pdf_name('/Font'): generic.DictionaryObject({pdf_name('/F1'): get_courier()}) }) media_box = generic.ArrayObject(map(generic.NumberObject, (0, 0, 300, 144))) def stream_data(txt, y): return f'BT /F1 18 Tf 0 {y} Td ({txt}) Tj ET'.encode('ascii') stream = generic.StreamObject(stream_data=stream_data(ascii_text, 0)) if compress: stream.compress() if extra_stream: stream2 = generic.StreamObject( stream_data=stream_data(ascii_text, 100)) if compress: stream2.compress() contents = generic.ArrayObject( [pdf_out.add_object(stream), pdf_out.add_object(stream2)]) else: contents = pdf_out.add_object(stream) return writer.PageObject(contents=contents, media_box=media_box, resources=resources)
def test_custom_crypt_filter(with_hex_filter, main_unencrypted): w = writer.PdfFileWriter() custom = pdf_name('/Custom') crypt_filters = { custom: StandardRC4CryptFilter(keylen=16), } if main_unencrypted: # streams/strings are unencrypted by default cfc = CryptFilterConfiguration(crypt_filters=crypt_filters) else: crypt_filters[STD_CF] = StandardAESCryptFilter(keylen=16) cfc = CryptFilterConfiguration(crypt_filters=crypt_filters, default_string_filter=STD_CF, default_stream_filter=STD_CF) sh = StandardSecurityHandler.build_from_pw_legacy( rev=StandardSecuritySettingsRevision.RC4_OR_AES128, id1=w.document_id[0], desired_user_pass="******", desired_owner_pass="******", keylen_bytes=16, crypt_filter_config=cfc) w._assign_security_handler(sh) test_data = b'This is test data!' dummy_stream = generic.StreamObject(stream_data=test_data) dummy_stream.add_crypt_filter(name=custom, handler=sh) ref = w.add_object(dummy_stream) dummy_stream2 = generic.StreamObject(stream_data=test_data) ref2 = w.add_object(dummy_stream2) if with_hex_filter: dummy_stream.apply_filter(pdf_name('/AHx')) out = BytesIO() w.write(out) r = PdfFileReader(out) r.decrypt("ownersecret") obj: generic.StreamObject = r.get_object(ref.reference) assert obj.data == test_data if with_hex_filter: cf_dict = obj['/DecodeParms'][1] else: cf_dict = obj['/DecodeParms'] assert cf_dict['/Name'] == pdf_name('/Custom') obj2: generic.DecryptedObjectProxy = r.get_object( ref2.reference, transparent_decrypt=False) raw = obj2.raw_object assert isinstance(raw, generic.StreamObject) if main_unencrypted: assert raw.encoded_data == test_data else: assert raw.encoded_data != test_data
def test_custom_crypt_filter_errors(): w = writer.PdfFileWriter() custom = pdf_name('/Custom') crypt_filters = { custom: StandardRC4CryptFilter(keylen=16), STD_CF: StandardAESCryptFilter(keylen=16) } cfc = CryptFilterConfiguration(crypt_filters=crypt_filters, default_string_filter=STD_CF, default_stream_filter=STD_CF) sh = StandardSecurityHandler.build_from_pw_legacy( rev=StandardSecuritySettingsRevision.RC4_OR_AES128, id1=w.document_id[0], desired_user_pass="******", desired_owner_pass="******", keylen_bytes=16, crypt_filter_config=cfc) w._assign_security_handler(sh) test_data = b'This is test data!' dummy_stream = generic.StreamObject(stream_data=test_data) with pytest.raises(misc.PdfStreamError): dummy_stream.add_crypt_filter(name='/Idontexist', handler=sh) # no handler dummy_stream.add_crypt_filter(name=custom) dummy_stream._handler = None w.add_object(dummy_stream) out = BytesIO() with pytest.raises(misc.PdfStreamError): w.write(out)
def test_identity_crypt_filter(use_alias, with_never_decrypt): w = writer.PdfFileWriter() sh = StandardSecurityHandler.build_from_pw("secret") w.security_handler = sh idf: IdentityCryptFilter = IdentityCryptFilter() assert sh.crypt_filter_config[pdf_name("/Identity")] is idf if use_alias: sh.crypt_filter_config._crypt_filters[pdf_name("/IdentityAlias")] = idf assert sh.crypt_filter_config[pdf_name("/IdentityAlias")] is idf if use_alias: # identity filter can't be serialised, so this should throw an error with pytest.raises(misc.PdfError): w._assign_security_handler(sh) return else: w._assign_security_handler(sh) test_bytes = b'This is some test data that should remain unencrypted.' test_stream = generic.StreamObject(stream_data=test_bytes, handler=sh) test_stream.apply_filter("/Crypt", params={pdf_name("/Name"): pdf_name("/Identity")}) ref = w.add_object(test_stream).reference out = BytesIO() w.write(out) r = PdfFileReader(out) r.decrypt("secret") the_stream = r.get_object(ref, never_decrypt=with_never_decrypt) assert the_stream.encoded_data == test_bytes assert the_stream.data == test_bytes
def test_add_stream_to_direct_arr(): w = writer.PdfFileWriter() w.insert_page(simple_page(w, 'Test Test', extra_stream=True)) out = BytesIO() w.write(out) out.seek(0) w = IncrementalPdfFileWriter(out) new_stream = 'BT /F1 18 Tf 0 50 Td (Test2 Test2) Tj ET'.encode('ascii') stream = generic.StreamObject(stream_data=new_stream) stream_ref = w.add_object(stream) w.add_stream_to_page(0, stream_ref) out = BytesIO() w.write(out) out.seek(0) r = PdfFileReader(out) # check if the content stream was added page_obj_ref = r.root['/Pages']['/Kids'].raw_get(0) assert isinstance(page_obj_ref, generic.IndirectObject) page_obj = page_obj_ref.get_object() conts = page_obj['/Contents'] assert len(conts) == 3 assert stream_ref.idnum in (c.idnum for c in conts) # check if resource dictionary is still OK assert '/F1' in page_obj['/Resources']['/Font']
def test_xref_stream_parse_width_value_default_ix2(): # no tail part encoded_entries = [ "0000000000", "0100000011", ] xref_data = b''.join(binascii.unhexlify(entr) for entr in encoded_entries) stream_obj = generic.StreamObject( dict_data={ generic.pdf_name('/W'): generic.ArrayObject(list( map(generic.NumberObject, [1, 4, 0]) )), generic.pdf_name('/Size'): 2 }, stream_data=xref_data ) expected_out = [ XRefEntry( xref_type=XRefType.FREE, location=None, idnum=0, generation=0 ), XRefEntry(xref_type=XRefType.STANDARD, location=0x11, idnum=1), ] actual_out = list(parse_xref_stream(stream_obj)) assert actual_out == expected_out
def test_write_embedded_string_objstream(): ffile = ttLib.TTFont(NOTO_SERIF_JP) ga = GlyphAccumulator(ffile) cid_hx, _ = ga.feed_string('テスト') assert cid_hx == '0637062a0639' w = IncrementalPdfFileWriter(BytesIO(MINIMAL_XREF)) obj_stream = w.prepare_object_stream() font_ref = ga.embed_subset(w, obj_stream=obj_stream) stream = generic.StreamObject( stream_data=f'BT /FEmb 18 Tf 0 100 Td <{cid_hx}> Tj ET'.encode( 'ascii')) stream_ref = w.add_object(stream) w.add_stream_to_page(0, stream_ref, resources=generic.DictionaryObject({ pdf_name('/Font'): generic.DictionaryObject( {pdf_name('/FEmb'): font_ref}) })) out = BytesIO() w.write(out) out.seek(0) r = PdfFileReader(out) page_obj = r.root['/Pages']['/Kids'][0].get_object() conts = page_obj['/Contents'] assert len(conts) == 2 assert stream_ref.idnum in (c.idnum for c in conts) assert font_ref.idnum in r.xrefs.in_obj_stream out.seek(0) # attempt to grab the font from the object stream font_ref.pdf = r font = font_ref.get_object() assert font['/Type'] == pdf_name('/Font')
def test_write_embedded_string(): w = IncrementalPdfFileWriter(BytesIO(MINIMAL)) with open(NOTO_SERIF_JP, 'rb') as ffile: ga = GlyphAccumulator(w, ffile, font_size=10) # shape the string, just to register the glyphs as used ga.shape('テスト') # ... but we're not going to use the result # hardcoded CIDs cid_hx = '0637062a0639' stream = generic.StreamObject( stream_data=f'BT /FEmb 18 Tf 0 100 Td <{cid_hx}> Tj ET'.encode('ascii') ) stream_ref = w.add_object(stream) w.add_stream_to_page( 0, stream_ref, resources=generic.DictionaryObject({ pdf_name('/Font'): generic.DictionaryObject({ pdf_name('/FEmb'): ga.as_resource() }) }) ) out = BytesIO() w.write(out) out.seek(0) r = PdfFileReader(out) page_obj = r.root['/Pages']['/Kids'][0].get_object() conts = page_obj['/Contents'] assert len(conts) == 2 assert stream_ref.idnum in (c.idnum for c in conts)
def _format_tounicode_cmap(self, registry, ordering, supplement): def _pairs(): for ch, (cid, _, _) in self._glyphs.items(): yield cid, ch by_cid = iter(sorted(_pairs(), key=lambda t: t[0])) header = ('/CIDInit /ProcSet findresource begin\n' '12 dict begin\n' 'begincmap\n' '/CIDSystemInfo 3 dict dup begin\n' f'/Registry ({registry}) def\n' f'/Ordering ({ordering}) def\n' f'/Supplement {supplement}\n def' 'end def\n' f'/CMapName {registry}-{ordering}-{supplement:03} def\n' '/CMapType 2 def\n' '1 begincodespacerange\n' '<0000> <FFFF>\n' 'endcodespacerange\n') # TODO make an effort to use ranges when appropriate, and at least # group the glyphs body = '\n'.join( f'1 beginbfchar\n<{cid:04x}> <{ord(ch):04x}>\nendbfchar\n' for cid, ch in by_cid) footer = ('endcmap\n' 'CMapName currentdict /CMap\n' 'defineresource pop\n' 'end\nend') stream = generic.StreamObject(stream_data=(header + body + footer).encode('ascii')) return stream
def apply(self, dest_page: int, x: int, y: int): """ Apply a stamp to a particular page in the PDF writer attached to this :class:`.TextStamp` instance. :param dest_page: Index of the page to which the stamp is to be applied (starting at `0`). :param x: Horizontal position of the stamp's lower left corner on the page. :param y: Vertical position of the stamp's lower left corner on the page. :return: A reference to the affected page object, together with a ``(width, height)`` tuple describing the dimensions of the stamp. """ stamp_ref = self.register() resource_name = b'/Stamp' + hexlify(uuid.uuid4().bytes) stamp_paint = b'q 1 0 0 1 %g %g cm %s Do Q' % (rd(x), rd(y), resource_name) stamp_wrapper_stream = generic.StreamObject(stream_data=stamp_paint) resources = generic.DictionaryObject({ pdf_name('/XObject'): generic.DictionaryObject( {pdf_name(resource_name.decode('ascii')): stamp_ref}) }) wr = self.writer page_ref = wr.add_stream_to_page(dest_page, wr.add_object(stamp_wrapper_stream), resources) dims = (self.box.width, self.box.height) return page_ref, dims
def init_xobject_dictionary(command_stream: bytes, box_width, box_height, resources: Optional[generic.DictionaryObject] = None) -> generic.StreamObject: """ Helper function to initialise form XObject dictionaries. .. note:: For utilities to handle image XObjects, see :mod:`.images`. :param command_stream: The XObject's raw appearance stream. :param box_width: The width of the XObject's bounding box. :param box_height: The height of the XObject's bounding box. :param resources: A resource dictionary to include with the form object. :return: A :class:`~.generic.StreamObject` representation of the form XObject. """ resources = resources or generic.DictionaryObject() return generic.StreamObject({ pdf_name('/BBox'): generic.ArrayObject(list( map(generic.FloatObject, (0.0, box_height, box_width, 0.0)) )), pdf_name('/Resources'): resources, pdf_name('/Type'): pdf_name('/XObject'), pdf_name('/Subtype'): pdf_name('/Form') }, stream_data=command_stream)
def empty_page(stream_xrefs=False): w = writer.PdfFileWriter(stream_xrefs=stream_xrefs) page = writer.PageObject(contents=w.add_object( generic.StreamObject(stream_data=b'')), media_box=generic.ArrayObject([0, 0, 595, 842])) w.insert_page(page) return w
def as_pdf_object(self) -> generic.StreamObject: """ Render the object stream to a PDF stream object :return: An instance of :class:`~.generic.StreamObject`. """ stream_header = BytesIO() main_body = BytesIO() for idnum, obj in self._obj_refs.items(): offset = main_body.tell() obj.write_to_stream(main_body, None) stream_header.write(b'%d %d ' % (idnum, offset)) first_obj_offset = stream_header.tell() stream_header.seek(0) sh_bytes = stream_header.read(first_obj_offset) stream_data = sh_bytes + main_body.getvalue() stream_object = generic.StreamObject({ pdf_name('/Type'): pdf_name('/ObjStm'), pdf_name('/N'): generic.NumberObject(len(self._obj_refs)), pdf_name('/First'): generic.NumberObject(first_obj_offset) }, stream_data=stream_data) if self.compress: stream_object.compress() return stream_object
def test_add_stream(): w = IncrementalPdfFileWriter(BytesIO(MINIMAL)) def stream_data(y): return f'BT /F1 18 Tf 0 {y} Td (Test Test) Tj ET'.encode('ascii') stream = generic.StreamObject(stream_data=stream_data(50)) stream_ref = w.add_object(stream) w.add_stream_to_page(0, stream_ref) out = BytesIO() w.write(out) out.seek(0) r = PdfFileReader(out) # check if the content stream was added page_obj_ref = r.root['/Pages']['/Kids'].raw_get(0) assert isinstance(page_obj_ref, generic.IndirectObject) page_obj = page_obj_ref.get_object() conts = page_obj['/Contents'] assert len(conts) == 2 assert stream_ref.idnum in (c.idnum for c in conts) # check if resource dictionary is still OK assert '/F1' in page_obj['/Resources']['/Font'] # let's try adding a third out.seek(0) w = IncrementalPdfFileWriter(out) stream = generic.StreamObject(stream_data=stream_data(100)) new_stream_ref = w.add_object(stream) w.add_stream_to_page(0, new_stream_ref) out = BytesIO() w.write(out) out.seek(0) r = PdfFileReader(out) # check if the content stream was added page_obj_ref = r.root['/Pages']['/Kids'].raw_get(0) assert isinstance(page_obj_ref, generic.IndirectObject) page_obj = page_obj_ref.get_object() conts = page_obj['/Contents'] assert len(conts) == 3 ids = [c.idnum for c in conts] assert stream_ref.idnum in ids and new_stream_ref.idnum in ids
def test_no_stms_in_obj_stm(): w = writer.PdfFileWriter(stream_xrefs=True) obj_stm = w.prepare_object_stream() with pytest.raises(TypeError, match='Stream obj.*references'): w.add_object( generic.StreamObject(stream_data=b'Hello world!'), obj_stream=obj_stm )
def set_font_file(self, writer: BasePdfFileWriter): stream_buf = BytesIO() self.tt_font.save(stream_buf) stream_buf.seek(0) font_stream = generic.StreamObject(stream_data=stream_buf.read()) font_stream.compress() font_stream_ref = writer.add_object(font_stream) self._font_descriptor[pdf_name('/FontFile2')] = font_stream_ref return font_stream_ref
def _import_object(self, obj: generic.PdfObject, reference_map: dict, obj_stream) -> generic.PdfObject: # TODO check the spec for guidance on fonts. Do font identifiers have # to be globally unique? # TODO deal with container_ref if isinstance(obj, generic.DecryptedObjectProxy): obj = obj.decrypted if isinstance(obj, generic.IndirectObject): try: return reference_map[obj.reference] except KeyError: refd = obj.get_object() # Add a placeholder to reserve the reference value. # This ensures correct behaviour in recursive calls # with self-references. new_ido = self.allocate_placeholder() reference_map[obj.reference] = new_ido imported = self._import_object(refd, reference_map, obj_stream) # if the imported object is a bare reference and/or a stream # object, we can't put it into an object stream. if isinstance(imported, OBJSTREAM_FORBIDDEN): obj_stream = None # fill in the placeholder self.add_object( imported, obj_stream=obj_stream, idnum=new_ido.idnum ) return new_ido elif isinstance(obj, generic.DictionaryObject): raw_dict = { k: self._import_object(v, reference_map, obj_stream) for k, v in obj.items() } if isinstance(obj, generic.StreamObject): # In the vast majority of use cases, I'd expect the content # to be available in encoded form by default. # By initialising the stream object in this way, we avoid # a potentially costly decoding operation. return generic.StreamObject( raw_dict, encoded_data=obj.encoded_data ) else: return generic.DictionaryObject(raw_dict) elif isinstance(obj, generic.ArrayObject): return generic.ArrayObject( self._import_object(v, reference_map, obj_stream) for v in obj ) else: return obj
def set_font_file(self, writer: BasePdfFileWriter): stream_buf = BytesIO() self.tt_font.save(stream_buf) stream_buf.seek(0) font_stream = generic.StreamObject({ # this is a Type2 TTF font program pdf_name('/Subtype'): pdf_name('/CIDFontType2'), }, stream_data=stream_buf.read()) font_stream.compress() font_stream_ref = writer.add_object(font_stream) self._font_descriptor[pdf_name('/FontFile2')] = font_stream_ref return font_stream_ref
def test_code128_render(): writer = IncrementalPdfFileWriter(BytesIO(MINIMAL)) bb = barcodes.BarcodeBox("code128", "this is a test") xobj_ref = writer.add_object(bb.as_form_xobject()) stamp_wrapper_stream = generic.StreamObject( stream_data=b'q 1 0 0 1 50 50 cm /Barcode Do Q') resources = generic.DictionaryObject({ pdf_name('/XObject'): generic.DictionaryObject({pdf_name('/Barcode'): xobj_ref}) }) writer.add_stream_to_page(0, writer.add_object(stamp_wrapper_stream), resources)
def _cms_objects_to_streams(self, objs, seen, dest): for obj in objs: obj_bytes = obj.dump() try: yield seen[obj_bytes] except KeyError: ref = self.writer.add_object( generic.StreamObject(stream_data=obj_bytes) ) self._mark_modified() seen[obj_bytes] = ref dest.append(ref) yield ref
def set_font_file(self, writer: BasePdfFileWriter): stream_buf = BytesIO() # write the CFF table to the stream self.cff.compile(stream_buf, self.tt_font) stream_buf.seek(0) font_stream = generic.StreamObject({ # this is a Type0 CFF font program (see Table 126 in ISO 32000) pdf_name('/Subtype'): pdf_name('/CIDFontType0C'), }, stream_data=stream_buf.read()) font_stream.compress() font_stream_ref = writer.add_object(font_stream) self._font_descriptor[pdf_name('/FontFile3')] = font_stream_ref return font_stream_ref
def _embed_cert(self, cert): if self.writer is None: raise TypeError('This DSS does not support updates.') try: return self.certs[cert.issuer_serial] except KeyError: pass ref = self.writer.add_object( generic.StreamObject(stream_data=cert.dump()) ) self._mark_modified() self.certs[cert.issuer_serial] = ref return ref
def test_xref_stream_parse_entry_types(): encoded_entries = [ "0000000000ffff", # free "01000000110000", # regular objects "01000000840000", "01000000bc0005", "01000001b40000", "01000002990000", "02000000030001", # object in stream "03deadbeef1337", # undefined (should be ignored) "02000000030002", # object in stream "ffcafebabe0007", # another undefined one ] xref_data = b''.join(binascii.unhexlify(entr) for entr in encoded_entries) stream_obj = generic.StreamObject( dict_data={ generic.pdf_name('/W'): generic.ArrayObject(list( map(generic.NumberObject, [1, 4, 2]) )), generic.pdf_name('/Size'): 10 }, stream_data=xref_data ) expected_out = [ XRefEntry( xref_type=XRefType.FREE, location=None, idnum=0, generation=0xffff ), XRefEntry(xref_type=XRefType.STANDARD, location=0x11, idnum=1), XRefEntry(xref_type=XRefType.STANDARD, location=0x84, idnum=2), XRefEntry( xref_type=XRefType.STANDARD, location=0xbc, idnum=3, generation=5 ), XRefEntry(xref_type=XRefType.STANDARD, location=0x1b4, idnum=4), XRefEntry(xref_type=XRefType.STANDARD, location=0x299, idnum=5), XRefEntry( xref_type=XRefType.IN_OBJ_STREAM, location=ObjStreamRef(3, 1), idnum=6 ), XRefEntry( xref_type=XRefType.IN_OBJ_STREAM, location=ObjStreamRef(3, 2), idnum=8 # idnum jump because of undefined entry ), ] actual_out = list(parse_xref_stream(stream_obj)) assert actual_out == expected_out
def test_premature_xref_stream_end(): encoded_entries = ["000000ffff", "0100110000"] xref_data = b''.join(binascii.unhexlify(entr) for entr in encoded_entries) stream_obj = generic.StreamObject( dict_data={ generic.pdf_name('/W'): generic.ArrayObject(list( map(generic.NumberObject, [1, 2, 2]) )), generic.pdf_name('/Size'): 3 # one too many }, stream_data=xref_data ) with pytest.raises(misc.PdfReadError, match='incomplete entry'): list(parse_xref_stream(stream_obj))
def test_bogus_metadata_manipulation(): # test using a double signature created using Adobe Reader # (uses object streams, XMP metadata updates and all the fun stuff) infile = BytesIO( read_all(PDF_DATA_DIR + '/minimal-two-fields-signed-twice.pdf')) bogus = b'This is bogus data, yay!' def do_check(): r = PdfFileReader(out) print(r.get_object(generic.Reference(2, 0, r), revision=3).data) s = r.embedded_signatures[0] status = validate_pdf_signature(s) assert status.modification_level == ModificationLevel.OTHER w = IncrementalPdfFileWriter(infile) w.root['/Metadata'] = w.add_object(generic.StreamObject(stream_data=bogus)) w.update_root() out = BytesIO() w.write(out) do_check() w = IncrementalPdfFileWriter(infile) metadata_ref = w.root.raw_get('/Metadata') metadata_stream: generic.StreamObject = metadata_ref.get_object() metadata_stream.strip_filters() metadata_stream._data = bogus metadata_stream._encoded_data = None w.mark_update(metadata_ref) out = BytesIO() w.write(out) do_check() w = IncrementalPdfFileWriter(infile) w.root['/Metadata'] = generic.NullObject() w.update_root() out = BytesIO() w.write(out) do_check() w = IncrementalPdfFileWriter(infile) w.root['/Metadata'] = w.add_object(generic.NullObject()) w.update_root() out = BytesIO() w.write(out) do_check()
def test_code128_render(): writer = IncrementalPdfFileWriter(BytesIO(MINIMAL)) bb = barcodes.BarcodeBox("code128", "this is a test") xobj_ref = writer.add_object(bb.as_form_xobject()) stamp_wrapper_stream = generic.StreamObject( stream_data=b'q 1 0 0 1 50 50 cm /Barcode Do Q') resources = generic.DictionaryObject({ pdf_name('/XObject'): generic.DictionaryObject({pdf_name('/Barcode'): xobj_ref}) }) writer.add_stream_to_page(0, writer.add_object(stamp_wrapper_stream), resources) # TODO try to read back the code using some kind of barcode scanning # library, perhaps. compare_output(writer, f'{EXPECTED_OUTPUT_DIR}/code128-test.pdf')
def test_pubkey_wrong_cert(): r = PdfFileReader(BytesIO(VECTOR_IMAGE_PDF)) w = writer.PdfFileWriter() recpt_cert = load_cert_from_pemder( TESTING_CA_DIR + '/intermediate/newcerts/signer2.cert.pem') test_data = b'This is test data!' dummy_stream = generic.StreamObject(stream_data=test_data) ref = w.add_object(dummy_stream) w.encrypt_pubkey([recpt_cert]) out = BytesIO() w.write(out) r = PdfFileReader(out) result = r.decrypt_pubkey(PUBKEY_TEST_DECRYPTER) assert result.status == AuthStatus.FAILED with pytest.raises(misc.PdfError): r.get_object(ref.reference)
def test_xref_stream_trailing_data(): encoded_entries = [ "0000000000ffff", # free "01000000110000", "deadbeef" ] xref_data = b''.join(binascii.unhexlify(entr) for entr in encoded_entries) stream_obj = generic.StreamObject( dict_data={ generic.pdf_name('/W'): generic.ArrayObject(list( map(generic.NumberObject, [1, 4, 2]) )), generic.pdf_name('/Size'): 2 }, stream_data=xref_data ) with pytest.raises(misc.PdfReadError, match='Trailing'): list(parse_xref_stream(stream_obj))
def test_write_embedded_string_objstream(): w = IncrementalPdfFileWriter(BytesIO(MINIMAL_XREF)) obj_stream = w.prepare_object_stream() with open(NOTO_SERIF_JP, 'rb') as ffile: ga = GlyphAccumulator(w, ffile, font_size=10, obj_stream=obj_stream) # shape the string, just to register the glyphs as used ga.shape('テスト') # ... but we're not going to use the result # hardcoded CIDs cid_hx = '0637062a0639' font_ref = ga.as_resource() stream = generic.StreamObject( stream_data=f'BT /FEmb 18 Tf 0 100 Td <{cid_hx}> Tj ET'.encode( 'ascii')) stream_ref = w.add_object(stream) w.add_stream_to_page(0, stream_ref, resources=generic.DictionaryObject({ pdf_name('/Font'): generic.DictionaryObject( {pdf_name('/FEmb'): font_ref}) })) out = BytesIO() w.write(out) out.seek(0) r = PdfFileReader(out) page_obj = r.root['/Pages']['/Kids'][0].get_object() conts = page_obj['/Contents'] assert len(conts) == 2 assert stream_ref.idnum in (c.idnum for c in conts) xref_sections = r.xrefs._xref_sections last = xref_sections[len(xref_sections) - 1] assert font_ref.idnum in last.xref_data.xrefs_in_objstm out.seek(0) # attempt to grab the font from the object stream font_ref.pdf = r font = font_ref.get_object() assert font['/Type'] == pdf_name('/Font')
def _format_tounicode_cmap(self): # ToUnicode is always Adobe-UCS2-0 in our case, # since we use the fixed-with 2-byte UCS2 encoding for the BMP header = ( '/CIDInit /ProcSet findresource begin\n' '12 dict begin\n' 'begincmap\n' '/CIDSystemInfo <<\n' '/Registry (Adobe)\n' '/Ordering (UCS2)\n' '/Supplement 0\n' '>> def\n' '/CMapName /Adobe-Identity-UCS2 def\n' '/CMapType 2 def\n' '1 begincodespacerange\n' '<0000> <FFFF>\n' 'endcodespacerange\n' ) to_segment = ( (cid, codepoints.encode('utf-16be')) for cid, codepoints in self._cid_to_unicode.items() ) body = '\n'.join(_segment_cmap(to_segment)) footer = ( '\nendcmap\n' 'CMapName\n' 'currentdict\n' '/CMap defineresource\n' 'pop\nend\nend' ) stream = generic.StreamObject( stream_data=(header + body + footer).encode('ascii') ) stream.compress() return stream