def test_text_filter(resources, outdir): input_pdf = resources / 'veraPDF test suite 6-2-10-t02-pass-a.pdf' # Ensure the test PDF has detect we can find proc = run(['pdftotext', str(input_pdf), '-'], check=True, stdout=PIPE, encoding='utf-8') assert proc.stdout.strip() != '', "Need input test file that contains text" pdf = Pdf.open(input_pdf) page = pdf.pages[0] keep = [] for operands, command in parse_content_stream(page): if command == Operator('Tj'): print("skipping Tj") continue keep.append((operands, command)) new_stream = Stream(pdf, keep) print(new_stream.read_bytes()) # pylint: disable=no-member page['/Contents'] = new_stream page['/Rotate'] = 90 pdf.save(outdir / 'notext.pdf', True) proc = run( ['pdftotext', str(outdir / 'notext.pdf'), '-'], check=True, stdout=PIPE, encoding='utf-8', ) assert proc.stdout.strip() == '', "Expected text to be removed"
def test_stream_dict_oneshot(): pdf = pikepdf.new() stream1 = Stream(pdf, b'12345', One=1, Two=2) stream2 = Stream(pdf, b'67890', {'/Three': 3, '/Four': 4}) stream3 = pdf.make_stream(b'abcdef', One=1, Two=2) assert stream1.One == 1 assert stream1.read_bytes() == b'12345' assert stream2.Three == 3 assert stream3.One == 1
def test_data_decoding_errors(filter_: str, data: bytes, msg: str): p = Pdf.new() st = Stream(p, data, Filter=Name(filter_)) with pytest.raises(DataDecodingError, match=msg): st.read_bytes()