def _test_resource(class_, basename, check, **kwargs): """Common code for testing the HTML and CSS classes.""" absolute_filename = resource_filename(basename) absolute_path = Path(absolute_filename) url = path2url(absolute_filename) check(class_(absolute_filename, **kwargs)) check(class_(absolute_path, **kwargs)) check(class_(guess=absolute_filename, **kwargs)) check(class_(guess=absolute_path, **kwargs)) check(class_(filename=absolute_filename, **kwargs)) check(class_(filename=absolute_path, **kwargs)) check(class_(url, **kwargs)) check(class_(guess=url, **kwargs)) url = path2url(absolute_filename.encode('utf-8')) check(class_(url=url, **kwargs)) with open(absolute_filename, 'rb') as fd: check(class_(fd, **kwargs)) with open(absolute_filename, 'rb') as fd: check(class_(guess=fd, **kwargs)) with open(absolute_filename, 'rb') as fd: check(class_(file_obj=fd, **kwargs)) with open(absolute_filename, 'rb') as fd: content = fd.read() py.path.local(os.path.dirname(__file__)).chdir() relative_filename = os.path.join('resources', basename) relative_path = Path(relative_filename) check(class_(relative_filename, **kwargs)) check(class_(relative_path, **kwargs)) kwargs.pop('base_url', None) check(class_(string=content, base_url=relative_filename, **kwargs)) encoding = kwargs.get('encoding') or 'utf8' check(class_(string=content.decode(encoding), # unicode base_url=relative_filename, **kwargs)) with pytest.raises(TypeError): class_(filename='foo', url='bar')
def add_css(self, font_config): css_url = urls.path2url(self.design) self.html.head.clear() css_tag = BeautifulSoup( '<title>{}</title><link rel="stylesheet" \ href="{}" type="text/css">'.format(self.title, css_url), 'html5lib') self.html.head.insert(0, css_tag)
def test_embedded_files_attachments(tmpdir): absolute_tmp_file = tmpdir.join('some_file.txt').strpath adata = b'12345678' with open(absolute_tmp_file, 'wb') as afile: afile.write(adata) absolute_url = path2url(absolute_tmp_file) assert absolute_url.startswith('file://') relative_tmp_file = tmpdir.join('äöü.txt').strpath rdata = b'abcdefgh' with open(relative_tmp_file, 'wb') as rfile: rfile.write(rdata) pdf = FakeHTML( string=''' <title>Test document</title> <meta charset="utf-8"> <link rel="attachment" title="some file attachment äöü" href="data:,hi%20there"> <link rel="attachment" href="{0}"> <link rel="attachment" href="{1}"> <h1>Heading 1</h1> <h2>Heading 2</h2> '''.format(absolute_url, os.path.basename(relative_tmp_file)), base_url=tmpdir.strpath, ).write_pdf( attachments=[ Attachment('data:,oob attachment', description='Hello'), 'data:,raw URL', io.BytesIO(b'file like obj') ] ) assert ( '<{}>'.format(hashlib.md5(b'hi there').hexdigest()).encode('ascii') in pdf) assert b'/F ()' in pdf assert b'/UF (attachment.bin)' in pdf name = BOM_UTF16_BE + 'some file attachment äöü'.encode('utf-16-be') assert b'/Desc <' + name.hex().encode('ascii') + b'>' in pdf assert hashlib.md5(adata).hexdigest().encode('ascii') in pdf assert os.path.basename(absolute_tmp_file).encode('ascii') in pdf assert hashlib.md5(rdata).hexdigest().encode('ascii') in pdf name = BOM_UTF16_BE + 'some file attachment äöü'.encode('utf-16-be') assert b'/Desc <' + name.hex().encode('ascii') + b'>' in pdf assert hashlib.md5(b'oob attachment').hexdigest().encode('ascii') in pdf assert b'/Desc (Hello)' in pdf assert hashlib.md5(b'raw URL').hexdigest().encode('ascii') in pdf assert hashlib.md5(b'file like obj').hexdigest().encode('ascii') in pdf assert b'/EmbeddedFiles' in pdf assert b'/Outlines' in pdf
def on_post_page(self, output_content, page, config, **kwargs): try: abs_dest_path = page.file.abs_dest_path src_path = page.file.src_path except AttributeError: abs_dest_path = page.abs_output_path src_path = page.input_path path = os.path.dirname(abs_dest_path) os.makedirs(path, exist_ok=True) filename = os.path.splitext(os.path.basename(src_path))[0] base_url = urls.path2url(os.path.join(path, filename)) pdf_url = self.generator.add_article(output_content, page, base_url) if pdf_url: output_content = modify_html(output_content, pdf_url) return output_content
def add_head(self): lines = ['<title>{}</title>'.format(self.title)] for key, val in ( ("author", self.config['author'] or self.mkdconfig['site_author']), ("description", self.mkdconfig['site_description']), ): if val: lines.append('<meta name="{}" content="{}">'.format(key, val)) for css in (self.design, self.design_extra): if css: css_tmpl = '<link rel="stylesheet" href="{}" type="text/css">' lines.append(css_tmpl.format(urls.path2url(css))) head = BeautifulSoup('\n'.join(lines), 'html5lib') self.html.head.clear() self.html.head.insert(0, head)
def _soup_from_content(self, content: str, page): soup = BeautifulSoup(content, 'html.parser') try: abs_dest_path = page.file.abs_dest_path src_path = page.file.src_path except AttributeError: abs_dest_path = page.abs_output_path src_path = page.input_path path = os.path.dirname(abs_dest_path) os.makedirs(path, exist_ok=True) filename = os.path.splitext(os.path.basename(src_path))[0] base_url = urls.path2url(os.path.join(path, filename)) return prep_combined(soup, base_url, page.file.url)
def test_image_image(): assert_pixels( 'test_image_image', 4, 4, ''' rBBB BBBB BBBB BBBB ''', ''' <style> @page { size: 4px 4px } svg { display: block } </style> <svg width="4px" height="4px" xmlns="http://www.w3.org/2000/svg"> <image xlink:href="%s" /> </svg> ''' % path2url(resource_filename('pattern.png')))
def on_post_page(self, output_content, page, config): if not self.enabled: return output_content start = timer() self.num_files += 1 try: abs_dest_path = page.file.abs_dest_path src_path = page.file.src_path except AttributeError: # Support for mkdocs <1.0 abs_dest_path = page.abs_output_path src_path = page.input_path path = os.path.dirname(abs_dest_path) os.makedirs(path, exist_ok=True) filename = os.path.splitext(os.path.basename(src_path))[0] from weasyprint import urls base_url = urls.path2url(os.path.join(path, filename)) pdf_file = filename + '.pdf' try: if self.combined: self.renderer.add_doc(output_content, base_url, page.file.url) pdf_path = self.get_path_to_pdf_from(page.file.dest_path) output_content = self.renderer.add_link( output_content, pdf_path) else: self.renderer.write_pdf(output_content, base_url, os.path.join(path, pdf_file)) output_content = self.renderer.add_link( output_content, pdf_file) except Exception as e: print('Error converting {} to PDF: {}'.format(src_path, e), file=sys.stderr) self.num_errors += 1 end = timer() self.total_time += (end - start) return output_content
def test_command_line_render(tmpdir): css = b''' @page { margin: 2px; size: 8px; background: #fff } @media screen { img { transform: rotate(-90deg) } } body { margin: 0; font-size: 0 } ''' html = b'<body><img src=pattern.png>' combined = b'<style>' + css + b'</style>' + html linked = b'<link rel=stylesheet href=style.css>' + html py.path.local(resource_filename('')).chdir() # Reference html_obj = FakeHTML(string=combined, base_url='dummy.html') pdf_bytes = html_obj.write_pdf() rotated_pdf_bytes = FakeHTML( string=combined, base_url='dummy.html', media_type='screen').write_pdf() tmpdir.chdir() with open(resource_filename('pattern.png'), 'rb') as pattern_fd: pattern_bytes = pattern_fd.read() tmpdir.join('pattern.png').write_binary(pattern_bytes) tmpdir.join('no_css.html').write_binary(html) tmpdir.join('combined.html').write_binary(combined) tmpdir.join('combined-UTF-16BE.html').write_binary( combined.decode('ascii').encode('UTF-16BE')) tmpdir.join('linked.html').write_binary(linked) tmpdir.join('style.css').write_binary(css) _run('combined.html out2.pdf') assert tmpdir.join('out2.pdf').read_binary() == pdf_bytes _run('combined-UTF-16BE.html out3.pdf --encoding UTF-16BE') assert tmpdir.join('out3.pdf').read_binary() == pdf_bytes _run(tmpdir.join('combined.html').strpath + ' out4.pdf') assert tmpdir.join('out4.pdf').read_binary() == pdf_bytes _run(path2url(tmpdir.join('combined.html').strpath) + ' out5.pdf') assert tmpdir.join('out5.pdf').read_binary() == pdf_bytes _run('linked.html --debug out6.pdf') # test relative URLs assert tmpdir.join('out6.pdf').read_binary() == pdf_bytes _run('combined.html --verbose out7') _run('combined.html --quiet out8') assert tmpdir.join('out7').read_binary() == pdf_bytes assert tmpdir.join('out8').read_binary() == pdf_bytes _run('no_css.html out9.pdf') _run('no_css.html out10.pdf -s style.css') assert tmpdir.join('out9.pdf').read_binary() != pdf_bytes assert tmpdir.join('out10.pdf').read_binary() == pdf_bytes stdout = _run('combined.html -') assert stdout == pdf_bytes _run('- out11.pdf', stdin=combined) assert tmpdir.join('out11.pdf').read_binary() == pdf_bytes stdout = _run('- -', stdin=combined) assert stdout == pdf_bytes _run('combined.html out13.pdf --media-type screen') _run('combined.html out12.pdf -m screen') _run('linked.html out14.pdf -m screen') assert tmpdir.join('out12.pdf').read_binary() == rotated_pdf_bytes assert tmpdir.join('out13.pdf').read_binary() == rotated_pdf_bytes assert tmpdir.join('out14.pdf').read_binary() == rotated_pdf_bytes stdout = _run('combined.html -') assert stdout.count(b'attachment') == 0 stdout = _run('combined.html -') assert stdout.count(b'attachment') == 0 stdout = _run('-a pattern.png combined.html -') assert stdout.count(b'attachment') == 1 stdout = _run('-a style.css -a pattern.png combined.html -') assert stdout.count(b'attachment') == 2 os.mkdir('subdirectory') py.path.local('subdirectory').chdir() with capture_logs() as logs: stdout = _run('- -', stdin=combined) assert len(logs) == 1 assert logs[0].startswith('ERROR: Failed to load image') assert stdout.startswith(b'%PDF') with capture_logs() as logs: stdout = _run('--base-url= - -', stdin=combined) assert len(logs) == 1 assert logs[0].startswith( 'ERROR: Relative URI reference without a base URI') assert stdout.startswith(b'%PDF') stdout = _run('--base-url .. - -', stdin=combined) assert stdout == pdf_bytes with pytest.raises(SystemExit): _run('--info') with pytest.raises(SystemExit): _run('--version')
(1, 'I', (0, 150), 'open'), ]], False), ('<h1>é', [ [(1, 'é', (0, 0), 'open')] ], False), (''' <h1 style="transform: translateX(50px)">! ''', [ [(1, '!', (50, 0), 'open')] ], False), (''' <style> img { display: block; bookmark-label: attr(alt); bookmark-level: 1 } </style> <img src="%s" alt="Chocolate" /> ''' % path2url(resource_filename('pattern.png')), [[(1, 'Chocolate', (0, 0), 'open')]], False), (''' <h1 style="transform-origin: 0 0; transform: rotate(90deg) translateX(50px)">! ''', [[(1, '!', (0, 50), 'open')]], True), (''' <body style="transform-origin: 0 0; transform: rotate(90deg)"> <h1 style="transform: translateX(50px)">! ''', [[(1, '!', (0, 50), 'open')]], True), )) @assert_no_logs def test_assert_bookmarks(html, expected_by_page, round_): document = FakeHTML(string=html).render() if round_: _round_meta(document.pages)
}.items()) class FakeHTML(HTML): """Like weasyprint.HTML, but with a lighter UA stylesheet.""" def _ua_stylesheets(self): return [TEST_UA_STYLESHEET] def resource_filename(basename): """Return the absolute path of the resource called ``basename``.""" return os.path.join(os.path.dirname(__file__), 'resources', basename) # Dummy filename, but in the right directory. BASE_URL = path2url(resource_filename('<test>')) class CallbackHandler(logging.Handler): """A logging handler that calls a function for every message.""" def __init__(self, callback): logging.Handler.__init__(self) self.emit = callback @contextlib.contextmanager def capture_logs(): """Return a context manager that captures all logged messages.""" logger = LOGGER messages = []
def test_annotate_document(): document = FakeHTML(resource_filename('doc1.html')) document._ua_stylesheets = lambda: [CSS(resource_filename('mini_ua.css'))] style_for = get_all_computed_styles( document, user_stylesheets=[CSS(resource_filename('user.css'))]) # Element objects behave as lists of their children _head, body = document.etree_element h1, p, ul, div = body li_0, _li_1 = ul a, = li_0 span1, = div span2, = span1 h1 = style_for(h1) p = style_for(p) ul = style_for(ul) li_0 = style_for(li_0) div = style_for(div) after = style_for(a, 'after') a = style_for(a) span1 = style_for(span1) span2 = style_for(span2) assert h1['background_image'] == (( 'url', path2url(resource_filename('logo_small.png'))), ) assert h1['font_weight'] == 700 assert h1['font_size'] == 40 # 2em # x-large * initial = 3/2 * 16 = 24 assert p['margin_top'] == (24, 'px') assert p['margin_right'] == (0, 'px') assert p['margin_bottom'] == (24, 'px') assert p['margin_left'] == (0, 'px') assert p['background_color'] == 'currentColor' # 2em * 1.25ex = 2 * 20 * 1.25 * 0.8 = 40 # 2.5ex * 1.25ex = 2.5 * 0.8 * 20 * 1.25 * 0.8 = 40 # TODO: ex unit doesn't work with @font-face fonts, see computed_values.py # assert ul['margin_top'] == (40, 'px') # assert ul['margin_right'] == (40, 'px') # assert ul['margin_bottom'] == (40, 'px') # assert ul['margin_left'] == (40, 'px') assert ul['font_weight'] == 400 # thick = 5px, 0.25 inches = 96*.25 = 24px assert ul['border_top_width'] == 0 assert ul['border_right_width'] == 5 assert ul['border_bottom_width'] == 0 assert ul['border_left_width'] == 24 assert li_0['font_weight'] == 700 assert li_0['font_size'] == 8 # 6pt assert li_0['margin_top'] == (16, 'px') # 2em assert li_0['margin_right'] == (0, 'px') assert li_0['margin_bottom'] == (16, 'px') assert li_0['margin_left'] == (32, 'px') # 4em assert a['text_decoration_line'] == {'underline'} assert a['font_weight'] == 900 assert a['font_size'] == 24 # 300% of 8px assert a['padding_top'] == (1, 'px') assert a['padding_right'] == (2, 'px') assert a['padding_bottom'] == (3, 'px') assert a['padding_left'] == (4, 'px') assert a['border_top_width'] == 42 assert a['border_bottom_width'] == 42 assert a['color'] == (1, 0, 0, 1) assert a['border_top_color'] == 'currentColor' assert div['font_size'] == 40 # 2 * 20px assert span1['width'] == (160, 'px') # 10 * 16px (root default is 16px) assert span1['height'] == (400, 'px') # 10 * (2 * 20px) assert span2['font_size'] == 32 # The href attr should be as in the source, not made absolute. assert after['content'] == (('string', ' ['), ('string', 'home.html'), ('string', ']')) assert after['background_color'] == (1, 0, 0, 1) assert after['border_top_width'] == 42 assert after['border_bottom_width'] == 3
def test_command_line_render(tmpdir): css = b''' @page { margin: 2px; size: 8px; background: #fff } @media screen { img { transform: rotate(-90deg) } } body { margin: 0; font-size: 0 } ''' html = b'<body><img src=pattern.png>' combined = b'<style>' + css + b'</style>' + html linked = b'<link rel=stylesheet href=style.css>' + html not_optimized = b'<body>a<img src="not-optimized.jpg">' tmpdir.chdir() for name in ('pattern.png', 'not-optimized.jpg'): pattern_bytes = Path(resource_filename(name)).read_bytes() tmpdir.join(name).write_binary(pattern_bytes) # Reference html_obj = FakeHTML(string=combined, base_url='dummy.html') pdf_bytes = html_obj.write_pdf() rotated_pdf_bytes = FakeHTML(string=combined, base_url='dummy.html', media_type='screen').write_pdf() tmpdir.join('no_css.html').write_binary(html) tmpdir.join('combined.html').write_binary(combined) tmpdir.join('combined-UTF-16BE.html').write_binary( combined.decode('ascii').encode('UTF-16BE')) tmpdir.join('linked.html').write_binary(linked) tmpdir.join('not_optimized.html').write_binary(not_optimized) tmpdir.join('style.css').write_binary(css) _run('combined.html out2.pdf') assert tmpdir.join('out2.pdf').read_binary() == pdf_bytes _run('combined-UTF-16BE.html out3.pdf --encoding UTF-16BE') assert tmpdir.join('out3.pdf').read_binary() == pdf_bytes _run(tmpdir.join('combined.html').strpath + ' out4.pdf') assert tmpdir.join('out4.pdf').read_binary() == pdf_bytes _run(path2url(tmpdir.join('combined.html').strpath) + ' out5.pdf') assert tmpdir.join('out5.pdf').read_binary() == pdf_bytes _run('linked.html --debug out6.pdf') # test relative URLs assert tmpdir.join('out6.pdf').read_binary() == pdf_bytes _run('combined.html --verbose out7') _run('combined.html --quiet out8') assert tmpdir.join('out7').read_binary() == pdf_bytes assert tmpdir.join('out8').read_binary() == pdf_bytes _run('no_css.html out9.pdf') _run('no_css.html out10.pdf -s style.css') assert tmpdir.join('out9.pdf').read_binary() != pdf_bytes assert tmpdir.join('out10.pdf').read_binary() == pdf_bytes stdout = _run('combined.html -') assert stdout == pdf_bytes _run('- out11.pdf', stdin=combined) assert tmpdir.join('out11.pdf').read_binary() == pdf_bytes stdout = _run('- -', stdin=combined) assert stdout == pdf_bytes _run('combined.html out13.pdf --media-type screen') _run('combined.html out12.pdf -m screen') _run('linked.html out14.pdf -m screen') assert tmpdir.join('out12.pdf').read_binary() == rotated_pdf_bytes assert tmpdir.join('out13.pdf').read_binary() == rotated_pdf_bytes assert tmpdir.join('out14.pdf').read_binary() == rotated_pdf_bytes _run('not_optimized.html out15.pdf') _run('not_optimized.html out16.pdf -O images') _run('not_optimized.html out17.pdf -O fonts') _run('not_optimized.html out18.pdf -O fonts -O images') _run('not_optimized.html out19.pdf -O all') _run('not_optimized.html out20.pdf -O none') _run('not_optimized.html out21.pdf -O none -O all') _run('not_optimized.html out22.pdf -O all -O none') # TODO: test that equivalent CLI options give equivalent PDF sizes, # unfortunately font optimization makes PDF generation not reproducible assert (len(tmpdir.join('out16.pdf').read_binary()) < len( tmpdir.join('out15.pdf').read_binary()) < len( tmpdir.join('out20.pdf').read_binary())) stdout = _run('combined.html -') assert stdout.count(b'attachment') == 0 stdout = _run('combined.html -') assert stdout.count(b'attachment') == 0 stdout = _run('-a pattern.png combined.html -') assert stdout.count(b'attachment') == 1 stdout = _run('-a style.css -a pattern.png combined.html -') assert stdout.count(b'attachment') == 2 os.mkdir('subdirectory') py.path.local('subdirectory').chdir() with capture_logs() as logs: stdout = _run('- -', stdin=combined) assert len(logs) == 1 assert logs[0].startswith('ERROR: Failed to load image') assert stdout.startswith(b'%PDF') with capture_logs() as logs: stdout = _run('--base-url= - -', stdin=combined) assert len(logs) == 1 assert logs[0].startswith( 'ERROR: Relative URI reference without a base URI') assert stdout.startswith(b'%PDF') stdout = _run('--base-url .. - -', stdin=combined) assert stdout == pdf_bytes with pytest.raises(SystemExit): _run('--info') with pytest.raises(SystemExit): _run('--version')