def figures_new1(): s = r""" <figure> <figcaption>Main caption</figcaption> <figure> <figcaption>Hello</figcaption> <img style='width:8em' src="duckietown-logo-transparent.png"/> </figure> <figure> <figcaption>second</figcaption> <img style='width:8em' src="duckietown-logo-transparent.png"/> </figure> </figure> """ soup = bs(s) res = AugmentedResult() location = LocationUnknown() make_figure_from_figureid_attr(soup, res, location) # nfigs = len(list(soup.select('figure'))) o = to_html_stripping_fragment(soup) print o
def elements_abbrevs_test1(): s = "<p>TODO: paragraph</p>" e = """<div class="todo-wrap"><p class="todo">paragraph</p></div>""" soup = bs(s.strip()) substitute_special_paragraphs(soup) o = to_html_stripping_fragment(soup) #print o assert_equal(o, e)
def elements_abbrevs_test2(): s = "<p>TODO: paragraph <strong>Strong</strong></p>" e = """<div class="todo-wrap"><p class="todo">TODO: paragraph <strong>Strong</strong></p></div>""" soup = bs(s.strip()) res = AugmentedResult() location = LocationUnknown() substitute_special_paragraphs(soup, res, location) o = to_html_stripping_fragment(soup) #print o assert_equal(o, e)
def make_last_modified(files_contents, nmax=100): res = AugmentedResult() files_contents = [DocToJoin(*x) for x in files_contents] files_contents = [_ for _ in files_contents if _.source_info] files_contents = list( sorted(files_contents, key=lambda x: x.source_info.last_modified, reverse=True)) r = Tag(name='fragment') r.append('\n') h = Tag(name='h1') h.append('Last modified') h.attrs['id'] = 'sec:last-modified' r.append(h) r.append('\n') ul = Tag(name='ul') ul.append('\n') for d in files_contents[:nmax]: li = Tag(name='li') when = d.source_info.last_modified when_s = time.strftime("%a, %b %d", when) # %H:%M li.append(when_s) li.append(': ') hid = get_main_header(bs(d.contents)) if hid is None: what = "File %s" % d.docname else: what = Tag(name='a') what.attrs['href'] = '#' + hid what.attrs['class'] = MCDPManualConstants.CLASS_NUMBER_NAME li.append(what) li.append(' (') name = d.source_info.author.name li.append(name) li.append(')') ul.append(li) ul.append('\n') r.append(ul) s = to_html_stripping_fragment(r) # print s res.set_result(s) return res
def get_svg_for_visualization(e, image_source, library_name, spec, name, thing, refined, make_relative, library): svg_data0 = spec.get_png_data_syntax(image_source=image_source, name=name, thing=thing, data_format='svg', library=library) fragment = bs(svg_data0) if fragment.svg is None: msg = 'Cannot interpret fragment.' msg += '\n' + indent(svg_data0, '> ') raise DPInternalError(msg) assert fragment.svg is not None style = {} for a in ['width', 'height']: if a in fragment.svg.attrs: value = fragment.svg.attrs[a] del fragment.svg.attrs[a] style['max-%s' % a] = value add_style(fragment.svg, **style) remove_doctype_etc(fragment) remove_all_titles(fragment.svg) if refined is not None: table = identifier2ndp(refined) else: table = {} def link_for_dp_name(identifier0): identifier = identifier0 # todo translate if identifier in table: a = table[identifier] libname = a.libname if a.libname is not None else library_name href0 = '/repos/%s/shelves/%s/libraries/%s/models/%s/views/syntax/' % ( e.repo_name, e.shelf_name, libname, a.name) return make_relative(href0) else: return None add_html_links_to_svg(fragment.svg, link_for_dp_name) svg_data = to_html_stripping_fragment(fragment) return svg_data
def prerender_mathjax_(html): """ Runs the prerender.js script to pre-render the MathJax into images. Raises PrerenderError. """ assert not '<html>' in html, html use = get_nodejs_bin() html = html.replace('<p>$$', '\n$$') html = html.replace('$$</p>', '$$\n') script = get_prerender_js() mcdp_tmp_dir = get_mcdp_tmp_dir() prefix = 'prerender_mathjax_' d = mkdtemp(dir=mcdp_tmp_dir, prefix=prefix) try: f_html = os.path.join(d, 'file.html') with open(f_html, 'w') as f: f.write(html) try: f_out = os.path.join(d, 'out.html') cmd = [use, script, f_html, f_out] pwd = os.getcwd() res = system_cmd_result(pwd, cmd, display_stdout=False, display_stderr=False, raise_on_error=False) if res.ret: # pragma: no cover if 'Error: Cannot find module' in res.stderr: msg = 'You have to install the MathJax and/or jsdom libraries.' msg += '\nOn Ubuntu, you can install them using:' msg += '\n\n\tsudo apt-get install npm' msg += '\n\n\tnpm install MathJax-node jsdom' msg += '\n\n' + indent(res.stderr, ' |') raise PrerenderError(msg) if 'parse error' in res.stderr: lines = [ _ for _ in res.stderr.split('\n') if 'parse error' in _ ] assert lines msg = 'LaTeX conversion errors:\n\n' + '\n'.join(lines) raise PrerenderError(msg) msg = 'Unknown error (ret = %d).' % res.ret msg += '\n\n' + indent(res.stderr, ' |') raise PrerenderError(msg) with open(f_out) as f: data = f.read() # Read the data soup = bs(data) # find this and move it at the end # <style id="MathJax_SVG_styles" tag_style = soup.find(id='MathJax_SVG_styles') if not tag_style: msg = 'Expected to find style MathJax_SVG_styles' raise_desc(Exception, msg, soup=str(soup)) # <svg style="display: none;"><defs id="MathJax_SVG_glyphs"> tag_svg_defs = soup.find('svg', style="display: none;") if not tag_svg_defs: msg = 'Expected to find tag <svg display=none>' raise_desc(Exception, msg, soup=str(soup)) other_tag = soup.find('div', style="display:none") if not other_tag: msg = 'Expected to find tag <div style="display:none">' raise_desc(Exception, msg, soup=str(soup)) #<div style="display:none">Because of mathjax bug</div> soup.append(other_tag.extract()) soup.append(tag_svg_defs.extract()) soup.append(tag_style.extract()) data = to_html_stripping_fragment(soup) return data except CmdException as e: # pragma: no cover raise e finally: shutil.rmtree(d)
def render_complete(library, s, raise_errors, realpath, generate_pdf=False, check_refs=False, use_mathjax=True, filter_soup=None, symbols=None): """ Transforms markdown into html and then renders the mcdp snippets inside. s: a markdown string with embedded html snippets Returns an HTML string; not a complete document. filter_soup(library, soup) """ s0 = s check_good_use_of_special_paragraphs(s0, realpath) raise_missing_image_errors = raise_errors # Imports here because of circular dependencies from .latex.latex_preprocess import extract_maths, extract_tabular from .latex.latex_preprocess import latex_preprocessing from .latex.latex_preprocess import replace_equations from .macro_col2 import col_macros, col_macros_prepare_before_markdown from .mark.markd import render_markdown from .preliminary_checks import do_preliminary_checks_and_fixes from .prerender_math import prerender_mathjax if isinstance(s, unicode): msg = 'I expect a str encoded with utf-8, not unicode.' raise_desc(TypeError, msg, s=s) # need to do this before do_preliminary_checks_and_fixes # because of & char s, tabulars = extract_tabular(s) s = do_preliminary_checks_and_fixes(s) # put back tabular, because extract_maths needs to grab them for k, v in tabulars.items(): assert k in s s = s.replace(k, v) # copy all math content, # between $$ and $$ # between various limiters etc. # returns a dict(string, substitution) s, maths = extract_maths(s) # print('maths = %s' % maths) for k, v in maths.items(): if v[0] == '$' and v[1] != '$$': if '\n\n' in v: msg = 'Suspicious math fragment %r = %r' % (k, v) logger.error(maths) logger.error(msg) raise ValueError(msg) s = latex_preprocessing(s) s = '<div style="display:none">Because of mathjax bug</div>\n\n\n' + s # cannot parse html before markdown, because md will take # invalid html, (in particular '$ ciao <ciao>' and make it work) s = s.replace('*}', '\*}') s, mcdpenvs = protect_my_envs(s) # print('mcdpenvs = %s' % maths) s = col_macros_prepare_before_markdown(s) # print(indent(s, 'before markdown | ')) s = render_markdown(s) # print(indent(s, 'after markdown | ')) for k, v in maths.items(): if not k in s: msg = 'Cannot find %r (= %r)' % (k, v) raise_desc(DPInternalError, msg, s=s) def preprocess_equations(x): # this gets mathjax confused x = x.replace('>', '\\gt{}') # need brace; think a<b -> a\lt{}b x = x.replace('<', '\\lt{}') # print('replaced equation %r by %r ' % (x0, x)) return x v = preprocess_equations(v) s = s.replace(k, v) s = replace_equations(s) s = s.replace('\\*}', '*}') # this parses the XML soup = bs(s) other_abbrevs(soup) # need to process tabular before mathjax escape_for_mathjax(soup) # print(indent(s, 'before prerender_mathjax | ')) # mathjax must be after markdown because of code blocks using "$" s = to_html_stripping_fragment(soup) if use_mathjax: s = prerender_mathjax(s, symbols) soup = bs(s) escape_for_mathjax_back(soup) s = to_html_stripping_fragment(soup) # print(indent(s, 'after prerender_mathjax | ')) for k, v in mcdpenvs.items(): # there is this case: # ~~~ # <pre> </pre> # ~~~ s = s.replace(k, v) s = s.replace('<p>DRAFT</p>', '<div class="draft">') s = s.replace('<p>/DRAFT</p>', '</div>') soup = bs(s) mark_console_pres(soup) try: substitute_github_refs(soup, defaults={}) except Exception as e: msg = 'I got an error while substituting github: references.' msg += '\nI will ignore this error because it might not be the fault of the writer.' msg += '\n\n' + indent(str(e), '|', ' error: |') logger.warn(msg) # must be before make_figure_from_figureid_attr() display_files(soup, defaults={}, raise_errors=raise_errors) make_figure_from_figureid_attr(soup) col_macros(soup) fix_subfig_references(soup) library = get_library_from_document(soup, default_library=library) from mcdp_docs.highlight import html_interpret html_interpret(library, soup, generate_pdf=generate_pdf, raise_errors=raise_errors, realpath=realpath) if filter_soup is not None: filter_soup(library=library, soup=soup) embed_images_from_library2(soup=soup, library=library, raise_errors=raise_missing_image_errors) make_videos(soup=soup) if check_refs: check_if_any_href_is_invalid(soup) if getuser() == 'andrea': if MCDPConstants.preprocess_style_using_less: run_lessc(soup) else: logger.warning( 'preprocess_style_using_less=False might break the manual') fix_validation_problems(soup) strip_pre(soup) if MCDPManualConstants.enable_syntax_higlighting: syntax_highlighting(soup) if MCDPManualConstants.enforce_status_attribute: check_status_codes(soup, realpath) if MCDPManualConstants.enforce_lang_attribute: check_lang_codes(soup) # Fixes the IDs (adding 'sec:'); add IDs to missing ones globally_unique_id_part = 'autoid-DO-NOT-USE-THIS-VERY-UNSTABLE-LINK-' + get_md5( s0)[:5] fix_ids_and_add_missing(soup, globally_unique_id_part) check_no_patently_wrong_links(soup) s = to_html_stripping_fragment(soup) s = replace_macros(s) return s
def make_figures(library, soup, res, location, raise_error_dp, raise_error_others, realpath, generate_pdf): """ Looks for codes like: <pre><code class="mcdp_ndp_graph_templatized">mcdp { # empty model } </code></pre> and creates a link to the image """ def go(s0, func): selectors = s0.split(',') for selector_ in selectors: for tag in soup.select(selector_): try: r = func(tag) tag.replaceWith(r) except (DPSyntaxError, DPSemanticError) as e: if raise_error_dp: raise else: res.note_error(str(e), HTMLIDLocation.for_element(tag)) continue except Exception as e: if raise_error_others: raise else: res.note_error(str(e), HTMLIDLocation.for_element(tag)) continue def make_tag(tag0, klass, data, ndp=None, template=None, poset=None): svg = data['svg'] tag_svg = BeautifulSoup(svg, 'lxml', from_encoding='utf-8').svg assert tag_svg.name == 'svg' if tag_svg.has_attr('width'): ws = tag_svg['width'] hs = tag_svg['height'] assert 'pt' in ws w = float(ws.replace('pt', '')) h = float(hs.replace('pt', '')) scale = MCDPConstants.scale_svg w2 = w * scale h2 = h * scale tag_svg['width'] = w2 tag_svg['height'] = h2 tag_svg['rescaled'] = 'Rescaled from %s %s, scale = %s' % (ws, hs, scale) else: print('no width in SVG tag: %s' % tag_svg) tag_svg['class'] = klass if tag0.has_attr('style'): tag_svg['style'] = tag0['style'] if tag0.has_attr('id'): tag_svg['id'] = tag0['id'] if generate_pdf: pdf0 = data['pdf'] pdf = crop_pdf(pdf0, margins=0) div = Tag(name='div') att = MCDPConstants.ATTR_LOAD_NAME if tag0.has_attr('id'): basename = tag0['id'] elif ndp is not None and hasattr(ndp, att): basename = getattr(ndp, att) elif template is not None and hasattr(template, att): basename = getattr(template, att) elif poset is not None and hasattr(poset, att): basename = getattr(poset, att) else: hashcode = hashlib.sha224(tag0.string).hexdigest()[-8:] basename = 'code-%s' % hashcode docname = os.path.splitext(os.path.basename(realpath))[0] download = docname + "." + basename + "." + klass + '.pdf' a = create_a_to_data(download=download, data_format='pdf', data=pdf) a['class'] = 'pdf_data' a.append(NavigableString(download)) div.append(tag_svg) div.append(a) return div else: return tag_svg image_source = ImagesFromPaths(library.get_images_paths()) mf0 = MakeFiguresNDP(None, None, None) available_ndp = set(mf0.available()) | set(mf0.aliases) for which in available_ndp: def callback(tag0): assert tag0.parent is not None context = Context() load = lambda x: library.load_ndp(x, context=context) parse = lambda x: library.parse_ndp( x, realpath=realpath, context=context) ndp = load_or_parse_from_tag(tag0, load, parse) mf = MakeFiguresNDP(ndp=ndp, image_source=image_source, yourname=None) # XXX formats = ['svg'] if generate_pdf: formats.append('pdf') data = mf.get_figure(which, formats) tag = make_tag(tag0, which, data, ndp=ndp, template=None) return tag selector = 'render.%s,pre.%s,img.%s' % (which, which, which) go(selector, callback) mf0 = MakeFiguresTemplate(None, None, None) available_template = set(mf0.available()) | set(mf0.aliases) for which in available_template: def callback(tag0): context = Context() load = lambda x: library.load_spec( SPEC_TEMPLATES, x, context=context) parse = lambda x: library.parse_template( x, realpath=realpath, context=context) template = load_or_parse_from_tag(tag0, load, parse) mf = MakeFiguresTemplate(template=template, library=library, yourname=None) # XXX formats = ['svg'] if generate_pdf: formats.append('pdf') data = mf.get_figure(which, formats) tag = make_tag(tag0, which, data, ndp=None, template=template) return tag selector = 'render.%s,pre.%s,img.%s' % (which, which, which) go(selector, callback) mf0 = MakeFiguresPoset(None, None) available_poset = set(mf0.available()) | set(mf0.aliases) for which in available_poset: def callback(tag0): context = Context() load = lambda x: library.load_poset(x, context=context) parse = lambda x: library.parse_poset( x, realpath=realpath, context=context) poset = load_or_parse_from_tag(tag0, load, parse) mf = MakeFiguresPoset(poset=poset, image_source=image_source) formats = ['svg'] if generate_pdf: formats.append('pdf') data = mf.get_figure(which, formats) tag = make_tag(tag0, which, data, ndp=None, template=None, poset=poset) return tag selector = 'render.%s,pre.%s,img.%s' % (which, which, which) go(selector, callback) unsure = list(soup.select('render')) unsure = [_ for _ in unsure if 'errored' not in _.attrs.get('class', '')] for _ in unsure: msg = 'Invalid "render" element.' # msg += '\n\n' + '\n\n'.join(str(_) for _ in unsure) msg += '\n\n' + " Available for NDPs: %s." % ", ".join( sorted(available_ndp)) msg += '\n\n' + " Available for templates: %s." % ", ".join( sorted(available_template)) msg += '\n\n' + " Available for posets: %s." % ", ".join( sorted(available_poset)) # raise ValueError(msg) res.note_error(msg, HTMLIDLocation.for_element(_)) return to_html_stripping_fragment(soup)
def render_complete(library, s, raise_errors, realpath, generate_pdf=False, check_refs=False, do_math=True, filter_soup=None, raise_missing_image_errors = False): """ Transforms markdown into html and then renders the mcdp snippets inside. s: a markdown string with embedded html snippets Returns an HTML string; not a complete document. filter_soup(library, soup) """ from .latex.latex_preprocess import extract_maths, extract_tabular from .latex.latex_preprocess import latex_preprocessing from .latex.latex_preprocess import replace_equations from .macro_col2 import col_macros, col_macros_prepare_before_markdown from .mark.markd import render_markdown from .preliminary_checks import do_preliminary_checks_and_fixes from .prerender_math import prerender_mathjax if isinstance(s, unicode): msg = 'I expect a str encoded with utf-8, not unicode.' raise_desc(TypeError, msg, s=s) # need to do this before do_preliminary_checks_and_fixes # because of & char s, tabulars = extract_tabular(s) s = do_preliminary_checks_and_fixes(s) # put back tabular, because extract_maths needs to grab them for k,v in tabulars.items(): assert k in s s = s.replace(k, v) # copy all math content, # between $$ and $$ # between various limiters etc. # returns a dict(string, substitution) s, maths = extract_maths(s) # print('maths = %s' % maths) for k, v in maths.items(): if v[0] == '$' and v[1] != '$$': if '\n\n' in v: msg = 'Suspicious math fragment %r = %r' % (k, v) logger.error(maths) logger.error(msg) raise ValueError(msg) # fixes for LaTeX s = latex_preprocessing(s) s = '<div style="display:none">Because of mathjax bug</div>\n\n\n' + s # cannot parse html before markdown, because md will take # invalid html, (in particular '$ ciao <ciao>' and make it work) s = s.replace('*}', '\*}') s, mcdpenvs = protect_my_envs(s) # print('mcdpenvs = %s' % maths) s = col_macros_prepare_before_markdown(s) # print(indent(s, 'before markdown | ')) s = render_markdown(s) # print(indent(s, 'after markdown | ')) for k,v in maths.items(): if not k in s: msg = 'Cannot find %r (= %r)' % (k, v) raise_desc(DPInternalError, msg, s=s) def preprocess_equations(x): # this gets mathjax confused x = x.replace('>', '\\gt{}') # need brace; think a<b -> a\lt{}b x = x.replace('<', '\\lt{}') # print('replaced equation %r by %r ' % (x0, x)) return x v = preprocess_equations(v) s = s.replace(k, v) s = replace_equations(s) s = s.replace('\\*}', '*}') # this parses the XML soup = bs(s) other_abbrevs(soup) # need to process tabular before mathjax escape_for_mathjax(soup) # print(indent(s, 'before prerender_mathjax | ')) # mathjax must be after markdown because of code blocks using "$" s = to_html_stripping_fragment(soup) if do_math: s = prerender_mathjax(s) soup = bs(s) escape_for_mathjax_back(soup) s = to_html_stripping_fragment(soup) # print(indent(s, 'after prerender_mathjax | ')) for k,v in mcdpenvs.items(): # there is this case: # ~~~ # <pre> </pre> # ~~~ s = s.replace(k, v) s = s.replace('<p>DRAFT</p>', '<div class="draft">') s = s.replace('<p>/DRAFT</p>', '</div>') soup = bs(s) mark_console_pres(soup) make_figure_from_figureid_attr(soup) col_macros(soup) fix_subfig_references(soup) library = get_library_from_document(soup, default_library=library) from mcdp_docs.highlight import html_interpret html_interpret(library, soup, generate_pdf=generate_pdf, raise_errors=raise_errors, realpath=realpath) if filter_soup is not None: filter_soup(library=library, soup=soup) embed_images_from_library2(soup=soup, library=library, raise_errors=raise_missing_image_errors) if check_refs: check_if_any_href_is_invalid(soup) run_lessc(soup) fix_validation_problems(soup) s = to_html_stripping_fragment(soup) s = replace_macros(s) return s