def generate_and_add_toc(soup, raise_error=False, res=None): if res is None: aug = AugmentedResult() logger.info('adding toc') body = soup.find('body') toc = generate_toc(body, res) # logger.info('TOC:\n' + str(toc)) toc_ul = bs(toc).ul if toc_ul is None: # empty TOC msg = 'Could not find toc.' # logger.warning(msg) res.note_error(msg) # XXX else: toc_ul.extract() assert toc_ul.name == 'ul' toc_ul['class'] = 'toc' # XXX: see XXX13 toc_ul['id'] = MCDPManualConstants.MAIN_TOC_ID toc_selector = MCDPManualConstants.TOC_PLACEHOLDER_SELECTOR tocs = list(body.select(toc_selector)) if not tocs: msg = 'Cannot find any element of type %r to put TOC inside.' % toc_selector if raise_error: raise NoTocPlaceholder(msg) logger.warning(msg) res.note_error(msg) else: toc_place = tocs[0] toc_place.replaceWith(toc_ul)
def generate_and_add_toc(soup, toc_selector='div#toc'): logger.info('adding toc') body = soup.find('body') toc = generate_toc(body) # logger.info('TOC:\n' + str(toc)) toc_ul = bs(toc).ul if toc_ul is None: # empty TOC msg = 'Could not find toc' logger.warning(msg) # XXX else: toc_ul.extract() assert toc_ul.name == 'ul' toc_ul['class'] = 'toc' toc_ul['id'] = 'main_toc' tocs = list(body.select(toc_selector)) if not tocs: msg = 'Cannot find any element of type %r to put TOC inside.' % toc_selector logger.warning(msg) else: toc_place = tocs[0] toc_place.replaceWith(toc_ul)
def figures_new1(): s = r""" <figure> <figcaption>Main caption</figcaption> <figure> <figcaption>Hello</figcaption> <img style='width:8em' src="duckietown-logo-transparent.png"/> </figure> <figure> <figcaption>second</figcaption> <img style='width:8em' src="duckietown-logo-transparent.png"/> </figure> </figure> """ soup = bs(s) res = AugmentedResult() location = LocationUnknown() make_figure_from_figureid_attr(soup, res, location) # nfigs = len(list(soup.select('figure'))) o = to_html_stripping_fragment(soup) print o
def gg_figure(r, name, ggraph, do_png=True, do_pdf=True, do_svg=True, do_dot=True): """ Adds a figure to the Report r that displays this graph and also its source. """ f = r.figure(name, cols=1) # save file in dot file with tmpfile(".dot") as filename_dot: with open(filename_dot, 'w') as fo: s = get_dot_string(ggraph) fo.write(s) # if False: # ff = '%s.dot' % id(r) # print('writing to %r' % ff) # with open(ff, 'w') as f2: # f2.write(s) prog = 'dot' try: if do_png: with f.data_file('graph', MIME_PNG) as filename: graphviz_run(filename_dot, filename, prog=prog) if do_pdf: with f.data_file('graph_pdf', MIME_PDF) as filename: graphviz_run(filename_dot, filename, prog=prog) if do_svg: with f.data_file('graph_svg', MIME_SVG) as filename: graphviz_run(filename_dot, filename, prog=prog) from mcdp_report.embedded_images import embed_svg_images data = open(filename).read() soup = bs(data) embed_svg_images(soup) # does not keep doctype: s = to_html_stripping_fragment(soup) # this will keep the doctype s = str(soup) s = s.replace('<fragment>', '') s = s.replace('</fragment>', '') write_bytes_to_file_as_utf8(s, filename) except CmdException: if MCDPConstants.test_ignore_graphviz_errors: mcdp_dev_warning('suppressing errors from graphviz') logger.error('Graphivz failed, but I will ignore it ' 'because of MCDPConstants.test_ignore_graphviz_errors.') else: raise # MIME_GRAPHVIZ if do_dot: with f.data_file('dot', MIME_PLAIN) as filename: with open(filename, 'w') as f: f.write(s) return f
def test_toc(): s = """ <html> <head></head> <body> <h1 id='one'>One</h1> <p>a</p> <h2 id='two'>Two</h2> <p>a</p> <h3 id='three'>Three</h3> <h2 id='four'>Four</h2> <p>a</p> </body> </html> """ soup = bs(s) # print(soup) # body = soup.find('body') # first time it should fail try: _toc = generate_toc(soup) except InvalidHeaders as e: # > InvalidHeaders: I expected that this header would start with either part:,app:,sec:. # > <h1 id="one">One</h1> pass else: raise Exception() soup = bs(s) fix_ids_and_add_missing(soup, 'prefix-', AugmentedResult(), LocationUnknown()) generate_toc(soup) s = str(soup) expected = ['sec:one', 'sub:two'] # print(indent(s, 'transformed > ')) for e in expected: assert e in s
def contains_header(l): if not l.startswith('#'): return None html = bs(render_markdown(l.encode('utf8'))) for e in html.select('h1,h2,h3,h4,h5,h6'): e_id = e.attrs.get('id', None) text = gettext(e) return HeaderIdent(e_id, text) return None
def elements_abbrevs_test1(): s = "<p>TODO: paragraph</p>" e = """<div class="todo-wrap"><p class="todo">paragraph</p></div>""" soup = bs(s.strip()) substitute_special_paragraphs(soup) o = to_html_stripping_fragment(soup) #print o assert_equal(o, e)
def elements_abbrevs_test2(): s = "<p>TODO: paragraph <strong>Strong</strong></p>" e = """<div class="todo-wrap"><p class="todo">TODO: paragraph <strong>Strong</strong></p></div>""" soup = bs(s.strip()) res = AugmentedResult() location = LocationUnknown() substitute_special_paragraphs(soup, res, location) o = to_html_stripping_fragment(soup) #print o assert_equal(o, e)
def get_sanitized_copy(element): """ Strips all IDs """ d = bs(str(element)) if 'id' in d.attrs: del d.attrs['id'] for e in d.descendants: if isinstance(e, Tag): if 'id' in e.attrs: del e.attrs['id'] for a in d.select('a[href]'): del a.attrs['href'] return d
def prerender_main(): f0 = sys.argv[1] f1 = sys.argv[2] html = open(f0).read() parsed = bs_entire_document(html) body = parsed.html.body body_string = str(body) res = AugmentedResult() body2_string = prerender_mathjax_(body_string, res) body2 = bs(body2_string) parsed.html.body.replace_with(body2) html2 = str(parsed) write_data_to_file(html2, f1)
def make_last_modified(files_contents, nmax=100): res = AugmentedResult() files_contents = [DocToJoin(*x) for x in files_contents] files_contents = [_ for _ in files_contents if _.source_info] files_contents = list( sorted(files_contents, key=lambda x: x.source_info.last_modified, reverse=True)) r = Tag(name='fragment') r.append('\n') h = Tag(name='h1') h.append('Last modified') h.attrs['id'] = 'sec:last-modified' r.append(h) r.append('\n') ul = Tag(name='ul') ul.append('\n') for d in files_contents[:nmax]: li = Tag(name='li') when = d.source_info.last_modified when_s = time.strftime("%a, %b %d", when) # %H:%M li.append(when_s) li.append(': ') hid = get_main_header(bs(d.contents)) if hid is None: what = "File %s" % d.docname else: what = Tag(name='a') what.attrs['href'] = '#' + hid what.attrs['class'] = MCDPManualConstants.CLASS_NUMBER_NAME li.append(what) li.append(' (') name = d.source_info.author.name li.append(name) li.append(')') ul.append(li) ul.append('\n') r.append(ul) s = to_html_stripping_fragment(r) # print s res.set_result(s) return res
def add_likebtn_(soup, likebtn_site_id): sections = 'h1[id],h2[id]' for h in list(soup.select(sections)): id_ = h.attrs['id'] div = Tag(name='div') div.attrs['class'] = 'like_buttons' div.append('Please provide your feedback: ') tag = Tag(name='span') tag.attrs['class'] = 'likebtn-wrapper' tag.attrs['data-identifier'] = 'btn-%s' % id_ tag.attrs['data-site_id'] = likebtn_site_id t = tag.attrs t['data-theme'] = "tick" t['data-white_label'] = "true" t['data-white_label'] = "true" t['data-identifier'] = "f1-%s" % id_ t['data-show_dislike_label'] = "true" t['data-icon_like_show'] = "false" t['data-icon_dislike_show'] = "false" t['data-counter_type'] = "percent" # t['data-popup_disabled'] = "true" t['data-popup_dislike'] = "true" t['data-popup_position'] = "bottom" t['data-popup_html'] = "Thanks for the feedback!" t['data-share_enabled'] = "false" t['data-share_size'] = "small" t['data-item_url'] = "item-url" t['data-item_title'] = 'title' t['data-item_description'] = "item - description" t['data-item_image'] = "item-image" t['data-lazy_load'] = "true" t['data-event_handler'] = "callback" t['data-i18n_like'] = "Great work!" t['data-i18n_dislike'] = "This needs more improvement" # t['data-i18n_after_like'] = "Glad you liked it!" # t['data-i18n_after_dislike'] = "Please help us improve!" t['data-i18n_like_tooltip'] = "This is great content" t['data-i18n_dislike_tooltip'] = "Something does not feel right" # t['data-i18n_unlike_tooltip'] = "dislike - tooltip - after" # t['data-i18n_undislike_tooltip'] = "dislike - tooltip - after" t['data-i18n_share_text'] = "Share this content" script = bs(likebtn_code).script div.append(tag) div.append(script) h.insert_after(div)
def subwith(name_, s): result = bs(s.encode('utf8')) result.name = 'div' pre = result.find('pre') pre.name = 'code' Pre = Tag(name='pre') add_class(Pre, 'syntax_highlight') add_class(Pre, name_) Pre.append(pre) try: code.parent.replace_with(Pre) except: logger.debug(str(code.parent)) raise
def append_disqus(filename, html): # append discus section PAGE_IDENTIFIER = filename.replace('.html', '') PAGE_URL = 'https://duckietown.github.io/duckuments/master/' + filename DISQUS_DOMAIN = 'duckuments.disqus.com' s = disqus s = s.replace('PAGE_IDENTIFIER', PAGE_IDENTIFIER) s = s.replace('PAGE_URL', PAGE_URL) s = s.replace('DISQUS_DOMAIN', DISQUS_DOMAIN) disqus_section = bs(s) disqus_section.name = 'div' not_toc = html.find(id='not-toc') not_toc.append(disqus_section) banner_string = """ <style type="text/css"> #banner { display: block; position: fixed; left: 0; top: 0; width: 100%; padding-top: 0.5em; padding-left:2em; padding-right: 0.5em; font-weight: bold !important; font-size: 120%; //background-color: yellow; color: red; font-weight: bold; padding-bottom: 0.5em; } div.super { margin-top: 2em; } </style> """ banner = bs(banner_string) banner.name = 'div' html.body.insert(0, banner)
def make_page(contents, head0, add_toc): """ Returns html (Beautiful Soup document) """ html = Tag(name='html') head = head0.__copy__() html.append(head) body = Tag(name='body') with timeit('make_page() / copy toc'): if add_toc: tocdiv = Tag(name='div') tocdiv.attrs['id'] = 'tocdiv' # toc = main_toc # toc.extract() # del toc.attrs['id'] tocdiv.append(add_toc) section_name = get_first_header_title(contents) if section_name is not None: title2 = bs(section_name) title2.name = 'title' title = head.find('title') if title is None: # msg = 'Cannot find the "title" element' # msg += '\n' + indent(str(head)[:500], 'head') # raise Exception(msg) head.append(title2) else: title.replace_with(title2) body.append(tocdiv) not_toc = Tag(name='div') not_toc.attrs['id'] = 'not-toc' not_toc.append(contents) body.append(not_toc) html.append(body) # delete the original one main_toc = contents.find(id='main_toc') if main_toc is not None: main_toc.extract() return html
def test_toc2(): s = """ <html> <head></head> <body> <h1>One</h1> <h1>Two</h1> <h1>Three</h1> <p></p> <h2>A</h2> <h2>B</h2> <h2>C</h2> <h3>a</h3> <h3>b</h3> <h3>c</h3> </body> </html> """ soup = bs(s) # print(soup) # body = soup.find('body') fix_ids_and_add_missing(soup, 'prefix', AugmentedResult(), LocationUnknown()) assert soup.find(id='sub:prefix-5') is not None # <fragment> # <h1 id="sec:prefix--1">One</h1> # <h1 id="sec:prefix--2">Two</h1> # <h1 id="sec:prefix--3">Three</h1> # <p></p> # <h2 id="sub:prefix--4">A</h2> # <h2 id="sub:prefix--5">B</h2> # <h2 id="sub:prefix--6">C</h2> # <h3 id="subsub:prefix--7">a</h3> # <h3 id="subsub:prefix--8">b</h3> # <h3 id="subsub:prefix--9">c</h3> # </fragment> print(soup) _toc = generate_toc(soup) s = str(soup)
def get_svg_for_visualization(e, image_source, library_name, spec, name, thing, refined, make_relative, library): svg_data0 = spec.get_png_data_syntax(image_source=image_source, name=name, thing=thing, data_format='svg', library=library) fragment = bs(svg_data0) if fragment.svg is None: msg = 'Cannot interpret fragment.' msg += '\n' + indent(svg_data0, '> ') raise DPInternalError(msg) assert fragment.svg is not None style = {} for a in ['width', 'height']: if a in fragment.svg.attrs: value = fragment.svg.attrs[a] del fragment.svg.attrs[a] style['max-%s' % a] = value add_style(fragment.svg, **style) remove_doctype_etc(fragment) remove_all_titles(fragment.svg) if refined is not None: table = identifier2ndp(refined) else: table = {} def link_for_dp_name(identifier0): identifier = identifier0 # todo translate if identifier in table: a = table[identifier] libname = a.libname if a.libname is not None else library_name href0 = '/repos/%s/shelves/%s/libraries/%s/models/%s/views/syntax/' % ( e.repo_name, e.shelf_name, libname, a.name) return make_relative(href0) else: return None add_html_links_to_svg(fragment.svg, link_for_dp_name) svg_data = to_html_stripping_fragment(fragment) return svg_data
def generate_toc(soup, max_depth=None): stack = [Item(None, 0, 'root', 'root', [])] headers_depths = list(get_things_to_index(soup)) for header, depth, using in headers_depths: if max_depth is not None: if depth > max_depth: continue item = Item(header, depth, using, header['id'], []) while (stack[-1].depth >= depth): stack.pop() stack[-1].items.append(item) stack.append(item) root = stack[0] logger.debug('numbering items') number_items2(root) logger.debug(toc_summary(root)) logger.debug('toc iterating') # iterate over chapters (below each h1) # XXX: this is parts if False: for item in root.items: s = item.to_html(root=True, max_levels=100) stoc = bs(s) if stoc.ul is not None: # empty document case ul = stoc.ul ul.extract() ul['class'] = 'toc chapter_toc' # todo: add specific h1 item.tag.insert_after(ul) # XXX: uses <fragment> logger.debug('toc done iterating') exclude = [ 'subsub', 'fig', 'code', 'tab', 'par', 'subfig', 'appsubsub', 'def', 'eq', 'rem', 'lem', 'prob', 'prop', 'exa', 'thm' ] without_levels = root.copy_excluding_levels(exclude) res = without_levels.to_html(root=True, max_levels=13) return res
def read_references(dirname, base_url, prefix): from mcdp_docs.mcdp_render_manual import look_for_files filenames = look_for_files([dirname], "*.html") res = OrderedDict() for f in filenames: contents = open(f).read() a = bs(contents) rel = os.path.relpath(os.path.realpath(f), os.path.realpath(dirname)) for element in a.select('[id]'): id_ = element.attrs['id'] url = base_url + '/' + rel + '#' + id_ ident = element.select('span.ident') if ident: title = str(ident[0]) else: title = None res[prefix + id_] = GenericReference(id_, url, title) return res
def make_changes(soup, f, rel, current_slug): s = bs(SPAN_BOOKS) for option in s.select('option[value]'): # noinspection PyAugmentAssignment option.attrs['value'] = rel + option.attrs['value'] if current_slug in option.attrs['value']: option.attrs['selected'] = 1 for a in s.select('a[href]'): # noinspection PyAugmentAssignment a.attrs['href'] = rel + a.attrs['href'] tocdiv = soup.select_one('#tocdiv') if tocdiv is not None: tocdiv.insert(0, s) return soup
def test_toc_first(): s = """ <p>Before everything</p> <h1 id='booktitle' nonumber="1" notoc="1">Booktitle</h1> <p>A figure</p> <h1 id='mtoc' nonumber="1" notoc="1">toc</h1> <p> This is my toc </p> <h1 id='part:part1'>Part1</h1> <p>a</p> <h1 id='sec:one'>One</h1> <p>a</p> """ files_contents = [DocToJoin(docname='a', contents=s, source_info=None)] stylesheet = 'v_manual_blurb_ready' res = manual_join(template=template, files_contents=files_contents, stylesheet=stylesheet) soup = bs(res) # print(indent(soup.prettify(), 't > ')) # body = soup.find('body') filename2contents = split_in_files(soup) print list(filename2contents.keys()) index = filename2contents['index.html'] print indent(index, 'index > ') s = str(index) assert 'Before everything' in s
def get_bibliography(bibfile): data = open(bibfile).read() frag = bs(data) res = Tag(name='div') ids = [] for dt in frag.select('dt'): assert dt.name == 'dt' name = dt.a.attrs['name'] name = 'bib:' + name ids.append(name) dd = dt.findNext('dd') assert dd.name == 'dd' entry = dd.__copy__() entry.name = 'cite' entry.attrs['id'] = name try_to_replace_stuff = False if try_to_replace_stuff: for x in entry.descendants: #print('child', x) if isinstance(x, NavigableString): s = x.string.encode('utf-8') s = s.replace('\n', ' ') s = s.replace('[', '') s = s.replace('|', '') s = s.replace(']', '') y = NavigableString(unicode(s, 'utf-8')) x.replace_with(y) #print('string %r' % x.string) if isinstance(x, Tag) and x.name == 'a' and x.string == 'bib': x.extract() res.append(NavigableString('\n')) res.append(entry) res.append(NavigableString('\n')) print('Found %d bib entries.' % len(ids)) return res
def substituting_empty_links(soup, raise_errors=False): ''' default style is [](#sec:systems) "Chapter 10" the name is [](#sec:systems?only_name) "My title" the number is [](#sec:systems?only_number) "10" and full is [](#sec:systems?toc_link) "Chapter 10 - My title" You can also use "class": <a href='#sec:name' class='only_number'></a> or <a href='#sec:name?only_number'></a> ''' CLASS_ONLY_NUMBER = MCDPManualConstants.CLASS_ONLY_NUMBER CLASS_NUMBER_NAME = MCDPManualConstants.CLASS_NUMBER_NAME CLASS_ONLY_NAME = MCDPManualConstants.CLASS_ONLY_NAME logger.debug('substituting_empty_links') n = 0 nerrors = 0 for le in get_empty_links_to_fragment(soup): a = le.linker element_id = le.eid element = le.linked n += 1 if not element: msg = ('Cannot find %s' % element_id) note_error_msg(a, msg) nerrors += 1 if raise_errors: raise ValueError(msg) continue # if there is a query, remove it if le.query is not None: new_href = '#' + le.eid a.attrs['href'] = new_href logger.info('setting new href= %s' % (new_href)) if (not LABEL_WHAT_NUMBER in element.attrs) or \ (not LABEL_NAME in element.attrs): msg = ( 'substituting_empty_links: Could not find attributes %s or %s in %s' % (LABEL_NAME, LABEL_WHAT_NUMBER, element)) if True: logger.warning(msg) else: note_error_msg(a, msg) nerrors += 1 if raise_errors: raise ValueError(msg) continue label_what_number = element.attrs[LABEL_WHAT_NUMBER] label_number = element.attrs[LABEL_NUMBER] label_what = element.attrs[LABEL_WHAT] label_name = element.attrs[LABEL_NAME] classes = list(a.attrs.get('class', [])) # bug: I was modifying if le.query is not None: classes.append(le.query) if 'toc_link' in classes: s = Tag(name='span') s.string = label_what add_class(s, 'toc_what') a.append(s) a.append(' ') s = Tag(name='span') s.string = label_number add_class(s, 'toc_number') a.append(s) s = Tag(name='span') s.string = ' - ' add_class(s, 'toc_sep') a.append(s) if label_name is not None and '<' in label_name: contents = bs(label_name) # sanitize the label name for br in contents.findAll('br'): br.replaceWith(NavigableString(' ')) for _ in contents.findAll('a'): _.extract() a.append(contents) #logger.debug('From label_name = %r to a = %r' % (label_name, a)) else: s = Tag(name='span') if label_name is None: s.string = '(unnamed)' # XXX else: s.string = label_name add_class(s, 'toc_name') a.append(s) else: if CLASS_ONLY_NUMBER in classes: label = label_number elif CLASS_NUMBER_NAME in classes: if label_name is None: label = label_what_number + \ ' - ' + '(unnamed)' # warning else: label = label_what_number + ' - ' + label_name elif CLASS_ONLY_NAME in classes: if label_name is None: label = '(unnamed)' # warning else: label = label_name else: label = label_what_number span1 = Tag(name='span') add_class(span1, 'reflabel') span1.string = label a.append(span1) logger.debug('substituting_empty_links: %d total, %d errors' % (n, nerrors))
def get_minimal_document(body_contents, title=None, add_markdown_css=False, add_manual_css=False, stylesheet=None, extra_css=None): """ Creates the minimal html document with MCDPL css. add_markdown_css: language + markdown add_manual_css: language + markdown + (manual*) extra_css = additional CSS contents """ check_html_fragment(body_contents) soup = bs("") assert soup.name == 'fragment' if title is None: title = '' html = Tag(name='html') head = Tag(name='head') body = Tag(name='body') head.append(Tag(name='meta', attrs={'http-equiv':"Content-Type", 'content': "application/xhtml+xml; charset=utf-8"})) if stylesheet is None: stylesheet = 'v_mcdp_render_default' if add_markdown_css or add_manual_css: link = Tag(name='link') link['rel'] = 'stylesheet' link['type'] = 'text/css' link['href'] = get_css_filename('compiled/%s' % stylesheet) head.append(link) tag_title = Tag(name='title') tag_title.append(NavigableString(title)) head.append(tag_title) parsed = bs(body_contents) assert parsed.name == 'fragment' parsed.name = 'div' body.append(parsed) html.append(head) html.append(body) soup.append(html) if extra_css is not None: add_extra_css(soup, extra_css) s = to_html_stripping_fragment_document(soup) assert not 'DOCTYPE' in s # s = html.prettify() # no: it removes empty text nodes # ns="""<?xml version="1.0" encoding="utf-8" ?>""" ns = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">""" res = ns + '\n' + s # if add_manual_css and MCDPConstants.manual_link_css_instead_of_including: # assert 'manual.css' in res, res res = res.replace('<div><!DOCTYPE html>', '<div>') return res
def render_complete(library, s, raise_errors, realpath, generate_pdf=False, check_refs=False, use_mathjax=True, filter_soup=None, symbols=None): """ Transforms markdown into html and then renders the mcdp snippets inside. s: a markdown string with embedded html snippets Returns an HTML string; not a complete document. filter_soup(library, soup) """ s0 = s check_good_use_of_special_paragraphs(s0, realpath) raise_missing_image_errors = raise_errors # Imports here because of circular dependencies from .latex.latex_preprocess import extract_maths, extract_tabular from .latex.latex_preprocess import latex_preprocessing from .latex.latex_preprocess import replace_equations from .macro_col2 import col_macros, col_macros_prepare_before_markdown from .mark.markd import render_markdown from .preliminary_checks import do_preliminary_checks_and_fixes from .prerender_math import prerender_mathjax if isinstance(s, unicode): msg = 'I expect a str encoded with utf-8, not unicode.' raise_desc(TypeError, msg, s=s) # need to do this before do_preliminary_checks_and_fixes # because of & char s, tabulars = extract_tabular(s) s = do_preliminary_checks_and_fixes(s) # put back tabular, because extract_maths needs to grab them for k, v in tabulars.items(): assert k in s s = s.replace(k, v) # copy all math content, # between $$ and $$ # between various limiters etc. # returns a dict(string, substitution) s, maths = extract_maths(s) # print('maths = %s' % maths) for k, v in maths.items(): if v[0] == '$' and v[1] != '$$': if '\n\n' in v: msg = 'Suspicious math fragment %r = %r' % (k, v) logger.error(maths) logger.error(msg) raise ValueError(msg) s = latex_preprocessing(s) s = '<div style="display:none">Because of mathjax bug</div>\n\n\n' + s # cannot parse html before markdown, because md will take # invalid html, (in particular '$ ciao <ciao>' and make it work) s = s.replace('*}', '\*}') s, mcdpenvs = protect_my_envs(s) # print('mcdpenvs = %s' % maths) s = col_macros_prepare_before_markdown(s) # print(indent(s, 'before markdown | ')) s = render_markdown(s) # print(indent(s, 'after markdown | ')) for k, v in maths.items(): if not k in s: msg = 'Cannot find %r (= %r)' % (k, v) raise_desc(DPInternalError, msg, s=s) def preprocess_equations(x): # this gets mathjax confused x = x.replace('>', '\\gt{}') # need brace; think a<b -> a\lt{}b x = x.replace('<', '\\lt{}') # print('replaced equation %r by %r ' % (x0, x)) return x v = preprocess_equations(v) s = s.replace(k, v) s = replace_equations(s) s = s.replace('\\*}', '*}') # this parses the XML soup = bs(s) other_abbrevs(soup) # need to process tabular before mathjax escape_for_mathjax(soup) # print(indent(s, 'before prerender_mathjax | ')) # mathjax must be after markdown because of code blocks using "$" s = to_html_stripping_fragment(soup) if use_mathjax: s = prerender_mathjax(s, symbols) soup = bs(s) escape_for_mathjax_back(soup) s = to_html_stripping_fragment(soup) # print(indent(s, 'after prerender_mathjax | ')) for k, v in mcdpenvs.items(): # there is this case: # ~~~ # <pre> </pre> # ~~~ s = s.replace(k, v) s = s.replace('<p>DRAFT</p>', '<div class="draft">') s = s.replace('<p>/DRAFT</p>', '</div>') soup = bs(s) mark_console_pres(soup) try: substitute_github_refs(soup, defaults={}) except Exception as e: msg = 'I got an error while substituting github: references.' msg += '\nI will ignore this error because it might not be the fault of the writer.' msg += '\n\n' + indent(str(e), '|', ' error: |') logger.warn(msg) # must be before make_figure_from_figureid_attr() display_files(soup, defaults={}, raise_errors=raise_errors) make_figure_from_figureid_attr(soup) col_macros(soup) fix_subfig_references(soup) library = get_library_from_document(soup, default_library=library) from mcdp_docs.highlight import html_interpret html_interpret(library, soup, generate_pdf=generate_pdf, raise_errors=raise_errors, realpath=realpath) if filter_soup is not None: filter_soup(library=library, soup=soup) embed_images_from_library2(soup=soup, library=library, raise_errors=raise_missing_image_errors) make_videos(soup=soup) if check_refs: check_if_any_href_is_invalid(soup) if getuser() == 'andrea': if MCDPConstants.preprocess_style_using_less: run_lessc(soup) else: logger.warning( 'preprocess_style_using_less=False might break the manual') fix_validation_problems(soup) strip_pre(soup) if MCDPManualConstants.enable_syntax_higlighting: syntax_highlighting(soup) if MCDPManualConstants.enforce_status_attribute: check_status_codes(soup, realpath) if MCDPManualConstants.enforce_lang_attribute: check_lang_codes(soup) # Fixes the IDs (adding 'sec:'); add IDs to missing ones globally_unique_id_part = 'autoid-DO-NOT-USE-THIS-VERY-UNSTABLE-LINK-' + get_md5( s0)[:5] fix_ids_and_add_missing(soup, globally_unique_id_part) check_no_patently_wrong_links(soup) s = to_html_stripping_fragment(soup) s = replace_macros(s) return s
def manual_join(template, files_contents, stylesheet, remove=None, extra_css=None, remove_selectors=None, hook_before_toc=None, references=None, resolve_references=True, hook_before_final_pass=None, require_toc_placeholder=False, permalink_prefix=None, crossrefs_aug=None, aug0=None): """ files_contents: a list of tuples that can be cast to DocToJoin: where the string is a unique one to be used for job naming. extra_css: if not None, a string of more CSS to be added Remove_selectors: list of selectors to remove (e.g. ".draft"). hook_before_toc if not None is called with hook_before_toc(soup=soup) just before generating the toc """ result = AugmentedResult() if references is None: references = {} check_isinstance(files_contents, list) if crossrefs_aug is None: crossrefs = Tag(name='no-cross-refs') else: crossrefs = bs(crossrefs_aug.get_result()) result.merge(crossrefs_aug) if aug0 is not None: result.merge(aug0) @contextmanager def timeit(_): yield with timeit('manual_join'): files_contents = [DocToJoin(*_) for _ in files_contents] # cannot use bs because entire document with timeit('parsing template'): template0 = template template = replace_macros(template) template_soup = BeautifulSoup(template, 'lxml', from_encoding='utf-8') d = template_soup if d.html is None: s = "Invalid template" raise_desc(ValueError, s, template0=template0) with timeit('adding head'): assert d.html is not None assert '<html' in str(d) head = d.find('head') if head is None: msg = 'Could not find <head> in template:' logger.error(msg) logger.error(str(d)) raise Exception(msg) assert head is not None for x in get_manual_css_frag().contents: head.append(x.__copy__()) with timeit('adding stylesheet'): if stylesheet is not None: link = Tag(name='link') link['rel'] = 'stylesheet' link['type'] = 'text/css' from mcdp_report.html import get_css_filename link['href'] = get_css_filename('compiled/%s' % stylesheet) head.append(link) with timeit('making basename2soup'): basename2soup = OrderedDict() for doc_to_join in files_contents: if doc_to_join.docname in basename2soup: msg = 'Repeated docname %r' % doc_to_join.docname raise ValueError(msg) from .latex.latex_preprocess import assert_not_inside if isinstance(doc_to_join.contents, AugmentedResult): result.merge(doc_to_join.contents) contents = doc_to_join.contents.get_result() else: contents = doc_to_join.contents assert_not_inside(contents, '<fragment') assert_not_inside(contents, 'DOCTYPE') frag = bs(contents) basename2soup[doc_to_join.docname] = frag # with timeit('fix_duplicate_ids'): # XXX # fix_duplicated_ids(basename2soup) with timeit('copy contents'): body = d.find('body') add_comments = False for docname, content in basename2soup.items(): if add_comments: body.append(NavigableString('\n\n')) body.append( Comment('Beginning of document dump of %r' % docname)) body.append(NavigableString('\n\n')) try_faster = True if try_faster: for e in list(content.children): body.append(e.extract()) else: copy_contents_into(content, body) if add_comments: body.append(NavigableString('\n\n')) body.append(Comment('End of document dump of %r' % docname)) body.append(NavigableString('\n\n')) with timeit('extract_bibtex_blocks'): extract_bibtex_blocks(d) with timeit('ID_PUT_BIB_HERE'): ID_PUT_BIB_HERE = MCDPManualConstants.ID_PUT_BIB_HERE bibhere = d.find('div', id=ID_PUT_BIB_HERE) if bibhere is None: msg = ('Could not find #%s in document. ' 'Adding one at end of document.') % ID_PUT_BIB_HERE result.note_warning(msg) bibhere = Tag(name='div') bibhere.attrs['id'] = ID_PUT_BIB_HERE d.find('body').append(bibhere) do_bib(d, bibhere) with timeit('hook_before_final_pass'): if hook_before_final_pass is not None: hook_before_final_pass(soup=d) with timeit('document_final_pass_before_toc'): location = LocationUnknown() document_final_pass_before_toc(d, remove, remove_selectors, result, location) with timeit('hook_before_toc'): if hook_before_toc is not None: hook_before_toc(soup=d) with timeit('generate_and_add_toc'): try: generate_and_add_toc(d, raise_error=True, res=result) except NoTocPlaceholder as e: if require_toc_placeholder: msg = 'Could not find toc placeholder: %s' % e # logger.error(msg) if aug0 is not None: result.note_error(msg) else: raise Exception(msg) with timeit('document_final_pass_after_toc'): document_final_pass_after_toc( soup=d, crossrefs=crossrefs, resolve_references=resolve_references, res=result) if extra_css is not None: logger.info('adding extra CSS') add_extra_css(d, extra_css) with timeit('document_only_once'): document_only_once(d) location = LocationUnknown() substitute_github_refs(d, defaults={}, res=result, location=location) with timeit('another A pass'): for a in d.select('a[href]'): href = a.attrs['href'] if href in references: r = references[href] a.attrs['href'] = r.url if not a.children: # empty a.append(r.title) # do not use to_html_stripping_fragment - this is a complete doc # mark_in_html(result, soup=d) add_github_links_if_edit_url(soup=d, permalink_prefix=permalink_prefix) with timeit('converting to string'): res = unicode(d) with timeit('encoding'): res = res.encode('utf8') logger.info('done - %.1f MB' % (len(res) / (1024 * 1024.0))) result.set_result(res) return result
def add_html_links(frag, library_name, get_link, get_link_library): """ Adds links to models. get_link(specname, libname, thingname) -> url get_link_library(libname) -> url """ soup = bs(frag) # look for links of the type: # <span class="FromLibraryKeyword">new</span> # <span class="NDPName"> Actuation_a2_vel</span> # </span> def get_name_from_tag(tag): _, middle, _ = break_string(tag.string) return middle.encode('utf-8') def add_link_to_ndpname(tag, href): initial, middle, final = break_string(tag.string) tag.string = '' name = middle attrs = {'class': 'link-to-model', 'href': href, 'target': '_blank'} new_tag = Tag(name="a", attrs=attrs) new_tag.string = name tag.append(NavigableString(initial)) tag.append(new_tag) tag.append(NavigableString(final)) def mark_not_found(tag): add_class(tag, 'library-not-found') def sub_ndpname(): for tag in soup.select('span.NDPName'): if 'NDPNameWithLibrary' in tag.parent['class']: continue ndpname = get_name_from_tag(tag) try: href = get_link(SPEC_MODELS, library_name, ndpname) add_link_to_ndpname(tag=tag, href=href) except NoSuchLibrary: mark_not_found(tag) def sub_ndpname_with_library(): for tag in soup.select('span.NDPNameWithLibrary'): tag_libraryname = list(tag.select('span.LibraryName'))[0] tag_ndpname = list(tag.select('span.NDPName'))[0] ndpname = get_name_from_tag(tag_ndpname) libname = get_name_from_tag(tag_libraryname) try: href = get_link(SPEC_MODELS, libname, ndpname) add_link_to_ndpname(tag=tag_ndpname, href=href) except NoSuchLibrary: mark_not_found(tag) # if False: # # TODO: add this as a feature # img = '/solver/%s/compact_graph' % name # attrs = {'src': img, 'class': 'popup'} # new_tag = soup.new_tag("img", **attrs) # tag.append(new_tag) def sub_template_name(): for tag in soup.select('span.TemplateName'): if 'TemplateNameWithLibrary' in tag.parent['class']: continue templatename = get_name_from_tag(tag) try: href = get_link(SPEC_TEMPLATES, library_name, templatename) add_link_to_ndpname(tag=tag, href=href) except NoSuchLibrary: mark_not_found(tag) def sub_template_name_with_library(): for tag in soup.select('span.TemplateNameWithLibrary'): tag_libraryname = list(tag.select('span.LibraryName'))[0] tag_templatename = list(tag.select('span.TemplateName'))[0] templatename = get_name_from_tag(tag_templatename) libname = get_name_from_tag(tag_libraryname) try: href = get_link(SPEC_TEMPLATES, libname, templatename) add_link_to_ndpname(tag=tag_templatename, href=href) except NoSuchLibrary: mark_not_found(tag) def sub_poset_name(): for tag in soup.select('span.PosetName'): if 'PosetNameWithLibrary' in tag.parent['class']: continue posetname = get_name_from_tag(tag) try: href = get_link(SPEC_POSETS, library_name, posetname) add_link_to_ndpname(tag=tag, href=href) except NoSuchLibrary: mark_not_found(tag) def sub_poset_name_with_library(): for tag in soup.select('span.PosetNameWithLibrary'): tag_libraryname = list(tag.select('span.LibraryName'))[0] tag_posetname = list(tag.select('span.PosetName'))[0] posetname = get_name_from_tag(tag_posetname) libname = get_name_from_tag(tag_libraryname) try: href = get_link(SPEC_POSETS, libname, posetname) add_link_to_ndpname(tag=tag_posetname, href=href) except NoSuchLibrary: mark_not_found(tag) def sub_libraryname(): # Need to be last for tag in soup.select('span.LibraryName'): libname = get_name_from_tag(tag) try: href = get_link_library(libname) add_link_to_ndpname(tag=tag, href=href) except NoSuchLibrary: mark_not_found(tag) try: sub_ndpname() sub_ndpname_with_library() sub_template_name() sub_template_name_with_library() sub_poset_name() sub_poset_name_with_library() sub_libraryname() # keep last except: # print soup raise # keep above last! # Add documentation links for each span # that has a class that finishes in "Keyword" if False: def select_tags(): for tag in soup.select('span'): if 'class' in tag.attrs: klass = tag.attrs['class'][0] if 'Keyword' in klass: yield tag manual = '/docs/language_notes/' for tag in select_tags(): keyword = tag.attrs['class'][0] link = manual + '#' + keyword text = tag.string tag.string = '' attrs = { 'class': 'link-to-keyword', 'href': link, 'target': '_blank' } new_tag = Tag(name="a", attrs=attrs) new_tag.string = text tag.append(new_tag) return to_html_stripping_fragment(soup)
def sub_link(a, element_id, element, raise_errors): """ a: the link with href= #element_id element: the link to which we refer """ CLASS_ONLY_NUMBER = MCDPManualConstants.CLASS_ONLY_NUMBER CLASS_NUMBER_NAME = MCDPManualConstants.CLASS_NUMBER_NAME CLASS_ONLY_NAME = MCDPManualConstants.CLASS_ONLY_NAME if not element: msg = ('Cannot find %s' % element_id) note_error2(a, 'Ref. error', 'substituting_empty_links():\n' + msg) #nerrors += 1 if raise_errors: raise ValueError(msg) return # if there is a query, remove it # if le.query is not None: # new_href = '#' + le.eid # a.attrs['href'] = new_href # logger.info('setting new href= %s' % (new_href)) if (not LABEL_WHAT_NUMBER in element.attrs) or \ (not LABEL_NAME in element.attrs): msg = ( 'substituting_empty_links: Could not find attributes %s or %s in %s' % (LABEL_NAME, LABEL_WHAT_NUMBER, element)) if True: logger.warning(msg) else: # note_error_msg(a, msg) note_error2(a, 'Ref. error', 'substituting_empty_links():\n' + msg) # nerrors += 1 if raise_errors: raise ValueError(msg) return label_what_number = element.attrs[LABEL_WHAT_NUMBER] label_number = element.attrs[LABEL_NUMBER] label_what = element.attrs[LABEL_WHAT] label_name = element.attrs[LABEL_NAME] classes = list(a.attrs.get('class', [])) # bug: I was modifying # if le.query is not None: # classes.append(le.query) if 'toc_link' in classes: s = Tag(name='span') s.string = label_what add_class(s, 'toc_what') a.append(s) a.append(' ') s = Tag(name='span') s.string = label_number add_class(s, 'toc_number') a.append(s) s = Tag(name='span') s.string = ' - ' add_class(s, 'toc_sep') a.append(s) if label_name is not None and '<' in label_name: contents = bs(label_name) # sanitize the label name for br in contents.findAll('br'): br.replaceWith(NavigableString(' ')) for _ in contents.findAll('a'): _.extract() contents.name = 'span' add_class(contents, 'toc_name') a.append(contents) #logger.debug('From label_name = %r to a = %r' % (label_name, a)) else: if label_name is None: s = Tag(name='span') s.string = '(unnamed)' # XXX else: s = bs(label_name) assert s.name == 'fragment' s.name = 'span' # add_class(s, 'produced-here') # XXX add_class(s, 'toc_name') a.append(s) else: if CLASS_ONLY_NUMBER in classes: label = label_number elif CLASS_NUMBER_NAME in classes: if label_name is None: label = label_what_number + \ ' - ' + '(unnamed)' # warning else: label = label_what_number + ' - ' + label_name elif CLASS_ONLY_NAME in classes: if label_name is None: label = '(unnamed)' # warning else: label = label_name else: # default behavior if string_starts_with(['fig:', 'tab:', 'bib:', 'code:'], element_id): label = label_what_number elif label_name is None: label = label_what_number else: label = label_what_number + ' - ' + label_name frag = bs(label) assert frag.name == 'fragment' frag.name = 'span' add_class(frag, 'reflabel') a.append(frag)
def prerender_mathjax_(html): """ Runs the prerender.js script to pre-render the MathJax into images. Raises PrerenderError. """ assert not '<html>' in html, html use = get_nodejs_bin() html = html.replace('<p>$$', '\n$$') html = html.replace('$$</p>', '$$\n') script = get_prerender_js() mcdp_tmp_dir = get_mcdp_tmp_dir() prefix = 'prerender_mathjax_' d = mkdtemp(dir=mcdp_tmp_dir, prefix=prefix) try: f_html = os.path.join(d, 'file.html') with open(f_html, 'w') as f: f.write(html) try: f_out = os.path.join(d, 'out.html') cmd = [use, script, f_html, f_out] pwd = os.getcwd() res = system_cmd_result(pwd, cmd, display_stdout=False, display_stderr=False, raise_on_error=False) if res.ret: # pragma: no cover if 'Error: Cannot find module' in res.stderr: msg = 'You have to install the MathJax and/or jsdom libraries.' msg += '\nOn Ubuntu, you can install them using:' msg += '\n\n\tsudo apt-get install npm' msg += '\n\n\tnpm install MathJax-node jsdom' msg += '\n\n' + indent(res.stderr, ' |') raise PrerenderError(msg) if 'parse error' in res.stderr: lines = [ _ for _ in res.stderr.split('\n') if 'parse error' in _ ] assert lines msg = 'LaTeX conversion errors:\n\n' + '\n'.join(lines) raise PrerenderError(msg) msg = 'Unknown error (ret = %d).' % res.ret msg += '\n\n' + indent(res.stderr, ' |') raise PrerenderError(msg) with open(f_out) as f: data = f.read() # Read the data soup = bs(data) # find this and move it at the end # <style id="MathJax_SVG_styles" tag_style = soup.find(id='MathJax_SVG_styles') if not tag_style: msg = 'Expected to find style MathJax_SVG_styles' raise_desc(Exception, msg, soup=str(soup)) # <svg style="display: none;"><defs id="MathJax_SVG_glyphs"> tag_svg_defs = soup.find('svg', style="display: none;") if not tag_svg_defs: msg = 'Expected to find tag <svg display=none>' raise_desc(Exception, msg, soup=str(soup)) other_tag = soup.find('div', style="display:none") if not other_tag: msg = 'Expected to find tag <div style="display:none">' raise_desc(Exception, msg, soup=str(soup)) #<div style="display:none">Because of mathjax bug</div> soup.append(other_tag.extract()) soup.append(tag_svg_defs.extract()) soup.append(tag_style.extract()) data = to_html_stripping_fragment(soup) return data except CmdException as e: # pragma: no cover raise e finally: shutil.rmtree(d)
def create_reveal(soup, res): assert isinstance(soup, Tag) body = soup.find('body') head = soup.find('head') if head is None: msg = 'Could not find <head>' raise Exception(msg) if body is None: msg = 'Could not find <body>' raise Exception(msg) # base = 'https://cdnjs.cloudflare.com/ajax/libs/reveal.js/3.6.0' # base = "http://cdn.rawgit.com/hakimel/reveal.js/3.5.0" base = 'revealjs' # Remove this: # <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-MML-AM_CHTML"></script> for script in list(soup.select('script[src]')): if 'MathJax.js' in script.attrs['src']: script.extract() header = """ <link rel="stylesheet" href="BASE/css/reveal.css"> """ header = header.replace('BASE', base) copy_contents_into_beginning(bs(header), head) # language=html footer = """ <script src="BASE/lib/js/head.min.js"></script> <script src="BASE/js/reveal.js"></script> <script> options = { transition: 'none', center: false, dependencies: [ { src: 'BASE/plugin/notes/notes.js', async: true }, // MathJax { src: 'BASE/plugin/math/math.js', async: true } ], // The "normal" size of the presentation, aspect ratio will be preserved // when the presentation is scaled to fit different resolutions. Can be // specified using percentage units. width: 960, height: 700, // Factor of the display size that should remain empty around the content margin: 0.1, slideNumber: true, history: true, // change the url fragment }; Reveal.initialize(options); </script> """ footer = footer.replace('BASE', base) copy_contents_into(bs(footer), body) """ <section class="without-header-inside" level="book"> <section class="without-header-inside" level="part"> """ s1 = body.find('section', attrs=dict(level="book")) if s1: s1.name = 'div' s1.attrs['class'] = 'reveal' s2 = body.find('section', attrs=dict(level="part")) if s2: s2.name = 'div' s2.attrs['class'] = 'slides'