def download_reveal(output_dir): res = AugmentedResult() url = "https://github.com/hakimel/reveal.js/archive/3.6.0.zip" target = os.path.join(output_dir, 'revealjs') if os.path.exists(target): logger.debug('skipping downloading because target exists: %s' % target) else: dest = os.path.join(output_dir, 'reveal-3.6.0.zip') if True or not os.path.exists(dest): logger.info('Downloading %s' % url) # ctx = ssl.create_default_context() # ctx.check_hostname = False # ctx.verify_mode = ssl.CERT_NONE response = requests.get( url, stream=True) # context=ssl._create_unverified_context()) # data = response.raw.read() # read() with open(dest, 'wb') as f: shutil.copyfileobj(response.raw, f) # logger.info('downloaded %1.fMB' % (len(data) / (1000.0 * 1000))) # write_data_to_file(data, dest) logger.info(dest) target_tmp = target + '.tmp' import zipfile zip_ref = zipfile.ZipFile(dest, 'r') zip_ref.extractall(target_tmp) zip_ref.close() actual = os.path.join(target_tmp, 'reveal.js-3.6.0') os.rename(actual, target) logger.debug('extracted to %r' % target) check = [ "plugin/notes/notes.js", "plugin/math/math.js", "lib/js/head.min.js", "js/reveal.js", ] for c in check: fn = os.path.join(target, c) if not os.path.exists(fn): msg = 'Incomplete reveal download, not found: %s' % fn res.note_error(msg) return res
def move_things_around(soup, raise_if_errors=False, res=None): """ Looks for tags like: <move-here src="#line_detector2-line_detector_node2-autogenerated"/> """ if res is None: res = AugmentedResult() from mcdp_docs.check_missing_links import get_id2element with timeit_wall('getting all IDs'): id2element, duplicates = get_id2element(soup, 'id') for e in soup.find_all('move-here'): if not 'src' in e.attrs: msg = 'Expected attribute "src" for element %s' % str(e) raise ValueError(msg) src = e.attrs['src'] if not src.startswith('#'): msg = 'Expected that attribute "src" started with "#" for element %s.' % str(e) raise ValueError(msg) nid = src[1:] # O(n^2) # el = soup.find(id=nid) el = id2element.get(nid, None) if not el: msg = 'move-here: Could not find ID %r.' % nid e.name = 'span' # note_error2(e, "invalid move-here reference", msg) res.note_error(msg, HTMLIDLocation.for_element(e)) if raise_if_errors: raise ValueError(msg) else: continue el.extract() e.replace_with(el)
def manual_join(template, files_contents, stylesheet, remove=None, extra_css=None, remove_selectors=None, hook_before_toc=None, references=None, resolve_references=True, hook_before_final_pass=None, require_toc_placeholder=False, permalink_prefix=None, crossrefs_aug=None, aug0=None): """ files_contents: a list of tuples that can be cast to DocToJoin: where the string is a unique one to be used for job naming. extra_css: if not None, a string of more CSS to be added Remove_selectors: list of selectors to remove (e.g. ".draft"). hook_before_toc if not None is called with hook_before_toc(soup=soup) just before generating the toc """ result = AugmentedResult() if references is None: references = {} check_isinstance(files_contents, list) if crossrefs_aug is None: crossrefs = Tag(name='no-cross-refs') else: crossrefs = bs(crossrefs_aug.get_result()) result.merge(crossrefs_aug) if aug0 is not None: result.merge(aug0) @contextmanager def timeit(_): yield with timeit('manual_join'): files_contents = [DocToJoin(*_) for _ in files_contents] # cannot use bs because entire document with timeit('parsing template'): template0 = template template = replace_macros(template) template_soup = BeautifulSoup(template, 'lxml', from_encoding='utf-8') d = template_soup if d.html is None: s = "Invalid template" raise_desc(ValueError, s, template0=template0) with timeit('adding head'): assert d.html is not None assert '<html' in str(d) head = d.find('head') if head is None: msg = 'Could not find <head> in template:' logger.error(msg) logger.error(str(d)) raise Exception(msg) assert head is not None for x in get_manual_css_frag().contents: head.append(x.__copy__()) with timeit('adding stylesheet'): if stylesheet is not None: link = Tag(name='link') link['rel'] = 'stylesheet' link['type'] = 'text/css' from mcdp_report.html import get_css_filename link['href'] = get_css_filename('compiled/%s' % stylesheet) head.append(link) with timeit('making basename2soup'): basename2soup = OrderedDict() for doc_to_join in files_contents: if doc_to_join.docname in basename2soup: msg = 'Repeated docname %r' % doc_to_join.docname raise ValueError(msg) from .latex.latex_preprocess import assert_not_inside if isinstance(doc_to_join.contents, AugmentedResult): result.merge(doc_to_join.contents) contents = doc_to_join.contents.get_result() else: contents = doc_to_join.contents assert_not_inside(contents, '<fragment') assert_not_inside(contents, 'DOCTYPE') frag = bs(contents) basename2soup[doc_to_join.docname] = frag # with timeit('fix_duplicate_ids'): # XXX # fix_duplicated_ids(basename2soup) with timeit('copy contents'): body = d.find('body') add_comments = False for docname, content in basename2soup.items(): if add_comments: body.append(NavigableString('\n\n')) body.append( Comment('Beginning of document dump of %r' % docname)) body.append(NavigableString('\n\n')) try_faster = True if try_faster: for e in list(content.children): body.append(e.extract()) else: copy_contents_into(content, body) if add_comments: body.append(NavigableString('\n\n')) body.append(Comment('End of document dump of %r' % docname)) body.append(NavigableString('\n\n')) with timeit('extract_bibtex_blocks'): extract_bibtex_blocks(d) with timeit('ID_PUT_BIB_HERE'): ID_PUT_BIB_HERE = MCDPManualConstants.ID_PUT_BIB_HERE bibhere = d.find('div', id=ID_PUT_BIB_HERE) if bibhere is None: msg = ('Could not find #%s in document. ' 'Adding one at end of document.') % ID_PUT_BIB_HERE result.note_warning(msg) bibhere = Tag(name='div') bibhere.attrs['id'] = ID_PUT_BIB_HERE d.find('body').append(bibhere) do_bib(d, bibhere) with timeit('hook_before_final_pass'): if hook_before_final_pass is not None: hook_before_final_pass(soup=d) with timeit('document_final_pass_before_toc'): location = LocationUnknown() document_final_pass_before_toc(d, remove, remove_selectors, result, location) with timeit('hook_before_toc'): if hook_before_toc is not None: hook_before_toc(soup=d) with timeit('generate_and_add_toc'): try: generate_and_add_toc(d, raise_error=True, res=result) except NoTocPlaceholder as e: if require_toc_placeholder: msg = 'Could not find toc placeholder: %s' % e # logger.error(msg) if aug0 is not None: result.note_error(msg) else: raise Exception(msg) with timeit('document_final_pass_after_toc'): document_final_pass_after_toc( soup=d, crossrefs=crossrefs, resolve_references=resolve_references, res=result) if extra_css is not None: logger.info('adding extra CSS') add_extra_css(d, extra_css) with timeit('document_only_once'): document_only_once(d) location = LocationUnknown() substitute_github_refs(d, defaults={}, res=result, location=location) with timeit('another A pass'): for a in d.select('a[href]'): href = a.attrs['href'] if href in references: r = references[href] a.attrs['href'] = r.url if not a.children: # empty a.append(r.title) # do not use to_html_stripping_fragment - this is a complete doc # mark_in_html(result, soup=d) add_github_links_if_edit_url(soup=d, permalink_prefix=permalink_prefix) with timeit('converting to string'): res = unicode(d) with timeit('encoding'): res = res.encode('utf8') logger.info('done - %.1f MB' % (len(res) / (1024 * 1024.0))) result.set_result(res) return result
def substituting_empty_links(soup, raise_errors=False, res=None, extra_refs=None): """ soup: where to look for references elemtn_to_modify: what to modify (if None, it is equal to soup) default style is [](#sec:systems) "Chapter 10" You can also use "class": <a href='#sec:name' class='only_number'></a> """ if extra_refs is None: extra_refs = Tag(name='div') if res is None: res = AugmentedResult() for le in get_empty_links_to_fragment(soup, extra_refs=extra_refs, res=res): a = le.linker element_id = le.eid element = le.linked if not element: msg = ('Cannot find %s' % element_id) res.note_error(msg, HTMLIDLocation.for_element(a)) if raise_errors: raise ValueError(msg) continue sub_link(a, element_id, element, res) for a in get_empty_links(soup): href = a.attrs.get('href', '(not present)') if not href: href = '""' if href.startswith('python:'): continue if href.startswith('http:') or href.startswith('https:'): msg = """ This link text is empty: ELEMENT Note that the syntax for links in Markdown is [link text](URL) For the internal links (where URL starts with "#"), then the documentation system can fill in the title automatically, leading to the format: [](#other-section) However, this does not work for external sites, such as: [](MYURL) So, you need to provide some text, such as: [this useful website](MYURL) """ msg = msg.replace('ELEMENT', str(a)) msg = msg.replace('MYURL', href) # note_error2(a, 'syntax error', msg.strip()) res.note_error(msg, HTMLIDLocation.for_element(a)) else: msg = """ This link is empty: ELEMENT It might be that the writer intended for this link to point to something, but they got the syntax wrong. href = %s As a reminder, to refer to other parts of the document, use the syntax "#ID", such as: See [](#fig:my-figure). See [](#section-name). """ % href msg = msg.replace('ELEMENT', str(a)) # note_error2(a, 'syntax error', msg.strip()) res.note_error(msg, HTMLIDLocation.for_element(a))
def get_cross_refs(src_dirs, permalink_prefix, extra_crossrefs, ignore=[]): res = AugmentedResult() files = look_for_files(src_dirs, "crossref.html") id2file = {} soup = Tag(name='div') def add_from_soup(s, f, ignore_alread_present, ignore_if_conflict): for img in list(s.find_all('img')): img.extract() for e in s.select('[base_url]'): e['external_crossref_file'] = f # Remove the ones with the same base_url for e in list(s.select('[base_url]')): if e.attrs['base_url'] == permalink_prefix: e.extract() for e in s.select('[id]'): id_ = e.attrs['id'] if id_ == 'container': continue # XXX: if id_ in id2file: if not ignore_alread_present: msg = 'Found two elements with same ID "%s":' % id_ msg += '\n %s' % id2file[id_] msg += '\n %s' % f res.note_error(msg) else: id2file[id_] = f e2 = e.__copy__() if ignore_if_conflict: e2.attrs['ignore_if_conflict'] = '1' soup.append(e2) soup.append('\n') ignore = [os.path.realpath(_) for _ in ignore] for _f in files: if os.path.realpath(_f) in ignore: msg = 'Ignoring file %r' % _f logger.info(msg) continue logger.info('cross ref file %s' % _f) data = open(_f).read() if permalink_prefix in data: msg = 'skipping own file' logger.debug(msg) continue s = bs(data) add_from_soup(s, _f, ignore_alread_present=False, ignore_if_conflict=False) if extra_crossrefs is not None: logger.info('Reading external refs\n%s' % extra_crossrefs) try: r = requests.get(extra_crossrefs) except Exception as ex: msg = 'Could not read external cross reference links' msg += '\n %s' % extra_crossrefs msg += '\n\n' + indent(str(ex), ' > ') res.note_error(msg) else: logger.debug('%s %s' % (r.status_code, extra_crossrefs)) if r.status_code == 404: msg = 'Could not read external cross refs: %s' % r.status_code msg += '\n url: ' + extra_crossrefs msg += '\n This is normal if you have not pushed this branch yet.' res.note_warning(msg) # logger.error(msg) s = bs(r.text) add_from_soup(s, extra_crossrefs, ignore_alread_present=True, ignore_if_conflict=True) # print soup res.set_result(str(soup)) return res
def render_book( src_dirs, generate_pdf, data, realpath, use_mathjax, raise_errors, filter_soup=None, symbols=None, ignore_ref_errors=False, ): """ Returns an AugmentedResult(str) """ res = AugmentedResult() from mcdp_docs.pipeline import render_complete librarian = get_test_librarian() # XXX: these might need to be changed if not MCDPConstants.softy_mode: for src_dir in src_dirs: librarian.find_libraries(src_dir) load_library_hooks = [librarian.load_library] library_ = MCDPLibrary(load_library_hooks=load_library_hooks) for src_dir in src_dirs: library_.add_search_dir(src_dir) d = tempfile.mkdtemp() library_.use_cache_dir(d) location = LocalFile(realpath) # print('location:\n%s' % location) def filter_soup0(soup, library): if filter_soup is not None: filter_soup(soup=soup, library=library) add_edit_links2(soup, location) add_last_modified_info(soup, location) try: html_contents = render_complete(library=library_, s=data, raise_errors=raise_errors, realpath=realpath, use_mathjax=use_mathjax, symbols=symbols, generate_pdf=generate_pdf, filter_soup=filter_soup0, location=location, res=res, ignore_ref_errors=ignore_ref_errors) except DPSyntaxError as e: msg = 'Could not compile %s' % realpath location0 = LocationInString(e.where, location) res.note_error(msg, locations=location0) fail = "<p>This file could not be compiled</p>" res.set_result(fail) return res # raise_wrapped(DPSyntaxError, e, msg, compact=True) if False: # write minimal doc doc = get_minimal_document(html_contents, add_markdown_css=True, extra_css=extra_css) dirname = main_file + '.parts' if dirname and not os.path.exists(dirname): try: os.makedirs(dirname) except: pass fn = os.path.join(dirname, '%s.html' % out_part_basename) write_data_to_file(doc, fn) res.set_result(html_contents) return res
def render_complete(library, s, raise_errors, realpath, generate_pdf=False, check_refs=False, use_mathjax=True, filter_soup=None, symbols=None, res=None, location=None, ignore_ref_errors=False): """ Transforms markdown into html and then renders the mcdp snippets inside. s: a markdown string with embedded html snippets Returns an HTML string; not a complete document. filter_soup(library, soup) """ if res is None: res = AugmentedResult() if location is None: location = LocationUnknown() from mcdp_report.gg_utils import resolve_references_to_images s0 = s unique = get_md5(realpath)[:8] check_good_use_of_special_paragraphs(s0, res, location) raise_missing_image_errors = raise_errors # Imports here because of circular dependencies from .latex.latex_preprocess import extract_maths, extract_tabular from .latex.latex_preprocess import latex_preprocessing from .latex.latex_preprocess import replace_equations from .macro_col2 import col_macros, col_macros_prepare_before_markdown from .mark.markd import render_markdown from .preliminary_checks import do_preliminary_checks_and_fixes from .prerender_math import prerender_mathjax if isinstance(s, unicode): msg = 'I expect a str encoded with utf-8, not unicode.' raise_desc(TypeError, msg, s=s) # need to do this before do_preliminary_checks_and_fixes # because of & char s, tabulars = extract_tabular(s) s = do_preliminary_checks_and_fixes(s, res, location) # put back tabular, because extract_maths needs to grab them for k, v in tabulars.items(): assert k in s s = s.replace(k, v) # copy all math content, # between $$ and $$ # between various limiters etc. # returns a dict(string, substitution) s, maths = extract_maths(s) # print('maths = %s' % maths) for k, v in list(maths.items()): if v[0] == '$' and v[1] != '$$': if '\n\n' in v: msg = 'The Markdown pre-processor got confused by this math fragment:' msg += '\n\n' + indent(v, ' > ') res.note_error(msg, location) maths[k] = 'ERROR' s = latex_preprocessing(s) s = '<div style="display:none">Because of mathjax bug</div>\n\n\n' + s # cannot parse html before markdown, because md will take # invalid html, (in particular '$ ciao <ciao>' and make it work) s = s.replace('*}', '\*}') s, mcdpenvs = protect_my_envs(s) # print('mcdpenvs = %s' % maths) s = col_macros_prepare_before_markdown(s) # print(indent(s, 'before markdown | ')) s = render_markdown(s) # print(indent(s, 'after markdown | ')) for k, v in maths.items(): if not k in s: msg = 'Internal error while dealing with Latex math.' msg += '\nCannot find %r (= %r)' % (k, v) res.note_error(msg, location) # raise_desc(DPInternalError, msg, s=s) continue def preprocess_equations(x): # this gets mathjax confused x = x.replace('>', '\\gt{}') # need brace; think a<b -> a\lt{}b x = x.replace('<', '\\lt{}') # print('replaced equation %r by %r ' % (x0, x)) return x v = preprocess_equations(v) s = s.replace(k, v) s = replace_equations(s) s = s.replace('\\*}', '*}') # this parses the XML soup = bs(s) other_abbrevs(soup, res, location) substitute_special_paragraphs(soup, res, location) create_notes_from_elements(soup, res, location, unique) # need to process tabular before mathjax escape_for_mathjax(soup) # print(indent(s, 'before prerender_mathjax | ')) # mathjax must be after markdown because of code blocks using "$" s = to_html_stripping_fragment(soup) if use_mathjax: s = prerender_mathjax(s, symbols, res) soup = bs(s) escape_for_mathjax_back(soup) s = to_html_stripping_fragment(soup) # print(indent(s, 'after prerender_mathjax | ')) for k, v in mcdpenvs.items(): # there is this case: # ~~~ # <pre> </pre> # ~~~ s = s.replace(k, v) s = s.replace('<p>DRAFT</p>', '<div class="draft">') s = s.replace('<p>/DRAFT</p>', '</div>') soup = bs(s) mark_console_pres(soup, res, location) # try: # except Exception as e: # msg = 'I got an error while substituting github: references.' # msg += '\nI will ignore this error because it might not be the fault of the writer.' # msg += '\n\n' + indent(str(e), '|', ' error: |') # # must be before make_figure_from_figureid_attr() display_files(soup, defaults={}, res=res, location=location, raise_errors=raise_errors) make_figure_from_figureid_attr(soup, res, location) col_macros(soup) fix_subfig_references(soup) library = get_library_from_document(soup, default_library=library) from .highlight import html_interpret html_interpret(library, soup, generate_pdf=generate_pdf, raise_errors=raise_errors, realpath=realpath, res=res, location=location) if filter_soup is not None: filter_soup(library=library, soup=soup) if False: embed_images_from_library2(soup=soup, library=library, raise_errors=raise_missing_image_errors, res=res, location=location) else: resolve_references_to_images(soup=soup, library=library, raise_errors=raise_missing_image_errors, res=res, location=location) make_videos(soup, res, location, raise_on_errors=False) if check_refs: check_if_any_href_is_invalid(soup, res, location, ignore_ref_errors=ignore_ref_errors) if False: if getuser() == 'andrea': if MCDPConstants.preprocess_style_using_less: run_lessc(soup) else: logger.warning( 'preprocess_style_using_less=False might break the manual') fix_validation_problems(soup) strip_pre(soup) if MCDPManualConstants.enable_syntax_higlighting: syntax_highlighting(soup) if MCDPManualConstants.enforce_lang_attribute: check_lang_codes(soup, res, location) # Fixes the IDs (adding 'sec:'); add IDs to missing ones globally_unique_id_part = 'autoid-DO-NOT-USE-THIS-VERY-UNSTABLE-LINK-' + get_md5(realpath)[:8] fix_ids_and_add_missing(soup, globally_unique_id_part, res, location) check_no_patently_wrong_links(soup, res, location) if MCDPManualConstants.enforce_status_attribute: check_status_codes(soup, realpath, res, location) s = to_html_stripping_fragment(soup) s = replace_macros(s) return s