def run_bibtex2html(contents): res = AugmentedResult() erase = True with tmpdir(prefix='bibtex', erase=erase, keep_on_exception=True) as d: fn = os.path.join(d, 'input.bib') fno = os.path.join(d, 'out') fno1 = fno + '.html' # fno2 = fno + '_bib.html' with open(fn, 'w') as f: f.write(contents) cmd = ['bibtex2html', '-unicode', '--dl', '-o', fno, fn] system_cmd_result( '.', cmd, display_stdout=False, display_stderr=False, raise_on_error=True, display_prefix=None, # leave it there env=None) bibtex2html_output = open(fno1).read() fixed = bibtex2html_output.replace('<p>\n</dd>', '</dd><!--fix-->') with open(os.path.join(d, 'fixed.html'), 'w') as f: f.write(fixed) out = process_bibtex2html_output(fixed, d) write_data_to_file(out, os.path.join(d, 'processed.html')) res.set_result(out) return res
def prerender(joined_aug, symbols): joined = joined_aug.get_result() soup = bs_entire_document(joined) for details in soup.select('details'): details.name = 'div' add_class(details, 'transmuted-details') # details.attrs['open'] = 1 joined = to_html_entire_document(soup) res = AugmentedResult() result = prerender_mathjax(joined, symbols=symbols, res=res) res.set_result(result) return res
def make_last_modified(files_contents, nmax=100): res = AugmentedResult() files_contents = [DocToJoin(*x) for x in files_contents] files_contents = [_ for _ in files_contents if _.source_info] files_contents = list( sorted(files_contents, key=lambda x: x.source_info.last_modified, reverse=True)) r = Tag(name='fragment') r.append('\n') h = Tag(name='h1') h.append('Last modified') h.attrs['id'] = 'sec:last-modified' r.append(h) r.append('\n') ul = Tag(name='ul') ul.append('\n') for d in files_contents[:nmax]: li = Tag(name='li') when = d.source_info.last_modified when_s = time.strftime("%a, %b %d", when) # %H:%M li.append(when_s) li.append(': ') hid = get_main_header(bs(d.contents)) if hid is None: what = "File %s" % d.docname else: what = Tag(name='a') what.attrs['href'] = '#' + hid what.attrs['class'] = MCDPManualConstants.CLASS_NUMBER_NAME li.append(what) li.append(' (') name = d.source_info.author.name li.append(name) li.append(')') ul.append(li) ul.append('\n') r.append(ul) s = to_html_stripping_fragment(r) # print s res.set_result(s) return res
def document_final_pass_after_toc(soup, crossrefs=None, resolve_references=True, res=None, location=LocationUnknown()): if res is None: res = AugmentedResult() """ This is done to a final document """ logger.info('checking errors') check_various_errors(soup) from .check_missing_links import check_if_any_href_is_invalid logger.info('checking hrefs') check_if_any_href_is_invalid(soup, res, location, extra_refs=crossrefs) # Note that this should be done *after* check_if_any_href_is_invalid() # because that one might fix some references if resolve_references: logger.info('substituting empty links') substituting_empty_links(soup, raise_errors=False, res=res, extra_refs=crossrefs) for a in soup.select('a[href_external]'): a.attrs['href'] = a.attrs['href_external'] add_class(a, 'interdoc') detect_duplicate_IDs(soup, res)
def figures_new1(): s = r""" <figure> <figcaption>Main caption</figcaption> <figure> <figcaption>Hello</figcaption> <img style='width:8em' src="duckietown-logo-transparent.png"/> </figure> <figure> <figcaption>second</figcaption> <img style='width:8em' src="duckietown-logo-transparent.png"/> </figure> </figure> """ soup = bs(s) res = AugmentedResult() location = LocationUnknown() make_figure_from_figureid_attr(soup, res, location) # nfigs = len(list(soup.select('figure'))) o = to_html_stripping_fragment(soup) print o
def another2(): # four spaces in the first line s = r""" (if it exists) of the set of fixed points of~$f$: \begin{equation} x = y .\label{eq:lfp-one} \end{equation} The equality in \eqref{lfp-one} can be relaxed to ``$xxx$''. The equality in \ref{eq:lfp-one} can be relaxed to ``$xxx$''. The least fixed point need not exist. Monotonicity of the map~$f$ plus completeness is sufficient to ensure existence. """ res = AugmentedResult() location = LocationUnknown() s2 = censor_markdown_code_blocks(s, res, location) print('original:') print indent_plus_invisibles(s) print('later:') print indent_plus_invisibles(s2) assert not 'censored-code' in s
def generate_and_add_toc(soup, raise_error=False, res=None): if res is None: aug = AugmentedResult() logger.info('adding toc') body = soup.find('body') toc = generate_toc(body, res) # logger.info('TOC:\n' + str(toc)) toc_ul = bs(toc).ul if toc_ul is None: # empty TOC msg = 'Could not find toc.' # logger.warning(msg) res.note_error(msg) # XXX else: toc_ul.extract() assert toc_ul.name == 'ul' toc_ul['class'] = 'toc' # XXX: see XXX13 toc_ul['id'] = MCDPManualConstants.MAIN_TOC_ID toc_selector = MCDPManualConstants.TOC_PLACEHOLDER_SELECTOR tocs = list(body.select(toc_selector)) if not tocs: msg = 'Cannot find any element of type %r to put TOC inside.' % toc_selector if raise_error: raise NoTocPlaceholder(msg) logger.warning(msg) res.note_error(msg) else: toc_place = tocs[0] toc_place.replaceWith(toc_ul)
def generate_toc(soup, max_depth=None, max_levels=2, res=AugmentedResult()): max_levels += 1 # since we added "book" stack = [Item(None, -1, 'root', 'root', [])] headers_depths = list(get_things_to_index(soup)) for header, depth, using in headers_depths: if max_depth is not None: if depth > max_depth: continue item = Item(header, depth, using, header['id'], []) while stack[-1].depth >= depth: stack.pop() stack[-1].items.append(item) stack.append(item) root = stack[0] number_items2(root, res) without_levels = root.copy_excluding_levels( MCDPManualConstants.exclude_from_toc) result = without_levels.to_html(root=True, max_levels=max_levels) if ZERO in result: res.note_error("Some counters had zero values") return result
def document_final_pass_before_toc(soup, remove, remove_selectors, res=None, location=None): if res is None: logger.warn('no res passed') res = AugmentedResult() if location is None: location = LocationUnknown() logger.info('reorganizing contents in <sections>') with timeit('find body'): body = soup.find('body') if body is None: msg = 'Cannot find <body>:\n%s' % indent(str(soup)[:1000], '|') raise ValueError(msg) with timeit('reorganize_contents'): body2 = reorganize_contents(body) process_assignment(body2, res, location) body.replace_with(body2) # Removing stuff with timeit('remove stuff'): do_remove_stuff(body2, remove_selectors, remove) with timeit('move_things_around'): move_things_around(soup=soup, res=res)
def download_reveal(output_dir): res = AugmentedResult() url = "https://github.com/hakimel/reveal.js/archive/3.6.0.zip" target = os.path.join(output_dir, 'revealjs') if os.path.exists(target): logger.debug('skipping downloading because target exists: %s' % target) else: dest = os.path.join(output_dir, 'reveal-3.6.0.zip') if True or not os.path.exists(dest): logger.info('Downloading %s' % url) # ctx = ssl.create_default_context() # ctx.check_hostname = False # ctx.verify_mode = ssl.CERT_NONE response = requests.get( url, stream=True) # context=ssl._create_unverified_context()) # data = response.raw.read() # read() with open(dest, 'wb') as f: shutil.copyfileobj(response.raw, f) # logger.info('downloaded %1.fMB' % (len(data) / (1000.0 * 1000))) # write_data_to_file(data, dest) logger.info(dest) target_tmp = target + '.tmp' import zipfile zip_ref = zipfile.ZipFile(dest, 'r') zip_ref.extractall(target_tmp) zip_ref.close() actual = os.path.join(target_tmp, 'reveal.js-3.6.0') os.rename(actual, target) logger.debug('extracted to %r' % target) check = [ "plugin/notes/notes.js", "plugin/math/math.js", "lib/js/head.min.js", "js/reveal.js", ] for c in check: fn = os.path.join(target, c) if not os.path.exists(fn): msg = 'Incomplete reveal download, not found: %s' % fn res.note_error(msg) return res
def render(library, docname, data, realpath, out_dir, generate_pdf, stylesheet, symbols, raise_errors, use_mathjax, do_slides): res = AugmentedResult() if MCDPConstants.pdf_to_png_dpi < 300: msg = ( 'Note that pdf_to_png_dpi is set to %d, which is not suitable for printing' % MCDPConstants.pdf_to_png_dpi) mcdp_dev_warning(msg) from mcdp_docs.pipeline import render_complete out = os.path.join(out_dir, docname + '.html') html_contents = render_complete(library=library, s=data, raise_errors=raise_errors, realpath=realpath, generate_pdf=generate_pdf, symbols=symbols, use_mathjax=use_mathjax) title = docname doc = get_minimal_document(html_contents, title=title, stylesheet=stylesheet, add_markdown_css=True, add_manual_css=True) soup = bs_entire_document(doc) document_final_pass_before_toc(soup, remove=None, remove_selectors=[], res=res) generate_and_add_toc(soup, res=res) document_final_pass_after_toc(soup, res=res) if use_mathjax and symbols: add_mathjax_preamble(soup, symbols) if do_slides: create_reveal(soup, res) doc = to_html_entire_document(soup) d = os.path.dirname(out) if not os.path.exists(d): os.makedirs(d) with open(out, 'w') as f: f.write(doc) logger.info('Written %s ' % out) return out
def move_things_around(soup, raise_if_errors=False, res=None): """ Looks for tags like: <move-here src="#line_detector2-line_detector_node2-autogenerated"/> """ if res is None: res = AugmentedResult() from mcdp_docs.check_missing_links import get_id2element with timeit_wall('getting all IDs'): id2element, duplicates = get_id2element(soup, 'id') for e in soup.find_all('move-here'): if not 'src' in e.attrs: msg = 'Expected attribute "src" for element %s' % str(e) raise ValueError(msg) src = e.attrs['src'] if not src.startswith('#'): msg = 'Expected that attribute "src" started with "#" for element %s.' % str(e) raise ValueError(msg) nid = src[1:] # O(n^2) # el = soup.find(id=nid) el = id2element.get(nid, None) if not el: msg = 'move-here: Could not find ID %r.' % nid e.name = 'span' # note_error2(e, "invalid move-here reference", msg) res.note_error(msg, HTMLIDLocation.for_element(e)) if raise_if_errors: raise ValueError(msg) else: continue el.extract() e.replace_with(el)
def elements_abbrevs_test2(): s = "<p>TODO: paragraph <strong>Strong</strong></p>" e = """<div class="todo-wrap"><p class="todo">TODO: paragraph <strong>Strong</strong></p></div>""" soup = bs(s.strip()) res = AugmentedResult() location = LocationUnknown() substitute_special_paragraphs(soup, res, location) o = to_html_stripping_fragment(soup) #print o assert_equal(o, e)
def add_likebtn(joined_aug, likebtn): res = AugmentedResult() res.merge(joined_aug) soup = bs_entire_document(joined_aug.get_result()) add_likebtn_(soup, likebtn) res.set_result(to_html_entire_document(soup)) return res
def add_related(joined_aug): res = AugmentedResult() res.merge(joined_aug) soup = bs_entire_document(joined_aug.get_result()) add_related_(soup, res) res.set_result(to_html_entire_document(soup)) return res
def mark_errors_and_rest(joined_aug): soup = bs_entire_document(joined_aug.get_result()) mark_in_html(joined_aug, soup) res = AugmentedResult() res.merge(joined_aug) res.set_result(to_html_entire_document(soup)) return res
def prerender_main(): f0 = sys.argv[1] f1 = sys.argv[2] html = open(f0).read() parsed = bs_entire_document(html) body = parsed.html.body body_string = str(body) res = AugmentedResult() body2_string = prerender_mathjax_(body_string, res) body2 = bs(body2_string) parsed.html.body.replace_with(body2) html2 = str(parsed) write_data_to_file(html2, f1)
def make_composite(compose_config, joined_aug): data = joined_aug.get_result() soup = bs_entire_document(data) recipe = compose_config.recipe remove_status = compose_config.remove_status show_removed = compose_config.show_removed permalink_prefix = compose_config.purl_prefix aug = compose_go2(soup, recipe, permalink_prefix, remove_status, show_removed) soup = aug.get_result() results = str(soup) res = AugmentedResult() res.merge(joined_aug) res.merge(aug) res.set_result(results) return res
def create_slides(soup): res = AugmentedResult() header = soup.find('h1', attrs=dict(type='slides')) if header is None: # logger.debug('No slides here') return _id = header.attrs['id'].replace('sec:', '') _id_section = (_id + ':section') section = soup.find(id=_id_section) if section is None: msg = 'Could not find section by ID %r' % _id_section logger.error(msg) return section.extract() body = soup.find('body') body.attrs['type'] = 'slides' body = soup.find('div', attrs={'class': 'super'}) div = Tag(name='div') div.attrs['class'] = 'reveal' body.append(div) div_slides = Tag(name='div') div_slides.attrs['class'] = 'slides' div.append(div_slides) for subsection in section.select('section[level=sub]'): if 'without-header-inside' in subsection.attrs['class']: continue # print 'extracting', subsection.attrs subsection.extract() div_slides.append(subsection) div_slides.insert(0, section) sub_notes(div_slides) sub_markers(div_slides) stylesheet = "v_manual_reveal" add_stylesheet(soup, stylesheet) embed_css_files(soup) create_reveal(soup, res)
def test_toc(): s = """ <html> <head></head> <body> <h1 id='one'>One</h1> <p>a</p> <h2 id='two'>Two</h2> <p>a</p> <h3 id='three'>Three</h3> <h2 id='four'>Four</h2> <p>a</p> </body> </html> """ soup = bs(s) # print(soup) # body = soup.find('body') # first time it should fail try: _toc = generate_toc(soup) except InvalidHeaders as e: # > InvalidHeaders: I expected that this header would start with either part:,app:,sec:. # > <h1 id="one">One</h1> pass else: raise Exception() soup = bs(s) fix_ids_and_add_missing(soup, 'prefix-', AugmentedResult(), LocationUnknown()) generate_toc(soup) s = str(soup) expected = ['sec:one', 'sub:two'] # print(indent(s, 'transformed > ')) for e in expected: assert e in s
def test_toc2(): s = """ <html> <head></head> <body> <h1>One</h1> <h1>Two</h1> <h1>Three</h1> <p></p> <h2>A</h2> <h2>B</h2> <h2>C</h2> <h3>a</h3> <h3>b</h3> <h3>c</h3> </body> </html> """ soup = bs(s) # print(soup) # body = soup.find('body') fix_ids_and_add_missing(soup, 'prefix', AugmentedResult(), LocationUnknown()) assert soup.find(id='sub:prefix-5') is not None # <fragment> # <h1 id="sec:prefix--1">One</h1> # <h1 id="sec:prefix--2">Two</h1> # <h1 id="sec:prefix--3">Three</h1> # <p></p> # <h2 id="sub:prefix--4">A</h2> # <h2 id="sub:prefix--5">B</h2> # <h2 id="sub:prefix--6">C</h2> # <h3 id="subsub:prefix--7">a</h3> # <h3 id="subsub:prefix--8">b</h3> # <h3 id="subsub:prefix--9">c</h3> # </fragment> print(soup) _toc = generate_toc(soup) s = str(soup)
def sub1(): defaults = {'org': 'AndreaCensi', 'repo': 'mcdp', 'branch': 'duckuments'} s = """ <a href="github:path=context_eval_as_constant.py"></a> """ soup = bs(s) location = LocationUnknown() res = AugmentedResult() n = substitute_github_refs(soup, defaults, res=res, location=location) assert n == 1 s2 = str(soup) logger.debug(indent(s2, ' ')) expect = '<code class="github-resource-link">context_eval_as_constant.py</code>' if not expect in s2: raise Exception(s2)
def displayfile1(): defaults = {'org': 'AndreaCensi', 'repo': 'mcdp', 'branch': 'duckuments'} s = """ <display-file src="github:path=context_eval_as_constant.py,from_text=get_connections_for,to_text=return"></a> """ soup = bs(s) res = AugmentedResult() location = LocationUnknown() n = display_files(soup, defaults, raise_errors=True, res=res, location=location) assert n == 1 s2 = str(soup) logger.debug('\n' + indent(s2, ' '))
def sub2(): defaults = {'org': 'AndreaCensi', 'repo': 'mcdp', 'branch': 'duckuments'} s = """ <a href="github:path=context_eval_as_constant.py,from_text=get_connections_for,to_text=return"></a> """ soup = bs(s) location = LocationUnknown() res = AugmentedResult() n = substitute_github_refs(soup, defaults, res=res, location=location) assert n == 1 s2 = str(soup) logger.debug('\n' + indent(s2, ' ')) expect = 'context_eval_as_constant.py#L7-L12' if not expect in s2: raise Exception('No %s in %s' % (expect, s2))
def add_style(data_aug, stylesheet): soup = bs_entire_document(data_aug.get_result()) head = soup.find('head') assert head is not None link = Tag(name='link') link['rel'] = 'stylesheet' link['type'] = 'text/css' from mcdp_report.html import get_css_filename link['href'] = get_css_filename('compiled/%s' % stylesheet) head.append(link) html = to_html_entire_document(soup) res = AugmentedResult() res.merge(data_aug) res.set_result(html) return res
def manual_join(template, files_contents, stylesheet, remove=None, extra_css=None, remove_selectors=None, hook_before_toc=None, references=None, resolve_references=True, hook_before_final_pass=None, require_toc_placeholder=False, permalink_prefix=None, crossrefs_aug=None, aug0=None): """ files_contents: a list of tuples that can be cast to DocToJoin: where the string is a unique one to be used for job naming. extra_css: if not None, a string of more CSS to be added Remove_selectors: list of selectors to remove (e.g. ".draft"). hook_before_toc if not None is called with hook_before_toc(soup=soup) just before generating the toc """ result = AugmentedResult() if references is None: references = {} check_isinstance(files_contents, list) if crossrefs_aug is None: crossrefs = Tag(name='no-cross-refs') else: crossrefs = bs(crossrefs_aug.get_result()) result.merge(crossrefs_aug) if aug0 is not None: result.merge(aug0) @contextmanager def timeit(_): yield with timeit('manual_join'): files_contents = [DocToJoin(*_) for _ in files_contents] # cannot use bs because entire document with timeit('parsing template'): template0 = template template = replace_macros(template) template_soup = BeautifulSoup(template, 'lxml', from_encoding='utf-8') d = template_soup if d.html is None: s = "Invalid template" raise_desc(ValueError, s, template0=template0) with timeit('adding head'): assert d.html is not None assert '<html' in str(d) head = d.find('head') if head is None: msg = 'Could not find <head> in template:' logger.error(msg) logger.error(str(d)) raise Exception(msg) assert head is not None for x in get_manual_css_frag().contents: head.append(x.__copy__()) with timeit('adding stylesheet'): if stylesheet is not None: link = Tag(name='link') link['rel'] = 'stylesheet' link['type'] = 'text/css' from mcdp_report.html import get_css_filename link['href'] = get_css_filename('compiled/%s' % stylesheet) head.append(link) with timeit('making basename2soup'): basename2soup = OrderedDict() for doc_to_join in files_contents: if doc_to_join.docname in basename2soup: msg = 'Repeated docname %r' % doc_to_join.docname raise ValueError(msg) from .latex.latex_preprocess import assert_not_inside if isinstance(doc_to_join.contents, AugmentedResult): result.merge(doc_to_join.contents) contents = doc_to_join.contents.get_result() else: contents = doc_to_join.contents assert_not_inside(contents, '<fragment') assert_not_inside(contents, 'DOCTYPE') frag = bs(contents) basename2soup[doc_to_join.docname] = frag # with timeit('fix_duplicate_ids'): # XXX # fix_duplicated_ids(basename2soup) with timeit('copy contents'): body = d.find('body') add_comments = False for docname, content in basename2soup.items(): if add_comments: body.append(NavigableString('\n\n')) body.append( Comment('Beginning of document dump of %r' % docname)) body.append(NavigableString('\n\n')) try_faster = True if try_faster: for e in list(content.children): body.append(e.extract()) else: copy_contents_into(content, body) if add_comments: body.append(NavigableString('\n\n')) body.append(Comment('End of document dump of %r' % docname)) body.append(NavigableString('\n\n')) with timeit('extract_bibtex_blocks'): extract_bibtex_blocks(d) with timeit('ID_PUT_BIB_HERE'): ID_PUT_BIB_HERE = MCDPManualConstants.ID_PUT_BIB_HERE bibhere = d.find('div', id=ID_PUT_BIB_HERE) if bibhere is None: msg = ('Could not find #%s in document. ' 'Adding one at end of document.') % ID_PUT_BIB_HERE result.note_warning(msg) bibhere = Tag(name='div') bibhere.attrs['id'] = ID_PUT_BIB_HERE d.find('body').append(bibhere) do_bib(d, bibhere) with timeit('hook_before_final_pass'): if hook_before_final_pass is not None: hook_before_final_pass(soup=d) with timeit('document_final_pass_before_toc'): location = LocationUnknown() document_final_pass_before_toc(d, remove, remove_selectors, result, location) with timeit('hook_before_toc'): if hook_before_toc is not None: hook_before_toc(soup=d) with timeit('generate_and_add_toc'): try: generate_and_add_toc(d, raise_error=True, res=result) except NoTocPlaceholder as e: if require_toc_placeholder: msg = 'Could not find toc placeholder: %s' % e # logger.error(msg) if aug0 is not None: result.note_error(msg) else: raise Exception(msg) with timeit('document_final_pass_after_toc'): document_final_pass_after_toc( soup=d, crossrefs=crossrefs, resolve_references=resolve_references, res=result) if extra_css is not None: logger.info('adding extra CSS') add_extra_css(d, extra_css) with timeit('document_only_once'): document_only_once(d) location = LocationUnknown() substitute_github_refs(d, defaults={}, res=result, location=location) with timeit('another A pass'): for a in d.select('a[href]'): href = a.attrs['href'] if href in references: r = references[href] a.attrs['href'] = r.url if not a.children: # empty a.append(r.title) # do not use to_html_stripping_fragment - this is a complete doc # mark_in_html(result, soup=d) add_github_links_if_edit_url(soup=d, permalink_prefix=permalink_prefix) with timeit('converting to string'): res = unicode(d) with timeit('encoding'): res = res.encode('utf8') logger.info('done - %.1f MB' % (len(res) / (1024 * 1024.0))) result.set_result(res) return result
def render_book( src_dirs, generate_pdf, data, realpath, use_mathjax, raise_errors, filter_soup=None, symbols=None, ignore_ref_errors=False, ): """ Returns an AugmentedResult(str) """ res = AugmentedResult() from mcdp_docs.pipeline import render_complete librarian = get_test_librarian() # XXX: these might need to be changed if not MCDPConstants.softy_mode: for src_dir in src_dirs: librarian.find_libraries(src_dir) load_library_hooks = [librarian.load_library] library_ = MCDPLibrary(load_library_hooks=load_library_hooks) for src_dir in src_dirs: library_.add_search_dir(src_dir) d = tempfile.mkdtemp() library_.use_cache_dir(d) location = LocalFile(realpath) # print('location:\n%s' % location) def filter_soup0(soup, library): if filter_soup is not None: filter_soup(soup=soup, library=library) add_edit_links2(soup, location) add_last_modified_info(soup, location) try: html_contents = render_complete(library=library_, s=data, raise_errors=raise_errors, realpath=realpath, use_mathjax=use_mathjax, symbols=symbols, generate_pdf=generate_pdf, filter_soup=filter_soup0, location=location, res=res, ignore_ref_errors=ignore_ref_errors) except DPSyntaxError as e: msg = 'Could not compile %s' % realpath location0 = LocationInString(e.where, location) res.note_error(msg, locations=location0) fail = "<p>This file could not be compiled</p>" res.set_result(fail) return res # raise_wrapped(DPSyntaxError, e, msg, compact=True) if False: # write minimal doc doc = get_minimal_document(html_contents, add_markdown_css=True, extra_css=extra_css) dirname = main_file + '.parts' if dirname and not os.path.exists(dirname): try: os.makedirs(dirname) except: pass fn = os.path.join(dirname, '%s.html' % out_part_basename) write_data_to_file(doc, fn) res.set_result(html_contents) return res
def get_cross_refs(src_dirs, permalink_prefix, extra_crossrefs, ignore=[]): res = AugmentedResult() files = look_for_files(src_dirs, "crossref.html") id2file = {} soup = Tag(name='div') def add_from_soup(s, f, ignore_alread_present, ignore_if_conflict): for img in list(s.find_all('img')): img.extract() for e in s.select('[base_url]'): e['external_crossref_file'] = f # Remove the ones with the same base_url for e in list(s.select('[base_url]')): if e.attrs['base_url'] == permalink_prefix: e.extract() for e in s.select('[id]'): id_ = e.attrs['id'] if id_ == 'container': continue # XXX: if id_ in id2file: if not ignore_alread_present: msg = 'Found two elements with same ID "%s":' % id_ msg += '\n %s' % id2file[id_] msg += '\n %s' % f res.note_error(msg) else: id2file[id_] = f e2 = e.__copy__() if ignore_if_conflict: e2.attrs['ignore_if_conflict'] = '1' soup.append(e2) soup.append('\n') ignore = [os.path.realpath(_) for _ in ignore] for _f in files: if os.path.realpath(_f) in ignore: msg = 'Ignoring file %r' % _f logger.info(msg) continue logger.info('cross ref file %s' % _f) data = open(_f).read() if permalink_prefix in data: msg = 'skipping own file' logger.debug(msg) continue s = bs(data) add_from_soup(s, _f, ignore_alread_present=False, ignore_if_conflict=False) if extra_crossrefs is not None: logger.info('Reading external refs\n%s' % extra_crossrefs) try: r = requests.get(extra_crossrefs) except Exception as ex: msg = 'Could not read external cross reference links' msg += '\n %s' % extra_crossrefs msg += '\n\n' + indent(str(ex), ' > ') res.note_error(msg) else: logger.debug('%s %s' % (r.status_code, extra_crossrefs)) if r.status_code == 404: msg = 'Could not read external cross refs: %s' % r.status_code msg += '\n url: ' + extra_crossrefs msg += '\n This is normal if you have not pushed this branch yet.' res.note_warning(msg) # logger.error(msg) s = bs(r.text) add_from_soup(s, extra_crossrefs, ignore_alread_present=True, ignore_if_conflict=True) # print soup res.set_result(str(soup)) return res
def compose_go2(soup, recipe, permalink_prefix, remove_status, show_removed): res = AugmentedResult() # Create context doc = soup.__copy__() body = Tag(name='body') doc.body.replace_with(body) elements = recipe.make(RecipeContext(soup=soup)) check_isinstance(elements, list) append_all(body, elements) # Now remove stuff for status in remove_status: removed = [] for section in list(body.select('section[status=%s]' % status)): level = section.attrs['level'] if not level in ['sec', 'part']: continue section_id = section.attrs['id'] pure_id = section_id.replace(':section', '') removed.append(section.attrs['id']) if show_removed: # remove everything that is not a header keep = ['h1', 'h2', 'h3', 'h4', 'h5'] for e in list(section.children): if e.name not in keep: e.extract() else: e.append(' [%s]' % status) p = Tag(name='p') p.append( "This section has been removed because it is in status %r. " % status) a = Tag(name='a') a.attrs['href'] = 'http://purl.org/dt/master/%s' % pure_id a.append( "If you are feeling adventurous, you can read it on master." ) p.append(a) section.append(p) p = Tag(name='p') p.append( "To disable this behavior, and completely hide the sections, " ) p.append( "set the parameter show_removed to false in fall2017.version.yaml." ) section.append(p) else: section.extract() # section.replace_with(div) if not removed: logger.info('Found no section with status = %r to remove.' % status) else: logger.info('I removed %d sections with status %r.' % (len(removed), status)) logger.debug('Removed: %s' % ", ".join(removed)) add_github_links_if_edit_url(doc, permalink_prefix=permalink_prefix) generate_and_add_toc(doc) doc = doc.__copy__() # generate_and_add_toc(soup) # substituting_empty_links(soup) raise_errors = False find_links_from_master(master_soup=soup, version_soup=doc, raise_errors=raise_errors, res=res) document_final_pass_after_toc(doc) res.set_result(doc) return res
def substituting_empty_links(soup, raise_errors=False, res=None, extra_refs=None): """ soup: where to look for references elemtn_to_modify: what to modify (if None, it is equal to soup) default style is [](#sec:systems) "Chapter 10" You can also use "class": <a href='#sec:name' class='only_number'></a> """ if extra_refs is None: extra_refs = Tag(name='div') if res is None: res = AugmentedResult() for le in get_empty_links_to_fragment(soup, extra_refs=extra_refs, res=res): a = le.linker element_id = le.eid element = le.linked if not element: msg = ('Cannot find %s' % element_id) res.note_error(msg, HTMLIDLocation.for_element(a)) if raise_errors: raise ValueError(msg) continue sub_link(a, element_id, element, res) for a in get_empty_links(soup): href = a.attrs.get('href', '(not present)') if not href: href = '""' if href.startswith('python:'): continue if href.startswith('http:') or href.startswith('https:'): msg = """ This link text is empty: ELEMENT Note that the syntax for links in Markdown is [link text](URL) For the internal links (where URL starts with "#"), then the documentation system can fill in the title automatically, leading to the format: [](#other-section) However, this does not work for external sites, such as: [](MYURL) So, you need to provide some text, such as: [this useful website](MYURL) """ msg = msg.replace('ELEMENT', str(a)) msg = msg.replace('MYURL', href) # note_error2(a, 'syntax error', msg.strip()) res.note_error(msg, HTMLIDLocation.for_element(a)) else: msg = """ This link is empty: ELEMENT It might be that the writer intended for this link to point to something, but they got the syntax wrong. href = %s As a reminder, to refer to other parts of the document, use the syntax "#ID", such as: See [](#fig:my-figure). See [](#section-name). """ % href msg = msg.replace('ELEMENT', str(a)) # note_error2(a, 'syntax error', msg.strip()) res.note_error(msg, HTMLIDLocation.for_element(a))