def note_error(tag0, e): check_isinstance(e, BaseException) add_class(tag0, 'errored') short = 'Error' long_error = traceback.format_exc(e) return insert_inset(tag0, short, long_error, [ERROR_CLASS, type(e).__name__])
def document_final_pass_after_toc(soup, crossrefs=None, resolve_references=True, res=None, location=LocationUnknown()): if res is None: res = AugmentedResult() """ This is done to a final document """ logger.info('checking errors') check_various_errors(soup) from .check_missing_links import check_if_any_href_is_invalid logger.info('checking hrefs') check_if_any_href_is_invalid(soup, res, location, extra_refs=crossrefs) # Note that this should be done *after* check_if_any_href_is_invalid() # because that one might fix some references if resolve_references: logger.info('substituting empty links') substituting_empty_links(soup, raise_errors=False, res=res, extra_refs=crossrefs) for a in soup.select('a[href_external]'): a.attrs['href'] = a.attrs['href_external'] add_class(a, 'interdoc') detect_duplicate_IDs(soup, res)
def substitute_task_marker_p(p, sub, klass, res, location): for element in list(p.descendants): # use list() otherwise modifying if not isinstance(element, NavigableString): continue s = element.string if sub in s: add_class(p, klass)
def find_links_from_master(master_soup, version_soup, raise_errors, res): logger.info('find_links_from_master') from mcdp_docs.tocs import sub_link # find all ids master_ids = get_ids_from_soup(master_soup) version_ids = get_ids_from_soup(version_soup) missing = [] seen = [] found = [] for a, eid in a_linking_to_fragments(version_soup): seen.append(eid) if eid not in version_ids: missing.append(eid) if eid in master_ids: # logger.info('found %s in master' % eid) found.append(eid) linked_element = master_ids[eid] if is_empty_link(a): # logger.debug('is: %s' % a) if not get_classes(a): add_class(a, MCDPManualConstants.CLASS_ONLY_NAME) # logger.debug('is before: %s' % a) sub_link(a, eid, linked_element, res) # logger.debug('is now: %s' % a) href = 'http://purl.org/dth/%s' % remove_prefix(eid) a.attrs['href'] = href add_class(a, 'link-to-master') else: logger.info('Not found %r in master.' % eid)
def substitute_todo(soup, res, location): prefix = "TODO" klass = 'todo' for r in get_elements_starting_with_string(soup, prefix=prefix): # r.ns.replaceWith(r.rest) div = Tag(name='div') add_class(div, klass + '-wrap') add_class(r.element, klass) parent = r.element.parent i = parent.index(r.element) r.element.extract() div.append(r.element) parent.insert(i, div) T = 'for' if r.rest.strip().startswith(T): after = r.rest[r.rest.index(T) + len(T):] if ':' in after: i = after.index(':') dest = after[:i] r.element.attrs['for'] = dest.strip() else: msg = 'Could not find ":" in "%s"' % after res.note_error(msg, HTMLIDLocation.for_element(div, location))
def note_error2(element, short, long_error, other_classes=[]): # if 'errored' in element.attrs.get('class', ''): # return None add_class(element, 'errored') # logger.error(short + '\n' + long_error) inset = insert_inset(element, short, long_error, [ERROR_CLASS] + other_classes) return inset
def embed_pdf_image(tag, resolve, density): assert tag.name == 'img' assert tag.has_attr('src') #print('!!embedding %s' % str(tag)) #raise Exception(str(tag)) # load pdf data data_pdf = resolve(tag['src']) if data_pdf is None: add_class(tag, 'missing-image') return # convert PDF to PNG # density = pixels per inch data_png = png_from_pdf(data_pdf, density=density) # get PNG image size in pixels width_px, height_px = get_pixel_width_height_of_png(data_png) # compute what was the original width of PDF in points width_in = width_px / float(density) height_in = height_px / float(density) latex_options = tag.get('latex-options', '') props = parse_includegraphics_option_string(latex_options) if 'height' in props: logger.warning('Cannot deal with "height" yet: latex_options = %s' % latex_options) if 'scale' in props: scale = float(props['scale']) use_width_in = width_in * scale use_height_in = height_in * scale elif 'width' in props: try: use_width_in = get_length_in_inches(props['width']) except ValueError as e: logger.error('Cannot interpret %s: %s' % (latex_options, e)) use_width_in = 5.0 ratio = height_in / width_in use_height_in = use_width_in * ratio else: use_width_in = width_in use_height_in = height_in # Add it before so that we can override add_style(tag, after=False, width='%sin' % use_width_in, height='%sin' % use_height_in) tag['size_in_pixels'] = '%s, %s' % (width_px, height_px) # encode tag['src'] = data_encoded_for_src(data_png, 'png')
def add_prev_next_links(filename2contents, only_for=None): new_one = OrderedDict() for filename, contents in list(filename2contents.items()): if only_for and not filename in only_for: continue id_prev = contents.attrs[ATTR_PREV] a_prev = Tag(name='a') a_prev.attrs['href'] = '#' + str(id_prev) a_prev.attrs['class'] = CLASS_LINK_PREV a_prev.append('prev') id_next = contents.attrs[ATTR_NEXT] a_next = Tag(name='a') a_next.attrs['href'] = '#' + str(id_next) a_next.attrs['class'] = CLASS_LINK_NEXT a_next.append('next') S = Tag(name='div') S.attrs['class'] = ['super'] nav1 = Tag(name='div') add_class(nav1, 'navigation') if id_prev: nav1.append(a_prev.__copy__()) if id_next: nav1.append(a_next.__copy__()) spacer = Tag(name='div') spacer.attrs['style'] = 'clear:both' nav1.append(spacer) add_class(contents, 'main-section-for-page') contents2 = contents S.append(contents2) from .source_info_imp import get_main_header actual_id = get_main_header(contents2) if False: # just checking e = contents2.find(id=actual_id) if e is not None: pass else: logger.error('not found %r' % actual_id) S.attrs['id'] = actual_id contents2.insert(0, nav1.__copy__()) contents2.append(nav1.__copy__()) new_one[filename] = S return new_one
def prerender(joined_aug, symbols): joined = joined_aug.get_result() soup = bs_entire_document(joined) for details in soup.select('details'): details.name = 'div' add_class(details, 'transmuted-details') # details.attrs['open'] = 1 joined = to_html_entire_document(soup) res = AugmentedResult() result = prerender_mathjax(joined, symbols=symbols, res=res) res.set_result(result) return res
def warn_for_duplicated_ids(soup): from collections import defaultdict counts = defaultdict(lambda: []) for e in soup.select('[id]'): ID = e['id'] counts[ID].append(e) problematic = [] for ID, elements in counts.items(): n = len(elements) if n == 1: continue ignore_if_contains = [ 'MathJax', # 'MJ', 'edge', 'mjx-eqn', ] if any(_ in ID for _ in ignore_if_contains): continue inside_svg = False for e in elements: for _ in e.parents: if _.name == 'svg': inside_svg = True break if inside_svg: continue #msg = ('ID %15s: found %s - numbering will be screwed up' % (ID, n)) # logger.error(msg) problematic.append(ID) for e in elements: t = Tag(name='span') t['class'] = 'duplicated-id' t.string = 'Error: warn_for_duplicated_ids: There are %d tags with ID %s' % ( n, ID) # e.insert_before(t) add_class(e, 'errored') for i, e in enumerate(elements[1:]): e['id'] = e['id'] + '-duplicate-%d' % (i + 1) #print('changing ID to %r' % e['id']) if problematic: logger.error('The following IDs were duplicated: %s' % ", ".join(problematic)) logger.error( 'I renamed some of them; references and numbering are screwed up')
def subwith(name_, s): result = bs(s.encode('utf8')) result.name = 'div' pre = result.find('pre') pre.name = 'code' Pre = Tag(name='pre') add_class(Pre, 'syntax_highlight') add_class(Pre, name_) Pre.append(pre) try: code.parent.replace_with(Pre) except: logger.debug(str(code.parent)) raise
def substitute_special_paragraphs(soup): for prefix, klass in prefix2class.items(): substitute_special_paragraph(soup, prefix, klass) make_details = ['comment', 'question', 'doubt'] for c in make_details: for e in list(soup.select('.%s' % c)): details = Tag(name='details') add_class(details, c) summary = Tag(name='summary') summary.append(c) details.append(summary) rest = e.__copy__() details.append(rest) e.replace_with(details)
def substitute_task_marker_p(p, sub, klass): # try: for element in list(p.descendants): # use list() otherwise modifying if not isinstance(element, NavigableString): continue s = element.string if sub in s: add_class(p, klass) # s2 = s.replace(sub, '') # ns = NavigableString(s2) # element.replaceWith(ns) # except AttributeError as e: # a bug with bs4 # msg = 'Bug with descendants: %s' % e # logger.debug(msg) # pass
def link_to_command_explanation(soup, res, location): """ Looks for pre.console span.program and creates a link to the section. """ for s in soup.select('span'): if has_class(s, 'program'): # logger.debug('found command: %s' % s) program_name = list(s.children)[0] a = Tag(name='a') add_class(a, MCDPConstants.CLASS_IGNORE_IF_NOT_EXISTENT) a.attrs['href'] = '#' + program_name a.append(s.__copy__()) s.replace_with(a)
def substitute_special_paragraphs(soup, res, location): substitute_assignment(soup, res, location) substitute_todo(soup, res, location) for prefix, klass in MCDPManualConstants.special_paragraphs.items(): substitute_special_paragraph(soup, prefix, klass, res, location) for c in MCDPManualConstants.special_paragraphs_foldable: for e in list(soup.select('.%s' % c)): details = Tag(name='details') add_class(details, c) summary = Tag(name='summary') summary.append(c) details.append(summary) rest = e.__copy__() details.append(rest) e.replace_with(details)
def copy_attributes_from_header(section, header): """ Note that for section, if header is "sec:Blah", we give the id "blah:section", so it's easier to link to it. """ assert section.name == 'section' if not 'id' in header.attrs: msg = 'This header has no ID' msg += '\n' + str(header) raise Exception(msg) from mcdp_docs.composing.cli import remove_prefix pure_id = remove_prefix(header.attrs['id']) section.attrs['id'] = pure_id + ':section' for c in header.attrs.get('class', []): add_class(section, c) for a in ['status', 'lang', 'type']: if a in header.attrs: section.attrs[a] = header.attrs[a]
def substitute_github_ref(a, defaults): href = a.attrs['href'] try: ref = parse_github_file_ref(href) except InvalidGithubRef as e: msg = 'Could not parse a reference in %s.' % str(a) raise_wrapped(DPSyntaxError, e, msg, compact=True) if ref.path is None: msg = 'There is no path specified.' raise_desc(DPSyntaxError, e, msg, ref=ref) ref = resolve_reference(ref, defaults) # logger.debug(ref.url) a.attrs['href'] = ref.url if not list(a.children): c = Tag(name='code') add_class(c, 'github-resource-link') c.append(os.path.basename(ref.path)) a.append(c)
def sub_markers(soup): for ns in list(soup.descendants): if isinstance(ns, NavigableString): # print('considering "%s"' % ns) marker = u'â–¶' if 'code' in get_parents_names(ns): # consider the char `â–¶` continue if marker in ns: ns2 = ns.replace(marker, '') parent = ns.parent if parent.parent and parent.parent.name == 'li': parent = parent.parent else: if 'figure-conv-to-div' in parent.attrs.get('class', ''): parent = parent.parent.parent add_class(parent, 'fragment') ns.replace_with(ns2)
def substitute_special_paragraph(soup, prefix, klass): """ Looks for paragraphs that start with a simple string with the given prefix. From: <p>prefix contents</p> Creates: <div class='klass-wrap'><p class='klass'>contents</p></div> """ ps = list(soup.select('p')) for p in ps: # Get first child contents = list(p.contents) if not contents: continue c = contents[0] if not isinstance(c, NavigableString): continue s = c.string starts = s.lower().startswith(prefix.lower()) if not starts: continue without = s[len(prefix):] ns = NavigableString(without) c.replaceWith(ns) div = Tag(name='div') add_class(div, klass + '-wrap') add_class(p, klass) parent = p.parent i = parent.index(p) p.extract() div.append(p) parent.insert(i, div)
def update_refs_(filename, contents, id2filename): test_href = lambda _: _ is not None and _.startswith('#') elements = list(contents.find_all('a', attrs={'href': test_href})) # logger.debug('updates: %s' % sorted(id2filename)) for a in elements: href = a.attrs['href'] assert href[0] == '#' id_ = href[1:] if id_ in id2filename: point_to_filename = id2filename[id_] if point_to_filename != filename: new_href = '%s#%s' % (point_to_filename, id_) a.attrs['href'] = new_href add_class(a, 'link-different-file') else: # actually it doesn't change new_href = '#%s' % id_ a.attrs['href'] = new_href add_class(a, 'link-same-file') if 'toc_link' in a.attrs['class']: p = a.parent assert p.name == 'li' add_class(p, 'link-same-file-direct-parent') # now find all the lis for x in list(p.descendants): if isinstance(x, Tag) and x.name == 'li': add_class(x, 'link-same-file-inside') p = a.parent while p: if isinstance(p, Tag) and p.name in ['ul', 'li']: add_class(p, 'contains-link-same-file') p = p.parent else: logger.error('update_ref() for %r: no element with ID "%s".' % (filename, id_))
def substitute_github_ref(a, defaults, res, location): href = a.attrs['href'] try: ref = parse_github_file_ref(href) except InvalidGithubRef as e: msg = 'Could not parse a reference in %s.' % str(a) msg += '\n\n' + indent(e, ' > ') res.note_warning(msg, HTMLIDLocation.for_element(a, location)) return # raise_wrapped(DPSyntaxError, e, msg, compact=True) if ref.url in FailedRepos.failed_repos: msg = 'Skipped because checkout of %s already failed.' % ref.url res.note_warning(msg, HTMLIDLocation.for_element(a, location)) return if ref.path is None: msg = 'There is no path specified.' res.note_warning(msg, HTMLIDLocation.for_element(a, location)) return # raise_desc(DPSyntaxError, e, msg, ref=ref) try: ref = resolve_reference(ref, defaults) except CouldNotResolveRef as e: res.note_error(str(e), HTMLIDLocation.for_element(a, location)) FailedRepos.failed_repos[ref.url] = str(e) # logger.debug(ref.url) return a.attrs['href'] = ref.url if not list(a.children): c = Tag(name='code') add_class(c, 'github-resource-link') c.append(os.path.basename(ref.path)) a.append(c)
def mark_console_pres_highlight(soup, res, location): for code in soup.select('pre code'): pre = code.parent if code.string is None: continue s0 = code.string from HTMLParser import HTMLParser h = HTMLParser() s = h.unescape(s0) if s != s0: # print('decoded %r -> %r' % (s0, s)) pass beg = s.strip() # is it a console line? ct = is_console_line(beg) if ct is None: continue add_class(pre, 'console') # add class "on-hostname" if ct.hostname is not None: cn = 'on-%s' % str(ct.hostname) add_class(pre, cn) code.string = '' lines = s.split('\n') def is_program(x, l): if x == 'git' and 'apt' in l: return False return x in programs for j, line in enumerate(lines): tokens = line.split(' ') for i, token in enumerate(tokens): previous_is_sudo_or_dollar = i >= 1 and tokens[i - 1] in [ '$', 'sudo' ] if token in ['$', 'DOLLAR']: # add <span class=console_sign>$</span> e = Tag(name='span') e['class'] = 'console_sign' e.string = '$' code.append(e) elif i == 0 and token == ct.hostname: # it's the hostname e = Tag(name='span') e['class'] = 'hostname' e.string = token code.append(e) elif is_program(token, line) and previous_is_sudo_or_dollar: e = Tag(name='span') e['class'] = '%s program' % token e.string = token code.append(e) elif token in program_commands: e = Tag(name='span') e['class'] = '%s program_command' % token e.string = token code.append(e) elif token and token[0] == '-': e = Tag(name='span') e['class'] = 'program_option' e.string = token code.append(e) else: code.append(NavigableString(token)) is_last = i == len(tokens) - 1 if not is_last: before = '![' in ' '.join(tokens[:i + 1]) if not before: # XXX: this is a bug space = Tag(name='span') space.append(' ') space['class'] = 'space' code.append(space) else: code.append(' ') is_last_line = j == len(lines) - 1 if not is_last_line: code.append(NavigableString('\n'))
def go(selector, parse_expr, extension, use_pre=True, refine=None): for tag in soup.select(selector): source_code = '<unset>' # XXX try: if tag.string is None: # or not tag.string.strip(): if not tag.has_attr('id'): msg = "If <pre> is empty then it needs to have an id." raise_desc(ValueError, msg, tag=describe_tag(tag)) # load it tag_id = tag['id'].encode('utf-8') if '.' in tag_id: i = tag_id.index('.') libname, name = tag_id[:i], tag_id[i + 1:] use_library = library.load_library(libname) else: name = tag_id use_library = library basename = '%s.%s' % (name, extension) data = use_library._get_file_data(basename) source_code = data['data'] else: source_code = get_source_code(tag) # prettify. # remove spurious indentation source_code = source_code.strip() do_apply_suggestions = (not tag.has_attr('noprettify') and not tag.has_attr('np')) # then apply suggestions try: if do_apply_suggestions: x = parse_wrap(parse_expr, source_code)[0] xr = parse_ndp_refine(x, Context()) suggestions = get_suggestions(xr) source_code = apply_suggestions( source_code, suggestions) except DPSyntaxError as e: if raise_errors: raise else: res.note_error(str(e), HTMLIDLocation.for_element(tag)) continue # we don't want the browser to choose different tab size # source_code = source_code.replace('\t', ' ' * 4) # we are not using it _realpath = realpath context = Context() def postprocess(x): if refine is not None: return refine(x, context=context) else: return x # print('rendering source code %r' % source_code) html = ast_to_html(source_code, parse_expr=parse_expr, add_line_gutter=False, postprocess=postprocess) for w in context.warnings: if w.where is not None: from mcdp_web.editor_fancy.app_editor_fancy_generic import html_mark html = html_mark(html, w.where, "language_warning") frag2 = BeautifulSoup(html, 'lxml', from_encoding='utf-8') if use_pre: rendered = Tag(name='div', attrs={'class': 'rendered'}) pre = frag2.pre pre.extract() rendered.append(pre) if not rendered.has_attr('class'): rendered['class'] = "" if tag.has_attr('label'): text = tag['label'] tag_label = Tag(name='span') add_class(tag_label, 'label') add_class(tag_label, 'label_inside') tag_label.append(NavigableString(text)) pre.insert(0, tag_label) tag_label_outside = Tag(name='span') add_class(tag_label_outside, 'label') add_class(tag_label_outside, 'label_outside') tag_label_outside.append(NavigableString(text)) rendered.insert(0, tag_label_outside) max_len = max_len_of_pre_html(html) if tag.has_attr('label'): add_class(rendered, 'has_label') max_len = max(max_len, len(tag['label']) + 6) style = '' else: # using <code> rendered = frag2.pre.code rendered.extract() if not rendered.has_attr('class'): rendered['class'] = "" style = '' if tag.has_attr('style'): style = style + tag['style'] if style: rendered['style'] = style if tag.has_attr('class'): add_class(rendered, tag['class']) if tag.has_attr('id'): rendered['id'] = tag['id'] if use_pre: if generate_pdf: pdf = get_ast_as_pdf(source_code, parse_expr) if tag.has_attr('id'): basename = tag['id'] else: hashcode = hashlib.sha224( source_code).hexdigest()[-8:] basename = 'code-%s' % hashcode docname = os.path.splitext( os.path.basename(realpath))[0] download = docname + '.' + basename + '.source_code.pdf' a = create_a_to_data(download=download, data_format='pdf', data=pdf) a['class'] = 'pdf_data' a.append(NavigableString(download)) div = Tag(name='div') div.append(rendered) div.append(a) tag.replaceWith(div) else: tag.replaceWith(rendered) else: tag.replaceWith(rendered) except DPSyntaxError as e: if raise_errors: raise else: res.note_error(str(e), HTMLIDLocation.for_element(tag)) # note_error(tag, e) if tag.string is None: tag.string = "`%s" % tag['id'] continue except DPSemanticError as e: if raise_errors: raise else: res.note_error(str(e), HTMLIDLocation.for_element(tag)) # note_error(tag, e) if tag.string is None: tag.string = "`%s" % tag['id'] continue except DPInternalError as ex: msg = 'Error while interpreting the code:\n\n' msg += indent(source_code, ' | ') raise_wrapped(DPInternalError, ex, msg, exc=sys.exc_info())
def do_bib(soup, bibhere): """ find used bibliography entries put them there """ used = [] unused = set() for a in soup.find_all('a'): href = a.attrs.get('href', '') if href.startswith('#bib:'): used.append(href[1:]) # no "#" logger.debug('I found %d references, to these: %s' % (len(used), used)) # collect all the <cite> id2cite = {} for c in soup.find_all('cite'): ID = c.attrs.get('id', None) id2cite[ID] = c if ID in used: add_class(c, 'used') else: unused.add(ID) add_class(c, 'unused') # divide in found and not found found = [] notfound = [] for ID in used: if not ID in id2cite: if not ID in notfound: notfound.append(ID) else: found.append(ID) # now create additional <cite> for the ones that are not found for ID in notfound: cite = Tag(name='cite') s = 'Reference %s not found.' % ID cite.append(NavigableString(s)) cite.attrs['class'] = ['errored', 'error'] # XXX soup.append(cite) id2cite[ID] = cite # now number the cites n = 1 id2number = {} for ID in used: if not ID in id2number: id2number[ID] = n n += 1 # now add the attributes for cross-referencing for ID in used: number = id2number[ID] cite = id2cite[ID] cite.attrs[LABEL_NAME] = '[%s]' % number cite.attrs[LABEL_SELF] = '[%s]' % number cite.attrs[LABEL_NUMBER] = number cite.attrs[LABEL_WHAT] = 'Reference' cite.attrs[LABEL_WHAT_NUMBER_NAME] = '[%s]' % number cite.attrs[LABEL_WHAT_NUMBER] = '[%s]' % number # now put the cites at the end of the document for ID in used: c = id2cite[ID] # remove it from parent c.extract() # logger.debug('Extracting cite for %r: %s' % (ID, c)) # add to bibliography bibhere.append(c) s = ("Bib cites: %d\nBib used: %s\nfound: %s\nnot found: %s\nunused: %d" % (len(id2cite), len(used), len(found), len(notfound), len(unused))) logger.info(s)
def insert_inset(element, short, long_error, klasses=[]): """ Inserts an errored details after element """ details = Tag(name='details') summary = Tag(name='summary') s = Tag(name='strong') s.append(short) summary.append(s) details.append(summary) if isinstance(long_error, Tag): pre = Tag(name='div') else: pre = Tag(name='pre') for c in klasses: add_class(pre, c) add_class(details, c) add_class(summary, c) pre.append(long_error) details.append(pre) element0 = element while element.next_sibling and element.next_sibling.name == 'details': element = element.next_sibling add_class(element0, 'contains-consecutive-notes') add_class(element, 'consecutive-note') add_class(details, 'consecutive-note') element.insert_after(details) parent = element0.parent if 'style' not in parent.attrs: if parent.name != 'blockquote': parent.attrs['style'] = 'display: inline;' return details
def sub_link(a, element_id, element, res): """ a: the link with href= #element_id element: the link to which we refer """ assert isinstance(element, Tag) CLASS_ONLY_NUMBER = MCDPManualConstants.CLASS_ONLY_NUMBER CLASS_NUMBER_NAME = MCDPManualConstants.CLASS_NUMBER_NAME CLASS_ONLY_NAME = MCDPManualConstants.CLASS_ONLY_NAME if MCDPManualConstants.ATTR_NONUMBER in element.attrs: label_what_number = None label_number = None try: label_what = element.attrs[LABEL_WHAT] label_name = element.attrs[LABEL_NAME] except KeyError as e: msg = 'Cannot find %r in %s' % (e, element.attrs) raise Exception(msg) # XXX classes = [CLASS_ONLY_NAME] else: if (not LABEL_WHAT_NUMBER in element.attrs) or \ (not LABEL_NAME in element.attrs): msg = ( 'substituting_empty_links: Could not find attributes %s or %s in %s' % (LABEL_NAME, LABEL_WHAT_NUMBER, compact_desc_tag(element))) res.note_error( msg, { 'original': HTMLIDLocation(element_id), 'reference': HTMLIDLocation.for_element(a) }) return label_what_number = element.attrs[LABEL_WHAT_NUMBER] label_number = element.attrs[LABEL_NUMBER] label_what = element.attrs[LABEL_WHAT] label_name = element.attrs[LABEL_NAME] classes = list(a.attrs.get('class', [])) # bug: I was modifying if MCDPManualConstants.CLASS_TOC_LINK in classes: if not CLASS_ONLY_NAME in classes: s = Tag(name='span') s.string = label_what add_class(s, 'toc_what') a.append(s) a.append(' ') s = Tag(name='span') s.string = label_number add_class(s, 'toc_number') a.append(s) s = Tag(name='span') s.string = ' - ' add_class(s, 'toc_sep') a.append(s) if label_name is not None and '<' in label_name: contents = bs(label_name) # sanitize the label name for br in contents.findAll('br'): br.replaceWith(NavigableString(' ')) for _ in contents.findAll('a'): _.extract() contents.name = 'span' add_class(contents, 'toc_name') a.append(contents) # logger.debug('From label_name = %r to a = %r' % (label_name, a)) else: if label_name is None: s = Tag(name='span') s.string = '(unnamed)' # XXX else: s = bs(label_name) assert s.name == 'fragment' s.name = 'span' # add_class(s, 'produced-here') # XXX add_class(s, 'toc_name') a.append(s) else: if CLASS_ONLY_NUMBER in classes: label = label_number elif CLASS_NUMBER_NAME in classes: if label_name is None: label = label_what_number + \ ' - ' + '(unnamed)' # warning else: label = label_what_number + ' - ' + label_name elif CLASS_ONLY_NAME in classes: if label_name is None: label = '(unnamed)' # warning else: label = label_name else: # default behavior if string_starts_with(['fig:', 'tab:', 'bib:', 'code:'], element_id): label = label_what_number elif label_name is None: label = label_what_number else: label = label_what_number + ' - ' + label_name frag = bs(label) assert frag.name == 'fragment' frag.name = 'span' add_class(frag, 'reflabel') a.append(frag) if 'base_url' in element.attrs: a['href'] = element.attrs['base_url'] + a['href']
def sub_link(a, element_id, element, raise_errors): """ a: the link with href= #element_id element: the link to which we refer """ CLASS_ONLY_NUMBER = MCDPManualConstants.CLASS_ONLY_NUMBER CLASS_NUMBER_NAME = MCDPManualConstants.CLASS_NUMBER_NAME CLASS_ONLY_NAME = MCDPManualConstants.CLASS_ONLY_NAME if not element: msg = ('Cannot find %s' % element_id) note_error2(a, 'Ref. error', 'substituting_empty_links():\n' + msg) #nerrors += 1 if raise_errors: raise ValueError(msg) return # if there is a query, remove it # if le.query is not None: # new_href = '#' + le.eid # a.attrs['href'] = new_href # logger.info('setting new href= %s' % (new_href)) if (not LABEL_WHAT_NUMBER in element.attrs) or \ (not LABEL_NAME in element.attrs): msg = ( 'substituting_empty_links: Could not find attributes %s or %s in %s' % (LABEL_NAME, LABEL_WHAT_NUMBER, element)) if True: logger.warning(msg) else: # note_error_msg(a, msg) note_error2(a, 'Ref. error', 'substituting_empty_links():\n' + msg) # nerrors += 1 if raise_errors: raise ValueError(msg) return label_what_number = element.attrs[LABEL_WHAT_NUMBER] label_number = element.attrs[LABEL_NUMBER] label_what = element.attrs[LABEL_WHAT] label_name = element.attrs[LABEL_NAME] classes = list(a.attrs.get('class', [])) # bug: I was modifying # if le.query is not None: # classes.append(le.query) if 'toc_link' in classes: s = Tag(name='span') s.string = label_what add_class(s, 'toc_what') a.append(s) a.append(' ') s = Tag(name='span') s.string = label_number add_class(s, 'toc_number') a.append(s) s = Tag(name='span') s.string = ' - ' add_class(s, 'toc_sep') a.append(s) if label_name is not None and '<' in label_name: contents = bs(label_name) # sanitize the label name for br in contents.findAll('br'): br.replaceWith(NavigableString(' ')) for _ in contents.findAll('a'): _.extract() contents.name = 'span' add_class(contents, 'toc_name') a.append(contents) #logger.debug('From label_name = %r to a = %r' % (label_name, a)) else: if label_name is None: s = Tag(name='span') s.string = '(unnamed)' # XXX else: s = bs(label_name) assert s.name == 'fragment' s.name = 'span' # add_class(s, 'produced-here') # XXX add_class(s, 'toc_name') a.append(s) else: if CLASS_ONLY_NUMBER in classes: label = label_number elif CLASS_NUMBER_NAME in classes: if label_name is None: label = label_what_number + \ ' - ' + '(unnamed)' # warning else: label = label_what_number + ' - ' + label_name elif CLASS_ONLY_NAME in classes: if label_name is None: label = '(unnamed)' # warning else: label = label_name else: # default behavior if string_starts_with(['fig:', 'tab:', 'bib:', 'code:'], element_id): label = label_what_number elif label_name is None: label = label_what_number else: label = label_what_number + ' - ' + label_name frag = bs(label) assert frag.name == 'fragment' frag.name = 'span' add_class(frag, 'reflabel') a.append(frag)
def note_error_msg(tag0, msg): check_isinstance(msg, bytes) add_class(tag0, 'errored') short = 'Error' long_error = msg return insert_inset(tag0, short, long_error, [ERROR_CLASS])
def substituting_empty_links(soup, raise_errors=False): ''' default style is [](#sec:systems) "Chapter 10" the name is [](#sec:systems?only_name) "My title" the number is [](#sec:systems?only_number) "10" and full is [](#sec:systems?toc_link) "Chapter 10 - My title" You can also use "class": <a href='#sec:name' class='only_number'></a> or <a href='#sec:name?only_number'></a> ''' CLASS_ONLY_NUMBER = MCDPManualConstants.CLASS_ONLY_NUMBER CLASS_NUMBER_NAME = MCDPManualConstants.CLASS_NUMBER_NAME CLASS_ONLY_NAME = MCDPManualConstants.CLASS_ONLY_NAME logger.debug('substituting_empty_links') n = 0 nerrors = 0 for le in get_empty_links_to_fragment(soup): a = le.linker element_id = le.eid element = le.linked n += 1 if not element: msg = ('Cannot find %s' % element_id) note_error_msg(a, msg) nerrors += 1 if raise_errors: raise ValueError(msg) continue # if there is a query, remove it if le.query is not None: new_href = '#' + le.eid a.attrs['href'] = new_href logger.info('setting new href= %s' % (new_href)) if (not LABEL_WHAT_NUMBER in element.attrs) or \ (not LABEL_NAME in element.attrs): msg = ( 'substituting_empty_links: Could not find attributes %s or %s in %s' % (LABEL_NAME, LABEL_WHAT_NUMBER, element)) if True: logger.warning(msg) else: note_error_msg(a, msg) nerrors += 1 if raise_errors: raise ValueError(msg) continue label_what_number = element.attrs[LABEL_WHAT_NUMBER] label_number = element.attrs[LABEL_NUMBER] label_what = element.attrs[LABEL_WHAT] label_name = element.attrs[LABEL_NAME] classes = list(a.attrs.get('class', [])) # bug: I was modifying if le.query is not None: classes.append(le.query) if 'toc_link' in classes: s = Tag(name='span') s.string = label_what add_class(s, 'toc_what') a.append(s) a.append(' ') s = Tag(name='span') s.string = label_number add_class(s, 'toc_number') a.append(s) s = Tag(name='span') s.string = ' - ' add_class(s, 'toc_sep') a.append(s) if label_name is not None and '<' in label_name: contents = bs(label_name) # sanitize the label name for br in contents.findAll('br'): br.replaceWith(NavigableString(' ')) for _ in contents.findAll('a'): _.extract() a.append(contents) #logger.debug('From label_name = %r to a = %r' % (label_name, a)) else: s = Tag(name='span') if label_name is None: s.string = '(unnamed)' # XXX else: s.string = label_name add_class(s, 'toc_name') a.append(s) else: if CLASS_ONLY_NUMBER in classes: label = label_number elif CLASS_NUMBER_NAME in classes: if label_name is None: label = label_what_number + \ ' - ' + '(unnamed)' # warning else: label = label_what_number + ' - ' + label_name elif CLASS_ONLY_NAME in classes: if label_name is None: label = '(unnamed)' # warning else: label = label_name else: label = label_what_number span1 = Tag(name='span') add_class(span1, 'reflabel') span1.string = label a.append(span1) logger.debug('substituting_empty_links: %d total, %d errors' % (n, nerrors))
def sub_notes(soup): for e in soup.select('blockquote'): e.name = 'aside' add_class(e, 'notes')