def check_status_codes(soup, realpath, res, location): allowed_statuses = MCDPManualConstants.allowed_statuses for h in all_headers(soup): if MCDPManualConstants.ATTR_NOTOC in h.attrs: continue if MCDPManualConstants.ATTR_STATUS in h.attrs: s = h.attrs[MCDPManualConstants.ATTR_STATUS] if not s in allowed_statuses: msg = 'Invalid status code %r.\n I expected one of: %s' % ( s, ", ".join(allowed_statuses)) # msg += '\n' + indent(str(h), ' ') res.note_error(msg, HTMLIDLocation.for_element(h, location)) else: # Only warn for h1 that are not part: if h.name == 'h1' and not 'part:' in h.attrs.get('id', ''): if not 'catkin_ws' in realpath: # let's not worry about the Software repo for now h2 = h.__copy__() h2.attrs.pop('github-blob-url', None) h2.attrs.pop(MCDPManualConstants.ATTR_GITHUB_EDIT_URL, None) msg = 'Status not found for this header:\n\n %s' % str(h2) msg += '\n\n in file %s' % realpath msg += '\n\nPlease set the status for all the top-level headers.' msg += '\n\nThe syntax is:\n\n # My section {#SECTIONID status=STATUS}' msg += '\n\nThese are the possible choices for the status:\n' for k, v in allowed_statuses.items(): if k != MCDPManualConstants.STATUS_UNKNOWN: msg += '\n' + indent(v, '', '%23s ' % ('status=%s' % k)) res.note_error(msg, HTMLIDLocation.for_element(h, location)) h.attrs[MCDPManualConstants. ATTR_STATUS] = MCDPManualConstants.STATUS_UNKNOWN
def detect_duplicate_IDs(soup, res): from mcdp_docs.manual_join_imp import can_ignore_duplicated_id id2element = OrderedDict() for element in soup.select('[id]'): ID = element.attrs['id'] if ID in id2element: if can_ignore_duplicated_id(element): continue else: # ignore this because it will be triggered for the sister element # e.g. fig:howto-mount-motors-video, fig:howto-mount-motors-video-wrap if ID.endswith('-wrap'): continue msg = 'Repeated use of ID "%s"' % ID element.attrs['id'] = ID + '-duplicate-%s' % id(element) locations = OrderedDict() locations['repeated-use'] = HTMLIDLocation.for_element(element) locations['original-use'] = HTMLIDLocation.for_element(id2element[ID]) res.note_error(msg, locations) else: id2element[ID] = element
def embed_img_data(soup, resolve, raise_on_error, res, location, embed=True): """ resolve: ref -> str or None --- how to get the data if embed = True, embeds the data. Expects resolve to return the data. if embed = False, just resolves the links. Expects resolve to return the path. """ img_extensions = MCDPManualConstants.embed_img_data_extensions for tag in soup.select('img[src]'): href = tag['src'] if href.startswith('data:'): continue if href.startswith('http'): msg = 'I will not embed remote files, such as\n %s' % href res.note_warning(msg, HTMLIDLocation.for_element(tag, location)) continue for ext in img_extensions: if not href.endswith('.' + ext): continue data = resolve(href) if data is None: msg = 'embed_img_data: Could not find file:\n %s' % href if raise_on_error: raise Exception(msg) # XXX else: res.note_error(msg, HTMLIDLocation.for_element(tag, location)) continue file_data = data['data'] realpath = data['realpath'] check_isinstance(data, dict) if embed: tag['src'] = data_encoded_for_src(file_data, ext) else: if not os.path.exists(realpath): msg = 'Expecting a path from %r, but does not exist %s' % ( href, realpath) raise Exception(msg) # XXX max_width = MCDPManualConstants.max_width_for_image try: tag['src'] = get_link_to_image_file(realpath, max_width) except IOError as e: msg = 'Could not resize %s:' % realpath msg += '\n\n ' + str(e) res.note_error(msg, locations=HTMLIDLocation.for_element( tag, location)) break
def fix_header_id(header, globally_unique_id_part, res, location): ID = header.get('id', None) prefix = None if (ID is None or ':' not in ID) else ID[:ID.index(':')] if header.name in MCDPManualConstants.allowed_prefixes_h: allowed_prefixes = MCDPManualConstants.allowed_prefixes_h[header.name] default_prefix = allowed_prefixes[0] if ID is None: main = '%s-%s' % (globally_unique_id_part, GlobalCounter.header_id) use = '%s:%s' % (default_prefix, main) header.attrs['id'] = use header.attrs['id-short'] = main header.attrs['id-autogenerated'] = "true" GlobalCounter.header_id += 1 else: if prefix is None: if ID != 'booktitle': # XXX # msg = ('Adding prefix %r to current id %r for %s.' % # (default_prefix, ID, header.name)) # header.insert_before(Comment('Warning: ' + msg)) header.attrs['id'] = default_prefix + ':' + ID header.attrs['id-short'] = ID else: if prefix not in allowed_prefixes: msg = ('The prefix %r is not allowed for %s (ID=%r)' % (prefix, header.name, ID)) # logger.error(msg) # TODO: add warning # header.insert_after(Comment('Error: ' + msg)) res.note_error( msg, HTMLIDLocation.for_element(header, location)) else: ID_short = ID.replace(prefix + ':', '') header.attrs['id-short'] = ID_short
def create_notes_from_elements(soup, res, location, unique): for klass, tag in MCDPManualConstants.classes_that_create_notes.items(): markers = list(soup.select('.%s' % klass)) # print('Found %d markers for class %s' % (len(markers), klass)) for p in markers: div = Tag(name='div') s = Tag(name='p') s.append('The following was marked as "%s".' % klass) div.append(s) div2 = Tag(name='div') div2.attrs[ 'style'] = 'margin: 1em; font-size: 90%; background-color: #eee; border-radius: 5px; padding: 0.5em;' # Copy: p2 = get_sanitized_copy(p) div2.append(p2) div.append(div2) tags = [tag] from mcdp_docs.manual_join_imp import split_robustly assignees = split_robustly(p.attrs.get("for", ""), ',') for a in assignees: tags.append('for:%s' % a) note = Note(div, HTMLIDLocation.for_element(p, location, unique=unique), stacklevel=0, tags=tuple(sorted(tags))) res.add_note(note)
def substitute_todo(soup, res, location): prefix = "TODO" klass = 'todo' for r in get_elements_starting_with_string(soup, prefix=prefix): # r.ns.replaceWith(r.rest) div = Tag(name='div') add_class(div, klass + '-wrap') add_class(r.element, klass) parent = r.element.parent i = parent.index(r.element) r.element.extract() div.append(r.element) parent.insert(i, div) T = 'for' if r.rest.strip().startswith(T): after = r.rest[r.rest.index(T) + len(T):] if ':' in after: i = after.index(':') dest = after[:i] r.element.attrs['for'] = dest.strip() else: msg = 'Could not find ":" in "%s"' % after res.note_error(msg, HTMLIDLocation.for_element(div, location))
def check_lang_codes(soup, res, location): for h in all_headers(soup): if MCDPManualConstants.LANG_ATTR in h.attrs: s = h.attrs[MCDPManualConstants.LANG_ATTR] if not s in MCDPManualConstants.allowed_langs: msg = ('Invalid lang code %r; expected one of %r' % (s, MCDPManualConstants.allowed_langs)) msg += '\n' + indent(str(h), ' ') # note_error2(h, 'syntax error', msg) res.note_error(msg, HTMLIDLocation.for_element(h, location))
def go(s0, func): selectors = s0.split(',') for selector_ in selectors: for tag in soup.select(selector_): try: r = func(tag) tag.replaceWith(r) except (DPSyntaxError, DPSemanticError) as e: if raise_error_dp: raise else: res.note_error(str(e), HTMLIDLocation.for_element(tag)) continue except Exception as e: if raise_error_others: raise else: res.note_error(str(e), HTMLIDLocation.for_element(tag)) continue
def go(selector, plotter, load, parse): for tag in soup.select(selector): try: # load value with units in vu def parsing(source_code): context = Context() return parse(source_code, realpath=realpath, context=context) from mcdp_docs.highlight import load_or_parse_from_tag vu = load_or_parse_from_tag(tag, load, parsing) rendered = plotter(tag, vu) if tag.has_attr('style'): style = tag['style'] else: style = '' if style: rendered['style'] = style tag.replaceWith(rendered) except (DPSyntaxError, DPSemanticError) as e: if raise_errors: raise else: msg = str(e) res.note_error(msg, HTMLIDLocation.for_element(tag, location)) # note_error(tag, e) except Exception as e: if raise_errors: raise else: msg = str(e) res.note_error(msg, HTMLIDLocation.for_element(tag, location))
def get_empty_links_to_fragment(element_to_modify, extra_refs, res): """ Find all empty links that have a reference to a fragment. yield LinkElement """ # logger.debug('building index') # first find all elements by id id2element_local, duplicates = get_id2element(element_to_modify, 'id') id2element_extra, duplicates2 = get_id2element(extra_refs, 'id') for k in id2element_extra: if k in id2element_local: if 'ignore_if_conflict' in id2element_extra[k].attrs: continue msg = 'ID %s in cross references also contained locally.' % k def cut(x): if len(x) < 500: return x else: return x[:500] + ' ... ' msg += '\n\n' + indent(cut(id2element_local[k]), '', 'local: ') msg += '\n\n' + indent(cut(id2element_extra[k]), '', 'crossrefs: ') res.note_error(msg, HTMLIDLocation.for_element(id2element_local[k])) logger.error(msg) id2element = {} id2element.update(id2element_extra) id2element.update(id2element_local) # logger.debug('building index done') for element in get_empty_links(element_to_modify): if not 'href' in element.attrs: continue href = element.attrs['href'] if not href.startswith('#'): continue rest = href[1:] eid = rest query = None linked = id2element.get(eid, None) # noinspection PyArgumentList yield LinkElement(linker=element, eid=eid, linked=linked, query=query)
def process_assignment(soup, res, location): sep = ',' for e in soup.select('.assignment'): try: parent = find_first_parent_section(e) except ValueError: msg = 'Could not find parent section for this annotation.' res.note_error(msg, HTMLIDLocation.for_element(e, location)) continue current = split_robustly(parent.attrs.get(ATTR_ASSIGNMENT, ''), sep) more = split_robustly(e.string, sep) now = current + more parent.attrs[ATTR_ASSIGNMENT] = sep.join(now) fix_notes_assignees(soup, res)
def substitute_github_ref(a, defaults, res, location): href = a.attrs['href'] try: ref = parse_github_file_ref(href) except InvalidGithubRef as e: msg = 'Could not parse a reference in %s.' % str(a) msg += '\n\n' + indent(e, ' > ') res.note_warning(msg, HTMLIDLocation.for_element(a, location)) return # raise_wrapped(DPSyntaxError, e, msg, compact=True) if ref.url in FailedRepos.failed_repos: msg = 'Skipped because checkout of %s already failed.' % ref.url res.note_warning(msg, HTMLIDLocation.for_element(a, location)) return if ref.path is None: msg = 'There is no path specified.' res.note_warning(msg, HTMLIDLocation.for_element(a, location)) return # raise_desc(DPSyntaxError, e, msg, ref=ref) try: ref = resolve_reference(ref, defaults) except CouldNotResolveRef as e: res.note_error(str(e), HTMLIDLocation.for_element(a, location)) FailedRepos.failed_repos[ref.url] = str(e) # logger.debug(ref.url) return a.attrs['href'] = ref.url if not list(a.children): c = Tag(name='code') add_class(c, 'github-resource-link') c.append(os.path.basename(ref.path)) a.append(c)
def check_no_patently_wrong_links(soup, res, location): for a in soup.select('a[href]'): href = a.attrs['href'] if href.startswith('#http:') or href.startswith('#https:'): msg = """ This link is invalid: URL = %s I think there is an extra "#" at the beginning. Note that the Markdown syntax is: [description](URL) where URL can be: 1) using the fragment notation, such as URL = '#SECTIONID' for example: Look at [the section](#section-name) 2) a regular URL, such as: URL = 'http://google.com' that is: Look at [the website](http://google.com) You have mixed the two syntaxes. You probably meant to write the url %s but you added an extra "#" at the beginning that should have not been there. Please remove the "#". """ % (href, href[1:]) # note_error2(a, 'syntax error', ) res.note_error(msg.lstrip(), HTMLIDLocation.for_element(a, location))
def add_person_links(soup, users, res): if not MCDPManualConstants.add_person_links: return for span in soup.select('span.person-name'): name = span.text try: k = find_user_by_name(users, name) span.name = 'a' span.attrs['href'] = users[k]['user_url'] except KeyError: msg = u'Could not find user "%s" in DB.' % name res.note_warning(msg.encode('utf8'), HTMLIDLocation.for_element(span))
def display_files(soup, defaults, res, location, raise_errors): n = 0 for element in soup.find_all('display-file'): src = element.attrs.get('src', '').strip() element.attrs['src'] = src if src.startswith('github:'): display_file(element, defaults, res, location, raise_errors) n += 1 else: msg = 'Unknown schema %r; I only know "github:".' % src if raise_errors: raise DPSemanticError(msg) else: res.note_error(msg, HTMLIDLocation.for_element(element, location)) # note_error2(element, 'syntax error', msg) return n
def display_file(element, defaults, res, location, raise_errors): assert element.name == 'display-file' assert 'src' in element.attrs src = element.attrs['src'] assert src.startswith('github:') ref = parse_github_file_ref(src) try: ref = resolve_reference(ref, defaults=defaults) except CouldNotResolveRef as e: msg = 'Could not resolve reference %r' % src if raise_errors: raise_wrapped(DPSemanticError, e, msg, compact=True) else: msg += '\n\n' + indent(str(e), '> ') # note_error2(element, 'reference error', msg) res.note_error(msg, HTMLIDLocation.for_element(element, location)) return lines = ref.contents.split('\n') a = ref.from_line if ref.from_line is not None else 0 b = ref.to_line if ref.to_line is not None else len(lines) - 1 portion = lines[a:b + 1] contents = "\n".join(portion) div = Tag(name='div') base = os.path.basename(ref.path) short = base + '-%d-%d' % (a, b) div.attrs['figure-id'] = 'code:%s' % short figcaption = Tag(name='figcaption') t = Tag(name='code') t.append(base) a = Tag(name='a') a.append(t) a.attrs['href'] = ref.url figcaption.append(a) div.append(figcaption) pre = Tag(name='pre') code = Tag(name='code') pre.append(code) code.append(contents) div.append(pre) element.replace_with(div)
def move_things_around(soup, raise_if_errors=False, res=None): """ Looks for tags like: <move-here src="#line_detector2-line_detector_node2-autogenerated"/> """ if res is None: res = AugmentedResult() from mcdp_docs.check_missing_links import get_id2element with timeit_wall('getting all IDs'): id2element, duplicates = get_id2element(soup, 'id') for e in soup.find_all('move-here'): if not 'src' in e.attrs: msg = 'Expected attribute "src" for element %s' % str(e) raise ValueError(msg) src = e.attrs['src'] if not src.startswith('#'): msg = 'Expected that attribute "src" started with "#" for element %s.' % str(e) raise ValueError(msg) nid = src[1:] # O(n^2) # el = soup.find(id=nid) el = id2element.get(nid, None) if not el: msg = 'move-here: Could not find ID %r.' % nid e.name = 'span' # note_error2(e, "invalid move-here reference", msg) res.note_error(msg, HTMLIDLocation.for_element(e)) if raise_if_errors: raise ValueError(msg) else: continue el.extract() e.replace_with(el)
def make_videos_(o, res, location, raise_on_errors): if 'src' not in o.attrs: msg = 'The video does not have a "src" attribute.' res.note_error(msg, HTMLIDLocation.for_element(o, location)) return # raise_desc(ValueError, msg, element=str(o)) src = o.attrs['src'] prefix = 'vimeo:' if not src.startswith(prefix): msg = 'Invalid src attribute "%s": it does not start with %r.' % ( src, prefix) res.note_error(msg, HTMLIDLocation.for_element(o, location)) return # raise_desc(ValueError, msg, element=str(o)) vimeo_id = src[len(prefix):] # <iframe src="https://player.vimeo.com/video/152233002" # class="embed-responsive-item" # frameborder="0" webkitallowfullscreen="" mozallowfullscreen="" allowfullscreen=""> try: vimeo_info = get_vimeo_info(vimeo_id) except VimeoInfoException as e: if raise_on_errors: raise else: msg = str(e) # note_error2(o, 'Resource error', str(e)) res.note_error(msg, HTMLIDLocation.for_element(o, location)) return d = Tag(name='div') d.attrs['class'] = 'video' ONLY_WEB = 'only-web' ONLY_EBOOK = 'only-ebook' ONLY_DEADTREE = 'only-deadtree' d.append(Comment('This is the iframe, for online playing.')) C = Tag(name='div') C.attrs['class'] = ONLY_WEB if True: r = Tag(name='iframe') r.attrs['class'] = 'video-vimeo-player' r.attrs['src'] = 'https://player.vimeo.com/video/' + vimeo_id r.attrs['frameborder'] = 0 r.attrs['webkitallowfullscreen'] = 1 r.attrs['mozallowfullscreen'] = 1 r.attrs['allowfullscreen'] = 1 C.append(r) d.append(C) d.append(Comment('This is the thumbnail, for ebook')) C = Tag(name='div') C.attrs['class'] = ONLY_EBOOK if True: a = Tag(name='a') a.attrs['href'] = vimeo_info.url img = Tag(name='img') img.attrs['class'] = 'video-vimeo-thumbnail-ebook' img.attrs['src'] = vimeo_info.thumbnail_large img.attrs['title'] = vimeo_info.title a.append(img) C.append(a) d.append(C) d.append(Comment('This is the textual version for printing.')) C = Tag(name='div') C.attrs['class'] = ONLY_DEADTREE if True: img = Tag(name='img') img.attrs['class'] = 'video-vimeo-thumbnail-deadtree' img.attrs['src'] = vimeo_info.thumbnail_large img.attrs['title'] = vimeo_info.title C.append(img) p = Tag(name='p') p.append("The video is at %s." % vimeo_info.url) C.append(p) d.append(C) for att in ['style']: if att in o.attrs: d.attrs[att] = o.attrs[att] o.replace_with(d)
def make_figures(library, soup, res, location, raise_error_dp, raise_error_others, realpath, generate_pdf): """ Looks for codes like: <pre><code class="mcdp_ndp_graph_templatized">mcdp { # empty model } </code></pre> and creates a link to the image """ def go(s0, func): selectors = s0.split(',') for selector_ in selectors: for tag in soup.select(selector_): try: r = func(tag) tag.replaceWith(r) except (DPSyntaxError, DPSemanticError) as e: if raise_error_dp: raise else: res.note_error(str(e), HTMLIDLocation.for_element(tag)) continue except Exception as e: if raise_error_others: raise else: res.note_error(str(e), HTMLIDLocation.for_element(tag)) continue def make_tag(tag0, klass, data, ndp=None, template=None, poset=None): svg = data['svg'] tag_svg = BeautifulSoup(svg, 'lxml', from_encoding='utf-8').svg assert tag_svg.name == 'svg' if tag_svg.has_attr('width'): ws = tag_svg['width'] hs = tag_svg['height'] assert 'pt' in ws w = float(ws.replace('pt', '')) h = float(hs.replace('pt', '')) scale = MCDPConstants.scale_svg w2 = w * scale h2 = h * scale tag_svg['width'] = w2 tag_svg['height'] = h2 tag_svg['rescaled'] = 'Rescaled from %s %s, scale = %s' % (ws, hs, scale) else: print('no width in SVG tag: %s' % tag_svg) tag_svg['class'] = klass if tag0.has_attr('style'): tag_svg['style'] = tag0['style'] if tag0.has_attr('id'): tag_svg['id'] = tag0['id'] if generate_pdf: pdf0 = data['pdf'] pdf = crop_pdf(pdf0, margins=0) div = Tag(name='div') att = MCDPConstants.ATTR_LOAD_NAME if tag0.has_attr('id'): basename = tag0['id'] elif ndp is not None and hasattr(ndp, att): basename = getattr(ndp, att) elif template is not None and hasattr(template, att): basename = getattr(template, att) elif poset is not None and hasattr(poset, att): basename = getattr(poset, att) else: hashcode = hashlib.sha224(tag0.string).hexdigest()[-8:] basename = 'code-%s' % hashcode docname = os.path.splitext(os.path.basename(realpath))[0] download = docname + "." + basename + "." + klass + '.pdf' a = create_a_to_data(download=download, data_format='pdf', data=pdf) a['class'] = 'pdf_data' a.append(NavigableString(download)) div.append(tag_svg) div.append(a) return div else: return tag_svg image_source = ImagesFromPaths(library.get_images_paths()) mf0 = MakeFiguresNDP(None, None, None) available_ndp = set(mf0.available()) | set(mf0.aliases) for which in available_ndp: def callback(tag0): assert tag0.parent is not None context = Context() load = lambda x: library.load_ndp(x, context=context) parse = lambda x: library.parse_ndp( x, realpath=realpath, context=context) ndp = load_or_parse_from_tag(tag0, load, parse) mf = MakeFiguresNDP(ndp=ndp, image_source=image_source, yourname=None) # XXX formats = ['svg'] if generate_pdf: formats.append('pdf') data = mf.get_figure(which, formats) tag = make_tag(tag0, which, data, ndp=ndp, template=None) return tag selector = 'render.%s,pre.%s,img.%s' % (which, which, which) go(selector, callback) mf0 = MakeFiguresTemplate(None, None, None) available_template = set(mf0.available()) | set(mf0.aliases) for which in available_template: def callback(tag0): context = Context() load = lambda x: library.load_spec( SPEC_TEMPLATES, x, context=context) parse = lambda x: library.parse_template( x, realpath=realpath, context=context) template = load_or_parse_from_tag(tag0, load, parse) mf = MakeFiguresTemplate(template=template, library=library, yourname=None) # XXX formats = ['svg'] if generate_pdf: formats.append('pdf') data = mf.get_figure(which, formats) tag = make_tag(tag0, which, data, ndp=None, template=template) return tag selector = 'render.%s,pre.%s,img.%s' % (which, which, which) go(selector, callback) mf0 = MakeFiguresPoset(None, None) available_poset = set(mf0.available()) | set(mf0.aliases) for which in available_poset: def callback(tag0): context = Context() load = lambda x: library.load_poset(x, context=context) parse = lambda x: library.parse_poset( x, realpath=realpath, context=context) poset = load_or_parse_from_tag(tag0, load, parse) mf = MakeFiguresPoset(poset=poset, image_source=image_source) formats = ['svg'] if generate_pdf: formats.append('pdf') data = mf.get_figure(which, formats) tag = make_tag(tag0, which, data, ndp=None, template=None, poset=poset) return tag selector = 'render.%s,pre.%s,img.%s' % (which, which, which) go(selector, callback) unsure = list(soup.select('render')) unsure = [_ for _ in unsure if 'errored' not in _.attrs.get('class', '')] for _ in unsure: msg = 'Invalid "render" element.' # msg += '\n\n' + '\n\n'.join(str(_) for _ in unsure) msg += '\n\n' + " Available for NDPs: %s." % ", ".join( sorted(available_ndp)) msg += '\n\n' + " Available for templates: %s." % ", ".join( sorted(available_template)) msg += '\n\n' + " Available for posets: %s." % ", ".join( sorted(available_poset)) # raise ValueError(msg) res.note_error(msg, HTMLIDLocation.for_element(_)) return to_html_stripping_fragment(soup)
def go(soup, towrap, ID, figure_class, res, location): from mcdp_docs.highlight import add_class parent = towrap.parent fig = Tag(name='figure') fig['id'] = ID caption_below = True if ID.startswith('fig:'): add_class(fig, 'figure') elif ID.startswith('subfig:'): add_class(fig, 'subfloat') elif ID.startswith('tab:'): add_class(fig, 'table') caption_below = False elif ID.startswith('code:'): add_class(fig, 'code') pass else: msg = 'The ID %r should start with fig: or tab: or code:' % ID res.note_error(msg, locations=HTMLIDLocation.for_element(towrap, location)) return if 'caption-left' in figure_class: caption_below = False external_caption_id = '%s:caption' % ID external_caption = soup.find(id=external_caption_id) if external_caption is None: external_caption = towrap.find(name='figcaption') if external_caption is not None: # print('using external caption %s' % str(external_caption)) external_caption.extract() if external_caption.name != 'figcaption': logger.error('Element %s#%r should have name figcaption.' % (external_caption.name, external_caption_id)) external_caption.name = 'figcaption' figcaption = external_caption if towrap.has_attr('figure-caption'): msg = 'Already using external caption for %s' % ID res.note_error(msg, location=HTMLIDLocation.for_element( towrap, location)) return else: # print('could not find external caption %s' % external_caption_id) if towrap.has_attr('figure-caption'): caption = towrap['figure-caption'] else: caption = '' figcaption = Tag(name='figcaption') figcaption.append(NavigableString(caption)) outside = Tag(name='div') outside['id'] = ID + '-wrap' if towrap.has_attr('figure-style'): outside['style'] = towrap['figure-style'] for k in figure_class: # logger.debug('figure-class: %s' % k) add_class(towrap, k) ## XXX but not to figure itself? add_class(fig, k) add_class(outside, k) i = parent.index(towrap) towrap.extract() figcontent = Tag(name='div', attrs={'class': 'figcontent'}) if towrap.name == 'figure': towrap.name = 'div' add_class(towrap, 'figure-conv-to-div') figcontent.append(towrap) # <div style='clear: both;'></div> <!-- for floating stuff--> # Not 100% where it should go breaking_div = Tag(name='div') breaking_div.attrs['style'] = 'clear: both' figcontent.append(breaking_div) fig.append(figcontent) if caption_below: fig.append(figcaption) else: fig.insert(0, figcaption) add_class(outside, 'generated-figure-wrap') add_class(fig, 'generated-figure') outside.append(fig) parent.insert(i, outside)
def embed_pdf_image(tag, resolve, density, raise_on_error, res, location): assert tag.name == 'img' assert tag.has_attr('src') # print('!!embedding %s' % str(tag)) # raise Exception(str(tag)) # load pdf data src = tag['src'] if src.startswith('http'): msg = 'I will not embed remote files, such as %s: ' % src logger.warning(msg) found = resolve(src) if found is None: msg = 'Could not find PDF file %r.' % src if raise_on_error: raise Exception(msg) # xxx else: # note_error2(tag, 'Resource error', msg, ['missing-image']) res.note_error(msg, HTMLIDLocation.for_element(tag, location)) return data_pdf = found['data'] _realpath = found['realpath'] # convert PDF to PNG # density = pixels per inch try: data_png = png_from_pdf(data_pdf, density=density) except ConversionError as e: msg = 'I was not able to convert the PDF "%s" to PNG.' % tag['src'] if raise_on_error: raise_wrapped(ConversionError, e, msg, compact=True) else: # note_error2(tag, 'Conversion error', msg, []) res.note_error(msg, HTMLIDLocation.for_element(tag, location)) return # get PNG image size in pixels width_px, height_px = get_pixel_width_height_of_png(data_png) # compute what was the original width of PDF in points width_in = width_px / float(density) height_in = height_px / float(density) latex_options = tag.get('latex-options', '') props = parse_includegraphics_option_string(latex_options) if 'height' in props: msg = ('Cannot deal with "height" yet: latex_options = %s' % latex_options) res.note_warning(msg, HTMLIDLocation.for_element(tag, location)) if 'scale' in props: scale = float(props['scale']) use_width_in = width_in * scale use_height_in = height_in * scale elif 'width' in props: try: use_width_in = get_length_in_inches(props['width']) except ValueError as e: logger.error('Cannot interpret %s: %s' % (latex_options, e)) use_width_in = 5.0 ratio = height_in / width_in use_height_in = use_width_in * ratio else: use_width_in = width_in use_height_in = height_in # Add it before so that we can override add_style(tag, after=False, width='%sin' % use_width_in, height='%sin' % use_height_in) tag['size_in_pixels'] = '%s, %s' % (width_px, height_px) # encode tag['src'] = data_encoded_for_src(data_png, 'png')
def sub_link(a, element_id, element, res): """ a: the link with href= #element_id element: the link to which we refer """ assert isinstance(element, Tag) CLASS_ONLY_NUMBER = MCDPManualConstants.CLASS_ONLY_NUMBER CLASS_NUMBER_NAME = MCDPManualConstants.CLASS_NUMBER_NAME CLASS_ONLY_NAME = MCDPManualConstants.CLASS_ONLY_NAME if MCDPManualConstants.ATTR_NONUMBER in element.attrs: label_what_number = None label_number = None try: label_what = element.attrs[LABEL_WHAT] label_name = element.attrs[LABEL_NAME] except KeyError as e: msg = 'Cannot find %r in %s' % (e, element.attrs) raise Exception(msg) # XXX classes = [CLASS_ONLY_NAME] else: if (not LABEL_WHAT_NUMBER in element.attrs) or \ (not LABEL_NAME in element.attrs): msg = ( 'substituting_empty_links: Could not find attributes %s or %s in %s' % (LABEL_NAME, LABEL_WHAT_NUMBER, compact_desc_tag(element))) res.note_error( msg, { 'original': HTMLIDLocation(element_id), 'reference': HTMLIDLocation.for_element(a) }) return label_what_number = element.attrs[LABEL_WHAT_NUMBER] label_number = element.attrs[LABEL_NUMBER] label_what = element.attrs[LABEL_WHAT] label_name = element.attrs[LABEL_NAME] classes = list(a.attrs.get('class', [])) # bug: I was modifying if MCDPManualConstants.CLASS_TOC_LINK in classes: if not CLASS_ONLY_NAME in classes: s = Tag(name='span') s.string = label_what add_class(s, 'toc_what') a.append(s) a.append(' ') s = Tag(name='span') s.string = label_number add_class(s, 'toc_number') a.append(s) s = Tag(name='span') s.string = ' - ' add_class(s, 'toc_sep') a.append(s) if label_name is not None and '<' in label_name: contents = bs(label_name) # sanitize the label name for br in contents.findAll('br'): br.replaceWith(NavigableString(' ')) for _ in contents.findAll('a'): _.extract() contents.name = 'span' add_class(contents, 'toc_name') a.append(contents) # logger.debug('From label_name = %r to a = %r' % (label_name, a)) else: if label_name is None: s = Tag(name='span') s.string = '(unnamed)' # XXX else: s = bs(label_name) assert s.name == 'fragment' s.name = 'span' # add_class(s, 'produced-here') # XXX add_class(s, 'toc_name') a.append(s) else: if CLASS_ONLY_NUMBER in classes: label = label_number elif CLASS_NUMBER_NAME in classes: if label_name is None: label = label_what_number + \ ' - ' + '(unnamed)' # warning else: label = label_what_number + ' - ' + label_name elif CLASS_ONLY_NAME in classes: if label_name is None: label = '(unnamed)' # warning else: label = label_name else: # default behavior if string_starts_with(['fig:', 'tab:', 'bib:', 'code:'], element_id): label = label_what_number elif label_name is None: label = label_what_number else: label = label_what_number + ' - ' + label_name frag = bs(label) assert frag.name == 'fragment' frag.name = 'span' add_class(frag, 'reflabel') a.append(frag) if 'base_url' in element.attrs: a['href'] = element.attrs['base_url'] + a['href']
def go(selector, parse_expr, extension, use_pre=True, refine=None): for tag in soup.select(selector): source_code = '<unset>' # XXX try: if tag.string is None: # or not tag.string.strip(): if not tag.has_attr('id'): msg = "If <pre> is empty then it needs to have an id." raise_desc(ValueError, msg, tag=describe_tag(tag)) # load it tag_id = tag['id'].encode('utf-8') if '.' in tag_id: i = tag_id.index('.') libname, name = tag_id[:i], tag_id[i + 1:] use_library = library.load_library(libname) else: name = tag_id use_library = library basename = '%s.%s' % (name, extension) data = use_library._get_file_data(basename) source_code = data['data'] else: source_code = get_source_code(tag) # prettify. # remove spurious indentation source_code = source_code.strip() do_apply_suggestions = (not tag.has_attr('noprettify') and not tag.has_attr('np')) # then apply suggestions try: if do_apply_suggestions: x = parse_wrap(parse_expr, source_code)[0] xr = parse_ndp_refine(x, Context()) suggestions = get_suggestions(xr) source_code = apply_suggestions( source_code, suggestions) except DPSyntaxError as e: if raise_errors: raise else: res.note_error(str(e), HTMLIDLocation.for_element(tag)) continue # we don't want the browser to choose different tab size # source_code = source_code.replace('\t', ' ' * 4) # we are not using it _realpath = realpath context = Context() def postprocess(x): if refine is not None: return refine(x, context=context) else: return x # print('rendering source code %r' % source_code) html = ast_to_html(source_code, parse_expr=parse_expr, add_line_gutter=False, postprocess=postprocess) for w in context.warnings: if w.where is not None: from mcdp_web.editor_fancy.app_editor_fancy_generic import html_mark html = html_mark(html, w.where, "language_warning") frag2 = BeautifulSoup(html, 'lxml', from_encoding='utf-8') if use_pre: rendered = Tag(name='div', attrs={'class': 'rendered'}) pre = frag2.pre pre.extract() rendered.append(pre) if not rendered.has_attr('class'): rendered['class'] = "" if tag.has_attr('label'): text = tag['label'] tag_label = Tag(name='span') add_class(tag_label, 'label') add_class(tag_label, 'label_inside') tag_label.append(NavigableString(text)) pre.insert(0, tag_label) tag_label_outside = Tag(name='span') add_class(tag_label_outside, 'label') add_class(tag_label_outside, 'label_outside') tag_label_outside.append(NavigableString(text)) rendered.insert(0, tag_label_outside) max_len = max_len_of_pre_html(html) if tag.has_attr('label'): add_class(rendered, 'has_label') max_len = max(max_len, len(tag['label']) + 6) style = '' else: # using <code> rendered = frag2.pre.code rendered.extract() if not rendered.has_attr('class'): rendered['class'] = "" style = '' if tag.has_attr('style'): style = style + tag['style'] if style: rendered['style'] = style if tag.has_attr('class'): add_class(rendered, tag['class']) if tag.has_attr('id'): rendered['id'] = tag['id'] if use_pre: if generate_pdf: pdf = get_ast_as_pdf(source_code, parse_expr) if tag.has_attr('id'): basename = tag['id'] else: hashcode = hashlib.sha224( source_code).hexdigest()[-8:] basename = 'code-%s' % hashcode docname = os.path.splitext( os.path.basename(realpath))[0] download = docname + '.' + basename + '.source_code.pdf' a = create_a_to_data(download=download, data_format='pdf', data=pdf) a['class'] = 'pdf_data' a.append(NavigableString(download)) div = Tag(name='div') div.append(rendered) div.append(a) tag.replaceWith(div) else: tag.replaceWith(rendered) else: tag.replaceWith(rendered) except DPSyntaxError as e: if raise_errors: raise else: res.note_error(str(e), HTMLIDLocation.for_element(tag)) # note_error(tag, e) if tag.string is None: tag.string = "`%s" % tag['id'] continue except DPSemanticError as e: if raise_errors: raise else: res.note_error(str(e), HTMLIDLocation.for_element(tag)) # note_error(tag, e) if tag.string is None: tag.string = "`%s" % tag['id'] continue except DPInternalError as ex: msg = 'Error while interpreting the code:\n\n' msg += indent(source_code, ' | ') raise_wrapped(DPInternalError, ex, msg, exc=sys.exc_info())
def check_if_any_href_is_invalid(soup, res, location0, extra_refs=None, ignore_ref_errors=False): """ Checks if references are invalid and tries to correct them. also works the magic """ if extra_refs is None: extra_refs = Tag(name='div') else: print('using extra cross refs') # let's first find all the IDs id2element_current, duplicates = get_id2element(soup, 'id') id2element_extra, _ = get_id2element(extra_refs, 'id') id2element = {} id2element.update(id2element_extra) id2element.update(id2element_current) for a in soup.select('[href^="#"]'): href = a['href'] assert href.startswith('#') ID = href[1:] if a.has_attr('class') and "mjx-svg-href" in a['class']: msg = 'Invalid math reference (sorry, no details): href = %s .' % href location = HTMLIDLocation.for_element(a, location0) res.note_error(msg, location) continue if ID not in id2element: # try to fix it # if there is already a prefix, remove it if ':' in href: i = href.index(':') core = href[i + 1:] else: core = ID possible = MCDPManualConstants.all_possible_prefixes_that_can_be_implied matches = [] others = [] for possible_prefix in possible: why_not = possible_prefix + ':' + core others.append(why_not) if why_not in id2element: matches.append(why_not) if len(matches) > 1: msg = '%s not found, and multiple matches for heuristics (%s)' % (href, matches) location = HTMLIDLocation.for_element(a, location0) res.note_error(msg, location) elif len(matches) == 1: # if 'base_url' in element.attrs: # a['href'] = element.attrs['base_url'] + '#' + matches[0] # else: a.attrs['href'] = '#' + matches[0] if matches[0] not in id2element_current: element = id2element[matches[0]] # msg = 'Using foreign resolve for %s -> %s' % (matches[0], a['href']) # logger.info(msg) a.attrs['href_external'] = element.attrs['base_url'] + '#' + matches[0] if show_debug_message_for_corrected_links: msg = '%s not found, but corrected in %s' % (href, matches[0]) location = HTMLIDLocation.for_element(a, location0) res.note_warning(msg, location) else: if has_class(a, MCDPConstants.CLASS_IGNORE_IF_NOT_EXISTENT): del a.attrs['href'] # logger.warning('ignoring link %s' % a) else: msg = 'I do not know what is indicated by the link %r.' % href marker = Tag(name='span') marker.attrs['class'] = 'inside-unknown-link' marker.append(' (unknown ref %s)' % core) a.append(marker) location = HTMLIDLocation.for_element(a, location0) if ignore_ref_errors: msg2 = 'I will ignore this error because this is the first pass:'******'\n\n' + indent(msg, ' > ') res.note_warning(msg2, location) else: res.note_error(msg, location) if ID in duplicates: msg = 'More than one element matching %r.' % href location = HTMLIDLocation.for_element(a, location0) res.note_error(msg, location)
def number_items2(root, res): counters = set(MCDPManualConstants.counters) # TODO: make configurable # style = get_style_book() style = get_style_duckietown() resets = style.resets labels = style.labels for c in counters: assert c in resets, c assert c in labels, c from collections import defaultdict counter_parents = defaultdict(lambda: set()) for c, cc in resets.items(): for x in cc: counter_parents[x].add(c) counter_state = {} for counter in counters: counter_state[counter] = 0 for item in root.depth_first_descendants(): counter = item.id.split(":")[0] nonumber = MCDPManualConstants.ATTR_NONUMBER in item.tag.attrs # print('counter %s id %s %s' % (counter, item.id, counter_state)) if counter in counters: if not nonumber: counter_state[counter] += 1 for counter_to_reset in resets[counter]: counter_state[counter_to_reset] = 0 label_spec = labels[counter] what = label_spec.what number = render(label_spec.number, counter_state) if LABEL_NAME in item.tag.attrs: pass # msg = "Don't overwrite %s for element = %s" % (LABEL_NAME, item.tag.attrs[LABEL_NAME]) # logger.warn(msg) else: item.tag.attrs[LABEL_NAME] = item.name if nonumber: item.tag.attrs[LABEL_WHAT] = what item.tag.attrs[ LABEL_SELF] = item.name # ??? render(label_spec.label_self, counter_state) item.tag.attrs[LABEL_WHAT_NUMBER_NAME] = item.name # item.tag.attrs[LABEL_WHAT_NUMBER] = None # item.tag.attrs[LABEL_NUMBER] = None else: item.tag.attrs[LABEL_WHAT] = what item.tag.attrs[LABEL_SELF] = render(label_spec.label_self, counter_state) if ZERO in item.tag.attrs[LABEL_SELF]: msg = 'This has zero counter.' res.note_error(msg, HTMLIDLocation.for_element(item.tag)) if item.name is None: item.tag.attrs[ LABEL_WHAT_NUMBER_NAME] = what + ' ' + number else: item.tag.attrs[ LABEL_WHAT_NUMBER_NAME] = what + ' ' + number + ' - ' + item.name item.tag.attrs[LABEL_WHAT_NUMBER] = what + ' ' + number item.tag.attrs[LABEL_NUMBER] = number allattrs = [ LABEL_NAME, LABEL_WHAT, LABEL_WHAT_NUMBER_NAME, LABEL_NUMBER, LABEL_SELF ] for c in counters: if c in counter_parents[counter] or c == counter: attname = 'counter-%s' % c allattrs.append(attname) item.tag.attrs[attname] = counter_state[c] if item.tag.name == 'figure': # also copy to the caption for figcaption in item.tag.findAll(['figcaption']): if figcaption.parent != item.tag: continue for x in allattrs: figcaption.attrs[x] = item.tag.attrs[x]
def substituting_empty_links(soup, raise_errors=False, res=None, extra_refs=None): """ soup: where to look for references elemtn_to_modify: what to modify (if None, it is equal to soup) default style is [](#sec:systems) "Chapter 10" You can also use "class": <a href='#sec:name' class='only_number'></a> """ if extra_refs is None: extra_refs = Tag(name='div') if res is None: res = AugmentedResult() for le in get_empty_links_to_fragment(soup, extra_refs=extra_refs, res=res): a = le.linker element_id = le.eid element = le.linked if not element: msg = ('Cannot find %s' % element_id) res.note_error(msg, HTMLIDLocation.for_element(a)) if raise_errors: raise ValueError(msg) continue sub_link(a, element_id, element, res) for a in get_empty_links(soup): href = a.attrs.get('href', '(not present)') if not href: href = '""' if href.startswith('python:'): continue if href.startswith('http:') or href.startswith('https:'): msg = """ This link text is empty: ELEMENT Note that the syntax for links in Markdown is [link text](URL) For the internal links (where URL starts with "#"), then the documentation system can fill in the title automatically, leading to the format: [](#other-section) However, this does not work for external sites, such as: [](MYURL) So, you need to provide some text, such as: [this useful website](MYURL) """ msg = msg.replace('ELEMENT', str(a)) msg = msg.replace('MYURL', href) # note_error2(a, 'syntax error', msg.strip()) res.note_error(msg, HTMLIDLocation.for_element(a)) else: msg = """ This link is empty: ELEMENT It might be that the writer intended for this link to point to something, but they got the syntax wrong. href = %s As a reminder, to refer to other parts of the document, use the syntax "#ID", such as: See [](#fig:my-figure). See [](#section-name). """ % href msg = msg.replace('ELEMENT', str(a)) # note_error2(a, 'syntax error', msg.strip()) res.note_error(msg, HTMLIDLocation.for_element(a))