Python to_html_stripping_fragmentの例、mcdp_utils_xml.to_html_stripping_fragment Pythonの例

コード例 #1

0

ファイルを表示

ファイル: transformations.py プロジェクト: kannode/mcdp

def figures_new1():
    s = r"""

<figure>  
    <figcaption>Main caption</figcaption>
    <figure>
        <figcaption>Hello</figcaption>
        <img style='width:8em' src="duckietown-logo-transparent.png"/>
    </figure>
    <figure>  
        <figcaption>second</figcaption>
        <img style='width:8em' src="duckietown-logo-transparent.png"/>
    </figure>
</figure>

"""
    soup = bs(s)

    res = AugmentedResult()
    location = LocationUnknown()
    make_figure_from_figureid_attr(soup, res, location)

    # nfigs = len(list(soup.select('figure')))
    o = to_html_stripping_fragment(soup)
    print o

コード例 #2

0

ファイルを表示

def elements_abbrevs_test1():
    s = "<p>TODO: paragraph</p>"
    e = """<div class="todo-wrap"><p class="todo">paragraph</p></div>"""
    soup = bs(s.strip())

    substitute_special_paragraphs(soup)

    o = to_html_stripping_fragment(soup)
    #print o
    assert_equal(o, e)

コード例 #3

0

ファイルを表示

ファイル: element_abbrevs_test.py プロジェクト: kannode/mcdp

def elements_abbrevs_test2():
    s = "<p>TODO: paragraph <strong>Strong</strong></p>"
    e = """<div class="todo-wrap"><p class="todo">TODO: paragraph <strong>Strong</strong></p></div>"""
    soup = bs(s.strip())

    res = AugmentedResult()
    location = LocationUnknown()
    substitute_special_paragraphs(soup, res, location)

    o = to_html_stripping_fragment(soup)
    #print o
    assert_equal(o, e)

コード例 #4

0

ファイルを表示

ファイル: source_info_imp.py プロジェクト: kannode/mcdp

def make_last_modified(files_contents, nmax=100):
    res = AugmentedResult()
    files_contents = [DocToJoin(*x) for x in files_contents]
    files_contents = [_ for _ in files_contents if _.source_info]

    files_contents = list(
        sorted(files_contents,
               key=lambda x: x.source_info.last_modified,
               reverse=True))

    r = Tag(name='fragment')
    r.append('\n')
    h = Tag(name='h1')
    h.append('Last modified')
    h.attrs['id'] = 'sec:last-modified'
    r.append(h)
    r.append('\n')

    ul = Tag(name='ul')
    ul.append('\n')
    for d in files_contents[:nmax]:
        li = Tag(name='li')
        when = d.source_info.last_modified
        when_s = time.strftime("%a, %b %d", when)
        #          %H:%M
        li.append(when_s)
        li.append(': ')

        hid = get_main_header(bs(d.contents))
        if hid is None:
            what = "File %s" % d.docname
        else:
            what = Tag(name='a')
            what.attrs['href'] = '#' + hid
            what.attrs['class'] = MCDPManualConstants.CLASS_NUMBER_NAME

        li.append(what)
        li.append(' (')
        name = d.source_info.author.name
        li.append(name)
        li.append(')')

        ul.append(li)
        ul.append('\n')

    r.append(ul)
    s = to_html_stripping_fragment(r)
    #     print s

    res.set_result(s)
    return res

コード例 #5

0

ファイルを表示

ファイル: app_visualization.py プロジェクト: rusi/mcdp

def get_svg_for_visualization(e, image_source, library_name, spec, name, thing,
                              refined, make_relative, library):

    svg_data0 = spec.get_png_data_syntax(image_source=image_source,
                                         name=name,
                                         thing=thing,
                                         data_format='svg',
                                         library=library)

    fragment = bs(svg_data0)
    if fragment.svg is None:
        msg = 'Cannot interpret fragment.'
        msg += '\n' + indent(svg_data0, '> ')
        raise DPInternalError(msg)
    assert fragment.svg is not None
    style = {}
    for a in ['width', 'height']:
        if a in fragment.svg.attrs:
            value = fragment.svg.attrs[a]
            del fragment.svg.attrs[a]
            style['max-%s' % a] = value
    add_style(fragment.svg, **style)

    remove_doctype_etc(fragment)
    remove_all_titles(fragment.svg)

    if refined is not None:
        table = identifier2ndp(refined)
    else:
        table = {}

    def link_for_dp_name(identifier0):
        identifier = identifier0  # todo translate
        if identifier in table:
            a = table[identifier]
            libname = a.libname if a.libname is not None else library_name
            href0 = '/repos/%s/shelves/%s/libraries/%s/models/%s/views/syntax/' % (
                e.repo_name, e.shelf_name, libname, a.name)
            return make_relative(href0)
        else:
            return None

    add_html_links_to_svg(fragment.svg, link_for_dp_name)
    svg_data = to_html_stripping_fragment(fragment)
    return svg_data

コード例 #6

0

ファイルを表示

def prerender_mathjax_(html):
    """
        Runs the prerender.js script to pre-render the MathJax into images.

        Raises PrerenderError.
    """
    assert not '<html>' in html, html

    use = get_nodejs_bin()

    html = html.replace('<p>$$', '\n$$')
    html = html.replace('$$</p>', '$$\n')
    script = get_prerender_js()
    mcdp_tmp_dir = get_mcdp_tmp_dir()
    prefix = 'prerender_mathjax_'
    d = mkdtemp(dir=mcdp_tmp_dir, prefix=prefix)

    try:
        f_html = os.path.join(d, 'file.html')
        with open(f_html, 'w') as f:
            f.write(html)

        try:
            f_out = os.path.join(d, 'out.html')
            cmd = [use, script, f_html, f_out]
            pwd = os.getcwd()
            res = system_cmd_result(pwd,
                                    cmd,
                                    display_stdout=False,
                                    display_stderr=False,
                                    raise_on_error=False)

            if res.ret:  # pragma: no cover
                if 'Error: Cannot find module' in res.stderr:
                    msg = 'You have to install the MathJax and/or jsdom libraries.'
                    msg += '\nOn Ubuntu, you can install them using:'
                    msg += '\n\n\tsudo apt-get install npm'
                    msg += '\n\n\tnpm install MathJax-node jsdom'
                    msg += '\n\n' + indent(res.stderr, '  |')
                    raise PrerenderError(msg)

                if 'parse error' in res.stderr:
                    lines = [
                        _ for _ in res.stderr.split('\n') if 'parse error' in _
                    ]
                    assert lines
                    msg = 'LaTeX conversion errors:\n\n' + '\n'.join(lines)
                    raise PrerenderError(msg)

                msg = 'Unknown error (ret = %d).' % res.ret
                msg += '\n\n' + indent(res.stderr, '  |')
                raise PrerenderError(msg)

            with open(f_out) as f:
                data = f.read()

            # Read the data
            soup = bs(data)
            # find this and move it at the end
            # <style id="MathJax_SVG_styles"
            tag_style = soup.find(id='MathJax_SVG_styles')
            if not tag_style:
                msg = 'Expected to find style MathJax_SVG_styles'
                raise_desc(Exception, msg, soup=str(soup))
            # <svg style="display: none;"><defs id="MathJax_SVG_glyphs">
            tag_svg_defs = soup.find('svg', style="display: none;")
            if not tag_svg_defs:
                msg = 'Expected to find tag <svg display=none>'
                raise_desc(Exception, msg, soup=str(soup))

            other_tag = soup.find('div', style="display:none")
            if not other_tag:
                msg = 'Expected to find tag <div style="display:none">'
                raise_desc(Exception, msg, soup=str(soup))

            #<div style="display:none">Because of mathjax bug</div>
            soup.append(other_tag.extract())
            soup.append(tag_svg_defs.extract())
            soup.append(tag_style.extract())
            data = to_html_stripping_fragment(soup)

            return data
        except CmdException as e:  # pragma: no cover
            raise e
    finally:
        shutil.rmtree(d)

コード例 #7

0

ファイルを表示

ファイル: pipeline.py プロジェクト: afcarl/mcdp

def render_complete(library,
                    s,
                    raise_errors,
                    realpath,
                    generate_pdf=False,
                    check_refs=False,
                    use_mathjax=True,
                    filter_soup=None,
                    symbols=None):
    """
        Transforms markdown into html and then renders the mcdp snippets inside.

        s: a markdown string with embedded html snippets

        Returns an HTML string; not a complete document.

        filter_soup(library, soup)
    """
    s0 = s
    check_good_use_of_special_paragraphs(s0, realpath)
    raise_missing_image_errors = raise_errors

    # Imports here because of circular dependencies
    from .latex.latex_preprocess import extract_maths, extract_tabular
    from .latex.latex_preprocess import latex_preprocessing
    from .latex.latex_preprocess import replace_equations
    from .macro_col2 import col_macros, col_macros_prepare_before_markdown
    from .mark.markd import render_markdown
    from .preliminary_checks import do_preliminary_checks_and_fixes
    from .prerender_math import prerender_mathjax

    if isinstance(s, unicode):
        msg = 'I expect a str encoded with utf-8, not unicode.'
        raise_desc(TypeError, msg, s=s)

    # need to do this before do_preliminary_checks_and_fixes
    # because of & char
    s, tabulars = extract_tabular(s)

    s = do_preliminary_checks_and_fixes(s)
    # put back tabular, because extract_maths needs to grab them
    for k, v in tabulars.items():
        assert k in s
        s = s.replace(k, v)

    # copy all math content,
    #  between $$ and $$
    #  between various limiters etc.
    # returns a dict(string, substitution)
    s, maths = extract_maths(s)
    #     print('maths = %s' % maths)
    for k, v in maths.items():
        if v[0] == '$' and v[1] != '$$':
            if '\n\n' in v:
                msg = 'Suspicious math fragment %r = %r' % (k, v)
                logger.error(maths)
                logger.error(msg)
                raise ValueError(msg)

    s = latex_preprocessing(s)
    s = '<div style="display:none">Because of mathjax bug</div>\n\n\n' + s

    # cannot parse html before markdown, because md will take
    # invalid html, (in particular '$   ciao <ciao>' and make it work)

    s = s.replace('*}', '\*}')

    s, mcdpenvs = protect_my_envs(s)
    #     print('mcdpenvs = %s' % maths)

    s = col_macros_prepare_before_markdown(s)

    #     print(indent(s, 'before markdown | '))
    s = render_markdown(s)
    #     print(indent(s, 'after  markdown | '))

    for k, v in maths.items():
        if not k in s:
            msg = 'Cannot find %r (= %r)' % (k, v)
            raise_desc(DPInternalError, msg, s=s)

        def preprocess_equations(x):
            # this gets mathjax confused
            x = x.replace('>', '\\gt{}')  # need brace; think a<b -> a\lt{}b
            x = x.replace('<', '\\lt{}')
            #             print('replaced equation %r by %r ' % (x0, x))
            return x

        v = preprocess_equations(v)
        s = s.replace(k, v)

    s = replace_equations(s)
    s = s.replace('\\*}', '*}')

    # this parses the XML
    soup = bs(s)

    other_abbrevs(soup)

    # need to process tabular before mathjax
    escape_for_mathjax(soup)

    #     print(indent(s, 'before prerender_mathjax | '))
    # mathjax must be after markdown because of code blocks using "$"

    s = to_html_stripping_fragment(soup)

    if use_mathjax:
        s = prerender_mathjax(s, symbols)

    soup = bs(s)
    escape_for_mathjax_back(soup)
    s = to_html_stripping_fragment(soup)

    #     print(indent(s, 'after prerender_mathjax | '))
    for k, v in mcdpenvs.items():
        # there is this case:
        # ~~~
        # <pre> </pre>
        # ~~~
        s = s.replace(k, v)

    s = s.replace('<p>DRAFT</p>', '<div class="draft">')

    s = s.replace('<p>/DRAFT</p>', '</div>')

    soup = bs(s)
    mark_console_pres(soup)

    try:
        substitute_github_refs(soup, defaults={})
    except Exception as e:
        msg = 'I got an error while substituting github: references.'
        msg += '\nI will ignore this error because it might not be the fault of the writer.'
        msg += '\n\n' + indent(str(e), '|', ' error: |')
        logger.warn(msg)

    # must be before make_figure_from_figureid_attr()
    display_files(soup, defaults={}, raise_errors=raise_errors)

    make_figure_from_figureid_attr(soup)
    col_macros(soup)
    fix_subfig_references(soup)

    library = get_library_from_document(soup, default_library=library)
    from mcdp_docs.highlight import html_interpret
    html_interpret(library,
                   soup,
                   generate_pdf=generate_pdf,
                   raise_errors=raise_errors,
                   realpath=realpath)
    if filter_soup is not None:
        filter_soup(library=library, soup=soup)

    embed_images_from_library2(soup=soup,
                               library=library,
                               raise_errors=raise_missing_image_errors)
    make_videos(soup=soup)

    if check_refs:
        check_if_any_href_is_invalid(soup)

    if getuser() == 'andrea':
        if MCDPConstants.preprocess_style_using_less:
            run_lessc(soup)
        else:
            logger.warning(
                'preprocess_style_using_less=False might break the manual')
    fix_validation_problems(soup)

    strip_pre(soup)

    if MCDPManualConstants.enable_syntax_higlighting:
        syntax_highlighting(soup)

    if MCDPManualConstants.enforce_status_attribute:
        check_status_codes(soup, realpath)
    if MCDPManualConstants.enforce_lang_attribute:
        check_lang_codes(soup)

    # Fixes the IDs (adding 'sec:'); add IDs to missing ones
    globally_unique_id_part = 'autoid-DO-NOT-USE-THIS-VERY-UNSTABLE-LINK-' + get_md5(
        s0)[:5]
    fix_ids_and_add_missing(soup, globally_unique_id_part)

    check_no_patently_wrong_links(soup)

    s = to_html_stripping_fragment(soup)
    s = replace_macros(s)

    return s

コード例 #8

0

ファイルを表示

def make_figures(library, soup, res, location, raise_error_dp,
                 raise_error_others, realpath, generate_pdf):
    """ Looks for codes like:

    <pre><code class="mcdp_ndp_graph_templatized">mcdp {
        # empty model
    }
    </code></pre>

        and creates a link to the image
    """
    def go(s0, func):
        selectors = s0.split(',')
        for selector_ in selectors:
            for tag in soup.select(selector_):
                try:
                    r = func(tag)
                    tag.replaceWith(r)
                except (DPSyntaxError, DPSemanticError) as e:
                    if raise_error_dp:
                        raise
                    else:
                        res.note_error(str(e), HTMLIDLocation.for_element(tag))
                        continue
                except Exception as e:
                    if raise_error_others:
                        raise
                    else:
                        res.note_error(str(e), HTMLIDLocation.for_element(tag))
                        continue

    def make_tag(tag0, klass, data, ndp=None, template=None, poset=None):
        svg = data['svg']

        tag_svg = BeautifulSoup(svg, 'lxml', from_encoding='utf-8').svg

        assert tag_svg.name == 'svg'
        if tag_svg.has_attr('width'):
            ws = tag_svg['width']
            hs = tag_svg['height']
            assert 'pt' in ws
            w = float(ws.replace('pt', ''))
            h = float(hs.replace('pt', ''))
            scale = MCDPConstants.scale_svg

            w2 = w * scale
            h2 = h * scale
            tag_svg['width'] = w2
            tag_svg['height'] = h2
            tag_svg['rescaled'] = 'Rescaled from %s %s, scale = %s' % (ws, hs,
                                                                       scale)
        else:
            print('no width in SVG tag: %s' % tag_svg)

        tag_svg['class'] = klass

        if tag0.has_attr('style'):
            tag_svg['style'] = tag0['style']
        if tag0.has_attr('id'):
            tag_svg['id'] = tag0['id']

        if generate_pdf:
            pdf0 = data['pdf']
            pdf = crop_pdf(pdf0, margins=0)

            div = Tag(name='div')

            att = MCDPConstants.ATTR_LOAD_NAME
            if tag0.has_attr('id'):
                basename = tag0['id']
            elif ndp is not None and hasattr(ndp, att):
                basename = getattr(ndp, att)
            elif template is not None and hasattr(template, att):
                basename = getattr(template, att)
            elif poset is not None and hasattr(poset, att):
                basename = getattr(poset, att)
            else:
                hashcode = hashlib.sha224(tag0.string).hexdigest()[-8:]
                basename = 'code-%s' % hashcode

            docname = os.path.splitext(os.path.basename(realpath))[0]
            download = docname + "." + basename + "." + klass + '.pdf'
            a = create_a_to_data(download=download,
                                 data_format='pdf',
                                 data=pdf)
            a['class'] = 'pdf_data'
            a.append(NavigableString(download))
            div.append(tag_svg)
            div.append(a)
            return div
        else:
            return tag_svg

    image_source = ImagesFromPaths(library.get_images_paths())

    mf0 = MakeFiguresNDP(None, None, None)
    available_ndp = set(mf0.available()) | set(mf0.aliases)
    for which in available_ndp:

        def callback(tag0):
            assert tag0.parent is not None
            context = Context()
            load = lambda x: library.load_ndp(x, context=context)
            parse = lambda x: library.parse_ndp(
                x, realpath=realpath, context=context)
            ndp = load_or_parse_from_tag(tag0, load, parse)

            mf = MakeFiguresNDP(ndp=ndp,
                                image_source=image_source,
                                yourname=None)  # XXX
            formats = ['svg']
            if generate_pdf:
                formats.append('pdf')
            data = mf.get_figure(which, formats)
            tag = make_tag(tag0, which, data, ndp=ndp, template=None)
            return tag

        selector = 'render.%s,pre.%s,img.%s' % (which, which, which)
        go(selector, callback)

    mf0 = MakeFiguresTemplate(None, None, None)
    available_template = set(mf0.available()) | set(mf0.aliases)
    for which in available_template:

        def callback(tag0):
            context = Context()
            load = lambda x: library.load_spec(
                SPEC_TEMPLATES, x, context=context)
            parse = lambda x: library.parse_template(
                x, realpath=realpath, context=context)
            template = load_or_parse_from_tag(tag0, load, parse)

            mf = MakeFiguresTemplate(template=template,
                                     library=library,
                                     yourname=None)  # XXX
            formats = ['svg']
            if generate_pdf:
                formats.append('pdf')
            data = mf.get_figure(which, formats)
            tag = make_tag(tag0, which, data, ndp=None, template=template)
            return tag

        selector = 'render.%s,pre.%s,img.%s' % (which, which, which)
        go(selector, callback)

    mf0 = MakeFiguresPoset(None, None)
    available_poset = set(mf0.available()) | set(mf0.aliases)
    for which in available_poset:

        def callback(tag0):
            context = Context()
            load = lambda x: library.load_poset(x, context=context)
            parse = lambda x: library.parse_poset(
                x, realpath=realpath, context=context)
            poset = load_or_parse_from_tag(tag0, load, parse)

            mf = MakeFiguresPoset(poset=poset, image_source=image_source)
            formats = ['svg']
            if generate_pdf:
                formats.append('pdf')
            data = mf.get_figure(which, formats)
            tag = make_tag(tag0,
                           which,
                           data,
                           ndp=None,
                           template=None,
                           poset=poset)
            return tag

        selector = 'render.%s,pre.%s,img.%s' % (which, which, which)
        go(selector, callback)

    unsure = list(soup.select('render'))
    unsure = [_ for _ in unsure if 'errored' not in _.attrs.get('class', '')]
    for _ in unsure:
        msg = 'Invalid "render" element.'
        # msg += '\n\n' + '\n\n'.join(str(_) for _ in unsure)

        msg += '\n\n' + " Available for NDPs: %s." % ", ".join(
            sorted(available_ndp))
        msg += '\n\n' + " Available for templates: %s." % ", ".join(
            sorted(available_template))
        msg += '\n\n' + " Available for posets: %s." % ", ".join(
            sorted(available_poset))
        # raise ValueError(msg)
        res.note_error(msg, HTMLIDLocation.for_element(_))
    return to_html_stripping_fragment(soup)

コード例 #9

0

ファイルを表示

def render_complete(library, s, raise_errors, realpath, generate_pdf=False,
                    check_refs=False, do_math=True, filter_soup=None,
                    raise_missing_image_errors = False):
    """
        Transforms markdown into html and then renders the mcdp snippets inside.
        
        s: a markdown string with embedded html snippets
        
        Returns an HTML string; not a complete document.
        
        filter_soup(library, soup)
    """
    from .latex.latex_preprocess import extract_maths, extract_tabular
    from .latex.latex_preprocess import latex_preprocessing
    from .latex.latex_preprocess import replace_equations
    from .macro_col2 import col_macros, col_macros_prepare_before_markdown
    from .mark.markd import render_markdown
    from .preliminary_checks import do_preliminary_checks_and_fixes
    from .prerender_math import prerender_mathjax

    if isinstance(s, unicode):
        msg = 'I expect a str encoded with utf-8, not unicode.'
        raise_desc(TypeError, msg, s=s)

    
    # need to do this before do_preliminary_checks_and_fixes 
    # because of & char
    s, tabulars = extract_tabular(s)
   
    s = do_preliminary_checks_and_fixes(s)
    # put back tabular, because extract_maths needs to grab them
    for k,v in tabulars.items():
        assert k in s
        s = s.replace(k, v)
        
    # copy all math content,
    #  between $$ and $$
    #  between various limiters etc.
    # returns a dict(string, substitution)
    s, maths = extract_maths(s) 
#     print('maths = %s' % maths)
    for k, v in maths.items():
        if v[0] == '$' and v[1] != '$$':
            if '\n\n' in v:
                msg = 'Suspicious math fragment %r = %r' % (k, v)
                logger.error(maths)
                logger.error(msg)
                raise ValueError(msg)
    
    # fixes for LaTeX
    s = latex_preprocessing(s) 
    
    s = '<div style="display:none">Because of mathjax bug</div>\n\n\n' + s

    # cannot parse html before markdown, because md will take
    # invalid html, (in particular '$   ciao <ciao>' and make it work)
    
    s = s.replace('*}', '\*}') 
    
    
    s, mcdpenvs = protect_my_envs(s) 
#     print('mcdpenvs = %s' % maths)
 
    s = col_macros_prepare_before_markdown(s)
    
#     print(indent(s, 'before markdown | '))
    s = render_markdown(s)
#     print(indent(s, 'after  markdown | '))
    
    for k,v in maths.items():
        if not k in s:
            msg = 'Cannot find %r (= %r)' % (k, v)
            raise_desc(DPInternalError, msg, s=s)
        def preprocess_equations(x):
            # this gets mathjax confused
            x = x.replace('>', '\\gt{}') # need brace; think a<b -> a\lt{}b
            x = x.replace('<', '\\lt{}')
#             print('replaced equation %r by %r ' % (x0, x))
            return x
            
        v = preprocess_equations(v)
        s = s.replace(k, v)

    s = replace_equations(s)        
    s = s.replace('\\*}', '*}')

    # this parses the XML
    soup = bs(s)
    
    other_abbrevs(soup)
    
    # need to process tabular before mathjax
    escape_for_mathjax(soup)
    
#     print(indent(s, 'before prerender_mathjax | '))
    # mathjax must be after markdown because of code blocks using "$"
    
    s = to_html_stripping_fragment(soup)
    
    if do_math:
        s = prerender_mathjax(s)

    soup = bs(s)
    escape_for_mathjax_back(soup)
    s = to_html_stripping_fragment(soup)

#     print(indent(s, 'after prerender_mathjax | '))
    for k,v in mcdpenvs.items():
        # there is this case:
        # ~~~
        # <pre> </pre>
        # ~~~
        s = s.replace(k, v)
    

    s = s.replace('<p>DRAFT</p>', '<div class="draft">')
    
    s = s.replace('<p>/DRAFT</p>', '</div>')
    
    
    soup = bs(s)
    mark_console_pres(soup)
    make_figure_from_figureid_attr(soup)
    col_macros(soup)
    fix_subfig_references(soup)  
    
    library = get_library_from_document(soup, default_library=library)
    from mcdp_docs.highlight import html_interpret
    html_interpret(library, soup, generate_pdf=generate_pdf,
                            raise_errors=raise_errors, realpath=realpath)
    if filter_soup is not None:
        filter_soup(library=library, soup=soup)
    
    
    embed_images_from_library2(soup=soup, library=library, 
                              raise_errors=raise_missing_image_errors)
        
    if check_refs:    
        check_if_any_href_is_invalid(soup)
            
    run_lessc(soup)
    fix_validation_problems(soup)
    
    s = to_html_stripping_fragment(soup)
    s = replace_macros(s)    
    return s