예제 #1
0
def document_final_pass_after_toc(soup,
                                  crossrefs=None,
                                  resolve_references=True,
                                  res=None,
                                  location=LocationUnknown()):
    if res is None:
        res = AugmentedResult()
    """ This is done to a final document """

    logger.info('checking errors')
    check_various_errors(soup)

    from .check_missing_links import check_if_any_href_is_invalid
    logger.info('checking hrefs')
    check_if_any_href_is_invalid(soup, res, location, extra_refs=crossrefs)

    # Note that this should be done *after* check_if_any_href_is_invalid()
    # because that one might fix some references
    if resolve_references:
        logger.info('substituting empty links')

        substituting_empty_links(soup,
                                 raise_errors=False,
                                 res=res,
                                 extra_refs=crossrefs)

    for a in soup.select('a[href_external]'):
        a.attrs['href'] = a.attrs['href_external']
        add_class(a, 'interdoc')

    detect_duplicate_IDs(soup, res)
예제 #2
0
def figures_new1():
    s = r"""

<figure>  
    <figcaption>Main caption</figcaption>
    <figure>
        <figcaption>Hello</figcaption>
        <img style='width:8em' src="duckietown-logo-transparent.png"/>
    </figure>
    <figure>  
        <figcaption>second</figcaption>
        <img style='width:8em' src="duckietown-logo-transparent.png"/>
    </figure>
</figure>

"""
    soup = bs(s)

    res = AugmentedResult()
    location = LocationUnknown()
    make_figure_from_figureid_attr(soup, res, location)

    # nfigs = len(list(soup.select('figure')))
    o = to_html_stripping_fragment(soup)
    print o
예제 #3
0
def document_final_pass_before_toc(soup,
                                   remove,
                                   remove_selectors,
                                   res=None,
                                   location=None):
    if res is None:
        logger.warn('no res passed')
        res = AugmentedResult()
    if location is None:
        location = LocationUnknown()

    logger.info('reorganizing contents in <sections>')

    with timeit('find body'):
        body = soup.find('body')
        if body is None:
            msg = 'Cannot find <body>:\n%s' % indent(str(soup)[:1000], '|')
            raise ValueError(msg)

    with timeit('reorganize_contents'):
        body2 = reorganize_contents(body)

    process_assignment(body2, res, location)

    body.replace_with(body2)

    # Removing stuff
    with timeit('remove stuff'):
        do_remove_stuff(body2, remove_selectors, remove)

    with timeit('move_things_around'):
        move_things_around(soup=soup, res=res)
예제 #4
0
def another2():
    # four spaces in the first line
    s = r"""

(if it exists) of the set of fixed points of~$f$:
\begin{equation}
x = y .\label{eq:lfp-one}
\end{equation}
The equality in \eqref{lfp-one} can be relaxed to ``$xxx$''.

The equality in \ref{eq:lfp-one} can be relaxed to ``$xxx$''.


The least fixed point need not exist. Monotonicity of the map~$f$
plus completeness is sufficient to ensure existence.
"""
    res = AugmentedResult()
    location = LocationUnknown()
    s2 = censor_markdown_code_blocks(s, res, location)

    print('original:')
    print indent_plus_invisibles(s)
    print('later:')
    print indent_plus_invisibles(s2)

    assert not 'censored-code' in s
예제 #5
0
def elements_abbrevs_test2():
    s = "<p>TODO: paragraph <strong>Strong</strong></p>"
    e = """<div class="todo-wrap"><p class="todo">TODO: paragraph <strong>Strong</strong></p></div>"""
    soup = bs(s.strip())

    res = AugmentedResult()
    location = LocationUnknown()
    substitute_special_paragraphs(soup, res, location)

    o = to_html_stripping_fragment(soup)
    #print o
    assert_equal(o, e)
예제 #6
0
def test_toc():
    s = """
<html>
<head></head>
<body>
<h1 id='one'>One</h1>

<p>a</p>

<h2 id='two'>Two</h2>

<p>a</p>

<h3 id='three'>Three</h3>

<h2 id='four'>Four</h2>

<p>a</p>
</body>
</html>
    """
    soup = bs(s)
    #     print(soup)
    #     body = soup.find('body')

    # first time it should fail
    try:
        _toc = generate_toc(soup)
    except InvalidHeaders as e:
        #         > InvalidHeaders: I expected that this header would start with either part:,app:,sec:.
        #         > <h1 id="one">One</h1>
        pass
    else:
        raise Exception()

    soup = bs(s)
    fix_ids_and_add_missing(soup, 'prefix-', AugmentedResult(),
                            LocationUnknown())
    generate_toc(soup)

    s = str(soup)
    expected = ['sec:one', 'sub:two']
    #     print(indent(s, 'transformed > '))
    for e in expected:
        assert e in s
예제 #7
0
def test_toc2():
    s = """
<html>
<head></head>
<body>
<h1>One</h1>
<h1>Two</h1>
<h1>Three</h1>
<p></p>

<h2>A</h2>

<h2>B</h2>

<h2>C</h2>

<h3>a</h3>
<h3>b</h3>
<h3>c</h3>

</body>
</html>
    """
    soup = bs(s)
    #     print(soup)
    #     body = soup.find('body')
    fix_ids_and_add_missing(soup, 'prefix', AugmentedResult(),
                            LocationUnknown())
    assert soup.find(id='sub:prefix-5') is not None
    #     <fragment>
    # <h1 id="sec:prefix--1">One</h1>
    # <h1 id="sec:prefix--2">Two</h1>
    # <h1 id="sec:prefix--3">Three</h1>
    # <p></p>
    # <h2 id="sub:prefix--4">A</h2>
    # <h2 id="sub:prefix--5">B</h2>
    # <h2 id="sub:prefix--6">C</h2>
    # <h3 id="subsub:prefix--7">a</h3>
    # <h3 id="subsub:prefix--8">b</h3>
    # <h3 id="subsub:prefix--9">c</h3>
    # </fragment>
    print(soup)

    _toc = generate_toc(soup)
    s = str(soup)
예제 #8
0
def sub2():
    defaults = {'org': 'AndreaCensi', 'repo': 'mcdp', 'branch': 'duckuments'}

    s = """
<a href="github:path=context_eval_as_constant.py,from_text=get_connections_for,to_text=return"></a> 
"""
    soup = bs(s)
    location = LocationUnknown()
    res = AugmentedResult()
    n = substitute_github_refs(soup, defaults, res=res, location=location)
    assert n == 1

    s2 = str(soup)
    logger.debug('\n' + indent(s2, '  '))

    expect = 'context_eval_as_constant.py#L7-L12'

    if not expect in s2:
        raise Exception('No %s in %s' % (expect, s2))
예제 #9
0
def sub1():
    defaults = {'org': 'AndreaCensi', 'repo': 'mcdp', 'branch': 'duckuments'}

    s = """
<a href="github:path=context_eval_as_constant.py"></a> 
"""
    soup = bs(s)
    location = LocationUnknown()
    res = AugmentedResult()

    n = substitute_github_refs(soup, defaults, res=res, location=location)
    assert n == 1

    s2 = str(soup)
    logger.debug(indent(s2, '  '))

    expect = '<code class="github-resource-link">context_eval_as_constant.py</code>'
    if not expect in s2:
        raise Exception(s2)
예제 #10
0
def displayfile1():
    defaults = {'org': 'AndreaCensi', 'repo': 'mcdp', 'branch': 'duckuments'}

    s = """
<display-file src="github:path=context_eval_as_constant.py,from_text=get_connections_for,to_text=return"></a> 
"""
    soup = bs(s)
    res = AugmentedResult()
    location = LocationUnknown()

    n = display_files(soup,
                      defaults,
                      raise_errors=True,
                      res=res,
                      location=location)
    assert n == 1

    s2 = str(soup)
    logger.debug('\n' + indent(s2, '  '))
예제 #11
0
def manual_join(template,
                files_contents,
                stylesheet,
                remove=None,
                extra_css=None,
                remove_selectors=None,
                hook_before_toc=None,
                references=None,
                resolve_references=True,
                hook_before_final_pass=None,
                require_toc_placeholder=False,
                permalink_prefix=None,
                crossrefs_aug=None,
                aug0=None):
    """
        files_contents: a list of tuples that can be cast to DocToJoin:
        where the string is a unique one to be used for job naming.

        extra_css: if not None, a string of more CSS to be added
        Remove_selectors: list of selectors to remove (e.g. ".draft").

        hook_before_toc if not None is called with hook_before_toc(soup=soup)
        just before generating the toc
    """
    result = AugmentedResult()

    if references is None:
        references = {}
    check_isinstance(files_contents, list)

    if crossrefs_aug is None:
        crossrefs = Tag(name='no-cross-refs')
    else:
        crossrefs = bs(crossrefs_aug.get_result())
        result.merge(crossrefs_aug)
    if aug0 is not None:
        result.merge(aug0)

    @contextmanager
    def timeit(_):
        yield

    with timeit('manual_join'):

        files_contents = [DocToJoin(*_) for _ in files_contents]

        # cannot use bs because entire document
        with timeit('parsing template'):
            template0 = template
            template = replace_macros(template)
            template_soup = BeautifulSoup(template,
                                          'lxml',
                                          from_encoding='utf-8')
            d = template_soup
            if d.html is None:
                s = "Invalid template"
                raise_desc(ValueError, s, template0=template0)

        with timeit('adding head'):
            assert d.html is not None
            assert '<html' in str(d)
            head = d.find('head')
            if head is None:
                msg = 'Could not find <head> in template:'
                logger.error(msg)
                logger.error(str(d))
                raise Exception(msg)
            assert head is not None
            for x in get_manual_css_frag().contents:
                head.append(x.__copy__())

        with timeit('adding stylesheet'):
            if stylesheet is not None:
                link = Tag(name='link')
                link['rel'] = 'stylesheet'
                link['type'] = 'text/css'
                from mcdp_report.html import get_css_filename
                link['href'] = get_css_filename('compiled/%s' % stylesheet)
                head.append(link)

        with timeit('making basename2soup'):
            basename2soup = OrderedDict()
            for doc_to_join in files_contents:
                if doc_to_join.docname in basename2soup:
                    msg = 'Repeated docname %r' % doc_to_join.docname
                    raise ValueError(msg)
                from .latex.latex_preprocess import assert_not_inside
                if isinstance(doc_to_join.contents, AugmentedResult):
                    result.merge(doc_to_join.contents)
                    contents = doc_to_join.contents.get_result()
                else:
                    contents = doc_to_join.contents
                assert_not_inside(contents, '<fragment')
                assert_not_inside(contents, 'DOCTYPE')

                frag = bs(contents)
                basename2soup[doc_to_join.docname] = frag

        # with timeit('fix_duplicate_ids'):
        # XXX
        # fix_duplicated_ids(basename2soup)

        with timeit('copy contents'):
            body = d.find('body')
            add_comments = False

            for docname, content in basename2soup.items():
                if add_comments:
                    body.append(NavigableString('\n\n'))
                    body.append(
                        Comment('Beginning of document dump of %r' % docname))
                    body.append(NavigableString('\n\n'))

                try_faster = True
                if try_faster:
                    for e in list(content.children):
                        body.append(e.extract())
                else:
                    copy_contents_into(content, body)

                if add_comments:
                    body.append(NavigableString('\n\n'))
                    body.append(Comment('End of document dump of %r' %
                                        docname))
                    body.append(NavigableString('\n\n'))

        with timeit('extract_bibtex_blocks'):
            extract_bibtex_blocks(d)

        with timeit('ID_PUT_BIB_HERE'):

            ID_PUT_BIB_HERE = MCDPManualConstants.ID_PUT_BIB_HERE

            bibhere = d.find('div', id=ID_PUT_BIB_HERE)
            if bibhere is None:
                msg = ('Could not find #%s in document. '
                       'Adding one at end of document.') % ID_PUT_BIB_HERE
                result.note_warning(msg)
                bibhere = Tag(name='div')
                bibhere.attrs['id'] = ID_PUT_BIB_HERE
                d.find('body').append(bibhere)

            do_bib(d, bibhere)

        with timeit('hook_before_final_pass'):
            if hook_before_final_pass is not None:
                hook_before_final_pass(soup=d)

        with timeit('document_final_pass_before_toc'):
            location = LocationUnknown()
            document_final_pass_before_toc(d, remove, remove_selectors, result,
                                           location)

        with timeit('hook_before_toc'):
            if hook_before_toc is not None:
                hook_before_toc(soup=d)

        with timeit('generate_and_add_toc'):
            try:
                generate_and_add_toc(d, raise_error=True, res=result)
            except NoTocPlaceholder as e:
                if require_toc_placeholder:
                    msg = 'Could not find toc placeholder: %s' % e
                    # logger.error(msg)
                    if aug0 is not None:
                        result.note_error(msg)
                    else:
                        raise Exception(msg)

        with timeit('document_final_pass_after_toc'):
            document_final_pass_after_toc(
                soup=d,
                crossrefs=crossrefs,
                resolve_references=resolve_references,
                res=result)

        if extra_css is not None:
            logger.info('adding extra CSS')
            add_extra_css(d, extra_css)

        with timeit('document_only_once'):
            document_only_once(d)

        location = LocationUnknown()
        substitute_github_refs(d, defaults={}, res=result, location=location)

        with timeit('another A pass'):
            for a in d.select('a[href]'):
                href = a.attrs['href']
                if href in references:
                    r = references[href]
                    a.attrs['href'] = r.url
                    if not a.children:  # empty
                        a.append(r.title)

        # do not use to_html_stripping_fragment - this is a complete doc
        # mark_in_html(result, soup=d)

        add_github_links_if_edit_url(soup=d, permalink_prefix=permalink_prefix)

        with timeit('converting to string'):
            res = unicode(d)

        with timeit('encoding'):
            res = res.encode('utf8')

        logger.info('done - %.1f MB' % (len(res) / (1024 * 1024.0)))

        result.set_result(res)
        return result