예제 #1
0
파일: reveal.py 프로젝트: kannode/mcdp
def download_reveal(output_dir):
    res = AugmentedResult()
    url = "https://github.com/hakimel/reveal.js/archive/3.6.0.zip"
    target = os.path.join(output_dir, 'revealjs')

    if os.path.exists(target):
        logger.debug('skipping downloading because target exists: %s' % target)
    else:
        dest = os.path.join(output_dir, 'reveal-3.6.0.zip')
        if True or not os.path.exists(dest):
            logger.info('Downloading %s' % url)
            # ctx = ssl.create_default_context()
            # ctx.check_hostname = False
            # ctx.verify_mode = ssl.CERT_NONE

            response = requests.get(
                url, stream=True)  # context=ssl._create_unverified_context())
            # data = response.raw.read() # read()
            with open(dest, 'wb') as f:
                shutil.copyfileobj(response.raw, f)

            # logger.info('downloaded %1.fMB' % (len(data) / (1000.0 * 1000)))
            # write_data_to_file(data, dest)
        logger.info(dest)

        target_tmp = target + '.tmp'
        import zipfile
        zip_ref = zipfile.ZipFile(dest, 'r')
        zip_ref.extractall(target_tmp)
        zip_ref.close()

        actual = os.path.join(target_tmp, 'reveal.js-3.6.0')
        os.rename(actual, target)
        logger.debug('extracted to %r' % target)

    check = [
        "plugin/notes/notes.js",
        "plugin/math/math.js",
        "lib/js/head.min.js",
        "js/reveal.js",
    ]
    for c in check:
        fn = os.path.join(target, c)
        if not os.path.exists(fn):
            msg = 'Incomplete reveal download, not found: %s' % fn
            res.note_error(msg)
    return res
예제 #2
0
def move_things_around(soup, raise_if_errors=False, res=None):
    """
        Looks for tags like:

            <move-here src="#line_detector2-line_detector_node2-autogenerated"/>

    """
    if res is None:
        res = AugmentedResult()
    from mcdp_docs.check_missing_links import get_id2element

    with timeit_wall('getting all IDs'):
        id2element, duplicates = get_id2element(soup, 'id')

    for e in soup.find_all('move-here'):

        if not 'src' in e.attrs:
            msg = 'Expected attribute "src" for element %s' % str(e)
            raise ValueError(msg)

        src = e.attrs['src']

        if not src.startswith('#'):
            msg = 'Expected that attribute "src" started with "#" for element %s.' % str(e)
            raise ValueError(msg)
        nid = src[1:]

        # O(n^2)
        # el = soup.find(id=nid)
        el = id2element.get(nid, None)
        if not el:
            msg = 'move-here: Could not find ID %r.' % nid
            e.name = 'span'
            # note_error2(e, "invalid move-here reference", msg)
            res.note_error(msg, HTMLIDLocation.for_element(e))
            if raise_if_errors:
                raise ValueError(msg)
            else:
                continue
        el.extract()
        e.replace_with(el)
예제 #3
0
def manual_join(template,
                files_contents,
                stylesheet,
                remove=None,
                extra_css=None,
                remove_selectors=None,
                hook_before_toc=None,
                references=None,
                resolve_references=True,
                hook_before_final_pass=None,
                require_toc_placeholder=False,
                permalink_prefix=None,
                crossrefs_aug=None,
                aug0=None):
    """
        files_contents: a list of tuples that can be cast to DocToJoin:
        where the string is a unique one to be used for job naming.

        extra_css: if not None, a string of more CSS to be added
        Remove_selectors: list of selectors to remove (e.g. ".draft").

        hook_before_toc if not None is called with hook_before_toc(soup=soup)
        just before generating the toc
    """
    result = AugmentedResult()

    if references is None:
        references = {}
    check_isinstance(files_contents, list)

    if crossrefs_aug is None:
        crossrefs = Tag(name='no-cross-refs')
    else:
        crossrefs = bs(crossrefs_aug.get_result())
        result.merge(crossrefs_aug)
    if aug0 is not None:
        result.merge(aug0)

    @contextmanager
    def timeit(_):
        yield

    with timeit('manual_join'):

        files_contents = [DocToJoin(*_) for _ in files_contents]

        # cannot use bs because entire document
        with timeit('parsing template'):
            template0 = template
            template = replace_macros(template)
            template_soup = BeautifulSoup(template,
                                          'lxml',
                                          from_encoding='utf-8')
            d = template_soup
            if d.html is None:
                s = "Invalid template"
                raise_desc(ValueError, s, template0=template0)

        with timeit('adding head'):
            assert d.html is not None
            assert '<html' in str(d)
            head = d.find('head')
            if head is None:
                msg = 'Could not find <head> in template:'
                logger.error(msg)
                logger.error(str(d))
                raise Exception(msg)
            assert head is not None
            for x in get_manual_css_frag().contents:
                head.append(x.__copy__())

        with timeit('adding stylesheet'):
            if stylesheet is not None:
                link = Tag(name='link')
                link['rel'] = 'stylesheet'
                link['type'] = 'text/css'
                from mcdp_report.html import get_css_filename
                link['href'] = get_css_filename('compiled/%s' % stylesheet)
                head.append(link)

        with timeit('making basename2soup'):
            basename2soup = OrderedDict()
            for doc_to_join in files_contents:
                if doc_to_join.docname in basename2soup:
                    msg = 'Repeated docname %r' % doc_to_join.docname
                    raise ValueError(msg)
                from .latex.latex_preprocess import assert_not_inside
                if isinstance(doc_to_join.contents, AugmentedResult):
                    result.merge(doc_to_join.contents)
                    contents = doc_to_join.contents.get_result()
                else:
                    contents = doc_to_join.contents
                assert_not_inside(contents, '<fragment')
                assert_not_inside(contents, 'DOCTYPE')

                frag = bs(contents)
                basename2soup[doc_to_join.docname] = frag

        # with timeit('fix_duplicate_ids'):
        # XXX
        # fix_duplicated_ids(basename2soup)

        with timeit('copy contents'):
            body = d.find('body')
            add_comments = False

            for docname, content in basename2soup.items():
                if add_comments:
                    body.append(NavigableString('\n\n'))
                    body.append(
                        Comment('Beginning of document dump of %r' % docname))
                    body.append(NavigableString('\n\n'))

                try_faster = True
                if try_faster:
                    for e in list(content.children):
                        body.append(e.extract())
                else:
                    copy_contents_into(content, body)

                if add_comments:
                    body.append(NavigableString('\n\n'))
                    body.append(Comment('End of document dump of %r' %
                                        docname))
                    body.append(NavigableString('\n\n'))

        with timeit('extract_bibtex_blocks'):
            extract_bibtex_blocks(d)

        with timeit('ID_PUT_BIB_HERE'):

            ID_PUT_BIB_HERE = MCDPManualConstants.ID_PUT_BIB_HERE

            bibhere = d.find('div', id=ID_PUT_BIB_HERE)
            if bibhere is None:
                msg = ('Could not find #%s in document. '
                       'Adding one at end of document.') % ID_PUT_BIB_HERE
                result.note_warning(msg)
                bibhere = Tag(name='div')
                bibhere.attrs['id'] = ID_PUT_BIB_HERE
                d.find('body').append(bibhere)

            do_bib(d, bibhere)

        with timeit('hook_before_final_pass'):
            if hook_before_final_pass is not None:
                hook_before_final_pass(soup=d)

        with timeit('document_final_pass_before_toc'):
            location = LocationUnknown()
            document_final_pass_before_toc(d, remove, remove_selectors, result,
                                           location)

        with timeit('hook_before_toc'):
            if hook_before_toc is not None:
                hook_before_toc(soup=d)

        with timeit('generate_and_add_toc'):
            try:
                generate_and_add_toc(d, raise_error=True, res=result)
            except NoTocPlaceholder as e:
                if require_toc_placeholder:
                    msg = 'Could not find toc placeholder: %s' % e
                    # logger.error(msg)
                    if aug0 is not None:
                        result.note_error(msg)
                    else:
                        raise Exception(msg)

        with timeit('document_final_pass_after_toc'):
            document_final_pass_after_toc(
                soup=d,
                crossrefs=crossrefs,
                resolve_references=resolve_references,
                res=result)

        if extra_css is not None:
            logger.info('adding extra CSS')
            add_extra_css(d, extra_css)

        with timeit('document_only_once'):
            document_only_once(d)

        location = LocationUnknown()
        substitute_github_refs(d, defaults={}, res=result, location=location)

        with timeit('another A pass'):
            for a in d.select('a[href]'):
                href = a.attrs['href']
                if href in references:
                    r = references[href]
                    a.attrs['href'] = r.url
                    if not a.children:  # empty
                        a.append(r.title)

        # do not use to_html_stripping_fragment - this is a complete doc
        # mark_in_html(result, soup=d)

        add_github_links_if_edit_url(soup=d, permalink_prefix=permalink_prefix)

        with timeit('converting to string'):
            res = unicode(d)

        with timeit('encoding'):
            res = res.encode('utf8')

        logger.info('done - %.1f MB' % (len(res) / (1024 * 1024.0)))

        result.set_result(res)
        return result
예제 #4
0
def substituting_empty_links(soup,
                             raise_errors=False,
                             res=None,
                             extra_refs=None):
    """
        soup: where to look for references
        elemtn_to_modify: what to modify (if None, it is equal to soup)


        default style is [](#sec:systems)  "Chapter 10"

        You can also use "class":

            <a href='#sec:name' class='only_number'></a>

    """
    if extra_refs is None:
        extra_refs = Tag(name='div')
    if res is None:
        res = AugmentedResult()

    for le in get_empty_links_to_fragment(soup, extra_refs=extra_refs,
                                          res=res):
        a = le.linker
        element_id = le.eid
        element = le.linked

        if not element:
            msg = ('Cannot find %s' % element_id)
            res.note_error(msg, HTMLIDLocation.for_element(a))

            if raise_errors:
                raise ValueError(msg)
            continue

        sub_link(a, element_id, element, res)

    for a in get_empty_links(soup):
        href = a.attrs.get('href', '(not present)')
        if not href:
            href = '""'
        if href.startswith('python:'):
            continue

        if href.startswith('http:') or href.startswith('https:'):
            msg = """
This link text is empty:

    ELEMENT

Note that the syntax for links in Markdown is

    [link text](URL)

For the internal links (where URL starts with "#"), then the documentation
system can fill in the title automatically, leading to the format:

    [](#other-section)

However, this does not work for external sites, such as:

    [](MYURL)

So, you need to provide some text, such as:

    [this useful website](MYURL)

"""
            msg = msg.replace('ELEMENT', str(a))
            msg = msg.replace('MYURL', href)
            # note_error2(a, 'syntax error', msg.strip())

            res.note_error(msg, HTMLIDLocation.for_element(a))

        else:
            msg = """
This link is empty:

    ELEMENT

It might be that the writer intended for this
link to point to something, but they got the syntax wrong.

    href = %s

As a reminder, to refer to other parts of the document, use
the syntax "#ID", such as:

    See [](#fig:my-figure).

    See [](#section-name).

""" % href
        msg = msg.replace('ELEMENT', str(a))
        # note_error2(a, 'syntax error', msg.strip())
        res.note_error(msg, HTMLIDLocation.for_element(a))
예제 #5
0
def get_cross_refs(src_dirs, permalink_prefix, extra_crossrefs, ignore=[]):
    res = AugmentedResult()
    files = look_for_files(src_dirs, "crossref.html")
    id2file = {}
    soup = Tag(name='div')

    def add_from_soup(s, f, ignore_alread_present, ignore_if_conflict):
        for img in list(s.find_all('img')):
            img.extract()

        for e in s.select('[base_url]'):
            e['external_crossref_file'] = f

        # Remove the ones with the same base_url
        for e in list(s.select('[base_url]')):
            if e.attrs['base_url'] == permalink_prefix:
                e.extract()

        for e in s.select('[id]'):
            id_ = e.attrs['id']
            if id_ == 'container': continue  # XXX:

            if id_ in id2file:
                if not ignore_alread_present:
                    msg = 'Found two elements with same ID "%s":' % id_
                    msg += '\n %s' % id2file[id_]
                    msg += '\n %s' % f
                    res.note_error(msg)
            else:
                id2file[id_] = f
                e2 = e.__copy__()
                if ignore_if_conflict:
                    e2.attrs['ignore_if_conflict'] = '1'
                soup.append(e2)
                soup.append('\n')

    ignore = [os.path.realpath(_) for _ in ignore]
    for _f in files:
        if os.path.realpath(_f) in ignore:
            msg = 'Ignoring file %r' % _f
            logger.info(msg)
            continue
        logger.info('cross ref file %s' % _f)
        data = open(_f).read()
        if permalink_prefix in data:
            msg = 'skipping own file'
            logger.debug(msg)
            continue
        s = bs(data)
        add_from_soup(s,
                      _f,
                      ignore_alread_present=False,
                      ignore_if_conflict=False)

    if extra_crossrefs is not None:
        logger.info('Reading external refs\n%s' % extra_crossrefs)
        try:
            r = requests.get(extra_crossrefs)
        except Exception as ex:
            msg = 'Could not read external cross reference links'
            msg += '\n  %s' % extra_crossrefs
            msg += '\n\n' + indent(str(ex), ' > ')
            res.note_error(msg)
        else:
            logger.debug('%s %s' % (r.status_code, extra_crossrefs))
            if r.status_code == 404:
                msg = 'Could not read external cross refs: %s' % r.status_code
                msg += '\n url: ' + extra_crossrefs
                msg += '\n This is normal if you have not pushed this branch yet.'
                res.note_warning(msg)
                # logger.error(msg)
            s = bs(r.text)
            add_from_soup(s,
                          extra_crossrefs,
                          ignore_alread_present=True,
                          ignore_if_conflict=True)

    # print soup
    res.set_result(str(soup))
    return res
예제 #6
0
def render_book(
    src_dirs,
    generate_pdf,
    data,
    realpath,
    use_mathjax,
    raise_errors,
    filter_soup=None,
    symbols=None,
    ignore_ref_errors=False,
):
    """ Returns an AugmentedResult(str) """
    res = AugmentedResult()
    from mcdp_docs.pipeline import render_complete

    librarian = get_test_librarian()
    # XXX: these might need to be changed
    if not MCDPConstants.softy_mode:
        for src_dir in src_dirs:
            librarian.find_libraries(src_dir)

    load_library_hooks = [librarian.load_library]
    library_ = MCDPLibrary(load_library_hooks=load_library_hooks)

    for src_dir in src_dirs:
        library_.add_search_dir(src_dir)

    d = tempfile.mkdtemp()
    library_.use_cache_dir(d)

    location = LocalFile(realpath)

    # print('location:\n%s' % location)

    def filter_soup0(soup, library):
        if filter_soup is not None:
            filter_soup(soup=soup, library=library)
        add_edit_links2(soup, location)
        add_last_modified_info(soup, location)

    try:
        html_contents = render_complete(library=library_,
                                        s=data,
                                        raise_errors=raise_errors,
                                        realpath=realpath,
                                        use_mathjax=use_mathjax,
                                        symbols=symbols,
                                        generate_pdf=generate_pdf,
                                        filter_soup=filter_soup0,
                                        location=location,
                                        res=res,
                                        ignore_ref_errors=ignore_ref_errors)
    except DPSyntaxError as e:
        msg = 'Could not compile %s' % realpath
        location0 = LocationInString(e.where, location)
        res.note_error(msg, locations=location0)
        fail = "<p>This file could not be compiled</p>"
        res.set_result(fail)
        return res
        # raise_wrapped(DPSyntaxError, e, msg, compact=True)

    if False:  # write minimal doc
        doc = get_minimal_document(html_contents,
                                   add_markdown_css=True,
                                   extra_css=extra_css)
        dirname = main_file + '.parts'
        if dirname and not os.path.exists(dirname):
            try:
                os.makedirs(dirname)
            except:
                pass
        fn = os.path.join(dirname, '%s.html' % out_part_basename)
        write_data_to_file(doc, fn)

    res.set_result(html_contents)
    return res
예제 #7
0
파일: pipeline.py 프로젝트: kannode/mcdp
def render_complete(library, s, raise_errors, realpath, generate_pdf=False,
                    check_refs=False, use_mathjax=True, filter_soup=None,
                    symbols=None, res=None, location=None,
                    ignore_ref_errors=False):
    """
        Transforms markdown into html and then renders the mcdp snippets inside.

        s: a markdown string with embedded html snippets

        Returns an HTML string; not a complete document.

        filter_soup(library, soup)
    """
    if res is None:
        res = AugmentedResult()
    if location is None:
        location = LocationUnknown()
    from mcdp_report.gg_utils import resolve_references_to_images
    s0 = s

    unique = get_md5(realpath)[:8]
    check_good_use_of_special_paragraphs(s0, res, location)
    raise_missing_image_errors = raise_errors

    # Imports here because of circular dependencies
    from .latex.latex_preprocess import extract_maths, extract_tabular
    from .latex.latex_preprocess import latex_preprocessing
    from .latex.latex_preprocess import replace_equations
    from .macro_col2 import col_macros, col_macros_prepare_before_markdown
    from .mark.markd import render_markdown
    from .preliminary_checks import do_preliminary_checks_and_fixes
    from .prerender_math import prerender_mathjax

    if isinstance(s, unicode):
        msg = 'I expect a str encoded with utf-8, not unicode.'
        raise_desc(TypeError, msg, s=s)

    # need to do this before do_preliminary_checks_and_fixes
    # because of & char
    s, tabulars = extract_tabular(s)

    s = do_preliminary_checks_and_fixes(s, res, location)
    # put back tabular, because extract_maths needs to grab them
    for k, v in tabulars.items():
        assert k in s
        s = s.replace(k, v)

    # copy all math content,
    #  between $$ and $$
    #  between various limiters etc.
    # returns a dict(string, substitution)
    s, maths = extract_maths(s)
    #     print('maths = %s' % maths)
    for k, v in list(maths.items()):
        if v[0] == '$' and v[1] != '$$':
            if '\n\n' in v:
                msg = 'The Markdown pre-processor got confused by this math fragment:'
                msg += '\n\n' + indent(v, '  > ')
                res.note_error(msg, location)
                maths[k] = 'ERROR'

    s = latex_preprocessing(s)
    s = '<div style="display:none">Because of mathjax bug</div>\n\n\n' + s

    # cannot parse html before markdown, because md will take
    # invalid html, (in particular '$   ciao <ciao>' and make it work)

    s = s.replace('*}', '\*}')

    s, mcdpenvs = protect_my_envs(s)
    #     print('mcdpenvs = %s' % maths)

    s = col_macros_prepare_before_markdown(s)

    #     print(indent(s, 'before markdown | '))
    s = render_markdown(s)
    #     print(indent(s, 'after  markdown | '))

    for k, v in maths.items():
        if not k in s:
            msg = 'Internal error while dealing with Latex math.'
            msg += '\nCannot find %r (= %r)' % (k, v)
            res.note_error(msg, location)
            # raise_desc(DPInternalError, msg, s=s)
            continue

        def preprocess_equations(x):
            # this gets mathjax confused
            x = x.replace('>', '\\gt{}')  # need brace; think a<b -> a\lt{}b
            x = x.replace('<', '\\lt{}')
            #             print('replaced equation %r by %r ' % (x0, x))
            return x

        v = preprocess_equations(v)
        s = s.replace(k, v)

    s = replace_equations(s)
    s = s.replace('\\*}', '*}')

    # this parses the XML
    soup = bs(s)

    other_abbrevs(soup, res, location)

    substitute_special_paragraphs(soup, res, location)
    create_notes_from_elements(soup, res, location, unique)

    # need to process tabular before mathjax
    escape_for_mathjax(soup)

    #     print(indent(s, 'before prerender_mathjax | '))
    # mathjax must be after markdown because of code blocks using "$"

    s = to_html_stripping_fragment(soup)

    if use_mathjax:
        s = prerender_mathjax(s, symbols, res)

    soup = bs(s)
    escape_for_mathjax_back(soup)
    s = to_html_stripping_fragment(soup)

    #     print(indent(s, 'after prerender_mathjax | '))
    for k, v in mcdpenvs.items():
        # there is this case:
        # ~~~
        # <pre> </pre>
        # ~~~
        s = s.replace(k, v)

    s = s.replace('<p>DRAFT</p>', '<div class="draft">')

    s = s.replace('<p>/DRAFT</p>', '</div>')

    soup = bs(s)
    mark_console_pres(soup, res, location)

    # try:

    # except Exception as e:
    #     msg = 'I got an error while substituting github: references.'
    #     msg += '\nI will ignore this error because it might not be the fault of the writer.'
    #     msg += '\n\n' + indent(str(e), '|', ' error: |')
    #

    # must be before make_figure_from_figureid_attr()
    display_files(soup, defaults={}, res=res, location=location, raise_errors=raise_errors)

    make_figure_from_figureid_attr(soup, res, location)
    col_macros(soup)
    fix_subfig_references(soup)

    library = get_library_from_document(soup, default_library=library)

    from .highlight import html_interpret
    html_interpret(library, soup, generate_pdf=generate_pdf,
                   raise_errors=raise_errors, realpath=realpath, res=res, location=location)
    if filter_soup is not None:
        filter_soup(library=library, soup=soup)

    if False:
        embed_images_from_library2(soup=soup, library=library,
                                   raise_errors=raise_missing_image_errors,
                                   res=res, location=location)
    else:
        resolve_references_to_images(soup=soup, library=library,
                                     raise_errors=raise_missing_image_errors,
                                     res=res, location=location)

    make_videos(soup, res, location, raise_on_errors=False)

    if check_refs:
        check_if_any_href_is_invalid(soup, res, location, ignore_ref_errors=ignore_ref_errors)

    if False:
        if getuser() == 'andrea':
            if MCDPConstants.preprocess_style_using_less:
                run_lessc(soup)
            else:
                logger.warning(
                        'preprocess_style_using_less=False might break the manual')

    fix_validation_problems(soup)

    strip_pre(soup)

    if MCDPManualConstants.enable_syntax_higlighting:
        syntax_highlighting(soup)

    if MCDPManualConstants.enforce_lang_attribute:
        check_lang_codes(soup, res, location)

    # Fixes the IDs (adding 'sec:'); add IDs to missing ones
    globally_unique_id_part = 'autoid-DO-NOT-USE-THIS-VERY-UNSTABLE-LINK-' + get_md5(realpath)[:8]
    fix_ids_and_add_missing(soup, globally_unique_id_part, res, location)

    check_no_patently_wrong_links(soup, res, location)

    if MCDPManualConstants.enforce_status_attribute:
        check_status_codes(soup, realpath, res, location)

    s = to_html_stripping_fragment(soup)
    s = replace_macros(s)

    return s