コード例 #1
0
ファイル: embedded_images.py プロジェクト: kannode/mcdp
def extract_assets(html, basedir):
    """
        Extracts all embedded assets in A links
        encoded using data: and save them to file.

        These are all links of the type:

            <a href="data:****"/>

            <a href="data:****" download='filename'/>
    """
    if not os.path.exists(basedir):
        os.makedirs(basedir)
    soup = BeautifulSoup(html, 'lxml', from_encoding='utf-8')
    for tag in soup.select('a[href]'):
        href = tag['href']
        if href.startswith('data:'):
            _mime, data = get_mime_data_from_base64_string(href)
            if tag.has_attr('download'):
                basename = tag['download']
            else:
                logger.warn('cannot find attr "download" in tag')
                # print tag
                continue
            print('extracting asset %s' % basename)
            filename = os.path.join(basedir, basename)
            write_data_to_file(data, filename)
コード例 #2
0
ファイル: reveal.py プロジェクト: kannode/mcdp
def write_slides(res, outdir):
    slides = res.get_result()

    for id_slides, html in slides.items():
        filename = os.path.join(outdir, id_slides + '.html')
        data = str(html)
        write_data_to_file(data, filename)
コード例 #3
0
def go(d, out):
    d0 = os.path.dirname(out)
    artefacts = get_artefacts(d0, d)
    print "\n".join(map(str, artefacts))
    links = get_links_from_artefacts(artefacts)

    body = Tag(name='body')
    body.append(links)
    html = Tag(name='html')
    head = Tag(name='head')
    meta = Tag(name='meta')
    meta.attrs['content'] = "text/html; charset=utf-8"
    meta.attrs['http-equiv'] = "Content-Type"
    style = Tag(name='style')
    style.append("""
    body {
        column-count: 3;
    }
    """)
    head.append(meta)

    html.append(head)
    html.append(body)

    write_data_to_file(str(html), out)
コード例 #4
0
def render_book(src_dirs,
                generate_pdf,
                data,
                realpath,
                main_file,
                use_mathjax,
                out_part_basename,
                raise_errors,
                filter_soup=None,
                extra_css=None,
                symbols=None):
    from mcdp_docs.pipeline import render_complete

    librarian = get_test_librarian()
    # XXX: these might need to be changed
    if not MCDPConstants.softy_mode:
        librarian.find_libraries('.')

    load_library_hooks = [librarian.load_library]
    library = MCDPLibrary(load_library_hooks=load_library_hooks)

    for src_dir in src_dirs:
        library.add_search_dir(src_dir)

    d = tempfile.mkdtemp()
    library.use_cache_dir(d)

    def filter_soup0(soup, library):
        if filter_soup is not None:
            filter_soup(soup=soup, library=library)
        add_edit_links(soup, realpath)

    try:
        html_contents = render_complete(library=library,
                                        s=data,
                                        raise_errors=raise_errors,
                                        realpath=realpath,
                                        use_mathjax=use_mathjax,
                                        symbols=symbols,
                                        generate_pdf=generate_pdf,
                                        filter_soup=filter_soup0)
    except DPSyntaxError as e:
        msg = 'Could not compile %s' % realpath
        raise_wrapped(DPSyntaxError, e, msg, compact=True)

    doc = get_minimal_document(html_contents,
                               add_markdown_css=True,
                               extra_css=extra_css)
    dirname = main_file + '.parts'
    if dirname and not os.path.exists(dirname):
        try:
            os.makedirs(dirname)
        except:
            pass
    fn = os.path.join(dirname, '%s.html' % out_part_basename)
    write_data_to_file(doc, fn)

    return html_contents
コード例 #5
0
ファイル: make_index.py プロジェクト: AurelNeff/duckuments
def do_it(f, rel, current_slug):
    f2 = f + '.old'
    if not os.path.exists(f2):
        shutil.copy(f, f2)
    orig = open(f2).read()

    soup = bs_entire_document(orig)
    soup2 = make_changes(soup, f, rel, current_slug)
    data = to_html_entire_document(soup2)

    data = data.replace('<body>', '<body>\n<?php header1() ?>\n')
    write_data_to_file(data, f, quiet=True)
コード例 #6
0
def generate_metadata(src_dir):
    template = MCDPManualConstants.pdf_metadata_template
    if not os.path.exists(template):
        msg = 'Metadata template does not exist: %s' % template
        raise ValueError(msg)

    out = MCDPManualConstants.pdf_metadata
    s = open(template).read()

    from .pipeline import replace_macros

    s = replace_macros(s)
    write_data_to_file(s, out)
コード例 #7
0
ファイル: prerender_math.py プロジェクト: kannode/mcdp
def prerender_main():
    f0 = sys.argv[1]
    f1 = sys.argv[2]
    html = open(f0).read()
    parsed = bs_entire_document(html)
    body = parsed.html.body
    body_string = str(body)
    res = AugmentedResult()
    body2_string = prerender_mathjax_(body_string, res)
    body2 = bs(body2_string)
    parsed.html.body.replace_with(body2)
    html2 = str(parsed)
    write_data_to_file(html2, f1)
コード例 #8
0
def compose_go(compose_config):
    input_ = compose_config.input
    output = compose_config.output
    recipe = compose_config.recipe
    remove_status = compose_config.remove_status
    show_removed = compose_config.show_removed
    data = open(input_).read()
    soup = bs_entire_document(data)
    permalink_prefix = compose_config.purl_prefix
    aug = compose_go2(soup, recipe, permalink_prefix, remove_status,
                      show_removed)
    soup = aug.get_result()
    results = str(soup)
    write_data_to_file(results, output)
コード例 #9
0
def remove_spurious(output_dir, filenames):
    ignore = [
        'link.html', 'toc.html', 'errors.html', 'warnings.html', 'tasks.html',
        'crossref.html'
    ]
    found = os.listdir(output_dir)
    for f in found:
        if not f.endswith('.html'):
            continue
        if f in ignore:
            continue
        if f not in filenames:
            fn = os.path.join(output_dir, f)

            if 'SPURIOUS' in open(fn).read():
                # already marked as spurious
                continue

            msg = 'I found a spurious file from earlier compilations: %s' % fn
            #             msg += '(%s not in %s) ' % (f, filenames)
            logger.warning(msg)

            soup = read_html_doc_from_file(fn)
            e = soup.find('section')
            if e is not None and 'id' in e.attrs:
                if False:
                    id_ = e.attrs['id'].replace(':section', '')
                    if 'autoid' not in id_:
                        id_ = remove_prefix(id_)
                        url = 'http://purl.org/dt/master/' + id_
                        OTHER = ((
                            '<p>Maybe try this link to find the version on master '
                            '(no guarantees): <a href="%s">%s</a></p>') %
                                 (url, url))
                        OTHER += '\n<p>If that does not work, the section was renamed.</p>'
                    else:
                        OTHER = ''
                else:
                    OTHER = ''
            else:
                OTHER = ''

            data = spurious.replace('OTHER', OTHER)
            write_data_to_file(data, fn, quiet=True)
コード例 #10
0
def write_crossref_info(data, id2filename, output_crossref, permalink_prefix):
    soup = bs_entire_document(data)

    cross = Tag(name='body')

    container = Tag(name='div')
    container.attrs['id'] = 'container'
    cross.append(container)

    for e in soup.select('[label-name]'):
        # logger.debug('considering %s' % e)
        if not 'id' in e.attrs:
            continue

        id_ = e.attrs['id']
        if id_.startswith('bib:'):
            # logger.warn('Excluding %r from cross refs' % id_)
            continue

        e2 = get_crossref_copy(e)
        e2.attrs[MCDPManualConstants.ATTR_BASE_URL] = permalink_prefix

        if id_ in id2filename:
            basename = id2filename[id_]

            e2.attrs['url'] = '%s/%s#%s' % (permalink_prefix, basename, id_)
            # print e2.attrs['url']
            a = Tag(name='a')

            a.attrs['href'] = e2.attrs['url']
            if not 'autoid' in id_:
                code = Tag(name='code')
                code.append(id_)
                a.append(code)
                a.append(' ')
                a.append(br())
            a.append(e2.attrs['label-name'])
            # e2.insert(0, Tag(name='br'))
            # e2.insert(0, ' ')
            e2.insert(0, a)
        else:
            logger.error('Cannot find url for %s' % id_)

        cross.append('\n\n\n')
        cross.append(e2)

    for img in list(cross.find_all('img')):
        img.extract()

    # print('writing cross ref info')
    html = Tag(name='html')
    html.append(cross)
    head = Tag(name='head')
    style = Tag(name='style')
    style.append(CROSSREF_CSS)
    head.append(style)
    html.append(head)

    script = Tag(name='script')
    script.append(CROSSREF_SCRIPT)
    cross.append(script)
    # XXX: we are doing this multiple times
    write_data_to_file(str(html), output_crossref, quiet=True)
コード例 #11
0
def go(context,
       worker_i,
       num_workers,
       data,
       mathjax,
       preamble,
       output_dir,
       assets_dir,
       add_toc_if_not_existing,
       extra_panel_content,
       permalink_prefix=None,
       output_crossref=None,
       only_refs=False):
    res = AugmentedResult()
    soup = bs_entire_document(data)

    # extract the main toc if it is there
    with timeit("Extracting main toc"):
        main_toc = soup.find(id=MCDPManualConstants.MAIN_TOC_ID)

        if main_toc is None:

            if add_toc_if_not_existing:
                # logger.info('Generating TOC because it is not there')

                tocg = generate_toc(soup)
                main_toc = bs(tocg).ul
                main_toc.attrs['class'] = 'toc'  # XXX: see XXX13
                assert main_toc is not None
                substituting_empty_links(main_toc,
                                         raise_errors=False,
                                         res=res,
                                         extra_refs=soup)

            else:
                msg = 'Could not find main toc (id #%s)' % MCDPManualConstants.MAIN_TOC_ID
                res.note_error(msg)
                main_toc = Tag(name='div')
                main_toc.append('TOC NOT FOUND')
        else:
            main_toc = main_toc.__copy__()

        if 'id' in main_toc.attrs:
            del main_toc.attrs['id']

    # XXX: this is not the place to do it
    mark_toc_links_as_errored(main_toc, soup)

    body = soup.html.body

    with timeit("split_in_files"):
        filename2contents = split_in_files(body)
        id2filename = get_id2filename(filename2contents)

    res.set_result(id2filename)

    if output_crossref is not None:
        from mcdp_docs.mcdp_render_manual import write_crossref_info
        context.comp(write_crossref_info,
                     data=data,
                     id2filename=id2filename,
                     output_crossref=output_crossref,
                     permalink_prefix=permalink_prefix)

    if only_refs:
        logger.debug('Skipping rest because only_refs')
        return res

    with timeit("add_prev_next_links"):
        filename2contents = add_prev_next_links(filename2contents)

    with timeit("preparing assets dir"):
        if not os.path.exists(output_dir):
            try:
                os.makedirs(output_dir)
            except:
                pass

    with timeit("creating link.html and link.js"):

        linkbase = 'link.html'  # do not change (it's used by http://purl.org/dth)
        linkbasejs = 'link.js'

        lb = create_link_base(id2filename)
        write_data_to_file(str(lb),
                           os.path.join(output_dir, linkbase),
                           quiet=True)

        linkjs = create_link_base_js(id2filename)
        write_data_to_file(str(linkjs),
                           os.path.join(output_dir, linkbasejs),
                           quiet=True)

    if preamble is not None:
        if preamble.endswith('.tex'):  # XXX
            preamble = open(preamble).read()

    ids_to_use = []
    for k in list(id2filename):
        if not 'autoid' in k:
            ids_to_use.append(k)
    ids_to_use = sorted(ids_to_use)

    pointed_to = []
    for k in ids_to_use:
        f = id2filename[k]
        if not f in pointed_to:
            pointed_to.append(f)

    # data = ",".join(pointed_to)
    head0 = soup.html.head

    if True:
        context.comp(remove_spurious, output_dir, list(filename2contents))

    with timeit('main_toc copy'):
        main_toc0 = main_toc.__copy__()

        main_toc0_s = str(main_toc0)
    asset_jobs = []
    for i, (filename, contents) in enumerate(filename2contents.items()):
        if i % num_workers != worker_i:
            continue
        with timeit('main_toc copy hack'):
            main_toc = bs(main_toc0_s).ul
            assert main_toc is not None

        # Trick: we add the main_toc, and then ... (look below)
        with timeit('make_page'):
            add_home_link = 'index.html' not in filename2contents
            html = make_page(contents,
                             head0,
                             main_toc,
                             extra_panel_content,
                             add_home_link=add_home_link)

        with timeit("direct job"):
            result = only_second_part(mathjax, preamble, html, id2filename,
                                      filename)

            # ... we remove it. In this way we don't have to copy it every time...
            main_toc.extract()

            fn = os.path.join(output_dir, filename)

            h = get_md5(result)[:8]
            r = context.comp(extract_assets_from_file,
                             result,
                             fn,
                             assets_dir,
                             job_id='%s-%s-assets' % (filename, h))
            asset_jobs.append(r)

    update_refs_('toc.html', main_toc, id2filename)
    out_toc = os.path.join(output_dir, 'toc.html')
    write_data_to_file(str(main_toc), out_toc, quiet=True)

    return context.comp(wait_assets, res, asset_jobs)
コード例 #12
0
def go(compose_config):
    input_ = compose_config.input
    output = compose_config.output
    recipe = compose_config.recipe
    permalink_prefix = compose_config.purl_prefix

    # Read input file
    logger.info('Reading %s' % input_)
    data = open(input_).read()
    soup = bs_entire_document(data)
    # Create context
    doc = soup.__copy__()
    body = Tag(name='body')
    doc.body.replace_with(body)
    elements = recipe.make(RecipeContext(soup=soup))
    check_isinstance(elements, list)
    append_all(body, elements)

    # Now remove stuff
    for status in compose_config.remove_status:
        removed = []
        for section in list(body.select('section[status=%s]' % status)):
            level = section.attrs['level']
            if not level in ['sec', 'part']:
                continue

            section_id = section.attrs['id']
            pure_id = section_id.replace(':section', '')
            removed.append(section.attrs['id'])

            if compose_config.show_removed:
                # remove everything that is not a header
                keep = ['h1', 'h2', 'h3', 'h4', 'h5']
                for e in list(section.children):
                    if e.name not in keep:
                        e.extract()
                    else:
                        e.append(' [%s]' % status)

                p = Tag(name='p')
                p.append(
                    "This section has been removed because it is in status %r. "
                    % (status))
                a = Tag(name='a')
                a.attrs['href'] = 'http://purl.org/dt/master/%s' % pure_id
                a.append(
                    "If you are feeling adventurous, you can read it on master."
                )
                p.append(a)

                section.append(p)

                p = Tag(name='p')
                p.append(
                    "To disable this behavior, and completely hide the sections, "
                )
                p.append(
                    "set the parameter show_removed to false in fall2017.version.yaml."
                )
                section.append(p)
            else:
                section.extract()


#             section.replace_with(div)

        if not removed:
            logger.info('Found no section with status = %r to remove.' %
                        status)
        else:
            logger.info('I removed %d sections with status %r.' %
                        (len(removed), status))
            logger.debug('Removed: %s' % ", ".join(removed))

    add_github_links_if_edit_url(doc, permalink_prefix=permalink_prefix)

    generate_and_add_toc(doc)
    doc = doc.__copy__()

    #     generate_and_add_toc(soup)
    #     substituting_empty_links(soup)
    raise_errors = False
    find_links_from_master(master_soup=soup,
                           version_soup=doc,
                           raise_errors=raise_errors)

    document_final_pass_after_toc(doc)
    results = str(doc)
    write_data_to_file(results, output)
コード例 #13
0
def composing1():

    data1 = """

docs:
    file0.md: |

        <div id='toc'></div>

        # All units {#part:all}

    file1.md: |

        # Audacious {#sa status=ready}

        This is section Audacious.

        Linking to:

        - (number name) <a href="#sa" class="number_name"></a>; (empty) [](#sa)
        - (number name) <a href="#sb" class="number_name"></a>; (empty) [](#sb)
        - (number name) <a href="#sc" class="number_name"></a>; (empty) [](#sc)
        - And linking to [](#elephant).

    file2.md: |

        # Bumblebee {#sb status=ready}

        This is section Bumblebee.

        Linking to:

        - (number name) <a href="#sa" class="number_name"></a>; (empty) [](#sa)
        - (number name) <a href="#sb" class="number_name"></a>; (empty) [](#sb)
        - (number name) <a href="#sc" class="number_name"></a>; (empty) [](#sc)
        - And linking to [](#elephant).

        ## This one will be removed {#to-remove}

        I don't like this section

        # Elephant {#elephant status=draft}

        Section Elephant is not ready.


    file3.md: |

        # The cat section {#sc status=ready}

        This is section Cat.

        Linking to:

        - (number name) <a href="#sa" class="number_name"></a>; (empty) [](#sa)
        - (number name) <a href="#sb" class="number_name"></a>; (empty) [](#sb)
        - (number name) <a href="#sc" class="number_name"></a>; (empty) [](#sc)

    00_main_template.html: |

        <html>
            <head></head>
            <body</body>
        </html>

book.version.yaml: |
    input: dist/master/book.html
    recipe:
        - toc
        - make-part: part1
          title: First part
          contents:
          - add: sb
            except: to-remove
        - make-part: part2
          title: Second part
          contents:
          - add: sa
          - add: elephant
    output: dist/version/book.html
    purl_prefix: http://purl.org/dt/fall2017/
    remove_status: [draft]

.compmake.rc:
    config echo 1

"""
    use = None
    # to use a specific dir for debugging:
    # use = '/tmp/composing1'

    with with_dir_content(data1, use_dir=use):

        repo = Repo.init('.')
        fn = 'readme'
        write_data_to_file('', fn)
        repo.index.add([fn])
        repo.index.commit("initial commit")

        url = '[email protected]:AndreaCensi/example.git'
        repo.create_remote('origin', url)

        res = 'dist/master/book.html'
        run_app(RenderManual, [
            '--src', 'docs', '--stylesheet', 'v_manual_split', '--mathjax',
            '0', '-o', 'out/out1', '--no_resolve_references', '--output_file',
            res
        ])

        assert os.path.exists(res)
        data = bs_entire_document(open(res).read())
        assert data.find(id='sa:section') is not None
        assert data.find(id='sb:section') is not None
        assert data.find(id='to-remove:section') is not None

        run_app(Split, [
            '--filename', 'dist/master/book.html', '--output_dir',
            'dist/master/book'
        ])
        run_app(Compose, ['--config', 'book.version.yaml'])
        version_whole = bs_entire_document(
            open('dist/version/book.html').read())
        assert version_whole.find(id='sa:section') is not None
        assert version_whole.find(id='sb:section') is not None
        assert version_whole.find(id='to-remove:section') is None
        # Now it's preserved
        # assert version_whole.find(id='elephant:section') is None

        run_app(Split, [
            '--filename', 'dist/version/book.html', '--output_dir',
            'dist/version/book'
        ])
コード例 #14
0
def write_errors_and_warnings_files(aug, d):
    if aug.has_result():
        id2filename = aug.get_result()
    else:
        id2filename = {}
    # print('id2filename: %s' % sorted(id2filename))
    assert isinstance(aug, AugmentedResult)
    aug.update_refs(id2filename)

    header = get_notes_panel(aug)

    manifest = []
    nwarnings = len(aug.get_notes_by_tag(MCDPManualConstants.NOTE_TAG_WARNING))
    fn = os.path.join(d, 'warnings.html')

    html = html_list_of_notes(aug,
                              MCDPManualConstants.NOTE_TAG_WARNING,
                              'warnings',
                              'warning',
                              header=header)
    # update_refs_('warnings', html, id2filename)

    write_data_to_file(str(html), fn, quiet=True)
    if nwarnings:
        manifest.append(
            dict(display='%d warnings' % nwarnings, filename='warnings.html'))
        msg = 'There were %d warnings: %s' % (nwarnings, fn)
        logger.warn(msg)

    ntasks = len(aug.get_notes_by_tag(MCDPManualConstants.NOTE_TAG_TASK))
    fn = os.path.join(d, 'tasks.html')

    html = html_list_of_notes(aug,
                              MCDPManualConstants.NOTE_TAG_TASK,
                              'tasks',
                              'task',
                              header=header)
    # update_refs_('tasks', html, id2filename)
    write_data_to_file(str(html), fn, quiet=True)
    if nwarnings:
        manifest.append(
            dict(display='%d tasks' % ntasks, filename='tasks.html'))
        msg = 'There are %d open tasks: %s' % (ntasks, fn)
        logger.info(msg)

    nerrors = len(aug.get_notes_by_tag(MCDPManualConstants.NOTE_TAG_ERROR))
    fn = os.path.join(d, 'errors.html')
    html = html_list_of_notes(aug,
                              MCDPManualConstants.NOTE_TAG_ERROR,
                              'errors',
                              'error',
                              header=header)
    # update_refs_('tasks', html, id2filename)
    write_data_to_file(str(html), fn, quiet=True)
    if nerrors:
        manifest.append(
            dict(display='%d errors' % nerrors, filename='errors.html'))

        msg = 'I am sorry to say that there were %d errors.\n\nPlease see: %s' % (
            nerrors, fn)
        logger.error('\n\n\n' + indent(msg, ' ' * 15) + '\n\n')

    fn = os.path.join(d, 'errors_and_warnings.manifest.yaml')
    write_data_to_file(yaml.dump(manifest), fn, quiet=False)

    fn = os.path.join(d, 'errors_and_warnings.pickle')
    res = AugmentedResult()
    res.merge(aug)
    write_data_to_file(pickle.dumps(res), fn, quiet=False)
コード例 #15
0
def write(s, out):
    write_data_to_file(s, out)
コード例 #16
0
def write_manifest_html(d):
    manifest = [dict(display='html', filename='index.html')]
    fn = os.path.join(d, 'output-html.manifest.yaml')
    write_data_to_file(yaml.dump(manifest), fn, quiet=False)
コード例 #17
0
def render_book(
    src_dirs,
    generate_pdf,
    data,
    realpath,
    use_mathjax,
    raise_errors,
    filter_soup=None,
    symbols=None,
    ignore_ref_errors=False,
):
    """ Returns an AugmentedResult(str) """
    res = AugmentedResult()
    from mcdp_docs.pipeline import render_complete

    librarian = get_test_librarian()
    # XXX: these might need to be changed
    if not MCDPConstants.softy_mode:
        for src_dir in src_dirs:
            librarian.find_libraries(src_dir)

    load_library_hooks = [librarian.load_library]
    library_ = MCDPLibrary(load_library_hooks=load_library_hooks)

    for src_dir in src_dirs:
        library_.add_search_dir(src_dir)

    d = tempfile.mkdtemp()
    library_.use_cache_dir(d)

    location = LocalFile(realpath)

    # print('location:\n%s' % location)

    def filter_soup0(soup, library):
        if filter_soup is not None:
            filter_soup(soup=soup, library=library)
        add_edit_links2(soup, location)
        add_last_modified_info(soup, location)

    try:
        html_contents = render_complete(library=library_,
                                        s=data,
                                        raise_errors=raise_errors,
                                        realpath=realpath,
                                        use_mathjax=use_mathjax,
                                        symbols=symbols,
                                        generate_pdf=generate_pdf,
                                        filter_soup=filter_soup0,
                                        location=location,
                                        res=res,
                                        ignore_ref_errors=ignore_ref_errors)
    except DPSyntaxError as e:
        msg = 'Could not compile %s' % realpath
        location0 = LocationInString(e.where, location)
        res.note_error(msg, locations=location0)
        fail = "<p>This file could not be compiled</p>"
        res.set_result(fail)
        return res
        # raise_wrapped(DPSyntaxError, e, msg, compact=True)

    if False:  # write minimal doc
        doc = get_minimal_document(html_contents,
                                   add_markdown_css=True,
                                   extra_css=extra_css)
        dirname = main_file + '.parts'
        if dirname and not os.path.exists(dirname):
            try:
                os.makedirs(dirname)
            except:
                pass
        fn = os.path.join(dirname, '%s.html' % out_part_basename)
        write_data_to_file(doc, fn)

    res.set_result(html_contents)
    return res
コード例 #18
0
def write(s_aug, out):
    s = s_aug.get_result()
    write_data_to_file(s, out)
コード例 #19
0
def write_manifest_pdf(out_pdf):
    d = os.path.dirname(out_pdf)
    basename = os.path.basename(out_pdf)
    manifest = [dict(display='PDF', filename=basename)]
    fn = os.path.join(d, 'output-pdf.manifest.yaml')
    write_data_to_file(yaml.dump(manifest), fn, quiet=False)
コード例 #20
0
 def savefile(filename_hint, data):
     """ must return the url (might be equal to filename) """
     where = os.path.join(assets_dir, filename_hint)
     write_data_to_file(data, where)
     relative = os.path.relpath(where, os.path.dirname(fo))
     return relative
コード例 #21
0
ファイル: split.py プロジェクト: afcarl/mcdp
def go(context, worker_i, num_workers, ifilename, mathjax, preamble,
       output_dir):
    with timeit("reading %s" % ifilename):
        soup = read_html_doc_from_file(ifilename)

    # extract the main toc if it is there

    with timeit("Extracting main_toc"):
        main_toc = soup.find(id='main_toc')

        if main_toc is None:
            msg = 'Could not find the element #main_toc.'
            raise ValueError(msg)

        main_toc = main_toc.__copy__()
        del main_toc.attrs['id']

    body = soup.html.body

    with timeit("split_in_files"):
        filename2contents = split_in_files(body)

    with timeit("add_prev_next_links"):
        filename2contents = add_prev_next_links(filename2contents)

    with timeit("preparing assets dir"):
        if not os.path.exists(output_dir):
            try:
                os.makedirs(output_dir)
            except:
                pass

        assets_dir = os.path.join(output_dir, 'assets')

    with timeit("creating link.html and link.js"):
        id2filename = get_id2filename(filename2contents)
        #<<<<<<< HEAD
        linkbase = 'link.html'  # do not change (it's used by http://purl.org/dth)
        linkbasejs = 'link.js'
        #=======
        #        linkbase = 'link.html'  # do not change (it's used by http://purl.org/dth)
        #>>>>>>> nl
        lb = create_link_base(id2filename)
        write_data_to_file(str(lb), os.path.join(output_dir, linkbase))

        linkjs = create_link_base_js(id2filename)
        write_data_to_file(str(linkjs), os.path.join(output_dir, linkbasejs))

    if preamble:
        preamble = open(preamble).read()

    ids_to_use = []
    for k in list(id2filename):
        if not 'autoid' in k:
            ids_to_use.append(k)
    ids_to_use = sorted(ids_to_use)

    pointed_to = []
    for k in ids_to_use:
        f = id2filename[k]
        if not f in pointed_to:
            pointed_to.append(f)

    data = ",".join(pointed_to)
    links_hash = get_md5(data)[:8]
    #     if self.options.faster_but_imprecise:
    #         links_hash = "nohash"
    #
    #     logger.debug('hash data: %r' % data)
    logger.debug('hash value: %r' % links_hash)

    head0 = soup.html.head

    if True:
        context.comp(remove_spurious, output_dir, list(filename2contents))

    tmpd = create_tmpdir()

    n = len(filename2contents)
    for i, (filename, contents) in enumerate(filename2contents.items()):
        if (i % num_workers != worker_i):
            continue
        # contents_hash = get_md5(str(contents) + str(preamble))[:8]
        # job_id = '%s-%s-%s' % (filename, links_hash, contents_hash)

#<<<<<<< HEAD
# Trick: we add the main_toc, and then ... (look below)
        with timeit('make_page'):
            html = make_page(contents, head0, main_toc)
        with timeit('main_toc copy'):
            main_toc = main_toc.__copy__()

        logger.debug('%d/%d: %s' % (i, n, filename))
        with timeit("direct job"):
            result = only_second_part(mathjax, preamble, html, id2filename,
                                      filename)

            # ... we remove it. In this way we don't have to copy it
            main_toc.extract()

            fn = os.path.join(output_dir, filename)

            fn0 = os.path.join(tmpd, filename)
            write_data_to_file(result, fn0, quiet=True)

            h = get_md5(result)[:8]
            context.comp(extract_assets_from_file,
                         fn0,
                         fn,
                         assets_dir,
                         job_id='assets-%s' % h)
コード例 #22
0
ファイル: parsing.py プロジェクト: kannode/mcdp
def write_html_doc_to_file(soup, filename, quiet=False):
    from mcdp_utils_misc import write_data_to_file

    html = to_html_entire_document(soup)
    write_data_to_file(html, filename, quiet=quiet)
コード例 #23
0
ファイル: make_index.py プロジェクト: AurelNeff/duckuments
def go():
    groups = OrderedDict(yaml.load(BOOKS))

    import os

    dist = 'duckuments-dist'

    html = Tag(name='html')
    head = Tag(name='head')
    meta = Tag(name='meta')
    meta.attrs['content'] = "text/html; charset=utf-8"
    meta.attrs['http-equiv'] = "Content-Type"

    stylesheet = 'v_manual_split'
    link = Tag(name='link')
    link['rel'] = 'stylesheet'
    link['type'] = 'text/css'
    link['href'] = get_css_filename('compiled/%s' % stylesheet)
    head.append(link)

    body = Tag(name='body')

    style = Tag(name='style')

    style.append(CSS)

    head.append(style)
    head.append(meta)

    html.append(head)
    html.append(body)

    divgroups = Tag(name='div')
    all_crossrefs = Tag(name='div')

    res = AugmentedResult()

    for id_group, group in groups.items():
        divgroup = Tag(name='div')
        divgroup.attrs['class'] = 'group'
        divgroup.attrs['id'] = id_group

        h0 = Tag(name='h1')
        h0.append(group['title'])

        divgroup.append(h0)

        if 'abstract' in group:
            p = Tag(name='p')
            p.append(group['abstract'])
            divgroup.append(p)

        books = group['books']
        # divbook = Tag(name='div')
        books = OrderedDict(books)
        for id_book, book in books.items():
            d = os.path.join(dist, id_book)
            change_frame(d, '../../', current_slug=id_book)

            d0 = dist

            errors_and_warnings = os.path.join(d, 'out', 'errors_and_warnings.pickle')
            if os.path.exists(errors_and_warnings):
                resi = pickle.loads(open(errors_and_warnings).read())
                # print(errors_and_warnings)

                resi.update_file_path(prefix=os.path.join(id_book, 'out'))
                res.merge(resi)
            else:
                msg = 'Path does not exist: %s' % errors_and_warnings
                logger.error(msg)

            artefacts = get_artefacts(d0, d)

            div = Tag(name='div')
            div.attrs['class'] = 'book-div'
            div.attrs['id'] = id_book
            div_inside = Tag(name='div')
            div_inside.attrs['class'] = 'div_inside'
            links = get_links2(artefacts)

            for a in links.select('a'):
                s = gettext(a)
                if 'error' in s or 'warning' in s or 'task' in s:
                    a['class'] = 'EWT'

            if False:
                h = Tag(name='h3')
                h.append(book['title'])

                # div_inside.append(h)
                if 'abstract' in book:
                    p = Tag(name='p')
                    p.append(book['abstract'])
                    div_inside.append(p)

            div_inside.append(links)
            div.append(div_inside)

            toc = os.path.join(d, 'out/toc.html')
            if os.path.exists(toc):
                data = open(toc).read()
                x = bs(data)
                for a in x.select('a[href]'):
                    href = a.attrs['href']
                    a.attrs['href'] = id_book + '/out/' + href
                x.name = 'div'  # not fragment
                div.append(x)
            crossrefs = os.path.join(d, 'crossref.html')
            if os.path.exists(crossrefs):
                x = bs(open(crossrefs).read())
                for e in x.select('[url]'):
                    all_crossrefs.append('\n\n')
                    all_crossrefs.append(e.__copy__())
            else:
                logger.error('File does not exist %s' % crossrefs)

            divgroup.append(div)
        divgroups.append(divgroup)

    out_pickle = sys.argv[3]

    nwarnings = len(res.get_notes_by_tag(MCDPManualConstants.NOTE_TAG_WARNING))
    ntasks = len(res.get_notes_by_tag(MCDPManualConstants.NOTE_TAG_TASK))
    nerrors = len(res.get_notes_by_tag(MCDPManualConstants.NOTE_TAG_ERROR))
    logger.info('%d tasks' % ntasks)
    logger.warning('%d warnings' % nwarnings)
    logger.error('%d nerrors' % nerrors)

    from mcdp_docs.mcdp_render_manual import write_errors_and_warnings_files
    write_errors_and_warnings_files(res, os.path.dirname(out_pickle))

    out_junit = os.path.join(os.path.dirname(out_pickle), 'junit', 'notes', 'junit.xml')
    s = get_junit_xml(res)
    write_data_to_file(s.encode('utf8'), out_junit)

    # write_data_to_file(pickle.dumps(res), out_pickle, quiet=False)

    extra = get_extra_content(res)

    extra.attrs['id'] = 'extra'
    body.append(extra)
    body.append(divgroups)

    embed_css_files(html)

    for e in body.select('.notes-panel'):
        e.extract()
    out = sys.argv[1]
    data = str(html)
    data = data.replace('<body>', '<body>\n<?php header1() ?>\n')
    write_data_to_file(data, out)

    manifest = [dict(display='index', filename=os.path.basename(out))]
    mf = os.path.join(os.path.dirname(out), 'summary.manifest.yaml')
    write_data_to_file(yaml.dump(manifest), mf)

    out_crossrefs = sys.argv[2]

    html = Tag(name='html')
    head = Tag(name='head')
    body = Tag(name='body')
    style = Tag(name='style')
    style.append(CROSSREF_CSS)
    head.append(style)
    html.append(head)

    script = Tag(name='script')
    script.append(CROSSREF_SCRIPT)

    container = Tag(name='div')
    container.attrs['id'] = 'container'
    body.append(container)

    details = Tag(name='details')
    summary = Tag(name='summary')
    summary.append('See all references')
    details.append(summary)
    details.append(all_crossrefs)
    body.append(details)
    body.append(script)
    html.append(body)

    write_data_to_file(str(html), out_crossrefs)

    if nerrors > 0:
        sys.exit(nerrors)