コード例 #1
0
ファイル: split.py プロジェクト: afcarl/mcdp
    def define_jobs_context(self, context):
        ifilename = self.options.filename
        output_dir = self.options.output_dir
        mathjax = self.options.mathjax
        preamble = self.options.preamble
        #         disqus = self.options.disqus
        logger.setLevel(logging.DEBUG)

        if self.options.workers != 0:
            n = self.options.workers
        else:
            n = max(1, cpu_count() - 2)
        self.debug("Using n = %d workers" % n)

        data = open(ifilename).read()
        h = get_md5(data)[-4:]

        jobs = []
        for i in range(n):
            promise = context.comp_dynamic(go,
                                           i,
                                           n,
                                           ifilename,
                                           mathjax,
                                           preamble,
                                           output_dir,
                                           job_id='worker-%d-of-%d-%s' %
                                           (i, n, h))
            jobs.append(promise.job_id)
コード例 #2
0
ファイル: embedded_images.py プロジェクト: kannode/mcdp
def get_link_to_image_file(filename, max_width):
    basename, ext = os.path.splitext(os.path.basename(filename).lower())
    if ext in ['.jpg', '.jpeg']:
        with open(filename) as f:
            im = Image.open(f)
            # print filename, im.size
            if im.size[0] > max_width:
                b = basename + '-' + get_md5(filename)[:4] + '.jpg'
                dest = os.path.join(get_mcdp_tmp_dir(), 'images', b)
                height = int(im.size[1] * max_width / im.size[0])
                new_size = (max_width, height)
                msg = 'Resizing image %s from %s to %s' % (filename, im.size,
                                                           new_size)
                logger.info(msg)
                # print('resizing to %s in %s' % (str(new_size), dest))
                if not os.path.exists(dest):
                    make_sure_dir_exists(dest)
                    resized = im.resize(new_size)

                    resized.save(dest)

                return dest
            # im.save(file + ".thumbnail", "JPEG")

        return filename
    else:
        return filename
コード例 #3
0
    def subfloat_replace(args, opts):
        contents = args[0]
        caption = opts[0]
        check_isinstance(contents, str)

        if caption is None:
            label = None
        else:
            caption, label = get_s_without_label(caption, labelprefix="fig:")
            if label is None:
                caption, label = get_s_without_label(caption,
                                                     labelprefix="subfig:")
            if label is not None and not label.startswith('subfig:'):
                msg = 'Subfigure labels should start with "subfig:"; found %r.' % (
                    label)
                label = 'sub' + label
                msg += 'I will change to %r.' % label
                logger.debug(msg)

        # we need to make up an ID
        if label is None:
            label = 'subfig:' + get_md5(contents)
#             print('making up label %r' % label)
#         if label is not None:
        idpart = ' id="%s"' % label
        #         else:
        #             idpart = ""

        if caption is None:
            caption = 'no subfloat caption'
        res = '<figure class="subfloat"%s>%s<figcaption>%s</figcaption></figure>' % (
            idpart, contents, caption)
        return res
コード例 #4
0
ファイル: embedded_images.py プロジェクト: rusi/mcdp
def extract_img_to_file_(soup, savefile, tagname, attrname):
    n = 0
    tot = 0
    for tag in soup.select(tagname):
        tot += 1
        src = tag[attrname]

        if not src.startswith('data:'):
            continue

        mime, data = get_mime_data_from_base64_string(src)

        # now we should make up the data
        if tag.has_attr('id'):
            basename = tag['id']
        else:
            md5 = get_md5(data)
            basename = 'data-from-%s-%s' % (tagname, md5)

        # Guess extension
        ext = get_ext_for_mime(mime)
        filename = basename + '.' + ext
        src = "%s" % filename
        # ask what we should be using
        use_src = savefile(filename, data)
        check_isinstance(use_src, str)
        tag[attrname] = use_src
        n += 1
    logger.debug(('extract_img_to_file: extracted %d/%d images from %r tags, '
                  ' attribute %r.') % (n, tot, tagname, attrname))
コード例 #5
0
ファイル: latex_preprocess.py プロジェクト: kannode/mcdp
    def replace_m(inside, opt):
        #         print('replacing environment %r inside %r opt %r' % (envname, inside, opt))
        thm_label = opt
        contents, label = get_s_without_label(inside, labelprefix=labelprefix)
        if label is not None and isinstance(labelprefix, str):
            assert label.startswith(labelprefix), (s, labelprefix, label)
        if label is not None:
            id_part = "id='%s' " % label
        else:
            if make_label_if_missing:
                if isinstance(labelprefix, tuple):
                    usel = labelprefix[0]
                else:
                    usel = labelprefix

                makeup_id = usel + get_md5(inside)[:5]
                id_part = "id='%s'" % makeup_id
            else:
                id_part = ''


#         print('using label %r for env %r (labelprefix %r)' % (label, envname, labelprefix))
        l = "<span class='%s_label latex_env_label'>%s</span>" % (
            classname, thm_label) if thm_label else ""
        rr = '<div %sclass="%s latex_env" markdown="1">%s%s</div>' % (
            id_part, classname, l, contents)
        return rr
コード例 #6
0
ファイル: disk_struct.py プロジェクト: rusi/mcdp
 def hash_code(self):
     codes = []
     for f in sorted(self._files):
         codes.append([f, self._files[f].hash_code()])
     for d in sorted(self._directories):
         codes.append([d, self._directories[d].hash_code()])
     return get_md5(yaml_dump(codes))
コード例 #7
0
def data_hash_code(s):
    if s is None:
        return 'None'
    if isinstance(s, str):
        return get_md5(s)
    elif isinstance(s, datetime.datetime):
        return get_md5(yaml_dump(s))
    elif isinstance(s, list):
        return get_md5("-".join(map(data_hash_code, s)))
    elif isinstance(s, dict):
        keys = sorted(s)
        values = [s[k] for k in keys]
        codes = ['%s-%s' % (k, data_hash_code(v)) for k,v in zip(keys, values)]
        return data_hash_code("_".join(codes))
    else:
        msg = 'Invalid type %s' % describe_type(s)
        raise ValueError(msg)
コード例 #8
0
def job_bib_contents(context, bib_files):
    bib_files = natsorted(bib_files)
    # read all contents
    contents = ""
    for fn in bib_files:
        contents += open(fn).read() + '\n\n'
    h = get_md5(contents)[:8]
    job_id = 'bibliography-' + h
    return context.comp(run_bibtex2html, contents, job_id=job_id)
コード例 #9
0
def create_split_jobs(context,
                      data_aug,
                      mathjax,
                      preamble,
                      output_dir,
                      nworkers=0,
                      extra_panel_content=None,
                      add_toc_if_not_existing=True,
                      output_crossref=None,
                      permalink_prefix=None,
                      only_refs=False,
                      reveal=True):
    data = data_aug.get_result()
    if nworkers == 0:
        nworkers = max(1, cpu_count() - 2)

    res = AugmentedResult()
    res.merge(data_aug)

    h = get_md5(data)[-4:]
    jobs = []

    assets_dir = os.path.join(output_dir, 'assets')

    with timeit("preprocess"):
        soup = bs_entire_document(data)
        embed_css_files(soup)
        fo = os.path.join(output_dir, 'dummy.html')
        save_css(soup, fo, assets_dir)
        data = to_html_entire_document(soup)

    for i in range(nworkers):
        promise = context.comp_dynamic(
            go,
            i,
            nworkers,
            data,
            mathjax,
            preamble,
            output_dir,
            add_toc_if_not_existing=add_toc_if_not_existing,
            assets_dir=assets_dir,
            extra_panel_content=extra_panel_content,
            output_crossref=output_crossref,
            permalink_prefix=permalink_prefix,
            only_refs=only_refs,
            job_id='worker-%d-of-%d-%s' % (i, nworkers, h))
        jobs.append(promise)

        if only_refs:
            break

    if reveal:
        jobs.append(context.comp(download_reveal, output_dir))

    return context.comp(notification, res, jobs, output_dir)
コード例 #10
0
ファイル: embedded_images.py プロジェクト: rusi/mcdp
def extract_svg_to_file(soup, savefile):
    n = 0
    tot = 0
    for i, svg in enumerate(list(soup.select('svg'))):
        tot += 1
        if not svg.attrs.get('class', ''):
            # only do the ones we rendered #XXX
            continue


#        <svg focusable="false" height="2.176ex" role="img" style="vertical-align: -0.505ex;" viewbox="0
        svg['xmlns'] = "http://www.w3.org/2000/svg"
        svg['version'] = "1.1"
        prefix = """<?xml version="1.0"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n"""
        img = Tag(name='img')
        if 'width' in svg.attrs:
            add_style(img,
                      width=svg['width'] + 'pt',
                      height=svg['height'] + 'pt')
            svg.attrs.pop('width')
            svg.attrs.pop('height')

        data = prefix + str(svg)
        md5 = get_md5(data)

        basename = 'svg-%03d-%s' % (i, md5)
        propose = basename + '.svg'
        url = savefile(propose, data)

        #         for k, v in svg.attrs.items():
        #             img[k] = v
        img['class'] = svg.attrs.get('class', '')

        if 'id' in svg:
            img['id'] = svg['id']
        img['src'] = url
        svg.replace_with(img)

    logger.debug('extract_svg_to_file: extracted %d/%d images from SVG tags.' %
                 (n, tot))
コード例 #11
0
ファイル: embedded_images.py プロジェクト: kannode/mcdp
def extract_img_to_file_(soup, savefile, tagname, attrname):
    n = 0
    tot = 0
    for tag in soup.select(tagname):
        tot += 1
        if not attrname in tag.attrs:
            msg = 'No attr %r found for tag %s' % (attrname, tag)
            logger.warning(msg)
            continue
        src = tag.attrs[attrname]

        if not src.startswith('data:'):
            continue

        mime, data = get_mime_data_from_base64_string(src)

        # now we should make up the data
        if tag.has_attr('id'):
            basename = tag['id']
        else:
            md5 = get_md5(data)
            basename = 'data-from-%s-%s' % (tagname, md5)

        # Guess extension
        ext = get_ext_for_mime(mime)
        filename = basename + '.' + ext
        # src = "%s" % filename
        # ask what we should be using
        # print('saving file %s with %d data' % (filename, len(data)))
        use_src = savefile(filename, data)
        check_isinstance(use_src, str)
        tag[attrname] = use_src
        n += 1
    if False:
        logger.debug(
            ('extract_img_to_file: extracted %d/%d images from %r tags, '
             ' attribute %r.') % (n, tot, tagname, attrname))
    return n
コード例 #12
0
ファイル: todoist_sync.py プロジェクト: kannode/mcdp
def todoist_sync(data, user, secret, prefix):
    import todoist
    api = todoist.TodoistAPI()
    api.user.login(user, secret)

    response = api.sync()
    if 'error' in response:
        print response
        raise Exception(response['error_extra'])
    projects = api.projects.all()
    # print projects
    use = 'Duckuments Tasks'
    for p in projects:
        if p['name'] == use:
            project_id = p['id']
            break
    else:
        msg = 'Could not find project %r' % use
        msg += '\nAvailable: %s' % ", ".join((_['name'] for _ in projects))
        raise Exception(msg)

    collaborators = response['collaborators']
    for c in collaborators:
        print c['full_name'], c['email']

    items = [_ for _ in api.items.all() if _['project_id'] == project_id]
    found = {}
    for i in items:
        if ';' in i['content']:
            tokens = i['content'].split(';')
            found[tokens[1].strip()] = i

    tasks = data.get_notes_by_tag(MCDPManualConstants.NOTE_TAG_TASK)
    n = 0
    for task in tasks:
        responsible_uid = get_task_person_uid(task, collaborators)

        ID = get_md5(str(task.msg))[-8:]

        if ID in found:
            msg = 'Task %s already in DB' % ID
            logger.info(msg)

            todoist_id = found[ID]['id']
            item = found[ID]

            if item['checked']:
                logger.debug('Setting the item to not done.')
                api.items.uncomplete([todoist_id])

            if found[ID]['responsible_uid'] != responsible_uid:
                print('need to update responsible id - to %s' %
                      responsible_uid)

                # print found[ID]
                api.items.update(todoist_id, responsible_uid=responsible_uid)

            continue

        html = task.as_html()
        for img in list(html.select('img')):
            img.extract()
        for a in html.select('a[href]'):
            href = a.attrs['href']
            if not href.startswith('http'):
                href = prefix + href
                a.attrs['href'] = href

        html_str = str(html)
        html_str = html_str.replace('<br/>', ' ')
        res = tomd.Tomd(html_str).markdown

        location = list(task.locations.values())[0]
        stack = location.get_stack()
        for l in stack:
            if isinstance(l, GithubLocation):
                head = l.path
                break
        else:
            head = '???'

        content = head + '; ' + ID
        # content = res[:100]
        item = api.items.add(content,
                             project_id,
                             responsible_uid=responsible_uid)
        api.notes.add(item['id'], res)

        n += 1

        if n > 45:
            break

        time.sleep(0.2)

    api.commit()
    api.sync()
コード例 #13
0
def makefigure(inside, opt, asterisk):  # @UnusedVariable
    align = opt  # @UnusedVariable

    #     print('makefigure inside = %r' % inside)

    def subfloat_replace(args, opts):
        contents = args[0]
        caption = opts[0]
        check_isinstance(contents, str)

        if caption is None:
            label = None
        else:
            caption, label = get_s_without_label(caption, labelprefix="fig:")
            if label is None:
                caption, label = get_s_without_label(caption,
                                                     labelprefix="subfig:")
            if label is not None and not label.startswith('subfig:'):
                msg = 'Subfigure labels should start with "subfig:"; found %r.' % (
                    label)
                label = 'sub' + label
                msg += 'I will change to %r.' % label
                logger.debug(msg)

        # we need to make up an ID
        if label is None:
            label = 'subfig:' + get_md5(contents)
#             print('making up label %r' % label)
#         if label is not None:
        idpart = ' id="%s"' % label
        #         else:
        #             idpart = ""

        if caption is None:
            caption = 'no subfloat caption'
        res = '<figure class="subfloat"%s>%s<figcaption>%s</figcaption></figure>' % (
            idpart, contents, caption)
        return res

    inside = substitute_command_ext(inside,
                                    'subfloat',
                                    subfloat_replace,
                                    nargs=1,
                                    nopt=1)

    class Tmp:
        label = None

    def sub_caption(args, opts):
        assert not opts and len(args) == 1
        x, Tmp.label = get_s_without_label(args[0], labelprefix="fig:")
        res = '<figcaption>' + x + "</figcaption>"
        #         print('caption args: %r, %r' % (args, opts))
        return res

    inside = substitute_command_ext(inside,
                                    'caption',
                                    sub_caption,
                                    nargs=1,
                                    nopt=0)

    #     print('makefigure inside without caption = %r'  % inside)
    assert not '\\caption' in inside

    if Tmp.label is None:
        Tmp.label = 'fig:' + get_md5(inside)
        #print('making up label %r' % Tmp.label)


#     if Tmp.label is not None:
    idpart = ' id="%s"' % Tmp.label
    #     else:
    #         idpart = ""

    res = '<figure%s>%s</figure>' % (idpart, inside)
    return res
コード例 #14
0
ファイル: pipeline.py プロジェクト: afcarl/mcdp
def render_complete(library,
                    s,
                    raise_errors,
                    realpath,
                    generate_pdf=False,
                    check_refs=False,
                    use_mathjax=True,
                    filter_soup=None,
                    symbols=None):
    """
        Transforms markdown into html and then renders the mcdp snippets inside.

        s: a markdown string with embedded html snippets

        Returns an HTML string; not a complete document.

        filter_soup(library, soup)
    """
    s0 = s
    check_good_use_of_special_paragraphs(s0, realpath)
    raise_missing_image_errors = raise_errors

    # Imports here because of circular dependencies
    from .latex.latex_preprocess import extract_maths, extract_tabular
    from .latex.latex_preprocess import latex_preprocessing
    from .latex.latex_preprocess import replace_equations
    from .macro_col2 import col_macros, col_macros_prepare_before_markdown
    from .mark.markd import render_markdown
    from .preliminary_checks import do_preliminary_checks_and_fixes
    from .prerender_math import prerender_mathjax

    if isinstance(s, unicode):
        msg = 'I expect a str encoded with utf-8, not unicode.'
        raise_desc(TypeError, msg, s=s)

    # need to do this before do_preliminary_checks_and_fixes
    # because of & char
    s, tabulars = extract_tabular(s)

    s = do_preliminary_checks_and_fixes(s)
    # put back tabular, because extract_maths needs to grab them
    for k, v in tabulars.items():
        assert k in s
        s = s.replace(k, v)

    # copy all math content,
    #  between $$ and $$
    #  between various limiters etc.
    # returns a dict(string, substitution)
    s, maths = extract_maths(s)
    #     print('maths = %s' % maths)
    for k, v in maths.items():
        if v[0] == '$' and v[1] != '$$':
            if '\n\n' in v:
                msg = 'Suspicious math fragment %r = %r' % (k, v)
                logger.error(maths)
                logger.error(msg)
                raise ValueError(msg)

    s = latex_preprocessing(s)
    s = '<div style="display:none">Because of mathjax bug</div>\n\n\n' + s

    # cannot parse html before markdown, because md will take
    # invalid html, (in particular '$   ciao <ciao>' and make it work)

    s = s.replace('*}', '\*}')

    s, mcdpenvs = protect_my_envs(s)
    #     print('mcdpenvs = %s' % maths)

    s = col_macros_prepare_before_markdown(s)

    #     print(indent(s, 'before markdown | '))
    s = render_markdown(s)
    #     print(indent(s, 'after  markdown | '))

    for k, v in maths.items():
        if not k in s:
            msg = 'Cannot find %r (= %r)' % (k, v)
            raise_desc(DPInternalError, msg, s=s)

        def preprocess_equations(x):
            # this gets mathjax confused
            x = x.replace('>', '\\gt{}')  # need brace; think a<b -> a\lt{}b
            x = x.replace('<', '\\lt{}')
            #             print('replaced equation %r by %r ' % (x0, x))
            return x

        v = preprocess_equations(v)
        s = s.replace(k, v)

    s = replace_equations(s)
    s = s.replace('\\*}', '*}')

    # this parses the XML
    soup = bs(s)

    other_abbrevs(soup)

    # need to process tabular before mathjax
    escape_for_mathjax(soup)

    #     print(indent(s, 'before prerender_mathjax | '))
    # mathjax must be after markdown because of code blocks using "$"

    s = to_html_stripping_fragment(soup)

    if use_mathjax:
        s = prerender_mathjax(s, symbols)

    soup = bs(s)
    escape_for_mathjax_back(soup)
    s = to_html_stripping_fragment(soup)

    #     print(indent(s, 'after prerender_mathjax | '))
    for k, v in mcdpenvs.items():
        # there is this case:
        # ~~~
        # <pre> </pre>
        # ~~~
        s = s.replace(k, v)

    s = s.replace('<p>DRAFT</p>', '<div class="draft">')

    s = s.replace('<p>/DRAFT</p>', '</div>')

    soup = bs(s)
    mark_console_pres(soup)

    try:
        substitute_github_refs(soup, defaults={})
    except Exception as e:
        msg = 'I got an error while substituting github: references.'
        msg += '\nI will ignore this error because it might not be the fault of the writer.'
        msg += '\n\n' + indent(str(e), '|', ' error: |')
        logger.warn(msg)

    # must be before make_figure_from_figureid_attr()
    display_files(soup, defaults={}, raise_errors=raise_errors)

    make_figure_from_figureid_attr(soup)
    col_macros(soup)
    fix_subfig_references(soup)

    library = get_library_from_document(soup, default_library=library)
    from mcdp_docs.highlight import html_interpret
    html_interpret(library,
                   soup,
                   generate_pdf=generate_pdf,
                   raise_errors=raise_errors,
                   realpath=realpath)
    if filter_soup is not None:
        filter_soup(library=library, soup=soup)

    embed_images_from_library2(soup=soup,
                               library=library,
                               raise_errors=raise_missing_image_errors)
    make_videos(soup=soup)

    if check_refs:
        check_if_any_href_is_invalid(soup)

    if getuser() == 'andrea':
        if MCDPConstants.preprocess_style_using_less:
            run_lessc(soup)
        else:
            logger.warning(
                'preprocess_style_using_less=False might break the manual')
    fix_validation_problems(soup)

    strip_pre(soup)

    if MCDPManualConstants.enable_syntax_higlighting:
        syntax_highlighting(soup)

    if MCDPManualConstants.enforce_status_attribute:
        check_status_codes(soup, realpath)
    if MCDPManualConstants.enforce_lang_attribute:
        check_lang_codes(soup)

    # Fixes the IDs (adding 'sec:'); add IDs to missing ones
    globally_unique_id_part = 'autoid-DO-NOT-USE-THIS-VERY-UNSTABLE-LINK-' + get_md5(
        s0)[:5]
    fix_ids_and_add_missing(soup, globally_unique_id_part)

    check_no_patently_wrong_links(soup)

    s = to_html_stripping_fragment(soup)
    s = replace_macros(s)

    return s
コード例 #15
0
def go(context,
       worker_i,
       num_workers,
       data,
       mathjax,
       preamble,
       output_dir,
       assets_dir,
       add_toc_if_not_existing,
       extra_panel_content,
       permalink_prefix=None,
       output_crossref=None,
       only_refs=False):
    res = AugmentedResult()
    soup = bs_entire_document(data)

    # extract the main toc if it is there
    with timeit("Extracting main toc"):
        main_toc = soup.find(id=MCDPManualConstants.MAIN_TOC_ID)

        if main_toc is None:

            if add_toc_if_not_existing:
                # logger.info('Generating TOC because it is not there')

                tocg = generate_toc(soup)
                main_toc = bs(tocg).ul
                main_toc.attrs['class'] = 'toc'  # XXX: see XXX13
                assert main_toc is not None
                substituting_empty_links(main_toc,
                                         raise_errors=False,
                                         res=res,
                                         extra_refs=soup)

            else:
                msg = 'Could not find main toc (id #%s)' % MCDPManualConstants.MAIN_TOC_ID
                res.note_error(msg)
                main_toc = Tag(name='div')
                main_toc.append('TOC NOT FOUND')
        else:
            main_toc = main_toc.__copy__()

        if 'id' in main_toc.attrs:
            del main_toc.attrs['id']

    # XXX: this is not the place to do it
    mark_toc_links_as_errored(main_toc, soup)

    body = soup.html.body

    with timeit("split_in_files"):
        filename2contents = split_in_files(body)
        id2filename = get_id2filename(filename2contents)

    res.set_result(id2filename)

    if output_crossref is not None:
        from mcdp_docs.mcdp_render_manual import write_crossref_info
        context.comp(write_crossref_info,
                     data=data,
                     id2filename=id2filename,
                     output_crossref=output_crossref,
                     permalink_prefix=permalink_prefix)

    if only_refs:
        logger.debug('Skipping rest because only_refs')
        return res

    with timeit("add_prev_next_links"):
        filename2contents = add_prev_next_links(filename2contents)

    with timeit("preparing assets dir"):
        if not os.path.exists(output_dir):
            try:
                os.makedirs(output_dir)
            except:
                pass

    with timeit("creating link.html and link.js"):

        linkbase = 'link.html'  # do not change (it's used by http://purl.org/dth)
        linkbasejs = 'link.js'

        lb = create_link_base(id2filename)
        write_data_to_file(str(lb),
                           os.path.join(output_dir, linkbase),
                           quiet=True)

        linkjs = create_link_base_js(id2filename)
        write_data_to_file(str(linkjs),
                           os.path.join(output_dir, linkbasejs),
                           quiet=True)

    if preamble is not None:
        if preamble.endswith('.tex'):  # XXX
            preamble = open(preamble).read()

    ids_to_use = []
    for k in list(id2filename):
        if not 'autoid' in k:
            ids_to_use.append(k)
    ids_to_use = sorted(ids_to_use)

    pointed_to = []
    for k in ids_to_use:
        f = id2filename[k]
        if not f in pointed_to:
            pointed_to.append(f)

    # data = ",".join(pointed_to)
    head0 = soup.html.head

    if True:
        context.comp(remove_spurious, output_dir, list(filename2contents))

    with timeit('main_toc copy'):
        main_toc0 = main_toc.__copy__()

        main_toc0_s = str(main_toc0)
    asset_jobs = []
    for i, (filename, contents) in enumerate(filename2contents.items()):
        if i % num_workers != worker_i:
            continue
        with timeit('main_toc copy hack'):
            main_toc = bs(main_toc0_s).ul
            assert main_toc is not None

        # Trick: we add the main_toc, and then ... (look below)
        with timeit('make_page'):
            add_home_link = 'index.html' not in filename2contents
            html = make_page(contents,
                             head0,
                             main_toc,
                             extra_panel_content,
                             add_home_link=add_home_link)

        with timeit("direct job"):
            result = only_second_part(mathjax, preamble, html, id2filename,
                                      filename)

            # ... we remove it. In this way we don't have to copy it every time...
            main_toc.extract()

            fn = os.path.join(output_dir, filename)

            h = get_md5(result)[:8]
            r = context.comp(extract_assets_from_file,
                             result,
                             fn,
                             assets_dir,
                             job_id='%s-%s-assets' % (filename, h))
            asset_jobs.append(r)

    update_refs_('toc.html', main_toc, id2filename)
    out_toc = os.path.join(output_dir, 'toc.html')
    write_data_to_file(str(main_toc), out_toc, quiet=True)

    return context.comp(wait_assets, res, asset_jobs)
コード例 #16
0
def manual_jobs(context,
                src_dirs,
                resources_dirs,
                out_split_dir,
                output_file,
                generate_pdf,
                stylesheet,
                stylesheet_pdf,
                use_mathjax,
                raise_errors,
                resolve_references=True,
                remove=None,
                filter_soup=None,
                symbols=None,
                out_pdf=None,
                only_refs=False,
                permalink_prefix=None,
                compose_config=None,
                output_crossref=None,
                do_last_modified=False,
                wordpress_integration=False,
                ignore_ref_errors=False,
                likebtn=None,
                extra_crossrefs=None):
    """
        src_dirs: list of sources
        symbols: a TeX preamble (or None)
    """
    #
    # if symbols is not None:
    #     symbols = open(symbols).read()
    if stylesheet_pdf is None:
        stylesheet_pdf = stylesheet
    # outdir = os.path.dirname(out_split_dir)  # XXX
    filenames = get_markdown_files(src_dirs)

    if not filenames:
        msg = "Could not find any file for composing the book."
        raise Exception(msg)

    files_contents = []
    for i, filename in enumerate(filenames):
        if is_ignored_by_catkin(filename):
            logger.debug('Ignoring because of CATKIN_IGNORE: %s' % filename)
            continue
        logger.info('adding document %s ' % friendly_path(filename))

        docname, _ = os.path.splitext(os.path.basename(filename))

        contents = open(filename).read()
        contents_hash = get_md5(contents)[:8]
        # because of hash job will be automatically erased if the source changes
        out_part_basename = '%03d-%s-%s' % (i, docname, contents_hash)
        job_id = '%s-%s-%s' % (docname, get_md5(filename)[:8], contents_hash)

        try:
            source_info = get_source_info(filename)
        except NoSourceInfo as e:
            logger.warn('No source info for %s:\n%s' % (filename, e))
            source_info = None

        for d in src_dirs:
            if filename.startswith(d):
                break
        else:
            msg = "Could not find dir for %s in %s" % (filename, src_dirs)
            raise Exception(msg)

        html_contents = context.comp(render_book,
                                     generate_pdf=generate_pdf,
                                     src_dirs=src_dirs + resources_dirs,
                                     data=contents,
                                     realpath=filename,
                                     use_mathjax=use_mathjax,
                                     symbols=symbols,
                                     raise_errors=raise_errors,
                                     filter_soup=filter_soup,
                                     ignore_ref_errors=ignore_ref_errors,
                                     job_id=job_id)

        doc = DocToJoin(docname=out_part_basename,
                        contents=html_contents,
                        source_info=source_info)
        files_contents.append(tuple(doc))  # compmake doesn't do namedtuples

    ignore = []
    if output_crossref:
        ignore.append(output_crossref)

    crossrefs_aug = get_cross_refs(resources_dirs,
                                   permalink_prefix,
                                   extra_crossrefs,
                                   ignore=ignore)

    bib_files = get_bib_files(src_dirs)

    logger.debug('Found bib files:\n%s' % "\n".join(bib_files))
    if bib_files:
        bib_contents_aug = job_bib_contents(context, bib_files)
        entry = DocToJoin(docname='bibtex',
                          contents=bib_contents_aug,
                          source_info=None)
        files_contents.append(tuple(entry))

    if do_last_modified:
        data_aug = context.comp(make_last_modified,
                                files_contents=files_contents)
        entry = DocToJoin(docname='last_modified',
                          contents=data_aug,
                          source_info=None)
        files_contents.append(tuple(entry))

    root_dir = src_dirs[0]

    template = get_main_template(root_dir, resources_dirs)

    references = OrderedDict()
    #     base_url = 'http://book.duckietown.org/master/duckiebook/pdoc'
    #     for extra_dir in extra_dirs:
    #         res = read_references(extra_dir, base_url, prefix='python:')
    #         references.update(res)

    #     extra = look_for_files(extra_dirs, "*.html")
    #
    #     for filename in extra:
    #         contents = open(filename).read()
    #         docname = os.path.basename(filename) + '_' + get_md5(filename)[:5]
    #         c = (('unused', docname), contents)
    #         files_contents.append(c)

    cs = get_md5((crossrefs_aug.get_result()))[:8]

    joined_aug = context.comp(manual_join,
                              template=template,
                              files_contents=files_contents,
                              stylesheet=None,
                              remove=remove,
                              references=references,
                              resolve_references=resolve_references,
                              crossrefs_aug=crossrefs_aug,
                              permalink_prefix=permalink_prefix,
                              job_id='join-%s' % cs)

    if compose_config is not None:
        try:
            data = yaml.load(open(compose_config).read())  # XXX
            compose_config_interpreted = ComposeConfig.from_yaml(data)
        except ValueError as e:
            msg = 'Cannot read YAML config file %s' % compose_config
            raise_wrapped(UserError, e, msg, compact=True)
        else:
            joined_aug = context.comp(make_composite,
                                      compose_config_interpreted, joined_aug)

    joined_aug = context.comp(mark_errors_and_rest, joined_aug)

    if likebtn:
        joined_aug = context.comp(add_likebtn, joined_aug, likebtn)

    if wordpress_integration:
        joined_aug = context.comp(add_related, joined_aug)

    if output_file is not None:
        context.comp(write, joined_aug, output_file)

    if out_split_dir is not None:

        joined_aug_with_html_stylesheet = context.comp(add_style, joined_aug,
                                                       stylesheet)

        extra_panel_content = context.comp(get_extra_content,
                                           joined_aug_with_html_stylesheet)
        id2filename_aug = context.comp_dynamic(
            create_split_jobs,
            data_aug=joined_aug_with_html_stylesheet,
            mathjax=True,
            preamble=symbols,
            extra_panel_content=extra_panel_content,
            output_dir=out_split_dir,
            nworkers=0,
            output_crossref=output_crossref,
            permalink_prefix=permalink_prefix,
            only_refs=only_refs)

        if not only_refs:
            context.comp(write_errors_and_warnings_files, id2filename_aug,
                         out_split_dir)
        context.comp(write_manifest_html, out_split_dir)

    if out_pdf is not None:
        joined_aug_with_pdf_stylesheet = context.comp(add_style, joined_aug,
                                                      stylesheet_pdf)
        prerendered = context.comp(prerender,
                                   joined_aug_with_pdf_stylesheet,
                                   symbols=symbols)
        pdf_data = context.comp(render_pdf, prerendered)
        context.comp(write_data_to_file, pdf_data, out_pdf)
        context.comp(write_manifest_pdf, out_pdf)
コード例 #17
0
ファイル: disk_struct.py プロジェクト: rusi/mcdp
 def hash_code(self):
     return get_md5(self.contents)
コード例 #18
0
def manual_jobs(context,
                src_dirs,
                output_file,
                generate_pdf,
                stylesheet,
                use_mathjax,
                raise_errors,
                resolve_references=True,
                remove=None,
                filter_soup=None,
                extra_css=None,
                symbols=None,
                do_last_modified=False):
    """
        src_dirs: list of sources
        symbols: a TeX preamble (or None)
    """
    filenames = get_markdown_files(src_dirs)
    print('using:')
    print("\n".join(filenames))

    if not filenames:
        msg = 'Could not find any file for composing the book.'
        raise Exception(msg)

    files_contents = []
    for i, filename in enumerate(filenames):
        if is_ignored_by_catkin(filename):
            logger.debug('Ignoring because of CATKIN_IGNORE: %s' % filename)
            continue
        logger.info('adding document %s ' % friendly_path(filename))

        docname, _ = os.path.splitext(os.path.basename(filename))

        contents = open(filename).read()
        contents_hash = get_md5(contents)[:8]
        # because of hash job will be automatically erased if the source changes
        out_part_basename = '%03d-%s-%s' % (i, docname, contents_hash)
        job_id = '%s-%s-%s' % (docname, get_md5(filename)[:8], contents_hash)

        source_info = get_source_info(filename)

        # find the dir
        for d in src_dirs:
            if os.path.realpath(d) in filename:
                break
        else:
            msg = 'Could not find dir for %s in %s' % (filename, src_dirs)

        html_contents = context.comp(render_book,
                                     generate_pdf=generate_pdf,
                                     src_dirs=src_dirs,
                                     data=contents,
                                     realpath=filename,
                                     use_mathjax=use_mathjax,
                                     symbols=symbols,
                                     raise_errors=raise_errors,
                                     main_file=output_file,
                                     out_part_basename=out_part_basename,
                                     filter_soup=filter_soup,
                                     extra_css=extra_css,
                                     job_id=job_id)

        doc = DocToJoin(docname=out_part_basename,
                        contents=html_contents,
                        source_info=source_info)
        files_contents.append(tuple(doc))  # compmake doesn't do namedtuples

    bib_files = get_bib_files(src_dirs)

    logger.debug('Found bib files:\n%s' % "\n".join(bib_files))
    if bib_files:
        bib_contents = job_bib_contents(context, bib_files)
        entry = DocToJoin(docname='bibtex',
                          contents=bib_contents,
                          source_info=None)
        files_contents.append(tuple(entry))

    if do_last_modified:
        data = context.comp(make_last_modified, files_contents=files_contents)
        entry = DocToJoin(docname='last_modified',
                          contents=data,
                          source_info=None)
        files_contents.append(tuple(entry))

    root_dir = src_dirs[0]

    template = get_main_template(root_dir)

    references = OrderedDict()
    #     base_url = 'http://book.duckietown.org/master/duckiebook/pdoc'
    #     for extra_dir in extra_dirs:
    #         res = read_references(extra_dir, base_url, prefix='python:')
    #         references.update(res)

    #     extra = look_for_files(extra_dirs, "*.html")
    #
    #     for filename in extra:
    #         contents = open(filename).read()
    #         docname = os.path.basename(filename) + '_' + get_md5(filename)[:5]
    #         c = (('unused', docname), contents)
    #         files_contents.append(c)

    d = context.comp(manual_join,
                     template=template,
                     files_contents=files_contents,
                     stylesheet=stylesheet,
                     remove=remove,
                     references=references,
                     resolve_references=resolve_references)

    context.comp(write, d, output_file)

    if os.path.exists(MCDPManualConstants.pdf_metadata_template):
        context.comp(generate_metadata, root_dir)
コード例 #19
0
ファイル: split.py プロジェクト: afcarl/mcdp
def go(context, worker_i, num_workers, ifilename, mathjax, preamble,
       output_dir):
    with timeit("reading %s" % ifilename):
        soup = read_html_doc_from_file(ifilename)

    # extract the main toc if it is there

    with timeit("Extracting main_toc"):
        main_toc = soup.find(id='main_toc')

        if main_toc is None:
            msg = 'Could not find the element #main_toc.'
            raise ValueError(msg)

        main_toc = main_toc.__copy__()
        del main_toc.attrs['id']

    body = soup.html.body

    with timeit("split_in_files"):
        filename2contents = split_in_files(body)

    with timeit("add_prev_next_links"):
        filename2contents = add_prev_next_links(filename2contents)

    with timeit("preparing assets dir"):
        if not os.path.exists(output_dir):
            try:
                os.makedirs(output_dir)
            except:
                pass

        assets_dir = os.path.join(output_dir, 'assets')

    with timeit("creating link.html and link.js"):
        id2filename = get_id2filename(filename2contents)
        #<<<<<<< HEAD
        linkbase = 'link.html'  # do not change (it's used by http://purl.org/dth)
        linkbasejs = 'link.js'
        #=======
        #        linkbase = 'link.html'  # do not change (it's used by http://purl.org/dth)
        #>>>>>>> nl
        lb = create_link_base(id2filename)
        write_data_to_file(str(lb), os.path.join(output_dir, linkbase))

        linkjs = create_link_base_js(id2filename)
        write_data_to_file(str(linkjs), os.path.join(output_dir, linkbasejs))

    if preamble:
        preamble = open(preamble).read()

    ids_to_use = []
    for k in list(id2filename):
        if not 'autoid' in k:
            ids_to_use.append(k)
    ids_to_use = sorted(ids_to_use)

    pointed_to = []
    for k in ids_to_use:
        f = id2filename[k]
        if not f in pointed_to:
            pointed_to.append(f)

    data = ",".join(pointed_to)
    links_hash = get_md5(data)[:8]
    #     if self.options.faster_but_imprecise:
    #         links_hash = "nohash"
    #
    #     logger.debug('hash data: %r' % data)
    logger.debug('hash value: %r' % links_hash)

    head0 = soup.html.head

    if True:
        context.comp(remove_spurious, output_dir, list(filename2contents))

    tmpd = create_tmpdir()

    n = len(filename2contents)
    for i, (filename, contents) in enumerate(filename2contents.items()):
        if (i % num_workers != worker_i):
            continue
        # contents_hash = get_md5(str(contents) + str(preamble))[:8]
        # job_id = '%s-%s-%s' % (filename, links_hash, contents_hash)

#<<<<<<< HEAD
# Trick: we add the main_toc, and then ... (look below)
        with timeit('make_page'):
            html = make_page(contents, head0, main_toc)
        with timeit('main_toc copy'):
            main_toc = main_toc.__copy__()

        logger.debug('%d/%d: %s' % (i, n, filename))
        with timeit("direct job"):
            result = only_second_part(mathjax, preamble, html, id2filename,
                                      filename)

            # ... we remove it. In this way we don't have to copy it
            main_toc.extract()

            fn = os.path.join(output_dir, filename)

            fn0 = os.path.join(tmpd, filename)
            write_data_to_file(result, fn0, quiet=True)

            h = get_md5(result)[:8]
            context.comp(extract_assets_from_file,
                         fn0,
                         fn,
                         assets_dir,
                         job_id='assets-%s' % h)