예제 #1
0
def generate_and_add_toc(soup, raise_error=False, res=None):
    if res is None:
        aug = AugmentedResult()
    logger.info('adding toc')
    body = soup.find('body')
    toc = generate_toc(body, res)

    # logger.info('TOC:\n' + str(toc))
    toc_ul = bs(toc).ul
    if toc_ul is None:
        # empty TOC
        msg = 'Could not find toc.'
        # logger.warning(msg)
        res.note_error(msg)
        # XXX
    else:
        toc_ul.extract()
        assert toc_ul.name == 'ul'
        toc_ul['class'] = 'toc'  # XXX: see XXX13
        toc_ul['id'] = MCDPManualConstants.MAIN_TOC_ID

        toc_selector = MCDPManualConstants.TOC_PLACEHOLDER_SELECTOR
        tocs = list(body.select(toc_selector))
        if not tocs:
            msg = 'Cannot find any element of type %r to put TOC inside.' % toc_selector
            if raise_error:
                raise NoTocPlaceholder(msg)
            logger.warning(msg)
            res.note_error(msg)
        else:
            toc_place = tocs[0]
            toc_place.replaceWith(toc_ul)
예제 #2
0
def generate_and_add_toc(soup, toc_selector='div#toc'):
    logger.info('adding toc')
    body = soup.find('body')
    toc = generate_toc(body)

    #     logger.info('TOC:\n' + str(toc))
    toc_ul = bs(toc).ul
    if toc_ul is None:
        # empty TOC
        msg = 'Could not find toc'
        logger.warning(msg)
        # XXX
    else:
        toc_ul.extract()
        assert toc_ul.name == 'ul'
        toc_ul['class'] = 'toc'
        toc_ul['id'] = 'main_toc'

        tocs = list(body.select(toc_selector))
        if not tocs:
            msg = 'Cannot find any element of type %r to put TOC inside.' % toc_selector
            logger.warning(msg)
        else:
            toc_place = tocs[0]
            toc_place.replaceWith(toc_ul)
예제 #3
0
def raise_if_any_error(results):
    errors = {}

    for rid, (_, r) in results.items():
        if r.error_type is not None:
            f = r.error_string.split('\n')[0]
            n = 150 - len(rid)
            f = f[:n]
            errors[rid] = (rid + ' | ' + r.error_type[:4] + ' | ' + f)

    expected = [
        'local-uav_energetics-pretty-models-batteries',
        'local-uav_energetics-pretty-models-battery_squash',
        'local-unittests-loading_python-models-load1',
        'local-unittests-loading_python-models-load1b',
        'local-unittests-loading_python-posets-load2',
        'local-unittests-loading_python-primitivedps-load_primitivedp',
        'local-unittests-making-models-test1',
        'local-examples_devel-icra17-models-uncertain2',
    ]

    for e in expected:
        if e in results:
            if not e in errors:
                msg = 'Expected a failure for %r' % e
                logger.warning(msg)
        if e in errors:
            del errors[e]

    if errors:
        msg = 'Found %s errors.\n\n' % len(errors)
        msg += "\n".join(sorted(errors))
        raise Exception(msg)
예제 #4
0
파일: library_view.py 프로젝트: rusi/mcdp
    def _load_spec_data(self, spec_name, thing_name):
        shelf = self.the_context.db_view.repos[self.repo_name].shelves[
            self.shelf_name]
        library = shelf.libraries[self.library_name]
        things = library.things.child(spec_name)

        try:
            match = get_soft_match(thing_name, list(things))
        except KeyError:
            msg = 'Soft match failed: Could not find %r in %s.' % (thing_name,
                                                                   spec_name)
            available = sorted(things)

            if available:
                msg += ("\n Available %s: %s." %
                        (spec_name, format_list(sorted(available))))
            else:
                msg += "\n None available."

            raise_desc(DPSemanticError, msg)
        else:

            if match != thing_name:
                if MCDPConstants.allow_soft_matching:
                    logger.warning('Soft matching %r to %r (deprecated)' %
                                   (match, thing_name))
                else:
                    msg = 'Found case in which the user relies on soft matching (%r to refer to %r).' % (
                        thing_name, match)
                    raise DPSemanticError(msg)
                # TODO: add warning

            data = things[match]
            spec = specs[spec_name]
            basename = match + '.' + spec.extension
            realpath = '%s in library %r in shelf %r in repo %r' % (
                basename, self.library_name, self.shelf_name, self.repo_name)
            return dict(data=data, realpath=realpath)
예제 #5
0
def substituting_empty_links(soup, raise_errors=False):
    '''
    
    
        default style is [](#sec:systems)  "Chapter 10"
        
        the name is [](#sec:systems?only_name) "My title"
        
        the number is [](#sec:systems?only_number) "10"
        
        and full is [](#sec:systems?toc_link) "Chapter 10 - My title"
    
    
        You can also use "class":
        
            <a href='#sec:name' class='only_number'></a>
            
            or
            
            <a href='#sec:name?only_number'></a>
    

    '''
    CLASS_ONLY_NUMBER = MCDPManualConstants.CLASS_ONLY_NUMBER
    CLASS_NUMBER_NAME = MCDPManualConstants.CLASS_NUMBER_NAME
    CLASS_ONLY_NAME = MCDPManualConstants.CLASS_ONLY_NAME

    logger.debug('substituting_empty_links')

    n = 0
    nerrors = 0
    for le in get_empty_links_to_fragment(soup):

        a = le.linker
        element_id = le.eid
        element = le.linked

        n += 1
        if not element:
            msg = ('Cannot find %s' % element_id)
            note_error_msg(a, msg)
            nerrors += 1
            if raise_errors:
                raise ValueError(msg)
            continue
        # if there is a query, remove it
        if le.query is not None:
            new_href = '#' + le.eid
            a.attrs['href'] = new_href
            logger.info('setting new href= %s' % (new_href))

        if (not LABEL_WHAT_NUMBER  in element.attrs) or \
                (not LABEL_NAME in element.attrs):
            msg = (
                'substituting_empty_links: Could not find attributes %s or %s in %s'
                % (LABEL_NAME, LABEL_WHAT_NUMBER, element))
            if True:
                logger.warning(msg)
            else:
                note_error_msg(a, msg)
                nerrors += 1
                if raise_errors:
                    raise ValueError(msg)
            continue

        label_what_number = element.attrs[LABEL_WHAT_NUMBER]
        label_number = element.attrs[LABEL_NUMBER]
        label_what = element.attrs[LABEL_WHAT]
        label_name = element.attrs[LABEL_NAME]

        classes = list(a.attrs.get('class', []))  # bug: I was modifying

        if le.query is not None:
            classes.append(le.query)

        if 'toc_link' in classes:
            s = Tag(name='span')
            s.string = label_what
            add_class(s, 'toc_what')
            a.append(s)

            a.append(' ')

            s = Tag(name='span')
            s.string = label_number
            add_class(s, 'toc_number')
            a.append(s)

            s = Tag(name='span')
            s.string = ' - '
            add_class(s, 'toc_sep')
            a.append(s)

            if label_name is not None and '<' in label_name:
                contents = bs(label_name)
                # sanitize the label name
                for br in contents.findAll('br'):
                    br.replaceWith(NavigableString(' '))
                for _ in contents.findAll('a'):
                    _.extract()

                a.append(contents)
                #logger.debug('From label_name = %r to a = %r' % (label_name, a))
            else:
                s = Tag(name='span')
                if label_name is None:
                    s.string = '(unnamed)'  # XXX
                else:
                    s.string = label_name
                add_class(s, 'toc_name')
                a.append(s)

        else:

            if CLASS_ONLY_NUMBER in classes:
                label = label_number
            elif CLASS_NUMBER_NAME in classes:
                if label_name is None:
                    label = label_what_number + \
                        ' - ' + '(unnamed)'  # warning
                else:
                    label = label_what_number + ' - ' + label_name
            elif CLASS_ONLY_NAME in classes:
                if label_name is None:
                    label = '(unnamed)'  # warning
                else:
                    label = label_name
            else:
                label = label_what_number

            span1 = Tag(name='span')
            add_class(span1, 'reflabel')
            span1.string = label
            a.append(span1)

    logger.debug('substituting_empty_links: %d total, %d errors' %
                 (n, nerrors))
예제 #6
0
def _warn_once(msg):
    logger.warning(msg)
예제 #7
0
def sub_link(a, element_id, element, raise_errors):
    """
        a: the link with href= #element_id
        element: the link to which we refer
    """
    CLASS_ONLY_NUMBER = MCDPManualConstants.CLASS_ONLY_NUMBER
    CLASS_NUMBER_NAME = MCDPManualConstants.CLASS_NUMBER_NAME
    CLASS_ONLY_NAME = MCDPManualConstants.CLASS_ONLY_NAME

    if not element:
        msg = ('Cannot find %s' % element_id)
        note_error2(a, 'Ref. error', 'substituting_empty_links():\n' + msg)
        #nerrors += 1
        if raise_errors:
            raise ValueError(msg)
        return
    # if there is a query, remove it


#     if le.query is not None:
#         new_href = '#' + le.eid
#         a.attrs['href'] = new_href
#         logger.info('setting new href= %s' % (new_href))

    if (not LABEL_WHAT_NUMBER in element.attrs) or \
            (not LABEL_NAME in element.attrs):
        msg = (
            'substituting_empty_links: Could not find attributes %s or %s in %s'
            % (LABEL_NAME, LABEL_WHAT_NUMBER, element))
        if True:
            logger.warning(msg)
        else:
            #                 note_error_msg(a, msg)
            note_error2(a, 'Ref. error', 'substituting_empty_links():\n' + msg)
            #             nerrors += 1
            if raise_errors:
                raise ValueError(msg)
        return

    label_what_number = element.attrs[LABEL_WHAT_NUMBER]
    label_number = element.attrs[LABEL_NUMBER]
    label_what = element.attrs[LABEL_WHAT]
    label_name = element.attrs[LABEL_NAME]

    classes = list(a.attrs.get('class', []))  # bug: I was modifying

    #     if le.query is not None:
    #         classes.append(le.query)

    if 'toc_link' in classes:
        s = Tag(name='span')
        s.string = label_what
        add_class(s, 'toc_what')
        a.append(s)

        a.append(' ')

        s = Tag(name='span')
        s.string = label_number
        add_class(s, 'toc_number')
        a.append(s)

        s = Tag(name='span')
        s.string = ' - '
        add_class(s, 'toc_sep')
        a.append(s)

        if label_name is not None and '<' in label_name:
            contents = bs(label_name)
            # sanitize the label name
            for br in contents.findAll('br'):
                br.replaceWith(NavigableString(' '))
            for _ in contents.findAll('a'):
                _.extract()

            contents.name = 'span'
            add_class(contents, 'toc_name')
            a.append(contents)
            #logger.debug('From label_name = %r to a = %r' % (label_name, a))
        else:
            if label_name is None:
                s = Tag(name='span')
                s.string = '(unnamed)'  # XXX
            else:
                s = bs(label_name)
                assert s.name == 'fragment'
                s.name = 'span'
                # add_class(s, 'produced-here') # XXX
            add_class(s, 'toc_name')
            a.append(s)

    else:

        if CLASS_ONLY_NUMBER in classes:
            label = label_number
        elif CLASS_NUMBER_NAME in classes:
            if label_name is None:
                label = label_what_number + \
                    ' - ' + '(unnamed)'  # warning
            else:
                label = label_what_number + ' - ' + label_name
        elif CLASS_ONLY_NAME in classes:
            if label_name is None:
                label = '(unnamed)'  # warning
            else:
                label = label_name
        else:
            # default behavior
            if string_starts_with(['fig:', 'tab:', 'bib:', 'code:'],
                                  element_id):
                label = label_what_number
            elif label_name is None:
                label = label_what_number
            else:
                label = label_what_number + ' - ' + label_name

        frag = bs(label)
        assert frag.name == 'fragment'
        frag.name = 'span'
        add_class(frag, 'reflabel')
        a.append(frag)
예제 #8
0
def check_if_any_href_is_invalid(soup):
    '''
         Checks if references are invalid and tries to correct them.

        if it is of the form "#frag?query" then query is stripped out
    '''
    logger.debug('check_if_any_href_is_invalid')

    errors = []
    math_errors = []

    # let's first find all the IDs
    id2element, duplicates = get_id2element(soup, 'id')
    _name2element, _duplicates = get_id2element(soup, 'name')

    for a in soup.select('[href^="#"]'):
        href = a['href']
        if a.has_attr('class') and "mjx-svg-href" in a['class']:
            msg = 'Invalid math reference (sorry, no details): href = %s .' % href
            logger.warning(msg)
            a.insert_before(Comment('Error: %s' % msg))
            math_errors.append(msg)
            continue
        assert href.startswith('#')
        ID = href[1:]
        # remove query if it exists
        if '?' in ID:
            ID = ID[:ID.index('?')]

        if not ID in id2element:
            # try to fix it

            # if there is already a prefix, remove it
            if ':' in href:
                i = href.index(':')
                core = href[i + 1:]
            else:
                core = ID

#             logger.debug('check_if_any_href_is_invalid: not found %r, core %r' % (ID, core))

            possible = [
                'part',
                'sec',
                'sub',
                'subsub',
                'fig',
                'tab',
                'code',
                'app',
                'appsub',
                'appsubsub',
                'def',
                'eq',
                'rem',
                'lem',
                'prob',
                'prop',
                'exa',
                'thm',
                #                         'bib'
            ]
            matches = []
            others = []
            for possible_prefix in possible:
                why_not = possible_prefix + ':' + core
                others.append(why_not)
                if why_not in id2element:
                    matches.append(why_not)


#             logger.debug('others = %r, matches = %r' % (others, matches))

            if len(matches) > 1:
                short = 'Ref. error'
                msg = '%s not found, and multiple matches for heuristics (%s)' % (
                    href, matches)
                note_error2(a, short, msg,
                            ['href-invalid', 'href-invalid-missing'])

            elif len(matches) == 1:

                a['href'] = '#' + matches[0]

                if show_debug_message_for_corrected_links:
                    short = 'Ref replaced'
                    msg = '%s not found, but corrected in %s' % (href,
                                                                 matches[0])
                    note_warning2(a, short, msg, ['href-replaced'])

            else:
                if has_class(a, MCDPConstants.CLASS_IGNORE_IF_NOT_EXISTENT):
                    pass
                else:
                    short = 'Ref. error'
                    #                 msg = 'Not found %r (also tried %s)' % (href, ", ".join(others))
                    msg = 'I do not know the link that is indicated by the link %r.' % href
                    note_error2(a, short, msg,
                                ['href-invalid', 'href-invalid-missing'])
                    errors.append(msg)

        if ID in duplicates:
            msg = 'More than one element matching %r.' % href
            short = 'Ref. error'
            note_error2(a, short, msg,
                        ['href-invalid', 'href-invalid-multiple'])
            errors.append(msg)

    return errors, math_errors
예제 #9
0
def manual_join(template,
                files_contents,
                bibfile,
                stylesheet,
                remove=None,
                extra_css=None,
                remove_selectors=None,
                hook_before_toc=None):
    """
        extra_css: if not None, a string of more CSS to be added
        Remove_selectors: list of selectors to remove (e.g. ".draft").

        hook_before_toc if not None is called with hook_before_toc(soup=soup)
        just before generating the toc
    """
    logger.debug('remove_selectors: %s' % remove_selectors)
    logger.debug('remove: %s' % remove)
    from mcdp_utils_xml import bs

    template = replace_macros(template)

    # cannot use bs because entire document
    template_soup = BeautifulSoup(template, 'lxml', from_encoding='utf-8')
    d = template_soup
    assert d.html is not None
    assert '<html' in str(d)
    head = d.find('head')
    assert head is not None
    for x in get_manual_css_frag().contents:
        head.append(x.__copy__())

    if stylesheet is not None:
        link = Tag(name='link')
        link['rel'] = 'stylesheet'
        link['type'] = 'text/css'
        from mcdp_report.html import get_css_filename
        link['href'] = get_css_filename('compiled/%s' % stylesheet)
        head.append(link)

    basename2soup = OrderedDict()
    for (_libname, docname), data in files_contents:
        frag = bs(data)
        basename2soup[docname] = frag

    fix_duplicated_ids(basename2soup)

    body = d.find('body')
    add_comments = False
    for docname, content in basename2soup.items():
        logger.debug('docname %r -> %s KB' % (docname, len(data) / 1024))
        from mcdp_docs.latex.latex_preprocess import assert_not_inside
        assert_not_inside(data, 'DOCTYPE')
        if add_comments:
            body.append(NavigableString('\n\n'))
            body.append(Comment('Beginning of document dump of %r' % docname))
            body.append(NavigableString('\n\n'))
        for x in content:
            x2 = x.__copy__()  # not clone, not extract
            body.append(x2)
        if add_comments:
            body.append(NavigableString('\n\n'))
            body.append(Comment('End of document dump of %r' % docname))
            body.append(NavigableString('\n\n'))

    extract_bibtex_blocks(d)
    logger.info('external bib')
    if bibfile is not None:
        if not os.path.exists(bibfile):
            logger.error('Cannot find bib file %s' % bibfile)
        else:
            bibliography_entries = get_bibliography(bibfile)
            bibliography_entries['id'] = 'bibliography_entries'
            body.append(bibliography_entries)

    bibhere = d.find('div', id='put-bibliography-here')
    if bibhere is None:
        logger.warning('Could not find #put-bibliography-here in document.'
                       'Adding one at end of document')
        bibhere = Tag(name='div')
        bibhere.attrs['id'] = 'put-bibliography-here'
        d.find('body').append(bibhere)

    do_bib(d, bibhere)

    if True:
        logger.info('reorganizing contents in <sections>')
        body2 = reorganize_contents(d.find('body'))
        body.replace_with(body2)
    else:
        warnings.warn('fix')
        body2 = body

    # Removing
    all_selectors = []
    if remove is not None and remove != '':
        all_selectors.append(remove)
    if remove_selectors:
        all_selectors.extend(remove_selectors)

    logger.debug('all_selectors: %s' % all_selectors)

    all_removed = ''
    for selector in all_selectors:
        nremoved = 0
        logger.debug('Removing selector %r' % remove)
        toremove = list(body2.select(selector))
        logger.debug('Removing %d objects' % len(toremove))
        for x in toremove:
            nremoved += 1
            nd = len(list(x.descendants))
            logger.debug('removing %s with %s descendants' % (x.name, nd))
            if nd > 1000:
                s = str(x)[:300]
                logger.debug(' it is %s' % s)
            x.extract()

            all_removed += '\n\n' + '-' * 50 + ' chunk %d removed\n' % nremoved
            all_removed += str(x)
            all_removed += '\n\n' + '-' * 100 + '\n\n'

        logger.info('Removed %d elements of selector %r' % (nremoved, remove))


#     if False:
    with open('all_removed.html', 'w') as f:
        f.write(all_removed)

    if hook_before_toc is not None:
        hook_before_toc(soup=d)
    ###
    logger.info('adding toc')
    toc = generate_toc(body2)

    logger.info('TOC:\n' + str(toc))
    toc_ul = bs(toc).ul
    toc_ul.extract()
    assert toc_ul.name == 'ul'
    toc_ul['class'] = 'toc'
    toc_ul['id'] = 'main_toc'
    toc_selector = 'div#toc'
    tocs = list(d.select(toc_selector))
    if not tocs:
        msg = 'Cannot find any element of type %r to put TOC inside.' % toc_selector
        logger.warning(msg)
    else:
        toc_place = tocs[0]
        toc_place.replaceWith(toc_ul)

    logger.info('checking errors')
    check_various_errors(d)

    from mcdp_docs.check_missing_links import check_if_any_href_is_invalid
    logger.info('checking hrefs')
    check_if_any_href_is_invalid(d)

    # Note that this should be done *after* check_if_any_href_is_invalid()
    # because that one might fix some references
    logger.info('substituting empty links')
    substituting_empty_links(d)

    warn_for_duplicated_ids(d)

    if extra_css is not None:
        logger.info('adding extra CSS')
        add_extra_css(d, extra_css)

    add_footnote_polyfill(d)

    logger.info('converting to string')
    # do not use to_html_stripping_fragment - this is a complete doc
    res = unicode(d)
    res = res.encode('utf8')
    logger.info('done - %d bytes' % len(res))
    return res
예제 #10
0
def manual_join(template,
                files_contents,
                stylesheet,
                remove=None,
                extra_css=None,
                remove_selectors=None,
                hook_before_toc=None,
                references={},
                resolve_references=True):
    """
        files_contents: a list of tuples that can be cast to DocToJoin:
        where the string is a unique one to be used for job naming.

        extra_css: if not None, a string of more CSS to be added
        Remove_selectors: list of selectors to remove (e.g. ".draft").

        hook_before_toc if not None is called with hook_before_toc(soup=soup)
        just before generating the toc
    """
    check_isinstance(files_contents, list)

    files_contents = [DocToJoin(*_) for _ in files_contents]

    template0 = template
    template = replace_macros(template)

    # cannot use bs because entire document
    template_soup = BeautifulSoup(template, 'lxml', from_encoding='utf-8')
    d = template_soup
    if d.html is None:
        s = "Invalid template"
        raise_desc(ValueError, s, template0=template0)

    assert d.html is not None
    assert '<html' in str(d)
    head = d.find('head')
    assert head is not None
    for x in get_manual_css_frag().contents:
        head.append(x.__copy__())

    if stylesheet is not None:
        link = Tag(name='link')
        link['rel'] = 'stylesheet'
        link['type'] = 'text/css'
        from mcdp_report.html import get_css_filename
        link['href'] = get_css_filename('compiled/%s' % stylesheet)
        head.append(link)

    basename2soup = OrderedDict()
    for doc_to_join in files_contents:
        if doc_to_join.docname in basename2soup:
            msg = 'Repeated docname %r' % doc_to_join.docname
            raise ValueError(msg)
        from .latex.latex_preprocess import assert_not_inside
        assert_not_inside(doc_to_join.contents, '<fragment')
        assert_not_inside(doc_to_join.contents, 'DOCTYPE')

        frag = bs(doc_to_join.contents)
        basename2soup[doc_to_join.docname] = frag

    fix_duplicated_ids(basename2soup)

    body = d.find('body')
    add_comments = False
    for docname, content in basename2soup.items():
        #         logger.debug('docname %r -> %s KB' % (docname, len(data) / 1024))
        if add_comments:
            body.append(NavigableString('\n\n'))
            body.append(Comment('Beginning of document dump of %r' % docname))
            body.append(NavigableString('\n\n'))

        copy_contents_into(content, body)

        f = body.find('fragment')
        if f:
            msg = 'I found a <fragment> in the manual after %r' % docname
            msg += '\n\n' + indent(str(content), '> ')
            raise Exception(msg)

        if add_comments:
            body.append(NavigableString('\n\n'))
            body.append(Comment('End of document dump of %r' % docname))
            body.append(NavigableString('\n\n'))

    extract_bibtex_blocks(d)
    logger.info('external bib')

    ID_PUT_BIB_HERE = MCDPManualConstants.ID_PUT_BIB_HERE

    bibhere = d.find('div', id=ID_PUT_BIB_HERE)
    if bibhere is None:
        logger.warning(('Could not find #%s in document. '
                        'Adding one at end of document.') % ID_PUT_BIB_HERE)
        bibhere = Tag(name='div')
        bibhere.attrs['id'] = ID_PUT_BIB_HERE
        d.find('body').append(bibhere)

    do_bib(d, bibhere)

    document_final_pass_before_toc(d, remove, remove_selectors)

    if hook_before_toc is not None:
        hook_before_toc(soup=d)

    generate_and_add_toc(d)

    document_final_pass_after_toc(soup=d,
                                  resolve_references=resolve_references)

    if extra_css is not None:
        logger.info('adding extra CSS')
        add_extra_css(d, extra_css)

    document_only_once(d)

    for a in d.select('[href]'):
        href = a.attrs['href']
        if href in references:
            r = references[href]
            a.attrs['href'] = r.url
            if not a.children:  # empty
                a.append(r.title)

    logger.info('converting to string')
    # do not use to_html_stripping_fragment - this is a complete doc
    res = unicode(d)
    res = res.encode('utf8')
    logger.info('done - %d bytes' % len(res))
    return res