Beispiel #1
0
    def make(self, context):
        soup = context.soup
        id_ = '%s:section' % self.id_ 
        try:
            e = soup_find_absolutely(soup, id_)
        except KeyError:
            msg = 'Cannot find ID %r in document.' % id_
            d = Tag(name='div')
            t = Tag(name='code')
            t.append(self.id_)
            d.append(t)
            note_error2(t, 'ref error', msg)
            return [d]
        logger.info('Adding section %r' %  e.attrs['id'])
#         logger.info('e: ' + get_summary_of_section(e))
        e_copy = e.__copy__()
        
        for eid in self.exceptions:
            logger.info('Removing sections by id "%s"' % eid)
            look_for = eid + ':section'
            s = e_copy.find(id=look_for)
            if s is None:
                msg = 'Could not remove "%s" because could not find element with ID "%s"' % (eid, look_for)
                raise Exception(msg)
            s.extract()
#         logger.info('e_copy: ' + get_summary_of_section(e_copy))
        
        return [e_copy] 
Beispiel #2
0
def move_things_around(soup, raise_if_errors=False):
    '''
        Looks for tags like:

            <move-here src="#line_detector2-line_detector_node2-autogenerated"/>

    '''

    for e in soup.select('move-here'):
        if not 'src' in e.attrs:
            msg = 'Expected attribute "src" for element %s' % str(e)
            raise ValueError(msg)
        src = e.attrs['src']
        if not src.startswith('#'):
            msg = 'Expected that attribute "src" started with "#" for element %s.' % str(
                e)
            raise ValueError(msg)
        nid = src[1:]
        el = soup.find(id=nid)
        if not el:
            msg = 'move-here: Could not find ID %r.' % nid
            e.name = 'span'
            note_error2(e, "invalid move-here reference", msg)

            if raise_if_errors:
                raise ValueError(msg)
            else:
                continue
        el.extract()
        e.replace_with(el)
Beispiel #3
0
def check_status_codes(soup, realpath):
    for h in all_headers(soup):
        if 'notoc' in h.attrs:
            continue
        if STATUS_ATTR in h.attrs:
            s = h.attrs[STATUS_ATTR]
            if not s in allowed_statuses:
                msg = 'Invalid status code %r; expected one of %r' % (
                    s, allowed_statuses)
                msg += '\n' + indent(str(h), '  ')
                note_error2(h, 'syntax error', msg)
        else:
            # Only warn for h1 that are not part:
            if h.name == 'h1' and not 'part:' in h.attrs.get('id', ''):
                if not 'catkin_ws' in realpath:
                    # let's not worry about the Software repo for now
                    h2 = h.__copy__()
                    h2.attrs.pop('github-blob-url', None)
                    h2.attrs.pop('github-edit-url', None)
                    msg = 'Status not found for this header:\n\n  %s' % str(h2)
                    msg += '\n\n in file %s' % realpath
                    msg += '\n\nPlease set the status for all the top-level headers.'
                    msg += '\n\nThe syntax is:\n\n      # My section    {#SECTIONID status=STATUS}'
                    msg += '\n\nThese are the possible choices for the status:\n'
                    for k, v in allowed_statuses.items():
                        if k != STATUS_UNKNOWN:
                            msg += '\n' + indent(v, '', '%23s   ' %
                                                 ('status=%s' % k))
                    note_error2(h, 'missing status', msg)
            h.attrs[STATUS_ATTR] = STATUS_UNKNOWN
Beispiel #4
0
def check_lang_codes(soup):
    for h in all_headers(soup):
        if LANG_ATTR in h.attrs:
            s = h.attrs[LANG_ATTR]
            if not s in allowed_langs:
                msg = 'Invalid lang code %r; expected one of %r' % (
                    s, allowed_langs)
                msg += '\n' + indent(str(h), '  ')
                note_error2(h, 'syntax error', msg)
Beispiel #5
0
def display_files(soup, defaults, raise_errors):
    n = 0
    for element in soup.find_all('display-file'):
        src = element.attrs.get('src', '').strip()
        element.attrs['src'] = src
        if src.startswith('github:'):
            display_file(element, defaults, raise_errors)
            n += 1
        else:
            msg = 'Unknown schema %r; I only know "github:".' % src
            if raise_errors:
                raise DPSemanticError(msg)
            else:
                note_error2(element, 'syntax error', msg)
    return n
Beispiel #6
0
def check_no_patently_wrong_links(soup):
    for a in soup.select('a[href]'):
        href = a.attrs['href']
        if href.startswith('#http:') or href.startswith('#https:'):
            msg = """
This link is invalid:

    URL = %s

I think there is an extra "#" at the beginning.

Note that the Markdown syntax is:

    [description](URL)

where URL can be:

    1) using the fragment notation, such as

        URL = '#SECTIONID'

    for example:

        Look at [the section](#section-name)


    2) a regular URL, such as:

        URL = 'http://google.com'

    that is:

        Look at [the website](http://google.com)


You have mixed the two syntaxes.

You probably meant to write the url

    %s

but you added an extra "#" at the beginning that should have not been there.

Please remove the "#".

            """ % (href, href[1:])
            note_error2(a, 'syntax error', msg.lstrip())
Beispiel #7
0
def display_file(element, defaults, raise_errors):
    assert element.name == 'display-file'
    assert 'src' in element.attrs
    src = element.attrs['src']
    assert src.startswith('github:')
    ref = parse_github_file_ref(src)

    try:
        ref = resolve_reference(ref, defaults=defaults)
    except CouldNotResolveRef as e:
        msg = 'Could not resolve reference %r' % src
        if raise_errors:
            raise_wrapped(DPSemanticError, e, msg, compact=True)
        else:
            msg += '\n\n' + indent(str(e), '> ')
            note_error2(element, 'reference error', msg)
            return

    lines = ref.contents.split('\n')
    a = ref.from_line if ref.from_line is not None else 0
    b = ref.to_line if ref.to_line is not None else len(lines) - 1
    portion = lines[a:b + 1]
    contents = "\n".join(portion)

    div = Tag(name='div')
    base = os.path.basename(ref.path)
    short = base + '-%d-%d' % (a, b)
    div.attrs['figure-id'] = 'code:%s' % short
    figcaption = Tag(name='figcaption')
    t = Tag(name='code')
    t.append(base)
    a = Tag(name='a')
    a.append(t)
    a.attrs['href'] = ref.url
    figcaption.append(a)
    div.append(figcaption)
    pre = Tag(name='pre')
    code = Tag(name='code')
    pre.append(code)
    code.append(contents)
    div.append(pre)
    element.replace_with(div)
Beispiel #8
0
def get_id2element(soup, att):
    id2element = {}
    duplicates = set()

    # ignore the maths
    ignore = set()
    for element in soup.select('svg [%s]' % att):  # node with ID below SVG
        ignore.add(element[att])
    for element in soup.select('svg[%s]' % att):  # svg with ID
        ignore.add(element[att])
    for element in soup.select('[%s^="MathJax"]' %
                               att):  # stuff created by MathJax
        ignore.add(element[att])

    for element in soup.select('[%s]' % att):
        ID = element[att]
        if ID in ignore:
            continue
        if ID in id2element:
            duplicates.add(ID)
            other = id2element[ID]
            for e0 in [element, other]:
                #                 if not 'errored' in e0.attrs.get('class', ''):
                note_error2(e0, 'Naming',
                            'More than one element with id %r.' % ID)


#                     add_class(e0, 'errored')
#                     w = Tag(name='span', attrs={'class':'duplicated-id'})
#                     w.string =
#                     e0.insert_after(w)
        id2element[element[att]] = element

    if duplicates:
        s = ", ".join(sorted(duplicates))
        msg = '%d duplicated %s found (not errored): %s' % (len(duplicates),
                                                            att, s)
        logger.error(msg)
    return id2element, duplicates
Beispiel #9
0
def make_videos_(o, raise_on_errors):
    if not 'src' in o.attrs:
        msg = 'The video does not have a "src" attribute.'
        raise_desc(ValueError, msg, element=str(o))

    src = o.attrs['src']
    prefix = 'vimeo:'
    if not src.startswith(prefix):
        msg = 'Invalid attribute "src": it does not start with %r.' % (src,
                                                                       prefix)
        raise_desc(ValueError, msg, element=str(o))

    vimeo_id = src[len(prefix):]

    #     <iframe src="https://player.vimeo.com/video/152233002"
    #         class="embed-responsive-item"
    #         frameborder="0" webkitallowfullscreen="" mozallowfullscreen="" allowfullscreen="">

    try:
        vimeo_info = get_vimeo_info(vimeo_id)
    except VimeoInfoException as e:
        if raise_on_errors:
            raise
        else:
            note_error2(o, 'Resource error', str(e))
            return

    d = Tag(name='div')
    d.attrs['class'] = 'video'

    ONLY_WEB = 'only-web'
    ONLY_EBOOK = 'only-ebook'
    ONLY_DEADTREE = 'only-deadtree'

    d.append(Comment('This is the iframe, for online playing.'))
    C = Tag(name='div')
    C.attrs['class'] = ONLY_WEB
    if True:
        r = Tag(name='iframe')
        r.attrs['class'] = 'video-vimeo-player'
        r.attrs['src'] = 'https://player.vimeo.com/video/' + vimeo_id
        r.attrs['frameborder'] = 0
        r.attrs['webkitallowfullscreen'] = 1
        r.attrs['mozallowfullscreen'] = 1
        r.attrs['allowfullscreen'] = 1
        C.append(r)
    d.append(C)

    d.append(Comment('This is the thumbnail, for ebook'))
    C = Tag(name='div')
    C.attrs['class'] = ONLY_EBOOK
    if True:
        a = Tag(name='a')
        a.attrs['href'] = vimeo_info.url
        img = Tag(name='img')
        img.attrs['class'] = 'video-vimeo-thumbnail-ebook'
        img.attrs['src'] = vimeo_info.thumbnail_large
        img.attrs['title'] = vimeo_info.title
        a.append(img)
        C.append(a)
    d.append(C)

    d.append(Comment('This is the textual version for printing.'))
    C = Tag(name='div')
    C.attrs['class'] = ONLY_DEADTREE
    if True:
        img = Tag(name='img')
        img.attrs['class'] = 'video-vimeo-thumbnail-deadtree'
        img.attrs['src'] = vimeo_info.thumbnail_large
        img.attrs['title'] = vimeo_info.title
        C.append(img)
        p = Tag(name='p')
        p.append("The video is at %s." % vimeo_info.url)
        C.append(p)
    d.append(C)

    o.replace_with(d)
Beispiel #10
0
def sub_link(a, element_id, element, raise_errors):
    """
        a: the link with href= #element_id
        element: the link to which we refer
    """
    CLASS_ONLY_NUMBER = MCDPManualConstants.CLASS_ONLY_NUMBER
    CLASS_NUMBER_NAME = MCDPManualConstants.CLASS_NUMBER_NAME
    CLASS_ONLY_NAME = MCDPManualConstants.CLASS_ONLY_NAME

    if not element:
        msg = ('Cannot find %s' % element_id)
        note_error2(a, 'Ref. error', 'substituting_empty_links():\n' + msg)
        #nerrors += 1
        if raise_errors:
            raise ValueError(msg)
        return
    # if there is a query, remove it


#     if le.query is not None:
#         new_href = '#' + le.eid
#         a.attrs['href'] = new_href
#         logger.info('setting new href= %s' % (new_href))

    if (not LABEL_WHAT_NUMBER in element.attrs) or \
            (not LABEL_NAME in element.attrs):
        msg = (
            'substituting_empty_links: Could not find attributes %s or %s in %s'
            % (LABEL_NAME, LABEL_WHAT_NUMBER, element))
        if True:
            logger.warning(msg)
        else:
            #                 note_error_msg(a, msg)
            note_error2(a, 'Ref. error', 'substituting_empty_links():\n' + msg)
            #             nerrors += 1
            if raise_errors:
                raise ValueError(msg)
        return

    label_what_number = element.attrs[LABEL_WHAT_NUMBER]
    label_number = element.attrs[LABEL_NUMBER]
    label_what = element.attrs[LABEL_WHAT]
    label_name = element.attrs[LABEL_NAME]

    classes = list(a.attrs.get('class', []))  # bug: I was modifying

    #     if le.query is not None:
    #         classes.append(le.query)

    if 'toc_link' in classes:
        s = Tag(name='span')
        s.string = label_what
        add_class(s, 'toc_what')
        a.append(s)

        a.append(' ')

        s = Tag(name='span')
        s.string = label_number
        add_class(s, 'toc_number')
        a.append(s)

        s = Tag(name='span')
        s.string = ' - '
        add_class(s, 'toc_sep')
        a.append(s)

        if label_name is not None and '<' in label_name:
            contents = bs(label_name)
            # sanitize the label name
            for br in contents.findAll('br'):
                br.replaceWith(NavigableString(' '))
            for _ in contents.findAll('a'):
                _.extract()

            contents.name = 'span'
            add_class(contents, 'toc_name')
            a.append(contents)
            #logger.debug('From label_name = %r to a = %r' % (label_name, a))
        else:
            if label_name is None:
                s = Tag(name='span')
                s.string = '(unnamed)'  # XXX
            else:
                s = bs(label_name)
                assert s.name == 'fragment'
                s.name = 'span'
                # add_class(s, 'produced-here') # XXX
            add_class(s, 'toc_name')
            a.append(s)

    else:

        if CLASS_ONLY_NUMBER in classes:
            label = label_number
        elif CLASS_NUMBER_NAME in classes:
            if label_name is None:
                label = label_what_number + \
                    ' - ' + '(unnamed)'  # warning
            else:
                label = label_what_number + ' - ' + label_name
        elif CLASS_ONLY_NAME in classes:
            if label_name is None:
                label = '(unnamed)'  # warning
            else:
                label = label_name
        else:
            # default behavior
            if string_starts_with(['fig:', 'tab:', 'bib:', 'code:'],
                                  element_id):
                label = label_what_number
            elif label_name is None:
                label = label_what_number
            else:
                label = label_what_number + ' - ' + label_name

        frag = bs(label)
        assert frag.name == 'fragment'
        frag.name = 'span'
        add_class(frag, 'reflabel')
        a.append(frag)
Beispiel #11
0
def substituting_empty_links(soup, raise_errors=False):
    '''

        default style is [](#sec:systems)  "Chapter 10"

        You can also use "class":

            <a href='#sec:name' class='only_number'></a>

    '''

    #     logger.debug('substituting_empty_links')

    #     n = 0
    for le in get_empty_links_to_fragment(soup):
        a = le.linker
        element_id = le.eid
        element = le.linked
        sub_link(a, element_id, element, raise_errors)

    # Now mark as errors the ones that
    for a in get_empty_links(soup):
        href = a.attrs.get('href', '(not present)')
        if not href:
            href = '""'
        if href.startswith('python:'):
            continue

        if href.startswith('http:') or href.startswith('https:'):
            msg = """
This link text is empty:

    ELEMENT

Note that the syntax for links in Markdown is

    [link text](URL)

For the internal links (where URL starts with "#"), then the documentation
system can fill in the title automatically, leading to the format:

    [](#other-section)

However, this does not work for external sites, such as:

    [](MYURL)

So, you need to provide some text, such as:

    [this useful website](MYURL)

"""
            msg = msg.replace('ELEMENT', str(a))
            msg = msg.replace('MYURL', href)
            note_error2(a, 'syntax error', msg.strip())

        else:
            msg = """
This link is empty:

    ELEMENT

It might be that the writer intended for this
link to point to something, but they got the syntax wrong.

    href = %s

As a reminder, to refer to other parts of the document, use
the syntax "#ID", such as:

    See [](#fig:my-figure).

    See [](#section-name).

""" % href
        msg = msg.replace('ELEMENT', str(a))
        note_error2(a, 'syntax error', msg.strip())
Beispiel #12
0
def check_if_any_href_is_invalid(soup):
    '''
         Checks if references are invalid and tries to correct them.

        if it is of the form "#frag?query" then query is stripped out
    '''
    logger.debug('check_if_any_href_is_invalid')

    errors = []
    math_errors = []

    # let's first find all the IDs
    id2element, duplicates = get_id2element(soup, 'id')
    _name2element, _duplicates = get_id2element(soup, 'name')

    for a in soup.select('[href^="#"]'):
        href = a['href']
        if a.has_attr('class') and "mjx-svg-href" in a['class']:
            msg = 'Invalid math reference (sorry, no details): href = %s .' % href
            logger.warning(msg)
            a.insert_before(Comment('Error: %s' % msg))
            math_errors.append(msg)
            continue
        assert href.startswith('#')
        ID = href[1:]
        # remove query if it exists
        if '?' in ID:
            ID = ID[:ID.index('?')]

        if not ID in id2element:
            # try to fix it

            # if there is already a prefix, remove it
            if ':' in href:
                i = href.index(':')
                core = href[i + 1:]
            else:
                core = ID

#             logger.debug('check_if_any_href_is_invalid: not found %r, core %r' % (ID, core))

            possible = [
                'part',
                'sec',
                'sub',
                'subsub',
                'fig',
                'tab',
                'code',
                'app',
                'appsub',
                'appsubsub',
                'def',
                'eq',
                'rem',
                'lem',
                'prob',
                'prop',
                'exa',
                'thm',
                #                         'bib'
            ]
            matches = []
            others = []
            for possible_prefix in possible:
                why_not = possible_prefix + ':' + core
                others.append(why_not)
                if why_not in id2element:
                    matches.append(why_not)


#             logger.debug('others = %r, matches = %r' % (others, matches))

            if len(matches) > 1:
                short = 'Ref. error'
                msg = '%s not found, and multiple matches for heuristics (%s)' % (
                    href, matches)
                note_error2(a, short, msg,
                            ['href-invalid', 'href-invalid-missing'])

            elif len(matches) == 1:

                a['href'] = '#' + matches[0]

                if show_debug_message_for_corrected_links:
                    short = 'Ref replaced'
                    msg = '%s not found, but corrected in %s' % (href,
                                                                 matches[0])
                    note_warning2(a, short, msg, ['href-replaced'])

            else:
                if has_class(a, MCDPConstants.CLASS_IGNORE_IF_NOT_EXISTENT):
                    pass
                else:
                    short = 'Ref. error'
                    #                 msg = 'Not found %r (also tried %s)' % (href, ", ".join(others))
                    msg = 'I do not know the link that is indicated by the link %r.' % href
                    note_error2(a, short, msg,
                                ['href-invalid', 'href-invalid-missing'])
                    errors.append(msg)

        if ID in duplicates:
            msg = 'More than one element matching %r.' % href
            short = 'Ref. error'
            note_error2(a, short, msg,
                        ['href-invalid', 'href-invalid-multiple'])
            errors.append(msg)

    return errors, math_errors