Ejemplo n.º 1
0
def check_intrinsics():
    stderr("checking intrinsics...")
    header("checking intrinsics...")
    # We can't just scan through spec.text looking for %...%,
    # because that would find occurrences in element IDs,
    # which are lower-cased.
    # Instead, just look in literal (text) nodes.
    # (Note that this skips occurrences of "%<var>Foo</var>Prototype%".)
    for tnode in spec.doc_node.each_descendant_named('#LITERAL'):
        for mo in re.compile(r'%\S+%').finditer(spec.text, tnode.start_posn,
                                                tnode.end_posn):
            itext = mo.group(0)
            itext_start = mo.start(0)
            if itext in ['%name%', '%name.a.b%']:
                # placeholders
                continue
            if itext in ['%_NativeError_%', '%_TypedArray_%']:
                # metavariable interpolation
                continue

            is_in_table = any(
                table_start < itext_start < table_end
                for (table_start,
                     table_end) in well_known_intrinsics_table_spans)

            status = well_known_intrinsics.get(itext, "doesn't exist")
            if status == "doesn't exist":
                msg_at_posn(itext_start, f"Intrinsic doesn't exist: {itext}")
            elif status.startswith("old name"):
                if not is_in_table:
                    msg_at_posn(itext_start, f"Using {status}")
Ejemplo n.º 2
0
def check_for_extra_blank_lines():
    stderr("checking for extra blank lines...")
    for mo in re.finditer(r'\n( *\n){2,}', spec.text):
        posn = mo.end() - 1
        msg_at_posn(posn, "2 or more adjacent blank lines")

    for mo in re.finditer(r'\n( *\n *</emu-clause>)', spec.text):
        posn = mo.start(1)
        msg_at_posn(posn, "blank line before end-clause tag")
Ejemplo n.º 3
0
 def check_tag_indent(line_s, tag_s, element_name):
     portion_of_line_before_tag = spec.text[line_s : tag_s]
     if (
         portion_of_line_before_tag == ''
         or
         portion_of_line_before_tag.isspace()
     ):
         actual_indent = len(portion_of_line_before_tag)
         if actual_indent != expected_indent:
             msg_at_posn(tag_s, f"expected indent={expected_indent}, got {actual_indent}")
     else:
         msg_at_posn(tag_s, f"{element_name} tag isn't the first non-blank thing on the line")
Ejemplo n.º 4
0
def check_characters():
    stderr("checking characters...")
    header("checking characters...")
    for mo in re.finditer(r'[^\n -~]', spec.text):
        posn = mo.start()
        character = spec.text[posn]
        if character == '\u211d':
            # PR 1135 introduced tons of these
            continue

        if character in ascii_replacement:
            suggestion = ": maybe change to %s" % ascii_replacement[character]
        else:
            suggestion = ''
        msg_at_posn(
            posn,
            "non-ASCII character U+%04x%s" % (ord(character), suggestion))
Ejemplo n.º 5
0
        def check_lines(lo, hi, emi):
            if lo == hi: return
            assert lo < hi
            (top_indent, x) = line_[lo]
            if top_indent != emi:
                msg_at_posn(x, f"expected indent={emi}, got {top_indent}")

            siblings = []
            for i in range(lo+1, hi):
                (indent, x) = line_[i]
                if indent < top_indent:
                    msg_at_posn(x, f"expected indent<{top_indent}, got {indent}")
                    siblings.append(i) # I guess
                elif indent == top_indent:
                    siblings.append(i)

            for (i,j) in zip([lo] + siblings, siblings + [hi]):
                check_lines(i+1, j, top_indent + INDENT_UNIT)
Ejemplo n.º 6
0
def check_characters():
    stderr("checking characters...")
    for mo in re.finditer(r'[^\n -~]', spec.text):
        # Note that this will (among other things) find and complain about TAB characters.
        posn = mo.start()
        character = spec.text[posn]
        if character == '\u211d':
            # PR 1135 introduced tons of these
            continue
        elif character in ['\u2124', '\U0001d53d']:
            continue

        if character in ascii_replacement:
            suggestion = ": maybe change to %s" % ascii_replacement[character]
        else:
            suggestion = ''
        msg_at_posn(posn, "non-ASCII character U+%04x%s" %
            (ord(character), suggestion) )
Ejemplo n.º 7
0
def _check_section_order(section):
    # In some sections, the subsections should be in "alphabetical order".

    if section.element_name == '#DOC':
        stderr("_check_section_order...")
    else:

        if section.section_kind in [
                'group_of_properties1',
                'group_of_properties2',
                'properties_of_an_intrinsic_object',
                'properties_of_instances',
        ]:
            prev_title = None
            prev_t = None
            for child in section.section_children:
                if child.section_kind not in [
                        'group_of_properties1',
                        'group_of_properties2',
                        'catchall',
                        'anonymous_built_in_function',
                ]:
                    assert re.search(r'_property(_xref)?$',
                                     child.section_kind), child.section_kind
                    t = child.section_title
                    t = t.lower()
                    t = t.replace('int8', 'int08')
                    t = re.sub(r'^get ', '', t)
                    if section.section_title == 'Properties of the RegExp Prototype Object':
                        t = re.sub(r' \[ @@(\w+) \]', r'.\1', t)
                    else:
                        t = re.sub(r' \[ @@(\w+) \]', r'.zz_\1', t)
                    if prev_t is not None and t <= prev_t:
                        msg_at_posn(
                            child.start_posn, '"%s" should be before "%s"' %
                            (child.section_title, prev_title))
                    prev_t = t
                    prev_title = child.section_title

    for child in section.section_children:
        _check_section_order(child)
Ejemplo n.º 8
0
def check_references_to_intrinsics():
    stderr("check_references_to_intrinsics...")

    # We can't just scan through spec.text looking for %...%,
    # because that would find occurrences in element IDs,
    # which are lower-cased.
    # Instead, just look in literal (text) nodes.
    # (Note that this skips occurrences of "%<var>Foo</var>Prototype%".)
    for tnode in spec.doc_node.each_descendant_named('#LITERAL'):
        for mo in re.compile(r'%\S+%').finditer(spec.text, tnode.start_posn, tnode.end_posn):
            itext = mo.group(0)
            itext_start = mo.start(0)
            if itext in ['%name%', '%name.a.b%']:
                # placeholders
                continue
            if itext in ['%_NativeError_%', '%_TypedArray_%']:
                # metavariable interpolation
                continue

            base_intrinsic = re.sub(r'\.[^%]+', '', itext)

            if base_intrinsic not in well_known_intrinsics:
                msg_at_posn(itext_start, f"Intrinsic doesn't exist: {base_intrinsic}")
Ejemplo n.º 9
0
    def close_open_child(end_tag_start_posn, end_tag_end_posn, element_name):
        nonlocal current_open_node
        if element_name != current_open_node.element_name:
            msg_at_posn(
                end_tag_start_posn,
                f"ERROR: The currently-open element is a {current_open_node.element_name!r}, but this is an end-tag for {element_name!r}.\nSkipping the end-tag, to see if that helps."

            )
            # This old code might be useful to adapt:
            # if current_open_node.parent is None:
            #     self._report("current_open_node.parent is None")
            # elif element_name == current_open_node.parent.element_name:
            #     self._report("Assuming that </%s> is missing" % current_open_node.element_name)
            #     # Pretend that we got the missing endtag:
            #     self.handle_endtag(current_open_node.element_name)
            #     # That will change current_open_node.
            #     assert element_name == current_open_node.element_name
            #     self.handle_endtag(current_open_node.element_name)
            return

        current_open_node.inner_start_posn = current_open_node.end_posn
        current_open_node.inner_end_posn   = end_tag_start_posn
        current_open_node.end_posn         = end_tag_end_posn
        current_open_node = current_open_node.parent
Ejemplo n.º 10
0
def check_section_title(h1, node):
    title = h1.inner_source_text()

    # Check capitalization.
    if node.parent.section_title != 'Terms and Definitions':
        mo = re.search(r' \b(?!(an|and|for|in|of|on|the|to|with))([a-z]\w+)',
                       title)
        if mo:
            msg_at_posn(h1.inner_start_posn + mo.start() + 1,
                        "title word '%s' should be capitalized?" % mo.group(2))

    # Check references to well-known symbols.
    mo1 = re.search('\[ *@', title)
    if mo1:
        mo2 = re.search(r'( |^)\[ @@\w+ \]( |$)', title)
        if not mo2:
            msg_at_posn(
                h1.inner_start_posn + mo1.start(),
                "Title's reference to well-known symbol does not conform to expected pattern"
            )

    # Check parentheses and spaces
    assert title.count('(') <= 1
    assert title.count(')') <= 1
    lpp = title.find('(')
    if lpp >= 0:
        if re.search(r' \(( .+)? \)( Concrete Method)?$', title):
            # space before and after '('
            # space before ")"
            # If param list is empty, just one space between parens.
            pass
        elif title == 'RegExp (Regular Expression) Objects':
            # Use of parens that isn't a parameter list.
            pass
        else:
            msg_at_posn(h1.inner_start_posn + lpp,
                        "Something odd here wrt parens + spaces")
Ejemplo n.º 11
0
def check_tables():
    stderr('check_tables...')
    header("checking tables...")
    for et in spec.doc_node.each_descendant_named('emu-table'):
        a_caption = et.attrs.get('caption', None)
        caption_children = [c for c in et.each_child_named('emu-caption')]
        if len(caption_children) == 0:
            e_caption = None
        elif len(caption_children) == 1:
            [emu_caption] = caption_children
            e_caption = emu_caption.inner_source_text().strip()
        else:
            assert 0
        # ----
        if a_caption and not e_caption:
            caption = a_caption
        elif e_caption and not a_caption:
            caption = e_caption
        else:
            assert 0, (a_caption, e_caption)

        if 'id' not in et.attrs:
            msg_at_posn(et.start_posn,
                        f'no id attribute for table with caption "{caption}"')

        header_tr = [tr for tr in et.each_descendant_named('tr')][0]
        header_line = '; '.join(
            th.inner_source_text().strip()
            for th in header_tr.each_descendant_named('th'))
        if 'Field' in caption:
            # print(header_line, ':', caption)
            if re.match(r'^(.+) Fields$', caption):
                pass
            elif re.match(r'^Additional Fields of (.+)$', caption):
                pass
            elif caption == 'Fields of the Private Name':
                # PR 1668
                pass
            else:
                assert 0, caption

        elif 'Slot' in caption:
            if re.match(r'^Internal Slots of (.+)$', caption):
                pass
            else:
                assert 0

        elif 'Method' in caption:
            if 'Internal Methods' in caption:
                assert caption in [
                    'Essential Internal Methods',
                    'Additional Essential Internal Methods of Function Objects'
                ]
                assert header_line == 'Internal Method; Signature; Description'
            elif 'Records' in caption:
                assert re.fullmatch(
                    r'(Additional )?(Abstract )?Methods of .+ Records',
                    caption), caption
                assert header_line == 'Method; Purpose'
            elif caption == 'Proxy Handler Methods':
                assert header_line == 'Internal Method; Handler Method'
            else:
                assert 0

        elif 'Properties' in caption:
            assert re.fullmatch(
                r'<i>\w+</i> Interface( (Required|Optional))? Properties',
                caption)
            assert header_line == 'Property; Value; Requirements'

        elif 'Intrinsic Objects' in caption:
            assert caption in [
                'Well-Known Intrinsic Objects',
                'Additional Well-known Intrinsic Objects',
            ]
            well_known_intrinsics_table_spans.append(
                (et.start_posn, et.end_posn))

            new_names = {}
            assert header_line == 'Intrinsic Name; Global Name; ECMAScript Language Association'
            for tr in et.each_descendant_named('tr'):
                if tr == header_tr: continue
                [oname, global_name, assoc] = [
                    td.inner_source_text().strip()
                    for td in tr.each_descendant_named('td')
                ]

                assert re.fullmatch(r'%\w+%', oname)
                assert oname not in well_known_intrinsics

                assert re.fullmatch(r"|`\w+(\.\w+)*`", global_name)

                if ';' in assoc or 'i.e.' in assoc:
                    mo = re.search(r'; i.e., (%\w+(\.\w+)+%)$', assoc)
                    assert mo
                    new_name = mo.group(1)
                    assert new_name not in well_known_intrinsics
                    assert new_name not in new_names
                    new_names[new_name] = tr.start_posn

                    assert new_name != oname
                    well_known_intrinsics[
                        oname] = f"old name;  2950,$s/{oname}/{new_name}/gc"
                    well_known_intrinsics[new_name] = "new name"
                else:
                    well_known_intrinsics[oname] = "only name"

            # Have to do this after processing the table,
            # because of possible forward references.
            # (E.g., on the row for %AsyncGenerator%,
            # column 3 mentions %AsyncGeneratorFunction.prototype%,
            # which implies the existence of %AsyncGeneratorFunction%,
            # which is declared in column 1 of the *next* row.)
            for (new_name, tr_posn) in new_names.items():
                base_of_new_name = re.sub(r'\..*', '%', new_name)
                if base_of_new_name not in well_known_intrinsics:
                    msg_at_posn(
                        tr_posn,
                        f"Implied intrinsic doesn't exist: {base_of_new_name}")

        else:
            # print('>>>', header_line, '---', caption)
            pass
Ejemplo n.º 12
0
    def check_ref_ids(refnode):
        if refnode.element_name == 'emu-xref':
            if 'href' not in refnode.attrs:
                stderr("At",
                       shared.convert_posn_to_linecol(refnode.start_posn))
                stderr("emu-xref element doesn't have an 'href' attribute")
                stderr("aborting")
                sys.exit()
            href = refnode.attrs['href']
            assert href.startswith('#')
            refid = href[1:]
            refids.add(refid)

            if refid in node_with_id_:

                defnode = node_with_id_[refid]
                if defnode.element_name in [
                        'emu-clause', 'emu-annex', 'emu-table'
                ]:
                    pass
                elif defnode.element_name == 'dfn':
                    deftext = defnode.inner_source_text()
                    reftext = refnode.inner_source_text()
                    assert deftext != ''
                    if reftext != '' and reftext.lower() != deftext.lower():
                        # Auto-linking would fail to make `reftext` into a link?
                        # So we have to use an emu-xref?
                        pass
                    else:
                        msg_at_posn(
                            refnode.start_posn,
                            f"emu-xref used when auto-linking would work: '{refid}'"
                        )
                else:
                    msg_at_posn(
                        defnode.start_posn,
                        f"unexpected defnode element-name <{defnode.element_name}>"
                    )

            else:
                if refid in [
                        'table-binary-unicode-properties',
                        'table-nonbinary-unicode-properties',
                        'table-unicode-general-category-values',
                        'table-unicode-script-values',
                ]:
                    # Those ids are declared in emu-imported files.
                    pass

                elif refid in [
                        'prod-annexB-LegacyOctalEscapeSequence',
                        'prod-annexB-LegacyOctalIntegerLiteral',
                        'prod-annexB-NonOctalDecimalIntegerLiteral',
                ]:
                    # These don't exist in the source file,
                    # but are generated during the rendering process?
                    pass

                else:
                    msg_at_posn(refnode.start_posn,
                                f"emu-xref refers to nonexistent id: {refid}")

        for child in refnode.children:
            check_ref_ids(child)
Ejemplo n.º 13
0
    def gather_def_ids(node):
        if 'id' in node.attrs:
            defid = node.attrs['id']

            # ----------
            # no duplicate ids, of course

            if defid in node_with_id_:
                msg_at_posn(node.start_posn, f"duplicate id: '{defid}'")

            node_with_id_[defid] = node

            # ----------
            # id should begin with "(sec|eqn|figure|table)-"
            # if and only if the node is of certain kinds.

            id_prefix_expectation = {
                'emu-intro': 'sec-',
                'emu-clause': 'sec-',
                'emu-annex': 'sec-',
                'emu-eqn': 'eqn-',
                'emu-figure': 'figure-',
                'emu-table': 'table-',
            }.get(node.element_name, None)
            if id_prefix_expectation:
                if not defid.startswith(id_prefix_expectation):
                    msg_at_posn(
                        node.start_posn,
                        f'Expected the id to start with "{id_prefix_expectation}"'
                    )
            else:
                if (False or defid.startswith('sec-')
                        or defid.startswith('eqn-')
                        or defid.startswith('figure-')
                        or defid.startswith('table-')):
                    msg_at_posn(node.start_posn,
                                f'Did not expect the id to start that way')

            # ----------
            # If an element defines an abstract operation,
            # its id should be ...

            if 'aoid' in node.attrs:
                aoid = node.attrs['aoid']
                assert node.element_name in [
                    'emu-clause', 'emu-annex', 'emu-eqn', 'dfn'
                ]
                if id_prefix_expectation is None:
                    id_prefix_expectation = 'sec-'  # for thisFooValue
                possibles = [
                    id_prefix_expectation +
                    aoid.lower().replace(' ', '-').replace('::', '-'),
                    id_prefix_expectation + aoid,
                    id_prefix_expectation + kebab(aoid)
                ]
                if defid not in possibles:
                    msg_at_posn(node.start_posn,
                                f'Expected id="{possibles[0]}"')

        if 'oldids' in node.attrs:
            for oldid in node.attrs['oldids'].split(','):
                assert oldid not in all_oldids
                all_oldids.add(oldid)

        for child in node.children:
            gather_def_ids(child)
Ejemplo n.º 14
0
def check_ids():
    header("checking ids...")

    node_with_id_ = OrderedDict()
    all_oldids = set()

    def gather_def_ids(node):
        if 'id' in node.attrs:
            defid = node.attrs['id']

            # ----------
            # no duplicate ids, of course

            if defid in node_with_id_:
                msg_at_posn(node.start_posn, f"duplicate id: '{defid}'")

            node_with_id_[defid] = node

            # ----------
            # id should begin with "(sec|eqn|figure|table)-"
            # if and only if the node is of certain kinds.

            id_prefix_expectation = {
                'emu-intro': 'sec-',
                'emu-clause': 'sec-',
                'emu-annex': 'sec-',
                'emu-eqn': 'eqn-',
                'emu-figure': 'figure-',
                'emu-table': 'table-',
            }.get(node.element_name, None)
            if id_prefix_expectation:
                if not defid.startswith(id_prefix_expectation):
                    msg_at_posn(
                        node.start_posn,
                        f'Expected the id to start with "{id_prefix_expectation}"'
                    )
            else:
                if (False or defid.startswith('sec-')
                        or defid.startswith('eqn-')
                        or defid.startswith('figure-')
                        or defid.startswith('table-')):
                    msg_at_posn(node.start_posn,
                                f'Did not expect the id to start that way')

            # ----------
            # If an element defines an abstract operation,
            # its id should be ...

            if 'aoid' in node.attrs:
                aoid = node.attrs['aoid']
                assert node.element_name in [
                    'emu-clause', 'emu-annex', 'emu-eqn', 'dfn'
                ]
                if id_prefix_expectation is None:
                    id_prefix_expectation = 'sec-'  # for thisFooValue
                possibles = [
                    id_prefix_expectation +
                    aoid.lower().replace(' ', '-').replace('::', '-'),
                    id_prefix_expectation + aoid,
                    id_prefix_expectation + kebab(aoid)
                ]
                if defid not in possibles:
                    msg_at_posn(node.start_posn,
                                f'Expected id="{possibles[0]}"')

        if 'oldids' in node.attrs:
            for oldid in node.attrs['oldids'].split(','):
                assert oldid not in all_oldids
                all_oldids.add(oldid)

        for child in node.children:
            gather_def_ids(child)

    gather_def_ids(spec.doc_node)

    # An id can't be both an oldid and a current id.
    assert not all_oldids & set(node_with_id_.keys())

    # Print a sorted list of all ids
    # (so that we notice if any ever go away):
    ids_f = shared.open_for_output('ids')
    for id in sorted(all_oldids | set(node_with_id_.keys())):
        print(id, file=ids_f)
    ids_f.close()

    # -------------------------------------------------------------

    # Find "referenced but not declared" ids.

    refids = set()

    def check_ref_ids(refnode):
        if refnode.element_name == 'emu-xref':
            if 'href' not in refnode.attrs:
                stderr("At",
                       shared.convert_posn_to_linecol(refnode.start_posn))
                stderr("emu-xref element doesn't have an 'href' attribute")
                stderr("aborting")
                sys.exit()
            href = refnode.attrs['href']
            assert href.startswith('#')
            refid = href[1:]
            refids.add(refid)

            if refid in node_with_id_:

                defnode = node_with_id_[refid]
                if defnode.element_name in [
                        'emu-clause', 'emu-annex', 'emu-table'
                ]:
                    pass
                elif defnode.element_name == 'dfn':
                    deftext = defnode.inner_source_text()
                    reftext = refnode.inner_source_text()
                    assert deftext != ''
                    if reftext != '' and reftext.lower() != deftext.lower():
                        # Auto-linking would fail to make `reftext` into a link?
                        # So we have to use an emu-xref?
                        pass
                    else:
                        msg_at_posn(
                            refnode.start_posn,
                            f"emu-xref used when auto-linking would work: '{refid}'"
                        )
                else:
                    msg_at_posn(
                        defnode.start_posn,
                        f"unexpected defnode element-name <{defnode.element_name}>"
                    )

            else:
                if refid in [
                        'table-binary-unicode-properties',
                        'table-nonbinary-unicode-properties',
                        'table-unicode-general-category-values',
                        'table-unicode-script-values',
                ]:
                    # Those ids are declared in emu-imported files.
                    pass

                elif refid in [
                        'prod-annexB-LegacyOctalEscapeSequence',
                        'prod-annexB-LegacyOctalIntegerLiteral',
                        'prod-annexB-NonOctalDecimalIntegerLiteral',
                ]:
                    # These don't exist in the source file,
                    # but are generated during the rendering process?
                    pass

                else:
                    msg_at_posn(refnode.start_posn,
                                f"emu-xref refers to nonexistent id: {refid}")

        for child in refnode.children:
            check_ref_ids(child)

    check_ref_ids(spec.doc_node)

    # -------------------------------------------------------------

    # Find "declared but nor referenced" ids.

    for (id, defnode) in node_with_id_.items():
        if id in refids: continue

        # `id` was not referenced.

        if id in ['metadata-block', 'ecma-logo']:
            # Actually, it *is* referenced, but from the CSS.
            continue

        if defnode.element_name in ['emu-intro', 'emu-clause', 'emu-annex']:
            # It's okay if the id isn't referenced:
            # it's more there for the ToC and for inbound URLs.
            continue

        if defnode.element_name in ['emu-figure', 'emu-table']:
            # The text might refer to it as "the following figure/table",
            # so don't expect an exolicit reference to the id.
            # So you could ask, why bother giving an id then?
            # I suppose for inbound URLs, and consistency?
            continue

        if defnode.element_name in ['dfn', 'emu-eqn']:
            # It's likely that the rendering process will create references
            # to this id.
            continue

        msg_at_posn(defnode.start_posn,
                    f"id declared but not referenced: '{id}'")
Ejemplo n.º 15
0
def _parse():
    stderr("parsing spec...")

    doc_node = HNode(0, len(shared.spec_text), '#DOC', {})
    doc_node.parent = None
    current_open_node = doc_node

    def add_child(child):
        nonlocal current_open_node
        current_open_node.children.append(child)
        child.parent = current_open_node
        if child.element_name.startswith('#') or child.element_name in ['html', 'meta', 'link', 'img', 'br']:
            # This is a complete child
            pass
        else:
            # This is an incomplete ("open") element.
            # (It should be closed eventually by a corresponding end-tag.)
            current_open_node = child

    def close_open_child(end_tag_start_posn, end_tag_end_posn, element_name):
        nonlocal current_open_node
        if element_name != current_open_node.element_name:
            msg_at_posn(
                end_tag_start_posn,
                f"ERROR: The currently-open element is a {current_open_node.element_name!r}, but this is an end-tag for {element_name!r}.\nSkipping the end-tag, to see if that helps."

            )
            # This old code might be useful to adapt:
            # if current_open_node.parent is None:
            #     self._report("current_open_node.parent is None")
            # elif element_name == current_open_node.parent.element_name:
            #     self._report("Assuming that </%s> is missing" % current_open_node.element_name)
            #     # Pretend that we got the missing endtag:
            #     self.handle_endtag(current_open_node.element_name)
            #     # That will change current_open_node.
            #     assert element_name == current_open_node.element_name
            #     self.handle_endtag(current_open_node.element_name)
            return

        current_open_node.inner_start_posn = current_open_node.end_posn
        current_open_node.inner_end_posn   = end_tag_start_posn
        current_open_node.end_posn         = end_tag_end_posn
        current_open_node = current_open_node.parent

    # ---------------------------------------------

    pattern_funcs = []
    def for_pattern(pattern):
        reo = re.compile(pattern)
        def wrapper(f):
            pattern_funcs.append( (reo, f) )
            return None
        return wrapper

    # ---------------------------------------------

    # non-markup text:
    @for_pattern(r'[^<]+')
    def _(start_posn, end_posn, _):
        add_child(HNode(start_posn, end_posn, '#LITERAL', {}))
        return end_posn

    # start-tag:
    @for_pattern(r'<([a-z][-a-z0-9]*)\b')
    def _(tag_start_posn, end_name_posn, groups):
        [element_name] = groups
        attrs = OrderedDict()
        posn = end_name_posn
        while True:
            if shared.spec_text[posn] == '>':
                tag_end_posn = posn + 1
                break
            mo = re.compile(r' ([a-z][-a-z0-9]*)(?:="([^"]*)")?').match(shared.spec_text, posn)
            if mo:
                (attr_name, attr_value) = mo.groups()
                assert attr_name not in attrs
                attrs[attr_name] = attr_value
                posn = mo.end()
                continue

            fatal_error(posn, "lexing error")

        add_child(HNode(tag_start_posn, tag_end_posn, element_name, attrs))
        return tag_end_posn

    # end-tag:
    @for_pattern(r'</([a-z][-a-z0-9]*)>')
    def _(start_posn, end_posn, groups):
        [element_name] = groups
        close_open_child(start_posn, end_posn, element_name)
        return end_posn

    # comment:
    @for_pattern(r'(?s)<!--.*?-->')
    def _(start_posn, end_posn, _):
        add_child(HNode(start_posn, end_posn, '#COMMENT', {}))
        return end_posn

    # doctype-decl:
    @for_pattern(r'<!DOCTYPE html>')
    def _(start_posn, end_posn, _):
        add_child(HNode(start_posn, end_posn, '#DECL', {}))
        return end_posn

    # ---------------------------------------------

    def fatal_error(posn, msg):
        (line_num, col_num) = shared.convert_posn_to_linecol(posn)
        stderr()
        stderr("********************************************")
        stderr(f"line {line_num}, col {col_num}:")
        stderr(repr(shared.spec_text[posn:posn+30] + '...'))
        stderr(msg)
        stderr("********************************************")
        sys.exit(1)

    # ---------------------------------------------

    posn = 0
    while posn < len(shared.spec_text):
        for (reo, func) in pattern_funcs:
            mo = reo.match(shared.spec_text, posn)
            if mo:
                posn = func(mo.start(), mo.end(), mo.groups())
                break
        else:
            fatal_error(posn, "lexing error")

    if current_open_node.element_name != '#DOC':
        msg_at_posn(
            current_open_node.start_posn,
            "ERROR: At end of file, this element is still open"
        )
        fatal_error(current_open_node.start_posn, f"At end of file, this element is still open")

    return doc_node
Ejemplo n.º 16
0
def check_trailing_whitespace():
    stderr("checking trailing whitespace...")
    header("checking trailing whitespace...")
    for mo in re.finditer(r'(?m)[ \t]+$', spec.text):
        posn = mo.start()
        msg_at_posn(posn, "trailing whitespace")
Ejemplo n.º 17
0
def _validate(node):
    if node.element_name == '#DOC':
        stderr("validating markup...")

    def is_loose_about_spaces(x):
        # In   sec-assignment-operators-runtime-semantics-evaluation
        # and  sec-applystringornumericbinaryoperator.
        # we have <emu-alg> elements that contain 'lightweight' tables
        # where we format the source with extra spaces in the <th> and <td> elements
        # to make the source easier to read.
        return (
            x.element_name in ['th', 'td']
            and
            x.parent.parent.attrs.get('class', None) == 'lightweight-table'
            # x.parent is the <tr>
            # x.parent.parent is the <table>
        )
        # TODO: Base it on the presence of "<!-- emu-format ignore -->",
        # because not all lightweight-tables are loose about spaces.

    if node.element_name == '#LITERAL':
        # Check for runs of multiple space characters.

        for mo in re.compile(r' {2,}').finditer(shared.spec.text, node.start_posn, node.end_posn):
            s_posn = mo.start()
            n_spaces = mo.end() - mo.start()
            if shared.spec.text[s_posn-1] == '\n':
                # indentation
                continue

            if is_loose_about_spaces(node.parent):
                continue

            msg_at_posn(
                s_posn,
                f"{n_spaces} space characters"
            )
        return

    # ------------------------

    if hasattr(node, 'inner_start_posn') and not is_loose_about_spaces(node):
        ist = node.inner_source_text()
        if ist.startswith(' '):
            msg_at_posn(node.inner_start_posn, f"<{node.element_name}> content starts with space")

        if re.search('\n +$', ist):
            # That's just an indented end-tag
            pass
        elif ist.endswith(' '):
            msg_at_posn(node.inner_end_posn, f"<{node.element_name}> content ends with space")


    # ------------------------

    required_attrs = required_attrs_[node.element_name]
    optional_attrs = optional_attrs_[node.element_name]
    attrs = node.attrs.keys()

    def stringify_set(s):
        return ' '.join(sorted(s))

    if not (attrs >= required_attrs):
        msg_at_posn(node.start_posn, f"required attribute(s) are missing: {stringify_set(required_attrs - attrs)}")
    if not (attrs <= required_attrs | optional_attrs):
        msg_at_posn(node.start_posn, f"unexpected attribute(s): {stringify_set(attrs - (required_attrs | optional_attrs))}")

    for (attr_name, attr_value) in node.attrs.items():
        assert attr_value is None or isinstance(attr_value, str)
        for key in [
            f"{node.element_name}.{attr_name}",
            attr_name
        ]:
            if key in attribute_info:
                value_pattern = attribute_info[key]
                break
        else:
            msg_at_posn(
                node.start_posn,
                f"Unknown attribute {attr_name!r}"
            )
            continue

        if value_pattern is None and attr_value is None:
            pass
        elif value_pattern is None:
            msg_at_posn(
                node.start_posn,
                f"For attribute {attr_name!r}, expected no value, but got {attr_value!r}"
            )
        elif attr_value is None:
            msg_at_posn(
                node.start_posn,
                f"For attribute {attr_name!r}, expected a value matching {value_pattern!r}, but got nothing"
            )
        else:
            if not re.fullmatch(value_pattern, attr_value):
                msg_at_posn(
                    node.start_posn,
                    f"For attribute {attr_name!r}, expected a value matching {value_pattern!r}, but got {attr_value!r}"
                )

    # ------------------------

    # First do a pass to figure whether the content of this node
    # is block items or inline items or (anomalously) both.
    node.block_child_element_names = set()
    node.inline_child_element_names = set()
    for child in node.children:
        if child.element_name == '#COMMENT':
            continue
        elif child.element_name == '#LITERAL':
            if not child.is_whitespace():
                node.inline_child_element_names.add(child.element_name)
        elif child.element_name in kind_:
            k = kind_[child.element_name]
            if k == 'B':
                node.block_child_element_names.add(child.element_name)
            elif k == 'I':
                node.inline_child_element_names.add(child.element_name)
        else:
            msg_at_posn(child.start_posn, "Is <%s> block or inline?" % child.element_name)

    if node.block_child_element_names and node.inline_child_element_names:
        msg_at_posn(node.start_posn, "%s content includes both block-level items (%s) and inline-level items (%s)" % (
                node.element_name,
                ', '.join(sorted(list(node.block_child_element_names))),
                ', '.join(sorted(list(node.inline_child_element_names)))
            )
        )

    # ------------------------

    children_names = []
    for child in node.children:
        if child.element_name == '#LITERAL':
            if node.inline_child_element_names:
                x = '#TEXT;'
            else:
                assert child.is_whitespace()
                x = '#WS;'
        else:
            x = child.element_name + ';'
        children_names.append(x)

    children_names = ''.join(children_names)
    children_names = re.sub('#WS;#COMMENT;#WS;', '#WS;', children_names)

    if node.element_name not in content_model_:
        msg_at_posn(node.start_posn, "No content model for <%s>" % node.element_name)
    else:
        content_model = content_model_[node.element_name]
        mo = re.match(content_model, children_names)
        if mo is None:
            msg_at_posn(node.start_posn, "%s has content %s, expected %s" %
                (node.element_name, children_names, content_model))

    #! if node.children:
    #!     node.inner_start_posn = node.children[0].start_posn
    #!     node.inner_end_posn   = node.children[-1].end_posn

    for child in node.children:
        _validate(child)