Ejemplo n.º 1
0
def establish_dependency_levels():
    stderr("    establish_dependency_levels ...")

    levels = []

    def get_level_of_cluster(cluster):
        if hasattr(cluster, 'level'):
            return cluster.level

        if len(cluster.direct_prereqs) == 0:
            # only depends on predefineds
            level = 0
        else:
            level = 1 + max(
                get_level_of_cluster(prereq)
                for prereq in cluster.direct_prereqs)
        cluster.level = level

        if level < len(levels):
            pass
        elif level == len(levels):
            levels.append([])
        else:
            assert 0
        levels[level].append(cluster)

    for cluster in cluster_:
        get_level_of_cluster(cluster)

    stderr("    %d levels" % len(levels))

    for (L, clusters_on_level_L) in enumerate(levels):
        clusters_on_level_L.sort(key=lambda cluster: cluster.members[0])

    return levels
Ejemplo n.º 2
0
    def replace_defined_term(mo):
        whole_match = mo.group(0)
        term = mo.group(1)
        # if node.element_name == 'emu-table' and whole_match == 'ToBoolean': pdb.set_trace()
        # if whole_match == 'HourFromTime': pdb.set_trace()
        # if '[' in term: pdb.set_trace()
        if term not in _fragid_for_term:
            stderr(f"no fragid for term {term!r} ")
            return whole_match

        term_fragid = _fragid_for_term[term]
        if node.element_name == 'p' and cc_section and term_fragid == cc_section.section_id:
            # This a reference to the term from within
            # the 'top' of the section in which it's defined.
            # In this case, we don't make the reference a link,
            # because the definition is presumably nearby.
            # (It's unclear what the actual rule is.)
            return whole_match
        elif 'id' in node.attrs and node.attrs['id'] == term_fragid:
            return whole_match
        elif node.element_name in [
                'dfn',  # Don't linkify the definition itself.
                'h1',  # Don't put links in clause-titles.
                'emu-xref',  # Don't linkify something that's already linked.
        ]:
            return whole_match
        else:
            return _replacement_for_term[term]
Ejemplo n.º 3
0
def check_intrinsics():
    stderr("checking intrinsics...")
    header("checking intrinsics...")
    # We can't just scan through spec.text looking for %...%,
    # because that would find occurrences in element IDs,
    # which are lower-cased.
    # Instead, just look in literal (text) nodes.
    # (Note that this skips occurrences of "%<var>Foo</var>Prototype%".)
    for tnode in spec.doc_node.each_descendant_named('#LITERAL'):
        for mo in re.compile(r'%\S+%').finditer(spec.text, tnode.start_posn,
                                                tnode.end_posn):
            itext = mo.group(0)
            itext_start = mo.start(0)
            if itext in ['%name%', '%name.a.b%']:
                # placeholders
                continue
            if itext in ['%_NativeError_%', '%_TypedArray_%']:
                # metavariable interpolation
                continue

            is_in_table = any(
                table_start < itext_start < table_end
                for (table_start,
                     table_end) in well_known_intrinsics_table_spans)

            status = well_known_intrinsics.get(itext, "doesn't exist")
            if status == "doesn't exist":
                msg_at_posn(itext_start, f"Intrinsic doesn't exist: {itext}")
            elif status.startswith("old name"):
                if not is_in_table:
                    msg_at_posn(itext_start, f"Using {status}")
Ejemplo n.º 4
0
    def report(self):
        report_file_base = self.file_base + '_prod_counts'
        shared.stderr(f"generating new {report_file_base} ...")

        if self.group_errors_by_expectation:
            # This approach is better when I'm developing a grammar,
            # as it tends to group similar cases.

            def err(x):
                print(x, file=self.f_errors)

            err("%d parsing errors:" % self.error_count)
            err('')
            for (expecting, posns) in sorted(self.error_posns.items()):
                # err('')
                err('X' * 80)
                # err('')
                err("Expecting:")
                for e in expecting:
                    err("    %r" % e)
                for posn in posns:
                    err(
                        shared.source_line_with_caret_marking_column(
                            math.ceil(posn)))

        f = shared.open_for_output(report_file_base)
        for prod in self.productions:
            print("%5d %s" % (prod.n_delivered_instances, prod), file=f)
Ejemplo n.º 5
0
def check_for_extra_blank_lines():
    stderr("checking for extra blank lines...")
    for mo in re.finditer(r'\n( *\n){2,}', spec.text):
        posn = mo.end() - 1
        msg_at_posn(posn, "2 or more adjacent blank lines")

    for mo in re.finditer(r'\n( *\n *</emu-clause>)', spec.text):
        posn = mo.start(1)
        msg_at_posn(posn, "blank line before end-clause tag")
Ejemplo n.º 6
0
def main():
    if len(sys.argv) == 1:
        stderr(
            f"usage: {sys.argv[0]} [ --all | --all-dir=<dir> | <file> ... ]")
    elif sys.argv[1] == '--all':
        test_all()
    elif mo := re.fullmatch(r'--all-dir=(\S+)', sys.argv[1]):
        test_dirname = mo.group(1)
        test_all_in_dir(test_dirname)
Ejemplo n.º 7
0
def check_tables():
    stderr('check_tables...')
    for et in spec.doc_node.each_descendant_named('emu-table'):
        analyze_table(et)

        caption = et._caption
        header_line = '; '.join(et._header_row.cell_texts)

        def check_value_descriptions_in_column(col_index):
            for row in et._data_rows:
                col_name = et._header_row.cell_texts[col_index]
                cell_value = row.cell_texts[col_index]
                Pseudocode.parse(row.cell_nodes[col_index], 'field_value_type')

        if 'Field' in caption or ('Method' in caption and 'Record' in caption):
            # See records.process_tables()
            pass

        elif 'Slot' in caption:
            if re.match(r'^Internal Slots of (.+)$', caption):
                if header_line == 'Internal Slot; Type; Description':
                    check_value_descriptions_in_column(1)
                else:
                    assert 0, header_line
            else:
                assert 0

        elif 'Method' in caption:
            if 'Internal Methods' in caption:
                assert caption in ['Essential Internal Methods', 'Additional Essential Internal Methods of Function Objects']
                assert header_line == 'Internal Method; Signature; Description'
            elif caption == 'Proxy Handler Methods':
                assert header_line == 'Internal Method; Handler Method'
            else:
                assert 0

        elif 'Properties' in caption:
            assert re.fullmatch(r'<i>\w+</i> Interface( (Required|Optional))? Properties', caption)
            assert header_line == 'Property; Value; Requirements'
            check_value_descriptions_in_column(1)

        elif caption == 'Attributes of an Object property':
            assert header_line == 'Attribute Name; Types of property for which it is present; Value Domain; Default Value; Description'
            check_value_descriptions_in_column(2)

        elif 'Intrinsic Objects' in caption:
            # see Section.extract_intrinsic_info_from_WKI_section()
            # and intrinsics.each_row_in_wki_table()
            pass

        else:
            # print('>>>', header_line, '---', caption)
            pass
Ejemplo n.º 8
0
def main():
    shared.register_output_dir(sys.argv[1])
    spec.restore()

    prep_xrefs()
    prep_autolinking()
    prep_grammar()

    stderr("render ...")
    global _f
    _f = shared.open_for_output('index.html')
    render_node(spec.doc_node)
    _f.close()
Ejemplo n.º 9
0
def prep_grammar():
    stderr("prep_grammar ...")
    for emu_grammar in spec.doc_node.each_descendant_named('emu-grammar'):
        ns = get_grammar_namespace(emu_grammar)
        trimmed_body = emu_grammars.trim_newlines(
            emu_grammar.inner_source_text())
        for production in re.split(r'\n{2,}', trimmed_body):
            mo = re.match(r'^ *(\w+)', production)
            assert mo
            lhs_nt = mo.group(1)
            _lhs_nts_in_namespace_[ns].add(lhs_nt)
            fragid = fragid_for_nt_def(lhs_nt, ns)
            _default_xref_text_for_fragid_[
                fragid] = '<emu-nt>' + lhs_nt + '</emu-nt>'
Ejemplo n.º 10
0
    def convert_to_header(self):
        self._dedupe()

        poi = AlgHeader()

        def join_field_values(key, joiner = ' & '):
            values = self.fields[key]
            if not values: return None
            return joiner.join(values)

        def at_most_one_value(key):
            values = self.fields[key]
            if not values: return None
            assert len(values) == 1, values
            return values[0]

        vs = join_field_values('kind')
        poi.species = {
            'anonymous built-in function object that is defined once for each realm' : 'bif: intrinsic',
            'anonymous built-in function'               : 'bif: * per realm',
            'accessor property'                         : 'bif: intrinsic: accessor function',
            'constructor'                               : 'bif: intrinsic',
            'function'                                  : 'bif: intrinsic',
            'method'                                    : 'bif: intrinsic',
            None                                        : None,
        }[vs]

        poi.name = at_most_one_value('name')

        pl_values = self.fields['pl']
        if len(pl_values) == 0:
            poi.params = None
        elif len(pl_values) == 1:
            get_info_from_parameter_listing_in_preamble(poi, pl_values[0])
        elif pl_values == [
            'zero or more arguments',
            'zero or more arguments which form the rest parameter ..._args_'
        ]:
            get_info_from_parameter_listing_in_preamble(poi, pl_values[1])
        else:
            stderr(f"{poi.name} has multi-pl: {pl_values}")
            assert 0

        poi.return_nature_normal = join_field_values('retn', ' or ')

        poi.return_nature_abrupt = at_most_one_value('reta')

        poi.description_paras = self.fields['desc']

        return poi
Ejemplo n.º 11
0
def check_dfns():
    stderr('check_dfns...')

    spec.dfn_for_term_ = {}
    for dfn in spec.doc_node.each_descendant_named('dfn'):
        ist = dfn.inner_source_text()
        assert ist not in spec.dfn_for_term_
        spec.dfn_for_term_[ist] = dfn

        variants = dfn.attrs.get('variants')
        if variants:
            if ',' in variants: assert NYI
            assert variants not in spec.dfn_for_term_
            spec.dfn_for_term_[variants] = dfn
Ejemplo n.º 12
0
def main():
    if len(sys.argv) != 3:
        stderr("usage: %s <output-dir> <spec.html>" % sys.argv[0])
        sys.exit(1)

    outdir = sys.argv[1]
    spec_path = sys.argv[2]

    shared.register_output_dir(outdir)

    shared.msg_at_posn_start()

    spec.read_source_file(spec_path)

    spec.doc_node = HTML.parse_and_validate()

    # Now that errors/warnings are interleaved with a copy of the spec text,
    # the order in which we call these functions
    # only matters when two msg_at_posn() calls
    # address the exact same position.

    check_characters()

    check_indentation()
    check_trailing_whitespace()
    check_for_extra_blank_lines()

    check_ids()
    check_dfns()

    Pseudocode.create_all_parsers()

    check_tables()
    records.process_tables()
    Section.make_and_check_sections()
    records.print_schema_hierarchies()
    process_intrinsics_facts()
    check_references_to_intrinsics()
    emu_grammars.do_stuff_with_emu_grammars()
    
    Pseudocode.do_stuff_with_pseudocode()

    check_globals()

    shared.msg_at_posn_finish()

    headers.generate_spec_for_PR_545()

    spec.save()
Ejemplo n.º 13
0
 def __init__(self, lines):
     self.raw_pattern = lines.pop(0)
     self.reo = re.compile(self.raw_pattern)
     self.templates = {}
     for line in lines:
         mo = re.fullmatch(r'([\w ]+)=(.*)', line)
         if mo is None:
             stderr(f"bad line: {line}")
             sys.exit(1)
         (key, template) = mo.groups()
         assert key not in self.templates
         self.templates[key] = template
     if 'v' not in self.templates:
         self.templates['v'] = ''
     self.count = 0
Ejemplo n.º 14
0
    def tokenize(self, s, start_posn, end_posn, generate_dent_tokens,
                 initial_indentation):
        prev_indentation = initial_indentation
        posn = start_posn
        while True:
            mo = self.reo.match(s, posn, end_posn)
            if mo is None:
                shared.stderr(
                    f"\nTokenization error at: {s[posn:min(posn+20,end_posn)]}...\n",
                    shared.source_line_with_caret_marking_column(tok_s_posn))
                assert 0
            pi = mo.lastgroup
            text = mo.group(pi)
            (tok_s_posn, tok_e_posn) = mo.span(pi)

            # XXX The sub-pattern associated with this group
            # might have a capturing subgroup
            # (whose value might be more useful than the group's),
            # but accessing it would be tricky,
            # because it doesn't have a name,
            # and we don't know its number in the overall pattern.
            # Either would take a bit of work.

            prod = self.prod_for_pi[pi]

            if generate_dent_tokens and prod.lhs_s == '{nlai}':
                this_indentation = len(text) - 1  # subtract 1 for the \n

                change_in_indentation = this_indentation - prev_indentation
                indent_unit = 2
                assert change_in_indentation % indent_unit == 0
                n_dents = change_in_indentation // indent_unit
                if n_dents > 0:
                    dent_prod = indent_prod
                elif n_dents < 0:
                    dent_prod = outdent_prod
                else:
                    dent_prod = None
                for i in range(abs(n_dents)):
                    yield (dent_prod, tok_s_posn, tok_s_posn, '')

                prev_indentation = this_indentation

            yield (prod, tok_s_posn, tok_e_posn, text)

            if prod.lhs_s == '{_eos_}': break

            posn = tok_e_posn
Ejemplo n.º 15
0
def check_characters():
    stderr("checking characters...")
    header("checking characters...")
    for mo in re.finditer(r'[^\n -~]', spec.text):
        posn = mo.start()
        character = spec.text[posn]
        if character == '\u211d':
            # PR 1135 introduced tons of these
            continue

        if character in ascii_replacement:
            suggestion = ": maybe change to %s" % ascii_replacement[character]
        else:
            suggestion = ''
        msg_at_posn(
            posn,
            "non-ASCII character U+%04x%s" % (ord(character), suggestion))
Ejemplo n.º 16
0
def check_characters():
    stderr("checking characters...")
    for mo in re.finditer(r'[^\n -~]', spec.text):
        # Note that this will (among other things) find and complain about TAB characters.
        posn = mo.start()
        character = spec.text[posn]
        if character == '\u211d':
            # PR 1135 introduced tons of these
            continue
        elif character in ['\u2124', '\U0001d53d']:
            continue

        if character in ascii_replacement:
            suggestion = ": maybe change to %s" % ascii_replacement[character]
        else:
            suggestion = ''
        msg_at_posn(posn, "non-ASCII character U+%04x%s" %
            (ord(character), suggestion) )
Ejemplo n.º 17
0
def check_globals():
    stderr('check_globals...')
    global_object_property_names = set()

    sgo = spec.node_with_id_['sec-global-object']
    for section in sgo.each_descendant_that_is_a_section():
        if '_property' in section.section_kind:
            # print('>', section.section_kind, section.section_title)
            mo = re.fullmatch(r'(\w+)( \(.*\))?', section.section_title)
            assert mo
            global_property_name = mo.group(1)
            if section.parent.section_title != 'Value Properties of the Global Object':
                global_object_property_names.add(global_property_name)

    def show_names_set(label, names_set):
        for name in sorted(names_set):
            stderr(f"> {label}: {name}")
        
    show_names_set("In 'The Global Object' but not in WKI", global_object_property_names - intrinsics.global_property_names)
    show_names_set("In WKI but not in 'The Global Object'", intrinsics.global_property_names - global_object_property_names)
Ejemplo n.º 18
0
def _check_section_order(section):
    # In some sections, the subsections should be in "alphabetical order".

    if section.element_name == '#DOC':
        stderr("_check_section_order...")
    else:

        if section.section_kind in [
                'group_of_properties1',
                'group_of_properties2',
                'properties_of_an_intrinsic_object',
                'properties_of_instances',
        ]:
            prev_title = None
            prev_t = None
            for child in section.section_children:
                if child.section_kind not in [
                        'group_of_properties1',
                        'group_of_properties2',
                        'catchall',
                        'anonymous_built_in_function',
                ]:
                    assert re.search(r'_property(_xref)?$',
                                     child.section_kind), child.section_kind
                    t = child.section_title
                    t = t.lower()
                    t = t.replace('int8', 'int08')
                    t = re.sub(r'^get ', '', t)
                    if section.section_title == 'Properties of the RegExp Prototype Object':
                        t = re.sub(r' \[ @@(\w+) \]', r'.\1', t)
                    else:
                        t = re.sub(r' \[ @@(\w+) \]', r'.zz_\1', t)
                    if prev_t is not None and t <= prev_t:
                        msg_at_posn(
                            child.start_posn, '"%s" should be before "%s"' %
                            (child.section_title, prev_title))
                    prev_t = t
                    prev_title = child.section_title

    for child in section.section_children:
        _check_section_order(child)
Ejemplo n.º 19
0
    def compute_dependency_levels(self):

        self.find_strongly_connected_components()

        stderr('    %d SCCs' % len(cluster_))

        stderr("    sorting...")
        for cluster in cluster_:
            cluster.members.sort()
            # cluster.position = vertex_collater(cluster.members[0])

        stderr("    dependencies between SCCs...")
        for cluster in cluster_:
            cluster.contains_a_cycle = False
            for vertex in cluster.members:
                for p in self.arcs_from_[vertex]:
                    if self.cluster_for_[p] is cluster:
                        # a "sideways" dependency
                        cluster.contains_a_cycle = True
                    else:
                        if self.cluster_for_[p] not in cluster.direct_prereqs:
                            cluster.direct_prereqs.append(self.cluster_for_[p])

            if len(cluster.members) > 1:
                assert cluster.contains_a_cycle
            # If len(cluster.members) == 1, it still might contain a cycle

        levels = establish_dependency_levels()

        return levels
Ejemplo n.º 20
0
def main():
    if len(sys.argv) != 3:
        stderr("usage: %s <output-dir> <spec.html>" % sys.argv[0])
        sys.exit(1)

    outdir = sys.argv[1]
    spec_path = sys.argv[2]

    shared.register_output_dir(outdir)

    shared.msg_at_posn_start()

    spec.read_source_file(spec_path)

    spec.doc_node = HTML.parse_and_validate()

    # It feels like it would make more sense to check characters and indentation
    # before paring/checking markup, because they're more 'primitive' than markup.
    # But when it comes to fixing errors, you should make sure
    # you've got the markup correct before fiddling with indentation.
    # So to encourage that, have markup errors appear before indentation errors,
    # i.e. run the markup checks before indentation checks.
    # (Not sure about characters.)
    check_indentation()
    check_trailing_whitespace()
    check_characters()

    check_ids()

    check_tables()
    check_intrinsics()
    Section.make_and_check_sections()
    emu_grammars.do_stuff_with_emu_grammars()

    Pseudocode.do_stuff_with_pseudocode()

    shared.msg_at_posn_finish()

    spec.save()
Ejemplo n.º 21
0
def write_header_info():
    stderr("write_header_info ...")

    f = shared.open_for_output('header_info')

    def put(*args): print(*args, file=f)

    for bif_or_op in ['op', 'bif']:
        put('X'*40)
        put(bif_or_op)
        for (alg_name, alg_info) in sorted(spec.alg_info_[bif_or_op].items()):
            n_defns_via_headers = 0
            assert alg_info.name == alg_name
            assert alg_info.bif_or_op == bif_or_op
            put()
            put(f"  {alg_info.name}")
            put(f"    {alg_info.species}")
            put(f"    {len(alg_info.headers)} headers:")
            for alg_header in alg_info.headers:
                assert alg_header.name == alg_name
                assert alg_header.species == alg_info.species
                put(f"      --")
                if alg_header.for_phrase: put(f"        for: {alg_header.for_phrase}")
                # alg_header.params
                # alg_header.also
                # alg_header.return_nature_{normal,abrupt}
                # alg_header.description_paras
                put(f"        {len(alg_header.u_defns)} defns")
                n_defns_via_headers += len(alg_header.u_defns)
                for alg_defn in alg_header.u_defns:
                    assert alg_defn.header is alg_header

            assert n_defns_via_headers == len(alg_info.all_definitions())
            # alg_info.invocations
            # alg_info.callees
            # alg_info.callers
        put()

    f.close()
Ejemplo n.º 22
0
def check_references_to_intrinsics():
    stderr("check_references_to_intrinsics...")

    # We can't just scan through spec.text looking for %...%,
    # because that would find occurrences in element IDs,
    # which are lower-cased.
    # Instead, just look in literal (text) nodes.
    # (Note that this skips occurrences of "%<var>Foo</var>Prototype%".)
    for tnode in spec.doc_node.each_descendant_named('#LITERAL'):
        for mo in re.compile(r'%\S+%').finditer(spec.text, tnode.start_posn, tnode.end_posn):
            itext = mo.group(0)
            itext_start = mo.start(0)
            if itext in ['%name%', '%name.a.b%']:
                # placeholders
                continue
            if itext in ['%_NativeError_%', '%_TypedArray_%']:
                # metavariable interpolation
                continue

            base_intrinsic = re.sub(r'\.[^%]+', '', itext)

            if base_intrinsic not in well_known_intrinsics:
                msg_at_posn(itext_start, f"Intrinsic doesn't exist: {base_intrinsic}")
Ejemplo n.º 23
0
def prep_xrefs():
    stderr("prep_xrefs ...")
    global _default_xref_text_for_fragid_, _title_xref_text_for_fragid_
    _default_xref_text_for_fragid_ = {}
    _title_xref_text_for_fragid_ = {}

    for section in spec.doc_node.each_descendant_named(
            re.compile('emu-clause|emu-annex')):
        assert 'id' in section.attrs
        fragid = section.attrs['id']
        _default_xref_text_for_fragid_[fragid] = section.section_num
        _title_xref_text_for_fragid_[fragid] = section.section_title

    table_i = 0
    for element in spec.doc_node.each_descendant_named(
            re.compile('emu-table|emu-import')):
        table_i += 1
        if element.element_name == 'emu-table':
            if 'id' not in element.attrs:
                # No way to xref the table (but it still gets counted).
                continue
            fragid = element.attrs['id']
        elif element.element_name == 'emu-import':
            # Currently, each emu-import (of file foo.html)
            # defines one emu-table (with id 'foo').
            # XXX Really, we should do something more robust.
            href = element.attrs['href']
            fragid = href.replace('.html', '')
        else:
            assert 0
        _default_xref_text_for_fragid_[fragid] = 'Table %d' % table_i

    for dfn in spec.doc_node.each_descendant_named('dfn'):
        if 'id' in dfn.attrs:
            fragid = dfn.attrs['id']
            term = dfn.inner_source_text()
            _default_xref_text_for_fragid_[fragid] = term
Ejemplo n.º 24
0
def parse_and_validate():
    doc_node = _parse()
    if doc_node.element_name != '#DOC':
        stderr("After _parse(), doc_node.element_name should be #DOC, is", doc_node.element_name)
        stderr("start_posn ~", shared.convert_posn_to_linecol(doc_node.start_posn))
        stderr("aborting due to above error")
        sys.exit()
    _validate(doc_node)
    return doc_node
Ejemplo n.º 25
0
    def check_ref_ids(refnode):
        if refnode.element_name == 'emu-xref':
            if 'href' not in refnode.attrs:
                stderr("At", shared.convert_posn_to_linecol(refnode.start_posn))
                stderr("emu-xref element doesn't have an 'href' attribute")
                stderr("aborting")
                sys.exit()
            href = refnode.attrs['href']
            assert href.startswith('#')
            refid = href[1:]
            refids.add(refid)

            if refid in node_with_id_:

                defnode = node_with_id_[refid]
                if defnode.element_name in ['emu-clause', 'emu-annex', 'emu-table', 'emu-alg', 'emu-note']:
                    pass
                elif defnode.element_name == 'dfn':
                    deftext = defnode.inner_source_text()
                    reftext = refnode.inner_source_text()
                    assert deftext != ''
                    if reftext != '' and reftext.lower() != deftext.lower():
                        # Auto-linking would fail to make `reftext` into a link?
                        # So we have to use an emu-xref?
                        pass
                    else:
                        msg_at_node(refnode, f"emu-xref used when auto-linking would work: '{refid}'")
                else:
                    msg_at_node(defnode, f"unexpected defnode element-name <{defnode.element_name}>")

            else:
                if refid in [
                    'table-binary-unicode-properties',
                    'table-nonbinary-unicode-properties',
                    'table-unicode-general-category-values',
                    'table-unicode-script-values',
                ]:
                    # Those ids are declared in emu-imported files.
                    pass

                else:
                    msg_at_node(refnode, f"emu-xref refers to nonexistent id: {refid}")

        for child in refnode.children:
            check_ref_ids(child)
Ejemplo n.º 26
0
def check_indentation():
    stderr("check_indentation...")
    header("checking indentation...")

    INDENT_UNIT = 2

    def check_indentation_for_node(node, expected_indent):
        if node.element_name == '#DOC':
            assert expected_indent is None
            for child in node.children:
                check_indentation_for_node(child, 0)
            return

        if node.element_name == '#LITERAL':
            # Mostly whitespace, but also:
            #     Editors:
            #     For each pair (_R_, _W_) ...
            #     For each element _eventsRecord_
            # whose indentation we don't care about?
            return

        def get_span_of_line_containing_posn(posn):
            # Excludes any newline at start or end.
            s = spec.text.rfind('\n', 0, posn)
            e = spec.text.find('\n', posn)
            return (0 if s == -1 else s + 1, len(spec.text) if e == -1 else e)

        (start_line_s,
         start_line_e) = get_span_of_line_containing_posn(node.start_posn)
        (end_line_s,
         end_line_e) = get_span_of_line_containing_posn(node.end_posn)

        def check_tag_indent(line_s, tag_s, element_name):
            portion_of_line_before_tag = spec.text[line_s:tag_s]
            if (portion_of_line_before_tag == ''
                    or portion_of_line_before_tag.isspace()):
                actual_indent = len(portion_of_line_before_tag)
                if actual_indent != expected_indent:
                    msg_at_posn(
                        tag_s,
                        f"expected indent={expected_indent}, got {actual_indent}"
                    )
            else:
                msg_at_posn(
                    tag_s,
                    f"{element_name} tag isn't the first non-blank thing on the line"
                )

        # Check indentation of start tag.
        check_tag_indent(start_line_s, node.start_posn, node.element_name)

        start_tag_indent = node.start_posn - start_line_s

        if start_line_s == end_line_s:
            # This node begins and ends on a single line.
            # Therefore, all of its children (if any)
            # also begin and end on the same single line,
            # so no point looking at them.
            # And no point looking at the end tag (if any).
            return

        # This node covers more than one line.

        if node.element_name == '#COMMENT':
            # No children, no end-tag.
            # XXX We could look at the indentation of the text content,
            # but ...
            check_inline_content(node, start_tag_indent + INDENT_UNIT)
            return

        if node.element_name == 'pre' and len(
                node.children
        ) == 1 and node.children[0].element_name == 'code':
            # These cases are always formatted like this:
            #     <pre><code>
            #       foo
            #     </code></pre>
            # which complicates things.
            code = node.children[0]
            assert code.attrs['class'] == 'javascript'
            check_inline_content(code, start_tag_indent + INDENT_UNIT)
            check_tag_indent(end_line_s, code.inner_end_posn,
                             code.element_name)
            return

        if node.element_name in ['emu-grammar', 'emu-alg', 'emu-eqn']:
            # Indentation of content is checked elsewhere, as part of a more detailed check.
            # But check it here anyway.
            check_inline_content(node, start_tag_indent + INDENT_UNIT)

        elif not node.block_child_element_names:
            check_inline_content(node, start_tag_indent + INDENT_UNIT)

        else:
            # So recurse to its children.

            if node.element_name in ['thead', 'tbody']:
                # For obscure reasons, <tr> tags in spec.html
                # generally have the same indentation as
                # the surrounding <thead> and <tbody> tags.
                # If we didn't special-case them here,
                # they would cause a lot of warnings.
                #
                # However, we can't just say:
                #     child_expected_indent = start_tag_indent
                # because there are also a fair number of tables
                # where the <tr> tags *are* indented wrt <thead> and <tbody>.
                # And it would be impolite to complain when they're
                # adhering to the general rule re indenting.
                #
                # So we peek ahead at the indentation of the next line
                next_line_s = start_line_e + 1  # skip the newline character
                if spec.text[next_line_s:next_line_s + start_tag_indent +
                             INDENT_UNIT].isspace():
                    # next line is indented wrt this line
                    child_expected_indent = start_tag_indent + INDENT_UNIT
                else:
                    child_expected_indent = start_tag_indent
            else:
                child_expected_indent = start_tag_indent + INDENT_UNIT

            for child in node.children:
                check_indentation_for_node(child, child_expected_indent)

        # ------------------------------
        # Check indentation of end tag.
        #
        if node.element_name == 'p' and 'br' in node.inline_child_element_names:
            # Normally, a <p> element is all on one line.
            # But if it contains <br> elements,
            # we expect those to be either preceded or followed (or both) by newlines.
            inner_text = node.inner_source_text()
            if inner_text.startswith('\n'):
                # Expect:
                #    <p>
                #      xxx<br>
                #      yyy
                #    </p>
                pass
            else:
                # Expect:
                #    <p>xxx
                #      <br>
                #      yyy</p>
                # In this case, don't check the indentation of the end tag.
                return
        check_tag_indent(end_line_s, node.inner_end_posn, node.element_name)

    def check_inline_content(parent, expected_min_indent):
        if parent.element_name == '#COMMENT':
            isp = parent.start_posn + 4
            iep = parent.end_posn - 3
        else:
            isp = parent.inner_start_posn
            iep = parent.inner_end_posn

        line_ = [
            (mo.end(1) - mo.start(1), mo.end(1))
            for mo in re.compile(r'\n( *)\S').finditer(spec.text, isp, iep)
            # Note that the pattern ignores blank lines.
        ]

        def check_lines(lo, hi, emi):
            if lo == hi: return
            assert lo < hi
            (top_indent, x) = line_[lo]
            if top_indent != emi:
                msg_at_posn(x, f"expected indent={emi}, got {top_indent}")

            siblings = []
            for i in range(lo + 1, hi):
                (indent, x) = line_[i]
                if indent < top_indent:
                    msg_at_posn(x,
                                f"expected indent<{top_indent}, got {indent}")
                    siblings.append(i)  # I guess
                elif indent == top_indent:
                    siblings.append(i)

            for (i, j) in zip([lo] + siblings, siblings + [hi]):
                check_lines(i + 1, j, top_indent + INDENT_UNIT)

        check_lines(0, len(line_), expected_min_indent)

    check_indentation_for_node(spec.doc_node, None)
Ejemplo n.º 27
0
def check_tables():
    stderr('check_tables...')
    header("checking tables...")
    for et in spec.doc_node.each_descendant_named('emu-table'):
        a_caption = et.attrs.get('caption', None)
        caption_children = [c for c in et.each_child_named('emu-caption')]
        if len(caption_children) == 0:
            e_caption = None
        elif len(caption_children) == 1:
            [emu_caption] = caption_children
            e_caption = emu_caption.inner_source_text().strip()
        else:
            assert 0
        # ----
        if a_caption and not e_caption:
            caption = a_caption
        elif e_caption and not a_caption:
            caption = e_caption
        else:
            assert 0, (a_caption, e_caption)

        if 'id' not in et.attrs:
            msg_at_posn(et.start_posn,
                        f'no id attribute for table with caption "{caption}"')

        header_tr = [tr for tr in et.each_descendant_named('tr')][0]
        header_line = '; '.join(
            th.inner_source_text().strip()
            for th in header_tr.each_descendant_named('th'))
        if 'Field' in caption:
            # print(header_line, ':', caption)
            if re.match(r'^(.+) Fields$', caption):
                pass
            elif re.match(r'^Additional Fields of (.+)$', caption):
                pass
            elif caption == 'Fields of the Private Name':
                # PR 1668
                pass
            else:
                assert 0, caption

        elif 'Slot' in caption:
            if re.match(r'^Internal Slots of (.+)$', caption):
                pass
            else:
                assert 0

        elif 'Method' in caption:
            if 'Internal Methods' in caption:
                assert caption in [
                    'Essential Internal Methods',
                    'Additional Essential Internal Methods of Function Objects'
                ]
                assert header_line == 'Internal Method; Signature; Description'
            elif 'Records' in caption:
                assert re.fullmatch(
                    r'(Additional )?(Abstract )?Methods of .+ Records',
                    caption), caption
                assert header_line == 'Method; Purpose'
            elif caption == 'Proxy Handler Methods':
                assert header_line == 'Internal Method; Handler Method'
            else:
                assert 0

        elif 'Properties' in caption:
            assert re.fullmatch(
                r'<i>\w+</i> Interface( (Required|Optional))? Properties',
                caption)
            assert header_line == 'Property; Value; Requirements'

        elif 'Intrinsic Objects' in caption:
            assert caption in [
                'Well-Known Intrinsic Objects',
                'Additional Well-known Intrinsic Objects',
            ]
            well_known_intrinsics_table_spans.append(
                (et.start_posn, et.end_posn))

            new_names = {}
            assert header_line == 'Intrinsic Name; Global Name; ECMAScript Language Association'
            for tr in et.each_descendant_named('tr'):
                if tr == header_tr: continue
                [oname, global_name, assoc] = [
                    td.inner_source_text().strip()
                    for td in tr.each_descendant_named('td')
                ]

                assert re.fullmatch(r'%\w+%', oname)
                assert oname not in well_known_intrinsics

                assert re.fullmatch(r"|`\w+(\.\w+)*`", global_name)

                if ';' in assoc or 'i.e.' in assoc:
                    mo = re.search(r'; i.e., (%\w+(\.\w+)+%)$', assoc)
                    assert mo
                    new_name = mo.group(1)
                    assert new_name not in well_known_intrinsics
                    assert new_name not in new_names
                    new_names[new_name] = tr.start_posn

                    assert new_name != oname
                    well_known_intrinsics[
                        oname] = f"old name;  2950,$s/{oname}/{new_name}/gc"
                    well_known_intrinsics[new_name] = "new name"
                else:
                    well_known_intrinsics[oname] = "only name"

            # Have to do this after processing the table,
            # because of possible forward references.
            # (E.g., on the row for %AsyncGenerator%,
            # column 3 mentions %AsyncGeneratorFunction.prototype%,
            # which implies the existence of %AsyncGeneratorFunction%,
            # which is declared in column 1 of the *next* row.)
            for (new_name, tr_posn) in new_names.items():
                base_of_new_name = re.sub(r'\..*', '%', new_name)
                if base_of_new_name not in well_known_intrinsics:
                    msg_at_posn(
                        tr_posn,
                        f"Implied intrinsic doesn't exist: {base_of_new_name}")

        else:
            # print('>>>', header_line, '---', caption)
            pass
Ejemplo n.º 28
0
def check_trailing_whitespace():
    stderr("checking trailing whitespace...")
    header("checking trailing whitespace...")
    for mo in re.finditer(r'(?m)[ \t]+$', spec.text):
        posn = mo.start()
        msg_at_posn(posn, "trailing whitespace")
Ejemplo n.º 29
0
def get_info_from_parameter_listing_in_preamble(oi, parameter_listing):

    assert oi.params is None, oi.name

    # if '_C_' in parameter_listing: stderr('gifpl', parameter_listing)

    if parameter_listing == '':
        assert 0
        return

    if parameter_listing == 'no arguments':
        # 27 cases
        oi.params = []
        return

    if parameter_listing in [
        'zero or more arguments _item1_, _item2_, etc.',
        'zero or more arguments',
        'any number of arguments',
        'one or two arguments',
        'zero or one arguments',
    ]:
        # 24 cases
        # XXX not sure what to do
        return

    if parameter_listing == 'zero or more arguments which form the rest parameter ..._args_':
        oi.params = [ AlgParam('_args_', '...', 'a List of ECMAScript language values') ]
        return

    elif parameter_listing in [
        'some arguments _p1_, _p2_, &hellip; , _pn_, _body_ (where _n_ might be 0, that is, there are no &ldquo; _p_ &rdquo; arguments, and where _body_ might also not be provided)',
        'some arguments _p1_, _p2_, &hellip; , _pn_, _body_ (where _n_ might be 0, that is, there are no &ldquo;_p_&rdquo; arguments, and where _body_ might also not be provided)',
        'some arguments _p1_, _p2_, &hellip; , _pn_, _body_ (where _n_ might be 0, that is, there are no "_p_" arguments, and where _body_ might also not be provided)',
        'some arguments _p1_, _p2_, &hellip; , _pn_, _body_ (where _n_ might be 0, that is, there are no _p_ arguments, and where _body_ might also not be provided)',
    ]:
        # 4 cases
        oi.params = [
            AlgParam('_args_', '...', 'a List of ECMAScript language values'),
            AlgParam('_body_', '[]', 'an ECMAScript language value'),
        ]
        return

    elif parameter_listing  == 'at least one argument _buffer_':
        # 1 case
        # kludgey
        if oi.name == 'DataView':
            oi.params = [
                AlgParam('_buffer_',     '',   'unknown'),
                AlgParam('_byteOffset_', '[]', 'unknown'),
                AlgParam('_byteLength_', '[]', 'unknown'),
            ]
        else:
            assert 0, oi.name
        return

    # --------------------

    # 'Hide' commas within parentheses, so they don't mess up splits:
    def hide_commas(mo):
        return mo.group(0).replace(',', '<COMMA>')
    param_listing = re.sub(r'\(.*?\)', hide_commas, parameter_listing)
    # The commas will be unhidden later.

    # Also here:
    param_listing = re.sub(r'(_argumentsList_), (a List of ECMAScript language values)', r'\1<COMMA> \2', param_listing)

    # ---------------------

    oi.params = []

    # Split the listing into the 'required' and 'optional' parts:
    parts = []
    if 'optional' in param_listing:
        if RE.fullmatch(r'optional (argument.+)', param_listing):
            parts.append(('optional', RE.group(1)))
        elif RE.fullmatch(r'(.+?),? and optional (argument.+)', param_listing):
            parts.append(('required', RE.group(1)))
            parts.append(('optional', RE.group(2)))
        else:
            assert 0, param_listing
    else:
        parts.append(('required', param_listing))

    for (optionality, part) in parts:
        part = sub_many(part, [
            ('^parameters ', ''),
            ('^argument ', ''),
            ('^one argument,? ', ''),
            ('^an argument ', ''),
            ('^arguments ', ''),
            ('^two arguments,? ', ''),
        ])

        pieces = re.split('(, and |, | and )', part)
        assert len(pieces) % 2 == 1
        param_items = pieces[0::2]
        connectors = pieces[1::2]

        if len(connectors) == 0:
            expected_connectors = []
        elif len(connectors) == 1:
            expected_connectors = [' and ']
        else:
            expected_connectors = [', '] * (len(connectors) - 1) + [', and ']

        if connectors != expected_connectors:
            oh_warn()
            oh_warn(f"`{oi.name}` preamble param list:")
            oh_warn(repr(part))
            oh_warn(f"is of the form: X{'X'.join(connectors)}X")
            oh_warn(f"but expected  : X{'X'.join(expected_connectors)}X")

        var_pattern = r'\b_\w+_\b'

        for param_item in param_items:

            # unhide_commas:
            param_item = param_item.replace('<COMMA>', ',')

            parameter_names = re.findall(var_pattern, param_item)
            if len(parameter_names) != 1:
                stderr()
                stderr(f"> {oi.name}: param listing")
                stderr(f"    {parameter_listing!r}")
                stderr(f"  contains item {param_item!r} with {len(parameter_names)} parameter names")
                continue

            [param_name] = parameter_names

            assert param_name not in oi.param_names(), param_name

            if optionality == 'optional':
                punct = '[]'
            elif param_item == 'zero or more _args_':
                punct = '...'
            else:
                punct = ''

            r_param_item = re.sub(var_pattern, 'VAR', param_item)

            for (pat, nat) in [
                (r'VAR, (a List of ECMAScript language values)', r'\1'),
                (r'VAR which is (a possibly empty List of ECMAScript language values)', r'\1'),
                (r'VAR of type BigInt', 'a BigInt'),
                (r'VAR \((.+)\)', r'\1'),
                (r'VAR',          'unknown'),

                (r'zero or more VAR', 'a List of ECMAScript language values'),
                (r'a Boolean flag named VAR', 'a Boolean'),
                (r'(an? .+) VAR', r'\1'),
                (r'(value) VAR',     r'a \1'),
            ]:
                mo = re.fullmatch(pat, r_param_item)
                if mo:
                    nature = mo.expand(nat)
                    break
            else:
                print(f"?   {r_param_item}")
                assert 0

            oi.params.append( AlgParam(param_name, punct, nature) )
Ejemplo n.º 30
0
def resolve_oi(hoi, poi):
    # Rather than creating a new AlgHeader,
    # modifies {hoi} if appropriate.

    if poi is None:
        # no preamble, so just use info from heading
        return

    # kind
    assert hoi.species is not None
    if poi.species is None:
        pass
    else:
        if hoi.species == poi.species:
            pass
        else:
            stderr(f"mismatch of 'species' in heading/preamble for {hoi.name}: {hoi.species!r} != {poi.species!r}")
            assert 0

    # name
    assert hoi.name is not None
    if True:
        # We prefer to use the heading-name,
        # ... but we also check that it's consistent with the preamble-name, if any:
        if (
            poi.name is None
            or
            hoi.name == poi.name
            or
            hoi.name.endswith('.' + poi.name)
            or
            hoi.name.endswith(f'.prototype [ {poi.name} ]')
            or
            hoi.name.lower() == poi.name.lower()
            or
            hoi.name.replace(' [ ', '[').replace(' ]', ']') == poi.name
        ):
            pass
        else:
            oh_warn()
            oh_warn(f'resolve_oi: name in heading ({hoi.name}) != name in preamble ({poi.name})')

    # for_phrase
    assert poi.for_phrase is None
    # so just leave hoi.for_phrase as is

    # param_names
    if hoi.params is None:
        # assert poi.params is not None
        hoi.params = poi.params
    elif poi.params is None:
        assert hoi.params is not None
    else:
        # neither is None

        # When the heading contains a signature,
        # it's deemed authoritative.

        if hoi.param_names() != poi.param_names():
            oh_warn()
            oh_warn(hoi.name, 'has param name mismatch:')
            oh_warn(hoi.param_names())
            oh_warn(poi.param_names())
        else:

            for (hoi_param, poi_param) in zip(hoi.params, poi.params):
                assert hoi_param.name == poi_param.name

                if hoi_param.punct != poi_param.punct:
                    oh_warn()
                    oh_warn(f"{hoi.name} parameter {hoi_param.name} has param punct mismatch:")
                    oh_warn('h:', hoi_param.punct)
                    oh_warn('p:', poi_param.punct)

                if hoi_param.nature == 'unknown':
                    hoi_param.nature = poi_param.nature
                else:
                    assert hoi_param.nature == poi_param.nature

    assert hoi.also is None
    assert poi.also is None

    assert hoi.return_nature_node is None
    hoi.return_nature_node = poi.return_nature_node

    assert hoi.description_paras == []
    hoi.description_paras = poi.description_paras