def establish_dependency_levels(): stderr(" establish_dependency_levels ...") levels = [] def get_level_of_cluster(cluster): if hasattr(cluster, 'level'): return cluster.level if len(cluster.direct_prereqs) == 0: # only depends on predefineds level = 0 else: level = 1 + max( get_level_of_cluster(prereq) for prereq in cluster.direct_prereqs) cluster.level = level if level < len(levels): pass elif level == len(levels): levels.append([]) else: assert 0 levels[level].append(cluster) for cluster in cluster_: get_level_of_cluster(cluster) stderr(" %d levels" % len(levels)) for (L, clusters_on_level_L) in enumerate(levels): clusters_on_level_L.sort(key=lambda cluster: cluster.members[0]) return levels
def replace_defined_term(mo): whole_match = mo.group(0) term = mo.group(1) # if node.element_name == 'emu-table' and whole_match == 'ToBoolean': pdb.set_trace() # if whole_match == 'HourFromTime': pdb.set_trace() # if '[' in term: pdb.set_trace() if term not in _fragid_for_term: stderr(f"no fragid for term {term!r} ") return whole_match term_fragid = _fragid_for_term[term] if node.element_name == 'p' and cc_section and term_fragid == cc_section.section_id: # This a reference to the term from within # the 'top' of the section in which it's defined. # In this case, we don't make the reference a link, # because the definition is presumably nearby. # (It's unclear what the actual rule is.) return whole_match elif 'id' in node.attrs and node.attrs['id'] == term_fragid: return whole_match elif node.element_name in [ 'dfn', # Don't linkify the definition itself. 'h1', # Don't put links in clause-titles. 'emu-xref', # Don't linkify something that's already linked. ]: return whole_match else: return _replacement_for_term[term]
def check_intrinsics(): stderr("checking intrinsics...") header("checking intrinsics...") # We can't just scan through spec.text looking for %...%, # because that would find occurrences in element IDs, # which are lower-cased. # Instead, just look in literal (text) nodes. # (Note that this skips occurrences of "%<var>Foo</var>Prototype%".) for tnode in spec.doc_node.each_descendant_named('#LITERAL'): for mo in re.compile(r'%\S+%').finditer(spec.text, tnode.start_posn, tnode.end_posn): itext = mo.group(0) itext_start = mo.start(0) if itext in ['%name%', '%name.a.b%']: # placeholders continue if itext in ['%_NativeError_%', '%_TypedArray_%']: # metavariable interpolation continue is_in_table = any( table_start < itext_start < table_end for (table_start, table_end) in well_known_intrinsics_table_spans) status = well_known_intrinsics.get(itext, "doesn't exist") if status == "doesn't exist": msg_at_posn(itext_start, f"Intrinsic doesn't exist: {itext}") elif status.startswith("old name"): if not is_in_table: msg_at_posn(itext_start, f"Using {status}")
def report(self): report_file_base = self.file_base + '_prod_counts' shared.stderr(f"generating new {report_file_base} ...") if self.group_errors_by_expectation: # This approach is better when I'm developing a grammar, # as it tends to group similar cases. def err(x): print(x, file=self.f_errors) err("%d parsing errors:" % self.error_count) err('') for (expecting, posns) in sorted(self.error_posns.items()): # err('') err('X' * 80) # err('') err("Expecting:") for e in expecting: err(" %r" % e) for posn in posns: err( shared.source_line_with_caret_marking_column( math.ceil(posn))) f = shared.open_for_output(report_file_base) for prod in self.productions: print("%5d %s" % (prod.n_delivered_instances, prod), file=f)
def check_for_extra_blank_lines(): stderr("checking for extra blank lines...") for mo in re.finditer(r'\n( *\n){2,}', spec.text): posn = mo.end() - 1 msg_at_posn(posn, "2 or more adjacent blank lines") for mo in re.finditer(r'\n( *\n *</emu-clause>)', spec.text): posn = mo.start(1) msg_at_posn(posn, "blank line before end-clause tag")
def main(): if len(sys.argv) == 1: stderr( f"usage: {sys.argv[0]} [ --all | --all-dir=<dir> | <file> ... ]") elif sys.argv[1] == '--all': test_all() elif mo := re.fullmatch(r'--all-dir=(\S+)', sys.argv[1]): test_dirname = mo.group(1) test_all_in_dir(test_dirname)
def check_tables(): stderr('check_tables...') for et in spec.doc_node.each_descendant_named('emu-table'): analyze_table(et) caption = et._caption header_line = '; '.join(et._header_row.cell_texts) def check_value_descriptions_in_column(col_index): for row in et._data_rows: col_name = et._header_row.cell_texts[col_index] cell_value = row.cell_texts[col_index] Pseudocode.parse(row.cell_nodes[col_index], 'field_value_type') if 'Field' in caption or ('Method' in caption and 'Record' in caption): # See records.process_tables() pass elif 'Slot' in caption: if re.match(r'^Internal Slots of (.+)$', caption): if header_line == 'Internal Slot; Type; Description': check_value_descriptions_in_column(1) else: assert 0, header_line else: assert 0 elif 'Method' in caption: if 'Internal Methods' in caption: assert caption in ['Essential Internal Methods', 'Additional Essential Internal Methods of Function Objects'] assert header_line == 'Internal Method; Signature; Description' elif caption == 'Proxy Handler Methods': assert header_line == 'Internal Method; Handler Method' else: assert 0 elif 'Properties' in caption: assert re.fullmatch(r'<i>\w+</i> Interface( (Required|Optional))? Properties', caption) assert header_line == 'Property; Value; Requirements' check_value_descriptions_in_column(1) elif caption == 'Attributes of an Object property': assert header_line == 'Attribute Name; Types of property for which it is present; Value Domain; Default Value; Description' check_value_descriptions_in_column(2) elif 'Intrinsic Objects' in caption: # see Section.extract_intrinsic_info_from_WKI_section() # and intrinsics.each_row_in_wki_table() pass else: # print('>>>', header_line, '---', caption) pass
def main(): shared.register_output_dir(sys.argv[1]) spec.restore() prep_xrefs() prep_autolinking() prep_grammar() stderr("render ...") global _f _f = shared.open_for_output('index.html') render_node(spec.doc_node) _f.close()
def prep_grammar(): stderr("prep_grammar ...") for emu_grammar in spec.doc_node.each_descendant_named('emu-grammar'): ns = get_grammar_namespace(emu_grammar) trimmed_body = emu_grammars.trim_newlines( emu_grammar.inner_source_text()) for production in re.split(r'\n{2,}', trimmed_body): mo = re.match(r'^ *(\w+)', production) assert mo lhs_nt = mo.group(1) _lhs_nts_in_namespace_[ns].add(lhs_nt) fragid = fragid_for_nt_def(lhs_nt, ns) _default_xref_text_for_fragid_[ fragid] = '<emu-nt>' + lhs_nt + '</emu-nt>'
def convert_to_header(self): self._dedupe() poi = AlgHeader() def join_field_values(key, joiner = ' & '): values = self.fields[key] if not values: return None return joiner.join(values) def at_most_one_value(key): values = self.fields[key] if not values: return None assert len(values) == 1, values return values[0] vs = join_field_values('kind') poi.species = { 'anonymous built-in function object that is defined once for each realm' : 'bif: intrinsic', 'anonymous built-in function' : 'bif: * per realm', 'accessor property' : 'bif: intrinsic: accessor function', 'constructor' : 'bif: intrinsic', 'function' : 'bif: intrinsic', 'method' : 'bif: intrinsic', None : None, }[vs] poi.name = at_most_one_value('name') pl_values = self.fields['pl'] if len(pl_values) == 0: poi.params = None elif len(pl_values) == 1: get_info_from_parameter_listing_in_preamble(poi, pl_values[0]) elif pl_values == [ 'zero or more arguments', 'zero or more arguments which form the rest parameter ..._args_' ]: get_info_from_parameter_listing_in_preamble(poi, pl_values[1]) else: stderr(f"{poi.name} has multi-pl: {pl_values}") assert 0 poi.return_nature_normal = join_field_values('retn', ' or ') poi.return_nature_abrupt = at_most_one_value('reta') poi.description_paras = self.fields['desc'] return poi
def check_dfns(): stderr('check_dfns...') spec.dfn_for_term_ = {} for dfn in spec.doc_node.each_descendant_named('dfn'): ist = dfn.inner_source_text() assert ist not in spec.dfn_for_term_ spec.dfn_for_term_[ist] = dfn variants = dfn.attrs.get('variants') if variants: if ',' in variants: assert NYI assert variants not in spec.dfn_for_term_ spec.dfn_for_term_[variants] = dfn
def main(): if len(sys.argv) != 3: stderr("usage: %s <output-dir> <spec.html>" % sys.argv[0]) sys.exit(1) outdir = sys.argv[1] spec_path = sys.argv[2] shared.register_output_dir(outdir) shared.msg_at_posn_start() spec.read_source_file(spec_path) spec.doc_node = HTML.parse_and_validate() # Now that errors/warnings are interleaved with a copy of the spec text, # the order in which we call these functions # only matters when two msg_at_posn() calls # address the exact same position. check_characters() check_indentation() check_trailing_whitespace() check_for_extra_blank_lines() check_ids() check_dfns() Pseudocode.create_all_parsers() check_tables() records.process_tables() Section.make_and_check_sections() records.print_schema_hierarchies() process_intrinsics_facts() check_references_to_intrinsics() emu_grammars.do_stuff_with_emu_grammars() Pseudocode.do_stuff_with_pseudocode() check_globals() shared.msg_at_posn_finish() headers.generate_spec_for_PR_545() spec.save()
def __init__(self, lines): self.raw_pattern = lines.pop(0) self.reo = re.compile(self.raw_pattern) self.templates = {} for line in lines: mo = re.fullmatch(r'([\w ]+)=(.*)', line) if mo is None: stderr(f"bad line: {line}") sys.exit(1) (key, template) = mo.groups() assert key not in self.templates self.templates[key] = template if 'v' not in self.templates: self.templates['v'] = '' self.count = 0
def tokenize(self, s, start_posn, end_posn, generate_dent_tokens, initial_indentation): prev_indentation = initial_indentation posn = start_posn while True: mo = self.reo.match(s, posn, end_posn) if mo is None: shared.stderr( f"\nTokenization error at: {s[posn:min(posn+20,end_posn)]}...\n", shared.source_line_with_caret_marking_column(tok_s_posn)) assert 0 pi = mo.lastgroup text = mo.group(pi) (tok_s_posn, tok_e_posn) = mo.span(pi) # XXX The sub-pattern associated with this group # might have a capturing subgroup # (whose value might be more useful than the group's), # but accessing it would be tricky, # because it doesn't have a name, # and we don't know its number in the overall pattern. # Either would take a bit of work. prod = self.prod_for_pi[pi] if generate_dent_tokens and prod.lhs_s == '{nlai}': this_indentation = len(text) - 1 # subtract 1 for the \n change_in_indentation = this_indentation - prev_indentation indent_unit = 2 assert change_in_indentation % indent_unit == 0 n_dents = change_in_indentation // indent_unit if n_dents > 0: dent_prod = indent_prod elif n_dents < 0: dent_prod = outdent_prod else: dent_prod = None for i in range(abs(n_dents)): yield (dent_prod, tok_s_posn, tok_s_posn, '') prev_indentation = this_indentation yield (prod, tok_s_posn, tok_e_posn, text) if prod.lhs_s == '{_eos_}': break posn = tok_e_posn
def check_characters(): stderr("checking characters...") header("checking characters...") for mo in re.finditer(r'[^\n -~]', spec.text): posn = mo.start() character = spec.text[posn] if character == '\u211d': # PR 1135 introduced tons of these continue if character in ascii_replacement: suggestion = ": maybe change to %s" % ascii_replacement[character] else: suggestion = '' msg_at_posn( posn, "non-ASCII character U+%04x%s" % (ord(character), suggestion))
def check_characters(): stderr("checking characters...") for mo in re.finditer(r'[^\n -~]', spec.text): # Note that this will (among other things) find and complain about TAB characters. posn = mo.start() character = spec.text[posn] if character == '\u211d': # PR 1135 introduced tons of these continue elif character in ['\u2124', '\U0001d53d']: continue if character in ascii_replacement: suggestion = ": maybe change to %s" % ascii_replacement[character] else: suggestion = '' msg_at_posn(posn, "non-ASCII character U+%04x%s" % (ord(character), suggestion) )
def check_globals(): stderr('check_globals...') global_object_property_names = set() sgo = spec.node_with_id_['sec-global-object'] for section in sgo.each_descendant_that_is_a_section(): if '_property' in section.section_kind: # print('>', section.section_kind, section.section_title) mo = re.fullmatch(r'(\w+)( \(.*\))?', section.section_title) assert mo global_property_name = mo.group(1) if section.parent.section_title != 'Value Properties of the Global Object': global_object_property_names.add(global_property_name) def show_names_set(label, names_set): for name in sorted(names_set): stderr(f"> {label}: {name}") show_names_set("In 'The Global Object' but not in WKI", global_object_property_names - intrinsics.global_property_names) show_names_set("In WKI but not in 'The Global Object'", intrinsics.global_property_names - global_object_property_names)
def _check_section_order(section): # In some sections, the subsections should be in "alphabetical order". if section.element_name == '#DOC': stderr("_check_section_order...") else: if section.section_kind in [ 'group_of_properties1', 'group_of_properties2', 'properties_of_an_intrinsic_object', 'properties_of_instances', ]: prev_title = None prev_t = None for child in section.section_children: if child.section_kind not in [ 'group_of_properties1', 'group_of_properties2', 'catchall', 'anonymous_built_in_function', ]: assert re.search(r'_property(_xref)?$', child.section_kind), child.section_kind t = child.section_title t = t.lower() t = t.replace('int8', 'int08') t = re.sub(r'^get ', '', t) if section.section_title == 'Properties of the RegExp Prototype Object': t = re.sub(r' \[ @@(\w+) \]', r'.\1', t) else: t = re.sub(r' \[ @@(\w+) \]', r'.zz_\1', t) if prev_t is not None and t <= prev_t: msg_at_posn( child.start_posn, '"%s" should be before "%s"' % (child.section_title, prev_title)) prev_t = t prev_title = child.section_title for child in section.section_children: _check_section_order(child)
def compute_dependency_levels(self): self.find_strongly_connected_components() stderr(' %d SCCs' % len(cluster_)) stderr(" sorting...") for cluster in cluster_: cluster.members.sort() # cluster.position = vertex_collater(cluster.members[0]) stderr(" dependencies between SCCs...") for cluster in cluster_: cluster.contains_a_cycle = False for vertex in cluster.members: for p in self.arcs_from_[vertex]: if self.cluster_for_[p] is cluster: # a "sideways" dependency cluster.contains_a_cycle = True else: if self.cluster_for_[p] not in cluster.direct_prereqs: cluster.direct_prereqs.append(self.cluster_for_[p]) if len(cluster.members) > 1: assert cluster.contains_a_cycle # If len(cluster.members) == 1, it still might contain a cycle levels = establish_dependency_levels() return levels
def main(): if len(sys.argv) != 3: stderr("usage: %s <output-dir> <spec.html>" % sys.argv[0]) sys.exit(1) outdir = sys.argv[1] spec_path = sys.argv[2] shared.register_output_dir(outdir) shared.msg_at_posn_start() spec.read_source_file(spec_path) spec.doc_node = HTML.parse_and_validate() # It feels like it would make more sense to check characters and indentation # before paring/checking markup, because they're more 'primitive' than markup. # But when it comes to fixing errors, you should make sure # you've got the markup correct before fiddling with indentation. # So to encourage that, have markup errors appear before indentation errors, # i.e. run the markup checks before indentation checks. # (Not sure about characters.) check_indentation() check_trailing_whitespace() check_characters() check_ids() check_tables() check_intrinsics() Section.make_and_check_sections() emu_grammars.do_stuff_with_emu_grammars() Pseudocode.do_stuff_with_pseudocode() shared.msg_at_posn_finish() spec.save()
def write_header_info(): stderr("write_header_info ...") f = shared.open_for_output('header_info') def put(*args): print(*args, file=f) for bif_or_op in ['op', 'bif']: put('X'*40) put(bif_or_op) for (alg_name, alg_info) in sorted(spec.alg_info_[bif_or_op].items()): n_defns_via_headers = 0 assert alg_info.name == alg_name assert alg_info.bif_or_op == bif_or_op put() put(f" {alg_info.name}") put(f" {alg_info.species}") put(f" {len(alg_info.headers)} headers:") for alg_header in alg_info.headers: assert alg_header.name == alg_name assert alg_header.species == alg_info.species put(f" --") if alg_header.for_phrase: put(f" for: {alg_header.for_phrase}") # alg_header.params # alg_header.also # alg_header.return_nature_{normal,abrupt} # alg_header.description_paras put(f" {len(alg_header.u_defns)} defns") n_defns_via_headers += len(alg_header.u_defns) for alg_defn in alg_header.u_defns: assert alg_defn.header is alg_header assert n_defns_via_headers == len(alg_info.all_definitions()) # alg_info.invocations # alg_info.callees # alg_info.callers put() f.close()
def check_references_to_intrinsics(): stderr("check_references_to_intrinsics...") # We can't just scan through spec.text looking for %...%, # because that would find occurrences in element IDs, # which are lower-cased. # Instead, just look in literal (text) nodes. # (Note that this skips occurrences of "%<var>Foo</var>Prototype%".) for tnode in spec.doc_node.each_descendant_named('#LITERAL'): for mo in re.compile(r'%\S+%').finditer(spec.text, tnode.start_posn, tnode.end_posn): itext = mo.group(0) itext_start = mo.start(0) if itext in ['%name%', '%name.a.b%']: # placeholders continue if itext in ['%_NativeError_%', '%_TypedArray_%']: # metavariable interpolation continue base_intrinsic = re.sub(r'\.[^%]+', '', itext) if base_intrinsic not in well_known_intrinsics: msg_at_posn(itext_start, f"Intrinsic doesn't exist: {base_intrinsic}")
def prep_xrefs(): stderr("prep_xrefs ...") global _default_xref_text_for_fragid_, _title_xref_text_for_fragid_ _default_xref_text_for_fragid_ = {} _title_xref_text_for_fragid_ = {} for section in spec.doc_node.each_descendant_named( re.compile('emu-clause|emu-annex')): assert 'id' in section.attrs fragid = section.attrs['id'] _default_xref_text_for_fragid_[fragid] = section.section_num _title_xref_text_for_fragid_[fragid] = section.section_title table_i = 0 for element in spec.doc_node.each_descendant_named( re.compile('emu-table|emu-import')): table_i += 1 if element.element_name == 'emu-table': if 'id' not in element.attrs: # No way to xref the table (but it still gets counted). continue fragid = element.attrs['id'] elif element.element_name == 'emu-import': # Currently, each emu-import (of file foo.html) # defines one emu-table (with id 'foo'). # XXX Really, we should do something more robust. href = element.attrs['href'] fragid = href.replace('.html', '') else: assert 0 _default_xref_text_for_fragid_[fragid] = 'Table %d' % table_i for dfn in spec.doc_node.each_descendant_named('dfn'): if 'id' in dfn.attrs: fragid = dfn.attrs['id'] term = dfn.inner_source_text() _default_xref_text_for_fragid_[fragid] = term
def parse_and_validate(): doc_node = _parse() if doc_node.element_name != '#DOC': stderr("After _parse(), doc_node.element_name should be #DOC, is", doc_node.element_name) stderr("start_posn ~", shared.convert_posn_to_linecol(doc_node.start_posn)) stderr("aborting due to above error") sys.exit() _validate(doc_node) return doc_node
def check_ref_ids(refnode): if refnode.element_name == 'emu-xref': if 'href' not in refnode.attrs: stderr("At", shared.convert_posn_to_linecol(refnode.start_posn)) stderr("emu-xref element doesn't have an 'href' attribute") stderr("aborting") sys.exit() href = refnode.attrs['href'] assert href.startswith('#') refid = href[1:] refids.add(refid) if refid in node_with_id_: defnode = node_with_id_[refid] if defnode.element_name in ['emu-clause', 'emu-annex', 'emu-table', 'emu-alg', 'emu-note']: pass elif defnode.element_name == 'dfn': deftext = defnode.inner_source_text() reftext = refnode.inner_source_text() assert deftext != '' if reftext != '' and reftext.lower() != deftext.lower(): # Auto-linking would fail to make `reftext` into a link? # So we have to use an emu-xref? pass else: msg_at_node(refnode, f"emu-xref used when auto-linking would work: '{refid}'") else: msg_at_node(defnode, f"unexpected defnode element-name <{defnode.element_name}>") else: if refid in [ 'table-binary-unicode-properties', 'table-nonbinary-unicode-properties', 'table-unicode-general-category-values', 'table-unicode-script-values', ]: # Those ids are declared in emu-imported files. pass else: msg_at_node(refnode, f"emu-xref refers to nonexistent id: {refid}") for child in refnode.children: check_ref_ids(child)
def check_indentation(): stderr("check_indentation...") header("checking indentation...") INDENT_UNIT = 2 def check_indentation_for_node(node, expected_indent): if node.element_name == '#DOC': assert expected_indent is None for child in node.children: check_indentation_for_node(child, 0) return if node.element_name == '#LITERAL': # Mostly whitespace, but also: # Editors: # For each pair (_R_, _W_) ... # For each element _eventsRecord_ # whose indentation we don't care about? return def get_span_of_line_containing_posn(posn): # Excludes any newline at start or end. s = spec.text.rfind('\n', 0, posn) e = spec.text.find('\n', posn) return (0 if s == -1 else s + 1, len(spec.text) if e == -1 else e) (start_line_s, start_line_e) = get_span_of_line_containing_posn(node.start_posn) (end_line_s, end_line_e) = get_span_of_line_containing_posn(node.end_posn) def check_tag_indent(line_s, tag_s, element_name): portion_of_line_before_tag = spec.text[line_s:tag_s] if (portion_of_line_before_tag == '' or portion_of_line_before_tag.isspace()): actual_indent = len(portion_of_line_before_tag) if actual_indent != expected_indent: msg_at_posn( tag_s, f"expected indent={expected_indent}, got {actual_indent}" ) else: msg_at_posn( tag_s, f"{element_name} tag isn't the first non-blank thing on the line" ) # Check indentation of start tag. check_tag_indent(start_line_s, node.start_posn, node.element_name) start_tag_indent = node.start_posn - start_line_s if start_line_s == end_line_s: # This node begins and ends on a single line. # Therefore, all of its children (if any) # also begin and end on the same single line, # so no point looking at them. # And no point looking at the end tag (if any). return # This node covers more than one line. if node.element_name == '#COMMENT': # No children, no end-tag. # XXX We could look at the indentation of the text content, # but ... check_inline_content(node, start_tag_indent + INDENT_UNIT) return if node.element_name == 'pre' and len( node.children ) == 1 and node.children[0].element_name == 'code': # These cases are always formatted like this: # <pre><code> # foo # </code></pre> # which complicates things. code = node.children[0] assert code.attrs['class'] == 'javascript' check_inline_content(code, start_tag_indent + INDENT_UNIT) check_tag_indent(end_line_s, code.inner_end_posn, code.element_name) return if node.element_name in ['emu-grammar', 'emu-alg', 'emu-eqn']: # Indentation of content is checked elsewhere, as part of a more detailed check. # But check it here anyway. check_inline_content(node, start_tag_indent + INDENT_UNIT) elif not node.block_child_element_names: check_inline_content(node, start_tag_indent + INDENT_UNIT) else: # So recurse to its children. if node.element_name in ['thead', 'tbody']: # For obscure reasons, <tr> tags in spec.html # generally have the same indentation as # the surrounding <thead> and <tbody> tags. # If we didn't special-case them here, # they would cause a lot of warnings. # # However, we can't just say: # child_expected_indent = start_tag_indent # because there are also a fair number of tables # where the <tr> tags *are* indented wrt <thead> and <tbody>. # And it would be impolite to complain when they're # adhering to the general rule re indenting. # # So we peek ahead at the indentation of the next line next_line_s = start_line_e + 1 # skip the newline character if spec.text[next_line_s:next_line_s + start_tag_indent + INDENT_UNIT].isspace(): # next line is indented wrt this line child_expected_indent = start_tag_indent + INDENT_UNIT else: child_expected_indent = start_tag_indent else: child_expected_indent = start_tag_indent + INDENT_UNIT for child in node.children: check_indentation_for_node(child, child_expected_indent) # ------------------------------ # Check indentation of end tag. # if node.element_name == 'p' and 'br' in node.inline_child_element_names: # Normally, a <p> element is all on one line. # But if it contains <br> elements, # we expect those to be either preceded or followed (or both) by newlines. inner_text = node.inner_source_text() if inner_text.startswith('\n'): # Expect: # <p> # xxx<br> # yyy # </p> pass else: # Expect: # <p>xxx # <br> # yyy</p> # In this case, don't check the indentation of the end tag. return check_tag_indent(end_line_s, node.inner_end_posn, node.element_name) def check_inline_content(parent, expected_min_indent): if parent.element_name == '#COMMENT': isp = parent.start_posn + 4 iep = parent.end_posn - 3 else: isp = parent.inner_start_posn iep = parent.inner_end_posn line_ = [ (mo.end(1) - mo.start(1), mo.end(1)) for mo in re.compile(r'\n( *)\S').finditer(spec.text, isp, iep) # Note that the pattern ignores blank lines. ] def check_lines(lo, hi, emi): if lo == hi: return assert lo < hi (top_indent, x) = line_[lo] if top_indent != emi: msg_at_posn(x, f"expected indent={emi}, got {top_indent}") siblings = [] for i in range(lo + 1, hi): (indent, x) = line_[i] if indent < top_indent: msg_at_posn(x, f"expected indent<{top_indent}, got {indent}") siblings.append(i) # I guess elif indent == top_indent: siblings.append(i) for (i, j) in zip([lo] + siblings, siblings + [hi]): check_lines(i + 1, j, top_indent + INDENT_UNIT) check_lines(0, len(line_), expected_min_indent) check_indentation_for_node(spec.doc_node, None)
def check_tables(): stderr('check_tables...') header("checking tables...") for et in spec.doc_node.each_descendant_named('emu-table'): a_caption = et.attrs.get('caption', None) caption_children = [c for c in et.each_child_named('emu-caption')] if len(caption_children) == 0: e_caption = None elif len(caption_children) == 1: [emu_caption] = caption_children e_caption = emu_caption.inner_source_text().strip() else: assert 0 # ---- if a_caption and not e_caption: caption = a_caption elif e_caption and not a_caption: caption = e_caption else: assert 0, (a_caption, e_caption) if 'id' not in et.attrs: msg_at_posn(et.start_posn, f'no id attribute for table with caption "{caption}"') header_tr = [tr for tr in et.each_descendant_named('tr')][0] header_line = '; '.join( th.inner_source_text().strip() for th in header_tr.each_descendant_named('th')) if 'Field' in caption: # print(header_line, ':', caption) if re.match(r'^(.+) Fields$', caption): pass elif re.match(r'^Additional Fields of (.+)$', caption): pass elif caption == 'Fields of the Private Name': # PR 1668 pass else: assert 0, caption elif 'Slot' in caption: if re.match(r'^Internal Slots of (.+)$', caption): pass else: assert 0 elif 'Method' in caption: if 'Internal Methods' in caption: assert caption in [ 'Essential Internal Methods', 'Additional Essential Internal Methods of Function Objects' ] assert header_line == 'Internal Method; Signature; Description' elif 'Records' in caption: assert re.fullmatch( r'(Additional )?(Abstract )?Methods of .+ Records', caption), caption assert header_line == 'Method; Purpose' elif caption == 'Proxy Handler Methods': assert header_line == 'Internal Method; Handler Method' else: assert 0 elif 'Properties' in caption: assert re.fullmatch( r'<i>\w+</i> Interface( (Required|Optional))? Properties', caption) assert header_line == 'Property; Value; Requirements' elif 'Intrinsic Objects' in caption: assert caption in [ 'Well-Known Intrinsic Objects', 'Additional Well-known Intrinsic Objects', ] well_known_intrinsics_table_spans.append( (et.start_posn, et.end_posn)) new_names = {} assert header_line == 'Intrinsic Name; Global Name; ECMAScript Language Association' for tr in et.each_descendant_named('tr'): if tr == header_tr: continue [oname, global_name, assoc] = [ td.inner_source_text().strip() for td in tr.each_descendant_named('td') ] assert re.fullmatch(r'%\w+%', oname) assert oname not in well_known_intrinsics assert re.fullmatch(r"|`\w+(\.\w+)*`", global_name) if ';' in assoc or 'i.e.' in assoc: mo = re.search(r'; i.e., (%\w+(\.\w+)+%)$', assoc) assert mo new_name = mo.group(1) assert new_name not in well_known_intrinsics assert new_name not in new_names new_names[new_name] = tr.start_posn assert new_name != oname well_known_intrinsics[ oname] = f"old name; 2950,$s/{oname}/{new_name}/gc" well_known_intrinsics[new_name] = "new name" else: well_known_intrinsics[oname] = "only name" # Have to do this after processing the table, # because of possible forward references. # (E.g., on the row for %AsyncGenerator%, # column 3 mentions %AsyncGeneratorFunction.prototype%, # which implies the existence of %AsyncGeneratorFunction%, # which is declared in column 1 of the *next* row.) for (new_name, tr_posn) in new_names.items(): base_of_new_name = re.sub(r'\..*', '%', new_name) if base_of_new_name not in well_known_intrinsics: msg_at_posn( tr_posn, f"Implied intrinsic doesn't exist: {base_of_new_name}") else: # print('>>>', header_line, '---', caption) pass
def check_trailing_whitespace(): stderr("checking trailing whitespace...") header("checking trailing whitespace...") for mo in re.finditer(r'(?m)[ \t]+$', spec.text): posn = mo.start() msg_at_posn(posn, "trailing whitespace")
def get_info_from_parameter_listing_in_preamble(oi, parameter_listing): assert oi.params is None, oi.name # if '_C_' in parameter_listing: stderr('gifpl', parameter_listing) if parameter_listing == '': assert 0 return if parameter_listing == 'no arguments': # 27 cases oi.params = [] return if parameter_listing in [ 'zero or more arguments _item1_, _item2_, etc.', 'zero or more arguments', 'any number of arguments', 'one or two arguments', 'zero or one arguments', ]: # 24 cases # XXX not sure what to do return if parameter_listing == 'zero or more arguments which form the rest parameter ..._args_': oi.params = [ AlgParam('_args_', '...', 'a List of ECMAScript language values') ] return elif parameter_listing in [ 'some arguments _p1_, _p2_, … , _pn_, _body_ (where _n_ might be 0, that is, there are no “ _p_ ” arguments, and where _body_ might also not be provided)', 'some arguments _p1_, _p2_, … , _pn_, _body_ (where _n_ might be 0, that is, there are no “_p_” arguments, and where _body_ might also not be provided)', 'some arguments _p1_, _p2_, … , _pn_, _body_ (where _n_ might be 0, that is, there are no "_p_" arguments, and where _body_ might also not be provided)', 'some arguments _p1_, _p2_, … , _pn_, _body_ (where _n_ might be 0, that is, there are no _p_ arguments, and where _body_ might also not be provided)', ]: # 4 cases oi.params = [ AlgParam('_args_', '...', 'a List of ECMAScript language values'), AlgParam('_body_', '[]', 'an ECMAScript language value'), ] return elif parameter_listing == 'at least one argument _buffer_': # 1 case # kludgey if oi.name == 'DataView': oi.params = [ AlgParam('_buffer_', '', 'unknown'), AlgParam('_byteOffset_', '[]', 'unknown'), AlgParam('_byteLength_', '[]', 'unknown'), ] else: assert 0, oi.name return # -------------------- # 'Hide' commas within parentheses, so they don't mess up splits: def hide_commas(mo): return mo.group(0).replace(',', '<COMMA>') param_listing = re.sub(r'\(.*?\)', hide_commas, parameter_listing) # The commas will be unhidden later. # Also here: param_listing = re.sub(r'(_argumentsList_), (a List of ECMAScript language values)', r'\1<COMMA> \2', param_listing) # --------------------- oi.params = [] # Split the listing into the 'required' and 'optional' parts: parts = [] if 'optional' in param_listing: if RE.fullmatch(r'optional (argument.+)', param_listing): parts.append(('optional', RE.group(1))) elif RE.fullmatch(r'(.+?),? and optional (argument.+)', param_listing): parts.append(('required', RE.group(1))) parts.append(('optional', RE.group(2))) else: assert 0, param_listing else: parts.append(('required', param_listing)) for (optionality, part) in parts: part = sub_many(part, [ ('^parameters ', ''), ('^argument ', ''), ('^one argument,? ', ''), ('^an argument ', ''), ('^arguments ', ''), ('^two arguments,? ', ''), ]) pieces = re.split('(, and |, | and )', part) assert len(pieces) % 2 == 1 param_items = pieces[0::2] connectors = pieces[1::2] if len(connectors) == 0: expected_connectors = [] elif len(connectors) == 1: expected_connectors = [' and '] else: expected_connectors = [', '] * (len(connectors) - 1) + [', and '] if connectors != expected_connectors: oh_warn() oh_warn(f"`{oi.name}` preamble param list:") oh_warn(repr(part)) oh_warn(f"is of the form: X{'X'.join(connectors)}X") oh_warn(f"but expected : X{'X'.join(expected_connectors)}X") var_pattern = r'\b_\w+_\b' for param_item in param_items: # unhide_commas: param_item = param_item.replace('<COMMA>', ',') parameter_names = re.findall(var_pattern, param_item) if len(parameter_names) != 1: stderr() stderr(f"> {oi.name}: param listing") stderr(f" {parameter_listing!r}") stderr(f" contains item {param_item!r} with {len(parameter_names)} parameter names") continue [param_name] = parameter_names assert param_name not in oi.param_names(), param_name if optionality == 'optional': punct = '[]' elif param_item == 'zero or more _args_': punct = '...' else: punct = '' r_param_item = re.sub(var_pattern, 'VAR', param_item) for (pat, nat) in [ (r'VAR, (a List of ECMAScript language values)', r'\1'), (r'VAR which is (a possibly empty List of ECMAScript language values)', r'\1'), (r'VAR of type BigInt', 'a BigInt'), (r'VAR \((.+)\)', r'\1'), (r'VAR', 'unknown'), (r'zero or more VAR', 'a List of ECMAScript language values'), (r'a Boolean flag named VAR', 'a Boolean'), (r'(an? .+) VAR', r'\1'), (r'(value) VAR', r'a \1'), ]: mo = re.fullmatch(pat, r_param_item) if mo: nature = mo.expand(nat) break else: print(f"? {r_param_item}") assert 0 oi.params.append( AlgParam(param_name, punct, nature) )
def resolve_oi(hoi, poi): # Rather than creating a new AlgHeader, # modifies {hoi} if appropriate. if poi is None: # no preamble, so just use info from heading return # kind assert hoi.species is not None if poi.species is None: pass else: if hoi.species == poi.species: pass else: stderr(f"mismatch of 'species' in heading/preamble for {hoi.name}: {hoi.species!r} != {poi.species!r}") assert 0 # name assert hoi.name is not None if True: # We prefer to use the heading-name, # ... but we also check that it's consistent with the preamble-name, if any: if ( poi.name is None or hoi.name == poi.name or hoi.name.endswith('.' + poi.name) or hoi.name.endswith(f'.prototype [ {poi.name} ]') or hoi.name.lower() == poi.name.lower() or hoi.name.replace(' [ ', '[').replace(' ]', ']') == poi.name ): pass else: oh_warn() oh_warn(f'resolve_oi: name in heading ({hoi.name}) != name in preamble ({poi.name})') # for_phrase assert poi.for_phrase is None # so just leave hoi.for_phrase as is # param_names if hoi.params is None: # assert poi.params is not None hoi.params = poi.params elif poi.params is None: assert hoi.params is not None else: # neither is None # When the heading contains a signature, # it's deemed authoritative. if hoi.param_names() != poi.param_names(): oh_warn() oh_warn(hoi.name, 'has param name mismatch:') oh_warn(hoi.param_names()) oh_warn(poi.param_names()) else: for (hoi_param, poi_param) in zip(hoi.params, poi.params): assert hoi_param.name == poi_param.name if hoi_param.punct != poi_param.punct: oh_warn() oh_warn(f"{hoi.name} parameter {hoi_param.name} has param punct mismatch:") oh_warn('h:', hoi_param.punct) oh_warn('p:', poi_param.punct) if hoi_param.nature == 'unknown': hoi_param.nature = poi_param.nature else: assert hoi_param.nature == poi_param.nature assert hoi.also is None assert poi.also is None assert hoi.return_nature_node is None hoi.return_nature_node = poi.return_nature_node assert hoi.description_paras == [] hoi.description_paras = poi.description_paras