def test_infer_ast_spec(self): observations = ast_spec_inference.ASTObservations( example_count=1, node_types={ "root": ast_spec_inference.NodeObservations( count=10, count_root=10, fields={ "nonempty_sequence": ast_spec_inference.FieldObservations( count_one=2, count_many=8), "one_child": ast_spec_inference.FieldObservations(count_one=10), }), "foo": ast_spec_inference.NodeObservations( count=20, count_root=0, fields={ "optional_child": ast_spec_inference.FieldObservations(count_one=15), "sequence": ast_spec_inference.FieldObservations( count_many=4, count_one=4), "no_children": ast_spec_inference.FieldObservations(), }), }) spec = ast_spec_inference.infer_ast_spec(observations) expected = { "root": generic_ast_graphs.ASTNodeSpec( fields={ "nonempty_sequence": generic_ast_graphs.FieldType.NONEMPTY_SEQUENCE, "one_child": generic_ast_graphs.FieldType.ONE_CHILD }, sequence_item_types={ "nonempty_sequence": "root_nonempty_sequence" }, has_parent=False), "foo": generic_ast_graphs.ASTNodeSpec( fields={ "optional_child": generic_ast_graphs.FieldType.OPTIONAL_CHILD, "sequence": generic_ast_graphs.FieldType.SEQUENCE, "no_children": generic_ast_graphs.FieldType.NO_CHILDREN }, sequence_item_types={"sequence": "foo_sequence"}, has_parent=True) } self.assertEqual(spec, expected)
def test_compute_nth_child_edges(self): mini_ast_spec = { "root": generic_ast_graphs.ASTNodeSpec( fields={"children": generic_ast_graphs.FieldType.SEQUENCE}, sequence_item_types={"children": "child"}, has_parent=False), "leaf": generic_ast_graphs.ASTNodeSpec() } mini_ast_node = generic_ast_graphs.GenericASTNode( "root", "root", { "children": [ generic_ast_graphs.GenericASTNode("leaf0", "leaf", {}), generic_ast_graphs.GenericASTNode("leaf1", "leaf", {}), generic_ast_graphs.GenericASTNode("leaf2", "leaf", {}), generic_ast_graphs.GenericASTNode("leaf3", "leaf", {}), generic_ast_graphs.GenericASTNode("leaf4", "leaf", {}), ] }) mini_ast_graph, _ = generic_ast_graphs.ast_to_graph( mini_ast_node, mini_ast_spec) # Allowing 10 children. nth_child_edges = graph_edge_util.compute_nth_child_edges( mini_ast_graph, 10) self.assertEqual( nth_child_edges, [("root__root", f"root_children_{i}__child-seq-helper", f"CHILD_INDEX_{i}") for i in range(5)]) # Allowing 2 children. nth_child_edges = graph_edge_util.compute_nth_child_edges( mini_ast_graph, 2) self.assertEqual( nth_child_edges, [("root__root", f"root_children_{i}__child-seq-helper", f"CHILD_INDEX_{i}") for i in range(2)])
def infer_ast_spec(observations): """Infers an AST spec compatible with all observations. Since nonterminal information is not available when looking at only concrete ASTs, we use a separate sequence helper node for each field of each node type. Args: observations: Observations collected from examples. Returns: An AST specification compatible with the observations. Raises: ValueError: If an AST spec could not be generated due to conflicting observations. """ ast_spec = {} for node_type, node_obs in observations.node_types.items(): node_spec = generic_ast_graphs.ASTNodeSpec() if node_obs.count_root == 0: node_spec.has_parent = True elif node_obs.count_root == node_obs.count: node_spec.has_parent = False else: raise ValueError( f"Node type {node_type} appears both in root and child " "position, which is not supported.") for field, field_obs in node_obs.fields.items(): count_zero = node_obs.count - field_obs.count_one - field_obs.count_many if field_obs.count_many: if count_zero: field_type = generic_ast_graphs.FieldType.SEQUENCE else: field_type = generic_ast_graphs.FieldType.NONEMPTY_SEQUENCE node_spec.sequence_item_types[field] = f"{node_type}_{field}" elif field_obs.count_one: if count_zero: field_type = generic_ast_graphs.FieldType.OPTIONAL_CHILD else: field_type = generic_ast_graphs.FieldType.ONE_CHILD else: field_type = generic_ast_graphs.FieldType.NO_CHILDREN node_spec.fields[field] = field_type ast_spec[node_type] = node_spec return ast_spec
have to have optional next/previous edges). """ from typing import Dict, Tuple import gast from gfsa import automaton_builder from gfsa import generic_ast_graphs from gfsa import graph_types PY_AST_SPECS = { # Module is the root node, so it does not have a parent "Module": generic_ast_graphs.ASTNodeSpec( has_parent=False, fields={"body": generic_ast_graphs.FieldType.NONEMPTY_SEQUENCE}, sequence_item_types={"body": "Module_body"}), # Statement nodes "FunctionDef": generic_ast_graphs.ASTNodeSpec( fields={ "args": generic_ast_graphs.FieldType.ONE_CHILD, "returns": generic_ast_graphs.FieldType.OPTIONAL_CHILD, "body": generic_ast_graphs.FieldType.NONEMPTY_SEQUENCE, "decorator_list": generic_ast_graphs.FieldType.NO_CHILDREN }, sequence_item_types={"body": "FunctionDef_body"}), "Return": generic_ast_graphs.ASTNodeSpec( fields={"value": generic_ast_graphs.FieldType.OPTIONAL_CHILD}), "Assign":