def test_infer_ast_spec(self):
    observations = ast_spec_inference.ASTObservations(
        example_count=1,
        node_types={
            "root":
                ast_spec_inference.NodeObservations(
                    count=10,
                    count_root=10,
                    fields={
                        "nonempty_sequence":
                            ast_spec_inference.FieldObservations(
                                count_one=2, count_many=8),
                        "one_child":
                            ast_spec_inference.FieldObservations(count_one=10),
                    }),
            "foo":
                ast_spec_inference.NodeObservations(
                    count=20,
                    count_root=0,
                    fields={
                        "optional_child":
                            ast_spec_inference.FieldObservations(count_one=15),
                        "sequence":
                            ast_spec_inference.FieldObservations(
                                count_many=4, count_one=4),
                        "no_children":
                            ast_spec_inference.FieldObservations(),
                    }),
        })

    spec = ast_spec_inference.infer_ast_spec(observations)
    expected = {
        "root":
            generic_ast_graphs.ASTNodeSpec(
                fields={
                    "nonempty_sequence":
                        generic_ast_graphs.FieldType.NONEMPTY_SEQUENCE,
                    "one_child":
                        generic_ast_graphs.FieldType.ONE_CHILD
                },
                sequence_item_types={
                    "nonempty_sequence": "root_nonempty_sequence"
                },
                has_parent=False),
        "foo":
            generic_ast_graphs.ASTNodeSpec(
                fields={
                    "optional_child":
                        generic_ast_graphs.FieldType.OPTIONAL_CHILD,
                    "sequence":
                        generic_ast_graphs.FieldType.SEQUENCE,
                    "no_children":
                        generic_ast_graphs.FieldType.NO_CHILDREN
                },
                sequence_item_types={"sequence": "foo_sequence"},
                has_parent=True)
    }
    self.assertEqual(spec, expected)
    def test_compute_nth_child_edges(self):
        mini_ast_spec = {
            "root":
            generic_ast_graphs.ASTNodeSpec(
                fields={"children": generic_ast_graphs.FieldType.SEQUENCE},
                sequence_item_types={"children": "child"},
                has_parent=False),
            "leaf":
            generic_ast_graphs.ASTNodeSpec()
        }
        mini_ast_node = generic_ast_graphs.GenericASTNode(
            "root", "root", {
                "children": [
                    generic_ast_graphs.GenericASTNode("leaf0", "leaf", {}),
                    generic_ast_graphs.GenericASTNode("leaf1", "leaf", {}),
                    generic_ast_graphs.GenericASTNode("leaf2", "leaf", {}),
                    generic_ast_graphs.GenericASTNode("leaf3", "leaf", {}),
                    generic_ast_graphs.GenericASTNode("leaf4", "leaf", {}),
                ]
            })
        mini_ast_graph, _ = generic_ast_graphs.ast_to_graph(
            mini_ast_node, mini_ast_spec)

        # Allowing 10 children.
        nth_child_edges = graph_edge_util.compute_nth_child_edges(
            mini_ast_graph, 10)
        self.assertEqual(
            nth_child_edges,
            [("root__root", f"root_children_{i}__child-seq-helper",
              f"CHILD_INDEX_{i}") for i in range(5)])

        # Allowing 2 children.
        nth_child_edges = graph_edge_util.compute_nth_child_edges(
            mini_ast_graph, 2)
        self.assertEqual(
            nth_child_edges,
            [("root__root", f"root_children_{i}__child-seq-helper",
              f"CHILD_INDEX_{i}") for i in range(2)])
Beispiel #3
0
def infer_ast_spec(observations):
    """Infers an AST spec compatible with all observations.

  Since nonterminal information is not available when looking at only concrete
  ASTs, we use a separate sequence helper node for each field of each node type.

  Args:
    observations: Observations collected from examples.

  Returns:
    An AST specification compatible with the observations.

  Raises:
    ValueError: If an AST spec could not be generated due to conflicting
      observations.
  """
    ast_spec = {}
    for node_type, node_obs in observations.node_types.items():
        node_spec = generic_ast_graphs.ASTNodeSpec()

        if node_obs.count_root == 0:
            node_spec.has_parent = True
        elif node_obs.count_root == node_obs.count:
            node_spec.has_parent = False
        else:
            raise ValueError(
                f"Node type {node_type} appears both in root and child "
                "position, which is not supported.")

        for field, field_obs in node_obs.fields.items():
            count_zero = node_obs.count - field_obs.count_one - field_obs.count_many

            if field_obs.count_many:
                if count_zero:
                    field_type = generic_ast_graphs.FieldType.SEQUENCE
                else:
                    field_type = generic_ast_graphs.FieldType.NONEMPTY_SEQUENCE

                node_spec.sequence_item_types[field] = f"{node_type}_{field}"
            elif field_obs.count_one:
                if count_zero:
                    field_type = generic_ast_graphs.FieldType.OPTIONAL_CHILD
                else:
                    field_type = generic_ast_graphs.FieldType.ONE_CHILD
            else:
                field_type = generic_ast_graphs.FieldType.NO_CHILDREN

            node_spec.fields[field] = field_type
        ast_spec[node_type] = node_spec
    return ast_spec
Beispiel #4
0
  have to have optional next/previous edges).
"""

from typing import Dict, Tuple

import gast

from gfsa import automaton_builder
from gfsa import generic_ast_graphs
from gfsa import graph_types

PY_AST_SPECS = {
    # Module is the root node, so it does not have a parent
    "Module":
        generic_ast_graphs.ASTNodeSpec(
            has_parent=False,
            fields={"body": generic_ast_graphs.FieldType.NONEMPTY_SEQUENCE},
            sequence_item_types={"body": "Module_body"}),
    # Statement nodes
    "FunctionDef":
        generic_ast_graphs.ASTNodeSpec(
            fields={
                "args": generic_ast_graphs.FieldType.ONE_CHILD,
                "returns": generic_ast_graphs.FieldType.OPTIONAL_CHILD,
                "body": generic_ast_graphs.FieldType.NONEMPTY_SEQUENCE,
                "decorator_list": generic_ast_graphs.FieldType.NO_CHILDREN
            },
            sequence_item_types={"body": "FunctionDef_body"}),
    "Return":
        generic_ast_graphs.ASTNodeSpec(
            fields={"value": generic_ast_graphs.FieldType.OPTIONAL_CHILD}),
    "Assign":