Beispiel #1
0
 def test_summary(self):
     d = metrics.Distribution("foo")
     self.assertEqual(
         "foo: total=0.0, count=0, min=None, max=None, mean=None, stdev=None",
         str(d))
     # This test is delicate because it is checking the string output of
     # floating point calculations.  This specific data set was chosen because
     # the number of samples is a power of two (thus the division is exact) and
     # the variance is a natural square (thus the sqrt() is exact).
     d.add(1)
     d.add(5)
     self.assertEqual(
         "foo: total=6.0, count=2, min=1, max=5, mean=3.0, stdev=2.0",
         str(d))
Beispiel #2
0
 def test_accumulation(self):
     d = metrics.Distribution("foo")
     # Check contents of an empty distribution.
     self.assertEqual(0, d._count)
     self.assertEqual(0, d._total)
     self.assertIsNone(d._min)
     self.assertIsNone(d._max)
     self.assertIsNone(d._mean())
     self.assertIsNone(d._stdev())
     # Add some values.
     d.add(3)
     d.add(2)
     d.add(5)
     # Check the final contents.
     self.assertEqual(3, d._count)
     self.assertEqual(10, d._total)
     self.assertEqual(2, d._min)
     self.assertEqual(5, d._max)
     self.assertAlmostEqual(10.0 / 3, d._mean())
     # Stddev should be sqrt(14/9).
     self.assertAlmostEqual(math.sqrt(14.0 / 9), d._stdev())
Beispiel #3
0
 def test_merge(self):
     d = metrics.Distribution("foo")
     # Merge two empty metrics together.
     other = metrics.Distribution("d_empty")
     d._merge(other)
     self.assertEqual(0, d._count)
     self.assertEqual(0, d._total)
     self.assertEqual(0, d._squared)
     self.assertEqual(None, d._min)
     self.assertEqual(None, d._max)
     # Merge into an empty metric (verifies the case where min/max must be
     # copied directly from the merged metric).
     other = metrics.Distribution("d2")
     other.add(10)
     other.add(20)
     d._merge(other)
     self.assertEqual(2, d._count)
     self.assertEqual(30, d._total)
     self.assertEqual(500, d._squared)
     self.assertEqual(10, d._min)
     self.assertEqual(20, d._max)
     # Merge into an existing metric resulting in a new min.
     other = metrics.Distribution("d3")
     other.add(5)
     d._merge(other)
     self.assertEqual(3, d._count)
     self.assertEqual(35, d._total)
     self.assertEqual(525, d._squared)
     self.assertEqual(5, d._min)
     self.assertEqual(20, d._max)
     # Merge into an existing metric resulting in a new max.
     other = metrics.Distribution("d4")
     other.add(30)
     d._merge(other)
     self.assertEqual(4, d._count)
     self.assertEqual(65, d._total)
     self.assertEqual(1425, d._squared)
     self.assertEqual(5, d._min)
     self.assertEqual(30, d._max)
     # Merge an empty metric (slopppy min/max code would fail).
     other = metrics.Distribution("d5")
     d._merge(other)
     self.assertEqual(4, d._count)
     self.assertEqual(65, d._total)
     self.assertEqual(1425, d._squared)
     self.assertEqual(5, d._min)
     self.assertEqual(30, d._max)
Beispiel #4
0
 def test_disabled(self):
     metrics._prepare_for_test(enabled=False)
     d = metrics.Distribution("foo")
     d.add(123)
     self.assertEqual(0, d._count)
Beispiel #5
0
class Solver(object):
    """The solver class is instantiated for a given "problem" instance.

  It maintains a cache of solutions for subproblems to be able to recall them if
  they reoccur in the solving process.
  """

    _cache_metric = metrics.MapCounter("cfg_solver_cache")
    _goals_per_find_metric = metrics.Distribution("cfg_solver_goals_per_find")

    def __init__(self, program):
        """Initialize a solver instance. Every instance has their own cache.

    Arguments:
      program: The program we're in.
    """
        self.program = program
        self._solved_states = {}
        self._path_finder = _PathFinder()

    def Solve(self, start_attrs, start_node):
        """Try to solve the given problem.

    Try to prove one or more bindings starting (and going backwards from) a
    given node, all the way to the program entrypoint.

    Arguments:
      start_attrs: The assignments we're trying to have, at the start node.
      start_node: The CFG node where we want the assignments to be active.

    Returns:
      True if there is a path through the program that would give "start_attr"
      its binding at the "start_node" program position. For larger programs,
      this might only look for a partial path (i.e., a path that doesn't go
      back all the way to the entry point of the program).
    """
        state = State(start_node, start_attrs)
        return self._RecallOrFindSolution(state)

    def _RecallOrFindSolution(self, state):
        """Memoized version of FindSolution()."""
        if state in self._solved_states:
            Solver._cache_metric.inc("hit")
            return self._solved_states[state]

        # To prevent infinite loops, we insert this state into the hashmap as a
        # solvable state, even though we have not solved it yet. The reasoning is
        # that if it's possible to solve this state at this level of the tree, it
        # can also be solved in any of the children.
        self._solved_states[state] = True

        Solver._cache_metric.inc("miss")
        result = self._solved_states[state] = self._FindSolution(state)
        return result

    def _FindSolution(self, state):
        """Find a sequence of assignments that would solve the given state."""
        if state.pos.condition:
            state.goals.add(state.pos.condition)
        Solver._goals_per_find_metric.add(len(state.goals))
        for removed_goals, new_goals in state.RemoveFinishedGoals():
            assert not state.pos.bindings & new_goals
            if _GoalsConflict(removed_goals):
                continue  # We bulk-removed goals that are internally conflicting.
            if not new_goals:
                return True
            blocked = frozenset().union(*(goal.variable.nodes
                                          for goal in new_goals))
            new_positions = set()
            for goal in new_goals:
                # "goal" is the assignment we're trying to find.
                for origin in goal.origins:
                    path_exist, path = self._path_finder.FindNodeBackwards(
                        state.pos, origin.where, blocked)
                    if path_exist:
                        where = origin.where
                        # Check if we found conditions on the way.
                        for node in path:
                            if node is not state.pos:
                                where = node
                                break
                        new_positions.add(where)
            for new_pos in new_positions:
                new_state = State(new_pos, new_goals)
                if self._RecallOrFindSolution(new_state):
                    return True
        return False
Beispiel #6
0
"""Points-to / dataflow / cfg graph engine.

It can be used to run reaching-definition queries on a nested CFG graph
and to model path-specific visibility of nested data structures.
"""

import collections
import logging

from pytype import metrics

log = logging.getLogger(__name__)

_variable_size_metric = metrics.Distribution("variable_size")

# Across a sample of 19352 modules, for files which took more than 25 seconds,
# the largest variable was, on average, 157. For files below 25 seconds, it was
# 7. Additionally, for 99% of files, the largest variable was below 64, so we
# use that as the cutoff.
MAX_VAR_SIZE = 64


class Program(object):
    """Program instances describe program entities.

  This class ties together the CFG, the data flow graph (variables + bindings)
  as well as methods. We use this for issuing IDs: We need every CFG node to
  have a unique ID, and this class does the corresponding counting.

  Attributes:
    entrypoint: Entrypoint of the program, if it has one. (None otherwise)
Beispiel #7
0
class Solver(object):
    """The solver class is instantiated for a given "problem" instance.

  It maintains a cache of solutions for subproblems to be able to recall them if
  they reoccur in the solving process.
  """

    _cache_metric = metrics.MapCounter("cfg_solver_cache")
    _goals_per_find_metric = metrics.Distribution("cfg_solver_goals_per_find")

    def __init__(self, program):
        """Initialize a solver instance. Every instance has their own cache.

    Arguments:
      program: The program we're in.
    """
        self.program = program
        self._solved_states = {}
        self._path_finder = _PathFinder()

    def Solve(self, start_attrs, start_node):
        """Try to solve the given problem.

    Try to prove one or more bindings starting (and going backwards from) a
    given node, all the way to the program entrypoint.

    Arguments:
      start_attrs: The assignments we're trying to have, at the start node.
      start_node: The CFG node where we want the assignments to be active.

    Returns:
      True if there is a path through the program that would give "start_attr"
      its binding at the "start_node" program position. For larger programs,
      this might only look for a partial path (i.e., a path that doesn't go
      back all the way to the entry point of the program).
    """
        state = State(start_node, start_attrs)
        return self._RecallOrFindSolution(state, frozenset(start_attrs))

    def _RecallOrFindSolution(self, state, seen_goals):
        """Memoized version of FindSolution()."""
        if state in self._solved_states:
            Solver._cache_metric.inc("hit")
            return self._solved_states[state]

        # To prevent infinite loops, we insert this state into the hashmap as a
        # solvable state, even though we have not solved it yet. The reasoning is
        # that if it's possible to solve this state at this level of the tree, it
        # can also be solved in any of the children.
        self._solved_states[state] = True

        Solver._cache_metric.inc("miss")
        result = self._solved_states[state] = self._FindSolution(
            state, seen_goals)
        return result

    def _FindSolution(self, state, seen_goals):
        """Find a sequence of assignments that would solve the given state."""
        if state.Done():
            return True
        if _GoalsConflict(state.goals):
            return False
        Solver._goals_per_find_metric.add(len(state.goals))
        # Note that this set might contain the current CFG node:
        blocked = frozenset(state.NodesWithAssignments())
        # Find the goal cfg node that was assigned last.  Due to the fact that we
        # treat CFGs as DAGs, there's typically one unique cfg node with this
        # property.
        for goal in state.goals:
            # "goal" is the assignment we're trying to find.
            for origin in goal.origins:
                path_exist, path = self._path_finder.FindNodeBackwards(
                    state.pos, origin.where, blocked)
                if path_exist:
                    # This loop over multiple different combinations of origins is why
                    # we need memoization of states.
                    for source_set in origin.source_sets:
                        new_goals = set(state.goals)
                        where = origin.where
                        # If we found conditions on the way, see whether we need to add
                        # any of them to our goals.
                        for node in path:
                            if node.condition not in seen_goals:
                                # It can happen that node == state.pos, typically if the node
                                # we're calling HasCombination on has a condition. If so, we'll
                                # treat it like any other condition and add it to our goals.
                                new_goals.add(node.condition)
                                where = node
                                break
                        new_state = State(where, new_goals)
                        if origin.where is new_state.pos:
                            # The goal can only be replaced if origin.where was actually
                            # reached.
                            new_state.Replace(goal, source_set)

                        # Also remove all goals that are trivially fulfilled at the
                        # new CFG node.
                        removed = new_state.RemoveFinishedGoals()
                        removed.add(goal)
                        if _GoalsConflict(removed | new_state.goals):
                            pass  # We bulk-removed goals that are internally conflicting.
                        elif self._RecallOrFindSolution(
                                new_state, seen_goals | new_goals):
                            return True
        return False
Beispiel #8
0
class Solver(object):
    """The solver class is instantiated for a given "problem" instance.

  It maintains a cache of solutions for subproblems to be able to recall them if
  they reoccur in the solving process.
  """

    _cache_metric = metrics.MapCounter("cfg_solver_cache")
    _goals_per_find_metric = metrics.Distribution("cfg_solver_goals_per_find")

    def __init__(self, program):
        """Initialize a solver instance. Every instance has their own cache.

    Arguments:
      program: The program we're in.
    """
        self.program = program
        self._solved_states = {}

    def Solve(self, start_attrs, start_node):
        """Try to solve the given problem.

    Try to prove one or more bindings starting (and going backwards from) a
    given node, all the way to the program entrypoint.

    Arguments:
      start_attrs: The assignments we're trying to have, at the start node.
      start_node: The CFG node where we want the assignments to be active.

    Returns:
      True if there is a path through the program that would give "start_attr"
      its binding at the "start_node" program position. For larger programs,
      this might only look for a partial path (i.e., a path that doesn't go
      back all the way to the entry point of the program).
    """
        state = State(start_node, start_attrs)
        return self._RecallOrFindSolution(state)

    def _RecallOrFindSolution(self, state):
        """Memoized version of FindSolution()."""
        if state in self._solved_states:
            Solver._cache_metric.inc("hit")
            return self._solved_states[state]

        # To prevent infinite loops, we insert this state into the hashmap as a
        # solvable state, even though we have not solved it yet. The reasoning is
        # that if it's possible to solve this state at this level of the tree, it
        # can also be solved in any of the children.
        self._solved_states[state] = True

        Solver._cache_metric.inc("miss")
        result = self._solved_states[state] = self._FindSolution(state)
        return result

    def _FindSolution(self, state):
        """Find a sequence of assignments that would solve the given state."""
        if state.Done():
            return True
        if state.HasConflictingGoals():
            return False
        Solver._goals_per_find_metric.add(len(state.goals))
        blocked = state.NodesWithAssignments()
        # We don't treat our current CFG node as blocked: If one of the goal
        # variables is overwritten by an assignment at our current pos, we assume
        # that assignment can still see the previous bindings.
        blocked.discard(state.pos)
        blocked = frozenset(blocked)
        # Find the goal cfg node that was assigned last.  Due to the fact that we
        # treat CFGs as DAGs, there's typically one unique cfg node with this
        # property.
        for goal in state.goals:
            # "goal" is the assignment we're trying to find.
            for origin in goal.origins:
                if _FindNodeBackwards(state.pos, origin.where, blocked):
                    # This loop over multiple different combinations of origins is why
                    # we need memoization of states.
                    for source_set in origin.source_sets:
                        new_state = State(origin.where, state.goals)
                        new_state.Replace(goal, source_set)
                        # Also remove all goals that are trivially fulfilled at the
                        # new CFG node.
                        new_state.RemoveFinishedGoals()
                        if self._RecallOrFindSolution(new_state):
                            return True
        return False
Beispiel #9
0
class Solver(object):
    """The solver class is instantiated for a given "problem" instance.

  It maintains a cache of solutions for subproblems to be able to recall them if
  they reoccur in the solving process.
  """

    _cache_metric = metrics.MapCounter("cfg_solver_cache")
    _goals_per_find_metric = metrics.Distribution("cfg_solver_goals_per_find")

    def __init__(self, program):
        """Initialize a solver instance. Every instance has their own cache.

    Arguments:
      program: The program we're in.
    """
        self.program = program
        self._solved_states = {}
        self._path_finder = _PathFinder()

    def Solve(self, start_attrs, start_node):
        """Try to solve the given problem.

    Try to prove one or more bindings starting (and going backwards from) a
    given node, all the way to the program entrypoint.

    Arguments:
      start_attrs: The assignments we're trying to have, at the start node.
      start_node: The CFG node where we want the assignments to be active.

    Returns:
      True if there is a path through the program that would give "start_attr"
      its binding at the "start_node" program position. For larger programs,
      this might only look for a partial path (i.e., a path that doesn't go
      back all the way to the entry point of the program).
    """
        state = State(start_node, start_attrs)
        return self._RecallOrFindSolution(state)

    def _RecallOrFindSolution(self, state):
        """Memoized version of FindSolution()."""
        if state in self._solved_states:
            Solver._cache_metric.inc("hit")
            return self._solved_states[state]

        # To prevent infinite loops, we insert this state into the hashmap as a
        # solvable state, even though we have not solved it yet. The reasoning is
        # that if it's possible to solve this state at this level of the tree, it
        # can also be solved in any of the children.
        self._solved_states[state] = True

        Solver._cache_metric.inc("miss")
        result = self._solved_states[state] = self._FindSolution(state)
        return result

    def _IsSolvedBefore(self, where, goal, entrypoint, blocked):
        """Determine if a goal is possibly solved in subsection of the CFG.

    If a condition introduces a new goal, but we can solve that goal *before*
    the goal we were trying to solve originally, assume that goal doesn't
    have anything to do with us.
    This currently does a quick CFG check as an approximation. An alternative
    implementation would be to call _FindSolution while blocking the new
    entrypoint.

    Args:
      where: Current CFG node. We search backwards from this node.
      goal: The goal to find a solution for.
      entrypoint: The "new" entry point of the graph. This typically reduces
        the CFG to a subgraph.
      blocked: A list of nodes.
    Returns:
      True if we think this goal can be solved without traversing beyond
      "entrypoint", False if it can't.
    """
        blocked = frozenset(blocked | {entrypoint})
        for origin in goal.origins:
            # TODO(kramm): We don't cache this. Should we?
            if origin.where not in blocked and self._path_finder.FindPathToNode(
                    where, origin.where, blocked):
                return True
        return False

    def _FindSolution(self, state):
        """Find a sequence of assignments that would solve the given state."""
        if state.Done():
            return True
        if _GoalsConflict(state.goals):
            return False
        Solver._goals_per_find_metric.add(len(state.goals))
        blocked = state.NodesWithAssignments()
        # We don't treat our current CFG node as blocked: If one of the goal
        # variables is overwritten by an assignment at our current pos, we assume
        # that assignment can still see the previous bindings.
        blocked.discard(state.pos)
        blocked = frozenset(blocked)
        # Find the goal cfg node that was assigned last.  Due to the fact that we
        # treat CFGs as DAGs, there's typically one unique cfg node with this
        # property.
        for goal in state.goals:
            # "goal" is the assignment we're trying to find.
            for origin in goal.origins:
                path_exist, path = self._path_finder.FindNodeBackwards(
                    state.pos, origin.where, blocked)
                if path_exist:
                    # This loop over multiple different combinations of origins is why
                    # we need memoization of states.
                    for source_set in origin.source_sets:
                        new_goals = set(state.goals)
                        where = origin.where
                        # If we found conditions on the way, see whether we need to add
                        # any of them to our goals.
                        for node in path:
                            if node.condition not in state.goals and not self._IsSolvedBefore(
                                    node, node.condition, origin.where,
                                    blocked):
                                # TODO(kramm): what if node == state.pos?
                                new_goals.add(node.condition)
                                where = node
                                break
                        new_state = State(where, new_goals)
                        if origin.where is new_state.pos:
                            # The goal can only be replaced if origin.where was actually
                            # reached.
                            new_state.Replace(goal, source_set)

                        # Also remove all goals that are trivially fulfilled at the
                        # new CFG node.
                        removed = new_state.RemoveFinishedGoals()
                        removed.add(goal)
                        if _GoalsConflict(removed):
                            # Sometimes, we bulk-remove goals that are internally conflicting.
                            return False
                        if self._RecallOrFindSolution(new_state):
                            return True
        return False