예제 #1
0
      def Compare(a, b):
        # This is the comparison function which determines whether the samples
        # we have from the two changes (a and b) are statistically significant.
        if a is None or b is None:
          return None

        if 'pending' in status_by_change[a] or 'pending' in status_by_change[b]:
          return compare.PENDING

        # NOTE: Here we're attempting to scale the provided comparison magnitude
        # threshold by the larger inter-quartile range (a measure of dispersion,
        # simply computed as the 75th percentile minus the 25th percentile). The
        # reason we're doing this is so that we can scale the tolerance
        # according to the noise inherent in the measurements -- i.e. more noisy
        # measurements will require a larger difference for us to consider
        # statistically significant.
        values_for_a = tuple(itertools.chain(*results_by_change[a]))
        values_for_b = tuple(itertools.chain(*results_by_change[b]))

        if not values_for_a:
          return None
        if not values_for_b:
          return None

        max_iqr = max(
            math_utils.Iqr(values_for_a), math_utils.Iqr(values_for_b), 0.001)
        comparison_magnitude = task.payload.get('comparison_magnitude',
                                                1.0) / max_iqr
        attempts = (len(values_for_a) + len(values_for_b)) // 2
        result = compare.Compare(values_for_a, values_for_b, attempts,
                                 'performance', comparison_magnitude)
        return result.result
예제 #2
0
def ClusterAndCompare(sequence, partition_point):
  """Returns the comparison result and the clusters at the partition point."""
  # Detect a difference between the two clusters
  cluster_a, cluster_b = Cluster(sequence, partition_point)
  if len(cluster_a) > 2 and len(cluster_b) > 2:
    magnitude = float(math_utils.Iqr(cluster_a) + math_utils.Iqr(cluster_b)) / 2
  else:
    magnitude = 1
  return (pinpoint_compare.Compare(cluster_a, cluster_b,
                                   (len(cluster_a) + len(cluster_b)) // 2,
                                   'performance',
                                   magnitude), cluster_a, cluster_b)
예제 #3
0
 def testIqr(self):
     self.assertEqual(4, math_utils.Iqr(xrange(8, 0, -1)))
예제 #4
0
    def _Compare(self, change_a, change_b):
        """Compare the results of two Changes in this Job.

    Aggregate the exceptions and result_values across every Quest for both
    Changes. Then, compare all the results for each Quest. If any of them are
    different, return DIFFERENT. Otherwise, if any of them are inconclusive,
    return UNKNOWN.  Otherwise, they are the SAME.

    Arguments:
      change_a: The first Change whose results to compare.
      change_b: The second Change whose results to compare.

    Returns:
      PENDING: If either Change has an incomplete Attempt.
      DIFFERENT: If the two Changes (very likely) have different results.
      SAME: If the two Changes (probably) have the same result.
      UNKNOWN: If we'd like more data to make a decision.
    """
        attempts_a = self._attempts[change_a]
        attempts_b = self._attempts[change_b]

        if any(not attempt.completed for attempt in attempts_a + attempts_b):
            return compare.PENDING

        attempt_count = (len(attempts_a) + len(attempts_b)) // 2

        executions_by_quest_a = _ExecutionsPerQuest(attempts_a)
        executions_by_quest_b = _ExecutionsPerQuest(attempts_b)

        any_unknowns = False
        for quest in self._quests:
            executions_a = executions_by_quest_a[quest]
            executions_b = executions_by_quest_b[quest]

            # Compare exceptions.
            exceptions_a = tuple(
                bool(execution.exception) for execution in executions_a)
            exceptions_b = tuple(
                bool(execution.exception) for execution in executions_b)
            if exceptions_a and exceptions_b:
                if self._comparison_mode == FUNCTIONAL:
                    if getattr(self, '_comparison_magnitude', None):
                        comparison_magnitude = self._comparison_magnitude
                    else:
                        comparison_magnitude = 0.5
                else:
                    comparison_magnitude = 1.0
                comparison, p_value, low_threshold, high_threshold = compare.Compare(
                    exceptions_a, exceptions_b, attempt_count, FUNCTIONAL,
                    comparison_magnitude)
                logging.debug('p-value = %.4f (low = %.4f, high = %.4f)',
                              p_value, low_threshold, high_threshold)
                if comparison == compare.DIFFERENT:
                    return compare.DIFFERENT
                elif comparison == compare.UNKNOWN:
                    any_unknowns = True

            # Compare result values by consolidating all measurments by change, and
            # treating those as a single sample set for comparison.
            def AllValues(execution):
                for e in execution:
                    if not e.result_values:
                        continue
                    for v in e.result_values:
                        yield v

            all_a_values = tuple(AllValues(executions_a))
            all_b_values = tuple(AllValues(executions_b))
            if all_a_values and all_b_values:
                if getattr(self, '_comparison_magnitude', None):
                    max_iqr = max(
                        max(math_utils.Iqr(all_a_values),
                            math_utils.Iqr(all_b_values)), 0.001)
                    comparison_magnitude = abs(self._comparison_magnitude /
                                               max_iqr)
                else:
                    comparison_magnitude = 1.0

                sample_count = (len(all_a_values) + len(all_b_values)) // 2
                comparison, p_value, low_threshold, high_threshold = compare.Compare(
                    all_a_values, all_b_values, sample_count, PERFORMANCE,
                    comparison_magnitude)
                logging.debug('p-value = %.4f (low = %.4f, high = %.4f)',
                              p_value, low_threshold, high_threshold)
                if comparison == compare.DIFFERENT:
                    return compare.DIFFERENT
                elif comparison == compare.UNKNOWN:
                    any_unknowns = True

        if any_unknowns:
            return compare.UNKNOWN

        return compare.SAME
예제 #5
0
    def _Compare(self, change_a, change_b):
        """Compare the results of two Changes in this Job.

    Aggregate the exceptions and result_values across every Quest for both
    Changes. Then, compare all the results for each Quest. If any of them are
    different, return DIFFERENT. Otherwise, if any of them are inconclusive,
    return UNKNOWN.  Otherwise, they are the SAME.

    Arguments:
      change_a: The first Change whose results to compare.
      change_b: The second Change whose results to compare.

    Returns:
      PENDING: If either Change has an incomplete Attempt.
      DIFFERENT: If the two Changes (very likely) have different results.
      SAME: If the two Changes (probably) have the same result.
      UNKNOWN: If we'd like more data to make a decision.
    """
        attempts_a = self._attempts[change_a]
        attempts_b = self._attempts[change_b]

        if any(not attempt.completed for attempt in attempts_a + attempts_b):
            return compare.PENDING

        attempt_count = (len(attempts_a) + len(attempts_b)) / 2

        executions_by_quest_a = _ExecutionsPerQuest(attempts_a)
        executions_by_quest_b = _ExecutionsPerQuest(attempts_b)

        any_unknowns = False
        for quest in self._quests:
            executions_a = executions_by_quest_a[quest]
            executions_b = executions_by_quest_b[quest]

            # Compare exceptions.
            values_a = tuple(
                bool(execution.exception) for execution in executions_a)
            values_b = tuple(
                bool(execution.exception) for execution in executions_b)
            if values_a and values_b:
                if self._comparison_mode == FUNCTIONAL:
                    if (hasattr(self, '_comparison_magnitude')
                            and self._comparison_magnitude):
                        comparison_magnitude = self._comparison_magnitude
                    else:
                        comparison_magnitude = 0.5
                else:
                    comparison_magnitude = 1.0
                comparison = compare.Compare(values_a, values_b, attempt_count,
                                             FUNCTIONAL, comparison_magnitude)
                if comparison == compare.DIFFERENT:
                    return compare.DIFFERENT
                elif comparison == compare.UNKNOWN:
                    any_unknowns = True

            # Compare result values.
            values_a = tuple(
                _Mean(execution.result_values) for execution in executions_a
                if execution.result_values)
            values_b = tuple(
                _Mean(execution.result_values) for execution in executions_b
                if execution.result_values)
            if values_a and values_b:
                if (hasattr(self, '_comparison_magnitude')
                        and self._comparison_magnitude):
                    max_iqr = max(math_utils.Iqr(values_a),
                                  math_utils.Iqr(values_b))
                    if max_iqr:
                        comparison_magnitude = abs(self._comparison_magnitude /
                                                   max_iqr)
                    else:
                        comparison_magnitude = 1000  # Something very large.
                else:
                    comparison_magnitude = 1.0
                comparison = compare.Compare(values_a, values_b, attempt_count,
                                             PERFORMANCE, comparison_magnitude)
                if comparison == compare.DIFFERENT:
                    return compare.DIFFERENT
                elif comparison == compare.UNKNOWN:
                    any_unknowns = True

        if any_unknowns:
            return compare.UNKNOWN

        return compare.SAME