Example #1
0
    def __init__(self, testsuite, runs_to_load,
                 aggregation_fn=stats.safe_min, confidence_lv=.05):
        self.testsuite = testsuite
        self.aggregation_fn = aggregation_fn
        self.confidence_lv = confidence_lv

        self.sample_map = util.multidict()
        self.loaded_run_ids = set()

        self._load_samples_for_runs(runs_to_load)
Example #2
0
    def __init__(self, testsuite, runs_to_load,
                 aggregation_fn=stats.median, confidence_lv=.05,
                 only_tests=None, cv=[]):
        """Get all the samples needed to build a CR.
        runs_to_load are the run IDs of the runs to get the samples from.
        if only_tests is passed, only samples form those test IDs are fetched.
        """
        self.testsuite = testsuite
        self.aggregation_fn = aggregation_fn
        self.confidence_lv = confidence_lv

        self.sample_map = util.multidict()
        self.cv_sample_map = util.multidict()
        self.profile_map = dict()
        self.loaded_run_ids = set()
        self.loaded_cv_run_ids = set()

        self._load_samples_for_runs(runs_to_load, only_tests)
        self._load_cv_samples_for_runs(cv, only_tests)
Example #3
0
    def __init__(self,
                 testsuite,
                 runs_to_load,
                 aggregation_fn=stats.median,
                 confidence_lv=.05,
                 only_tests=None,
                 cv=[]):
        """Get all the samples needed to build a CR.
        runs_to_load are the run IDs of the runs to get the samples from.
        if only_tests is passed, only samples form those test IDs are fetched.
        """
        self.testsuite = testsuite
        self.aggregation_fn = aggregation_fn
        self.confidence_lv = confidence_lv

        self.sample_map = util.multidict()
        self.cv_sample_map = util.multidict()
        self.profile_map = dict()
        self.loaded_run_ids = set()
        self.loaded_cv_run_ids = set()

        self._load_samples_for_runs(runs_to_load, only_tests)
        self._load_cv_samples_for_runs(cv, only_tests)
Example #4
0
    def build(self):
        ts = self.ts

        # Construct datetime instances for the report range.
        day_ordinal = datetime.datetime(self.year, self.month,
                                        self.day).toordinal()

        # Adjust the dates time component.  As we typically want to do runs
        # overnight, we define "daily" to really mean "daily plus some
        # offset". The offset should generally be whenever the last run
        # finishes on today's date.

        self.next_day = (datetime.datetime.fromordinal(day_ordinal + 1) +
                         self.day_start_offset)
        self.prior_days = [(datetime.datetime.fromordinal(day_ordinal - i) +
                            self.day_start_offset)
                           for i in range(self.num_prior_days_to_include + 1)]

        # Find all the runs that occurred for each day slice.
        prior_runs = [ts.query(ts.Run).
                      filter(ts.Run.start_time > prior_day).
                      filter(ts.Run.start_time <= day).all()
                      for day, prior_day in util.pairs(self.prior_days)]

        if self.filter_machine_re is not None:
            prior_runs = [[run for run in runs
                           if self.filter_machine_re.search(run.machine.name)]
                          for runs in prior_runs]

        # For every machine, we only want to report on the last run order that
        # was reported for that machine for the particular day range.
        #
        # Note that this *does not* mean that we will only report for one
        # particular run order for each day, because different machines may
        # report on different orders.
        #
        # However, we want to limit ourselves to a single run order for each
        # (day,machine) so that we don't obscure any details through our
        # aggregation.
        self.prior_days_machine_order_map = \
            [None] * self.num_prior_days_to_include
        historic_runs = [None] * len(prior_runs)
        for i, runs in enumerate(prior_runs):
            # Aggregate the runs by machine.
            machine_to_all_orders = util.multidict()
            for r in runs:
                machine_to_all_orders[r.machine] = r.order

            # Create a map from machine to max order and some history.
            self.prior_days_machine_order_map[i] = machine_order_map = dict(
                (machine, OrderAndHistory(max(orders), sorted(orders)))
                for machine, orders in machine_to_all_orders.items())

            # Update the run list to only include the runs with that order.
            def is_max_order(r):
                return r.order is machine_order_map[r.machine].max_order
            prior_runs[i] = [r for r in runs if is_max_order(r)]

            # Also keep some recent runs, so we have some extra samples.
            def is_recent_order(r):
                return r.order in machine_order_map[r.machine].recent_orders
            historic_runs[i] = [r for r in runs if is_recent_order(r)]

        # Form a list of all relevant runs.
        relevant_runs = sum(prior_runs, [])
        less_relevant_runs = sum(historic_runs, relevant_runs)

        # Find the union of all machines reporting in the relevant runs.
        self.reporting_machines = list(set(r.machine for r in relevant_runs))
        self.reporting_machines.sort(key=lambda m: m.name)

        # We aspire to present a "lossless" report, in that we don't ever hide
        # any possible change due to aggregation. In addition, we want to make
        # it easy to see the relation of results across all the reporting
        # machines. In particular:
        #
        #   (a) When a test starts failing or passing on one machine, it should
        #       be easy to see how that test behaved on other machines. This
        #       makes it easy to identify the scope of the change.
        #
        #   (b) When a performance change occurs, it should be easy to see the
        #       performance of that test on other machines. This makes it easy
        #       to see the scope of the change and to potentially apply human
        #       discretion in determining whether or not a particular result is
        #       worth considering (as opposed to noise).
        #
        # The idea is as follows, for each (machine, test, metric_field),
        # classify the result into one of REGRESSED, IMPROVED, UNCHANGED_FAIL,
        # ADDED, REMOVED, PERFORMANCE_REGRESSED, PERFORMANCE_IMPROVED.
        #
        # For now, we then just aggregate by test and present the results as
        # is. This is lossless, but not nearly as nice to read as the old style
        # per-machine reports. In the future we will want to find a way to
        # combine the per-machine report style of presenting results aggregated
        # by the kind of status change, while still managing to present the
        # overview across machines.

        # Aggregate runs by machine ID and day index.
        self.machine_runs = machine_runs = util.multidict()
        for day_index, day_runs in enumerate(prior_runs):
            for run in day_runs:
                machine_runs[(run.machine_id, day_index)] = run

        # Also aggregate past runs by day.
        self.machine_past_runs = util.multidict()
        for day_index, day_runs in enumerate(historic_runs):
            for run in day_runs:
                self.machine_past_runs[(run.machine_id, day_index)] = run

        relevant_run_ids = [r.id for r in relevant_runs]

        # If there are no relevant runs, just stop processing (the report will
        # generate an error).
        if not relevant_run_ids:
            self.error = "no runs to display in selected date range"
            return

        # Get the set all tests reported in the recent runs.
        self.reporting_tests = ts.query(ts.Test).filter(
            sqlalchemy.sql.exists('*', sqlalchemy.sql.and_(
                    ts.Sample.run_id.in_(relevant_run_ids),
                    ts.Sample.test_id == ts.Test.id))).all()
        self.reporting_tests.sort(key=lambda t: t.name)

        run_ids_to_load = list(relevant_run_ids) + \
            [r.id for r in less_relevant_runs]

        # Create a run info object.
        sri = lnt.server.reporting.analysis.RunInfo(ts, run_ids_to_load)

        # Build the result table of tests with interesting results.
        def compute_visible_results_priority(visible_results):
            # We just use an ad hoc priority that favors showing tests with
            # failures and large changes. We do this by computing the priority
            # as tuple of whether or not there are any failures, and then sum
            # of the mean percentage changes.
            test, results = visible_results
            had_failures = False
            sum_abs_day0_deltas = 0.
            for machine, day_results in results:
                day0_cr = day_results[0].cr

                test_status = day0_cr.get_test_status()

                if (test_status == REGRESSED or test_status == UNCHANGED_FAIL):
                    had_failures = True
                elif day0_cr.pct_delta is not None:
                    sum_abs_day0_deltas += abs(day0_cr.pct_delta)
            return (-int(had_failures), -sum_abs_day0_deltas, test.name)

        self.result_table = []
        for field in self.fields:
            field_results = []
            for test in self.reporting_tests:
                # For each machine, compute if there is anything to display for
                # the most recent day, and if so add it to the view.
                visible_results = []
                for machine in self.reporting_machines:
                    # Get the most recent comparison result.
                    # Record which days have samples, so that we'll compare
                    # also consecutive runs that are further than a day
                    # apart if no runs happened in between.
                    day_has_samples = []
                    for i in range(0, self.num_prior_days_to_include):
                        runs = self.machine_past_runs.get((machine.id, i), ())
                        samples = sri.get_samples(runs, test.id, field)
                        day_has_samples.append(len(samples) > 0)

                    def find_most_recent_run_with_samples(day_nr):
                        for i in range(day_nr+1,
                                       self.num_prior_days_to_include):
                            if day_has_samples[i]:
                                return i
                        return day_nr+1

                    prev_day_index = find_most_recent_run_with_samples(0)
                    day_runs = machine_runs.get((machine.id, 0), ())
                    prev_runs = self.machine_past_runs.get(
                        (machine.id, prev_day_index), ())
                    cr = sri.get_comparison_result(
                        day_runs, prev_runs, test.id, field)

                    # If the result is not "interesting", ignore this machine.
                    if not cr.is_result_interesting():
                        continue

                    # Otherwise, compute the results for all the days.
                    day_results = DayResults()
                    day_results.append(DayResult(cr))
                    for i in range(1, self.num_prior_days_to_include):
                        day_runs = machine_runs.get((machine.id, i), ())
                        if len(day_runs) == 0:
                            day_results.append(None)
                            continue

                        prev_day_index = find_most_recent_run_with_samples(i)
                        prev_runs = self.machine_past_runs.get(
                                       (machine.id, prev_day_index), ())
                        cr = sri.get_comparison_result(day_runs, prev_runs,
                                                       test.id, field)
                        day_results.append(DayResult(cr))

                    day_results.complete()

                    # Append the result for the machine.
                    visible_results.append((machine, day_results))

                # If there are visible results for this test, append it to the
                # view.
                if visible_results:
                    field_results.append((test, visible_results))

            # Order the field results by "priority".
            field_results.sort(key=compute_visible_results_priority)

            self.result_table.append((field, field_results))
Example #5
0
    def build(self):
        ts = self.ts

        # Construct datetime instances for the report range.
        day_ordinal = datetime.datetime(self.year, self.month,
                                        self.day).toordinal()

        # Adjust the dates time component.  As we typically want to do runs
        # overnight, we define "daily" to really mean "daily plus some
        # offset". The offset should generally be whenever the last run
        # finishes on today's date.

        self.next_day = (datetime.datetime.fromordinal(day_ordinal + 1) +
                         self.day_start_offset)
        self.prior_days = [(datetime.datetime.fromordinal(day_ordinal - i) +
                            self.day_start_offset)
                           for i in range(self.num_prior_days_to_include + 1)]

        # Find all the runs that occurred for each day slice.
        prior_runs = [ts.query(ts.Run).
                      filter(ts.Run.start_time > prior_day).
                      filter(ts.Run.start_time <= day).all()
                      for day, prior_day in util.pairs(self.prior_days)]

        if self.filter_machine_re is not None:
            prior_runs = [[run for run in runs
                           if self.filter_machine_re.search(run.machine.name)]
                          for runs in prior_runs]

        # For every machine, we only want to report on the last run order that
        # was reported for that machine for the particular day range.
        #
        # Note that this *does not* mean that we will only report for one
        # particular run order for each day, because different machines may
        # report on different orders.
        #
        # However, we want to limit ourselves to a single run order for each
        # (day,machine) so that we don't obscure any details through our
        # aggregation.
        self.prior_days_machine_order_map = \
            [None] * self.num_prior_days_to_include
        historic_runs = [None] * len(prior_runs)
        for i, runs in enumerate(prior_runs):
            # Aggregate the runs by machine.
            machine_to_all_orders = util.multidict()
            for r in runs:
                machine_to_all_orders[r.machine] = r.order

            # Create a map from machine to max order and some history.
            self.prior_days_machine_order_map[i] = machine_order_map = dict(
                (machine, OrderAndHistory(max(orders), sorted(orders)))
                for machine, orders in machine_to_all_orders.items())

            # Update the run list to only include the runs with that order.
            def is_max_order(r):
                return r.order is machine_order_map[r.machine].max_order
            prior_runs[i] = [r for r in runs if is_max_order(r)]

            # Also keep some recent runs, so we have some extra samples.
            def is_recent_order(r):
                return r.order in machine_order_map[r.machine].recent_orders
            historic_runs[i] = [r for r in runs if is_recent_order(r)]

        # Form a list of all relevant runs.
        relevant_runs = sum(prior_runs, [])
        less_relevant_runs = sum(historic_runs, relevant_runs)

        # Find the union of all machines reporting in the relevant runs.
        self.reporting_machines = list(set(r.machine for r in relevant_runs))
        self.reporting_machines.sort(key=lambda m: m.name)

        # We aspire to present a "lossless" report, in that we don't ever hide
        # any possible change due to aggregation. In addition, we want to make
        # it easy to see the relation of results across all the reporting
        # machines. In particular:
        #
        #   (a) When a test starts failing or passing on one machine, it should
        #       be easy to see how that test behaved on other machines. This
        #       makes it easy to identify the scope of the change.
        #
        #   (b) When a performance change occurs, it should be easy to see the
        #       performance of that test on other machines. This makes it easy
        #       to see the scope of the change and to potentially apply human
        #       discretion in determining whether or not a particular result is
        #       worth considering (as opposed to noise).
        #
        # The idea is as follows, for each (machine, test, metric_field),
        # classify the result into one of REGRESSED, IMPROVED, UNCHANGED_FAIL,
        # ADDED, REMOVED, PERFORMANCE_REGRESSED, PERFORMANCE_IMPROVED.
        #
        # For now, we then just aggregate by test and present the results as
        # is. This is lossless, but not nearly as nice to read as the old style
        # per-machine reports. In the future we will want to find a way to
        # combine the per-machine report style of presenting results aggregated
        # by the kind of status change, while still managing to present the
        # overview across machines.

        # Aggregate runs by machine ID and day index.
        self.machine_runs = machine_runs = util.multidict()
        for day_index, day_runs in enumerate(prior_runs):
            for run in day_runs:
                machine_runs[(run.machine_id, day_index)] = run

        # Also aggregate past runs by day.
        self.machine_past_runs = util.multidict()
        for day_index, day_runs in enumerate(historic_runs):
            for run in day_runs:
                self.machine_past_runs[(run.machine_id, day_index)] = run

        relevant_run_ids = [r.id for r in relevant_runs]

        # If there are no relevant runs, just stop processing (the report will
        # generate an error).
        if not relevant_run_ids:
            self.error = "no runs to display in selected date range"
            return

        # Get the set all tests reported in the recent runs.
        self.reporting_tests = ts.query(ts.Test).filter(
            sqlalchemy.sql.exists('*', sqlalchemy.sql.and_(
                    ts.Sample.run_id.in_(relevant_run_ids),
                    ts.Sample.test_id == ts.Test.id))).all()
        self.reporting_tests.sort(key=lambda t: t.name)

        run_ids_to_load = list(relevant_run_ids) + \
            [r.id for r in less_relevant_runs]

        # Create a run info object.
        sri = lnt.server.reporting.analysis.RunInfo(ts, run_ids_to_load)

        # Build the result table of tests with interesting results.
        def compute_visible_results_priority(visible_results):
            # We just use an ad hoc priority that favors showing tests with
            # failures and large changes. We do this by computing the priority
            # as tuple of whether or not there are any failures, and then sum
            # of the mean percentage changes.
            test, results = visible_results
            had_failures = False
            sum_abs_day0_deltas = 0.
            for machine, day_results in results:
                day0_cr = day_results[0].cr

                test_status = day0_cr.get_test_status()

                if (test_status == REGRESSED or test_status == UNCHANGED_FAIL):
                    had_failures = True
                elif day0_cr.pct_delta is not None:
                    sum_abs_day0_deltas += abs(day0_cr.pct_delta)
            return (-int(had_failures), -sum_abs_day0_deltas, test.name)

        self.result_table = []
        self.nr_tests_table = []
        for field in self.fields:
            field_results = []
            for test in self.reporting_tests:
                # For each machine, compute if there is anything to display for
                # the most recent day, and if so add it to the view.
                visible_results = []
                for machine in self.reporting_machines:
                    # Get the most recent comparison result.
                    # Record which days have samples, so that we'll compare
                    # also consecutive runs that are further than a day
                    # apart if no runs happened in between.
                    day_has_samples = []
                    for i in range(0, self.num_prior_days_to_include):
                        runs = self.machine_past_runs.get((machine.id, i), ())
                        samples = sri.get_samples(runs, test.id)
                        day_has_samples.append(len(samples) > 0)

                    def find_most_recent_run_with_samples(day_nr):
                        for i in range(day_nr+1,
                                       self.num_prior_days_to_include):
                            if day_has_samples[i]:
                                return i
                        return day_nr+1

                    prev_day_index = find_most_recent_run_with_samples(0)
                    day_runs = machine_runs.get((machine.id, 0), ())
                    prev_runs = self.machine_past_runs.get(
                        (machine.id, prev_day_index), ())
                    cr = sri.get_comparison_result(
                        day_runs, prev_runs, test.id, field,
                        self.hash_of_binary_field)

                    # If the result is not "interesting", ignore this machine.
                    if not cr.is_result_interesting():
                        continue

                    # Otherwise, compute the results for all the days.
                    day_results = DayResults()
                    day_results.append(DayResult(cr))
                    for i in range(1, self.num_prior_days_to_include):
                        day_runs = machine_runs.get((machine.id, i), ())
                        if len(day_runs) == 0:
                            day_results.append(None)
                            continue

                        prev_day_index = find_most_recent_run_with_samples(i)
                        prev_runs = self.machine_past_runs.get(
                                       (machine.id, prev_day_index), ())
                        cr = sri.get_comparison_result(
                            day_runs, prev_runs, test.id, field,
                            self.hash_of_binary_field)
                        day_results.append(DayResult(cr))

                    day_results.complete()

                    # Append the result for the machine.
                    visible_results.append((machine, day_results))

                # If there are visible results for this test, append it to the
                # view.
                if visible_results:
                    field_results.append((test, visible_results))

            # Order the field results by "priority".
            field_results.sort(key=compute_visible_results_priority)
            self.result_table.append((field, field_results))

        for machine in self.reporting_machines:
            nr_tests_for_machine = []
            for i in range(0, self.num_prior_days_to_include):
                # get all runs with the same largest "order" on a given day
                day_runs = machine_runs.get((machine.id, i), ())
                nr_tests_seen = 0
                for test in self.reporting_tests:
                    samples = sri.get_samples(day_runs, test.id)
                    if len(samples)>0:
                        nr_tests_seen += 1
                nr_tests_for_machine.append(nr_tests_seen)
            self.nr_tests_table.append((machine, nr_tests_for_machine))