Beispiel #1
0
    def testEvaluateSuccess_NeedToRefineAttempts(self):
        self.PopulateSimpleBisectionGraph()
        task_module.Evaluate(
            self.job,
            event_module.Event(type='initiate', target_task=None, payload={}),
            self.CompoundEvaluatorForTesting(
                FakeReadValueMapResult(
                    self.job, {
                        change_module.Change.FromDict({
                            'commits': [{
                                'repository': 'chromium',
                                'git_hash': commit
                            }]
                        }): values
                        for commit, values in (
                            ('commit_0', range(10)),
                            ('commit_1', range(10)),
                            ('commit_2', range(4, 14)),
                            ('commit_3', range(3, 13)),
                            ('commit_4', range(3, 13)),
                            ('commit_5', range(3, 13)),
                        )
                    })))

        # Here we test that we have more than the minimum attempts for the change
        # between commit_1 and commit_2.
        evaluate_result = task_module.Evaluate(
            self.job, SelectEvent(),
            evaluators.Selector(task_type='read_value'))
        attempt_counts = {}
        for payload in evaluate_result.values():
            change = change_module.Change.FromDict(payload.get('change'))
            attempt_counts[change] = attempt_counts.get(change, 0) + 1
        self.assertGreater(
            attempt_counts[change_module.Change.FromDict({
                'commits': [{
                    'repository': 'chromium',
                    'git_hash': 'commit_2',
                }]
            })], 10)
        self.assertLess(
            attempt_counts[change_module.Change.FromDict({
                'commits': [{
                    'repository': 'chromium',
                    'git_hash': 'commit_2',
                }]
            })], 100)

        # We know that we will never get a deterministic answer, so we ensure that
        # we don't inadvertently blame the wrong changes at the end of the
        # refinement.
        evaluate_result = task_module.Evaluate(
            self.job, SelectEvent(),
            evaluators.Selector(task_type='find_culprit'))
        self.assertIn('performance_bisection', evaluate_result)
        logging.info('Results: %s', evaluate_result['performance_bisection'])
        self.assertEquals(evaluate_result['performance_bisection']['culprits'],
                          [])
    def testEvaluateSuccess_NeedToRefineAttempts(self):
        self.PopulateSimpleBisectionGraph(self.job)
        task_module.Evaluate(
            self.job,
            event_module.Event(type='initiate', target_task=None, payload={}),
            self.BisectionEvaluatorForTesting(
                bisection_test_util.FakeReadValueMapResult(
                    self.job, {
                        change_module.Change.FromDict({
                            'commits': [{
                                'repository': 'chromium',
                                'git_hash': commit
                            }]
                        }): values
                        for commit, values in (
                            ('commit_0', range(10)),
                            ('commit_1', range(1, 11)),
                            ('commit_2', range(2, 12)),
                            ('commit_3', range(3, 13)),
                            ('commit_4', range(3, 13)),
                            ('commit_5', range(3, 13)),
                        )
                    })))

        # Here we test that we have more than the minimum attempts for the change
        # between commit_1 and commit_2.
        evaluate_result = task_module.Evaluate(
            self.job, bisection_test_util.SelectEvent(),
            evaluators.Selector(task_type='read_value'))
        attempt_counts = {}
        for payload in evaluate_result.values():
            change = change_module.Change.FromDict(payload.get('change'))
            attempt_counts[change] = attempt_counts.get(change, 0) + 1
        self.assertGreater(
            attempt_counts[change_module.Change.FromDict({
                'commits': [{
                    'repository': 'chromium',
                    'git_hash': 'commit_2',
                }]
            })], 10)
        self.assertLess(
            attempt_counts[change_module.Change.FromDict({
                'commits': [{
                    'repository': 'chromium',
                    'git_hash': 'commit_2',
                }]
            })], 100)

        # We know that we will refine the graph until we see the progression from
        # commit_0 -> commit_1 -> commit_2 -> commit_3 and stabilize.
        evaluate_result = task_module.Evaluate(
            self.job, bisection_test_util.SelectEvent(),
            evaluators.Selector(task_type='find_culprit'))
        self.assertIn('performance_bisection', evaluate_result)
        self.assertEquals(evaluate_result['performance_bisection']['culprits'],
                          [mock.ANY, mock.ANY, mock.ANY])
Beispiel #3
0
    def testSelector_Combinations(self):
        matching_task_types = (None, 'test')
        matching_event_types = (None, 'select')
        task = task_module.InMemoryTask(id='test_id',
                                        task_type='test',
                                        payload={},
                                        status='pending',
                                        dependencies=[])
        for task_type in matching_task_types:
            for event_type in matching_event_types:
                if not task_type and not event_type:
                    continue
                accumulator = {}
                evaluators.Selector(task_type=task_type,
                                    event_type=event_type)(
                                        task,
                                        event_module.Event(type='select',
                                                           target_task=None,
                                                           payload={}),
                                        accumulator)
                self.assertEqual({'test_id': mock.ANY}, accumulator,
                                 'task_type = %s, event_type = %s')

        non_matching_task_types = ('unmatched_task', )
        non_matching_event_types = ('unmatched_event', )

        # Because the Selector's default behaviour is a logical disjunction of
        # matchers, we ensure that we will always find the tasks and handle events
        # if either (or both) match.
        for task_type in [t for t in matching_task_types if t is not None]:
            for event_type in non_matching_event_types:
                accumulator = {}
                evaluators.Selector(task_type=task_type,
                                    event_type=event_type)(
                                        task,
                                        event_module.Event(type='select',
                                                           target_task=None,
                                                           payload={}),
                                        accumulator)
                self.assertEqual({'test_id': mock.ANY}, accumulator,
                                 'task_type = %s, event_type = %s')
        for task_type in non_matching_task_types:
            for event_type in [
                    t for t in matching_event_types if t is not None
            ]:
                accumulator = {}
                evaluators.Selector(task_type=task_type,
                                    event_type=event_type)(
                                        task,
                                        event_module.Event(type='select',
                                                           target_task=None,
                                                           payload={}),
                                        accumulator)
                self.assertEqual({'test_id': mock.ANY}, accumulator,
                                 'task_type = %s, event_type = %s')
Beispiel #4
0
 def testEvaluateSuccess_HistogramSkipRefTraceUrls(self, isolate_retrieve):
   hist = histogram_module.Histogram('some_benchmark', 'count')
   hist.AddSample(0)
   hist.diagnostics[reserved_infos.TRACE_URLS.name] = (
       generic_set.GenericSet(['trace_url1', 'trace_url2']))
   hist2 = histogram_module.Histogram('hist2', 'count')
   hist2.diagnostics[reserved_infos.TRACE_URLS.name] = (
       generic_set.GenericSet(['trace_url3']))
   hist2.diagnostics[reserved_infos.TRACE_URLS.name].guid = 'foo'
   histograms = histogram_set.HistogramSet([hist, hist2])
   isolate_retrieve.side_effect = itertools.chain(
       *itertools.repeat([('{"files": {"some_benchmark/perf_results.json": '
                           '{"h": "394890891823812873798734a"}}}'),
                          json.dumps(histograms.AsDicts())], 10))
   self.PopulateTaskGraph(benchmark='some_benchmark')
   self.assertNotEqual({},
                       task_module.Evaluate(
                           self.job,
                           event_module.Event(
                               type='initiate', target_task=None, payload={}),
                           self.evaluator))
   self.assertEqual(
       {
           'read_value_chromium@aaaaaaa_%s' % (attempt,): {
               'benchmark':
                   'some_benchmark',
               'change': mock.ANY,
               'mode':
                   'histogram_sets',
               'results_filename':
                   'some_benchmark/perf_results.json',
               'histogram_options': {
                   'grouping_label': None,
                   'story': None,
                   'statistic': None,
               },
               'graph_json_options': {
                   'chart': None,
                   'trace': 'some_trace'
               },
               'result_values': [0],
               'status':
                   'completed',
               'tries':
                   1,
               'trace_urls': [{
                   'key': 'trace',
                   'value': 'trace_url1',
                   'url': 'trace_url1'
               }, {
                   'key': 'trace',
                   'value': 'trace_url2',
                   'url': 'trace_url2',
               }]
           } for attempt in range(10)
       },
       task_module.Evaluate(
           self.job,
           event_module.Event(type='select', target_task=None, payload={}),
           evaluators.Selector(task_type='read_value')))
  def testEvaluateSuccess_MultipleHistograms(self, isolate_retrieve):

    def CreateHistogram(name):
      histogram = histogram_module.Histogram(name, 'count')
      histogram.AddSample(0)
      histogram.AddSample(1)
      histogram.AddSample(2)
      return histogram

    histograms = histogram_set.HistogramSet([
        CreateHistogram(name)
        for name in ('some_chart', 'some_chart', 'some_other_chart')
    ])
    histograms.AddSharedDiagnosticToAllHistograms(
        reserved_infos.STORY_TAGS.name, generic_set.GenericSet(['group:label']))
    histograms.AddSharedDiagnosticToAllHistograms(
        reserved_infos.STORIES.name, generic_set.GenericSet(['story']))
    isolate_retrieve.side_effect = itertools.chain(
        *itertools.repeat([('{"files": {"some_benchmark/perf_results.json": '
                            '{"h": "394890891823812873798734a"}}}'),
                           json.dumps(histograms.AsDicts())], 10))
    self.PopulateTaskGraph(
        benchmark='some_benchmark',
        chart='some_chart',
        grouping_label='label',
        story='story')
    self.assertNotEqual({},
                        task_module.Evaluate(
                            self.job,
                            event_module.Event(
                                type='initiate', target_task=None, payload={}),
                            self.evaluator))
    self.assertEqual(
        {
            'read_value_chromium@aaaaaaa_%s' % (attempt,): {
                'benchmark': 'some_benchmark',
                'change': mock.ANY,
                'mode': 'histogram_sets',
                'results_filename': 'some_benchmark/perf_results.json',
                'results_path': ['some_benchmark', 'perf_results.json'],
                'histogram_options': {
                    'grouping_label': 'label',
                    'story': 'story',
                    'statistic': None,
                    'histogram_name': 'some_chart',
                },
                'graph_json_options': {
                    'chart': 'some_chart',
                    'trace': 'some_trace'
                },
                'result_values': [0, 1, 2, 0, 1, 2],
                'status': 'completed',
                'tries': 1,
                'index': attempt,
            } for attempt in range(10)
        },
        task_module.Evaluate(
            self.job,
            event_module.Event(type='select', target_task=None, payload={}),
            evaluators.Selector(task_type='read_value')))
Beispiel #6
0
 def testEvaluateFail_GraphJsonMissingTrace(self, isolate_retrieve):
     isolate_retrieve.side_effect = itertools.chain(*itertools.repeat(
         [('{"files": {"some_benchmark/perf_results.json": '
           '{"h": "394890891823812873798734a"}}}'),
          json.dumps(
              {'chart': {
                  'traces': {
                      'trace': ['126444.869721', '0.0']
                  }
              }})], 10))
     self.PopulateTaskGraph(benchmark='some_benchmark',
                            chart='chart',
                            trace='must_not_be_found',
                            mode='graph_json')
     self.assertNotEqual({},
                         task_module.Evaluate(
                             self.job,
                             event_module.Event(type='initiate',
                                                target_task=None,
                                                payload={}),
                             self.evaluator))
     self.assertEqual(
         {
             'read_value_chromium@aaaaaaa_%s' % (attempt, ): {
                 'benchmark':
                 'some_benchmark',
                 'change':
                 mock.ANY,
                 'mode':
                 'graph_json',
                 'results_filename':
                 'some_benchmark/perf_results.json',
                 'histogram_options': {
                     'grouping_label': None,
                     'story': None,
                     'statistic': None,
                     'histogram_name': 'chart',
                 },
                 'graph_json_options': {
                     'chart': 'chart',
                     'trace': 'must_not_be_found',
                 },
                 'errors': [{
                     'reason': 'ReadValueTraceNotFound',
                     'message': mock.ANY,
                 }],
                 'status':
                 'failed',
                 'tries':
                 1,
                 'index':
                 attempt,
             }
             for attempt in range(10)
         },
         task_module.Evaluate(
             self.job,
             event_module.Event(type='select', target_task=None,
                                payload={}),
             evaluators.Selector(task_type='read_value')))
Beispiel #7
0
 def testEvaluateFailedDependency(self, *_):
     self.PopulateTaskGraph(benchmark='some_benchmark',
                            chart='chart',
                            trace='must_not_be_found',
                            mode='graph_json')
     self.assertNotEqual(
         {},
         task_module.Evaluate(
             self.job,
             event_module.Event(type='initiate',
                                target_task=None,
                                payload={}),
             evaluators.SequenceEvaluator(evaluators=(
                 evaluators.FilteringEvaluator(
                     predicate=evaluators.TaskTypeEq('find_isolate'),
                     delegate=evaluators.SequenceEvaluator(evaluators=(
                         functools.partial(FakeFoundIsolate, self.job),
                         evaluators.TaskPayloadLiftingEvaluator()))),
                 evaluators.FilteringEvaluator(
                     predicate=evaluators.TaskTypeEq('run_test'),
                     delegate=evaluators.SequenceEvaluator(evaluators=(
                         functools.partial(FakeFailedRunTest, self.job),
                         evaluators.TaskPayloadLiftingEvaluator()))),
                 read_value.Evaluator(self.job),
             ))))
     self.assertEqual(
         {
             'read_value_chromium@aaaaaaa_%s' % (attempt, ): {
                 'benchmark':
                 'some_benchmark',
                 'mode':
                 'graph_json',
                 'results_filename':
                 'some_benchmark/perf_results.json',
                 'histogram_options': {
                     'tir_label': None,
                     'story': None,
                     'statistic': None,
                 },
                 'graph_json_options': {
                     'chart': 'chart',
                     'trace': 'must_not_be_found',
                 },
                 'errors': [{
                     'reason': 'DependencyFailed',
                     'message': mock.ANY,
                 }],
                 'status':
                 'failed',
                 'tries':
                 1,
             }
             for attempt in range(10)
         },
         task_module.Evaluate(
             self.job,
             event_module.Event(type='select', target_task=None,
                                payload={}),
             evaluators.Selector(task_type='read_value')))
  def testEvaluateSuccess_WithData(self, isolate_retrieve):
    # Seed the response to the call to the isolate service.
    histogram = histogram_module.Histogram('some_chart', 'count')
    histogram.AddSample(0)
    histogram.AddSample(1)
    histogram.AddSample(2)
    histograms = histogram_set.HistogramSet([histogram])
    histograms.AddSharedDiagnosticToAllHistograms(
        reserved_infos.STORY_TAGS.name, generic_set.GenericSet(['group:label']))
    histograms.AddSharedDiagnosticToAllHistograms(
        reserved_infos.STORIES.name, generic_set.GenericSet(['story']))
    isolate_retrieve.side_effect = itertools.chain(
        *itertools.repeat([('{"files": {"some_benchmark/perf_results.json": '
                            '{"h": "394890891823812873798734a"}}}'),
                           json.dumps(histograms.AsDicts())], 10))

    # Set it up so that we are building a graph that's looking for no statistic.
    self.PopulateTaskGraph(
        benchmark='some_benchmark',
        chart='some_chart',
        grouping_label='label',
        story='story')
    self.assertNotEqual({},
                        task_module.Evaluate(
                            self.job,
                            event_module.Event(
                                type='initiate', target_task=None, payload={}),
                            self.evaluator))

    # Ensure we find the find a value, and the histogram (?) associated with the
    # data we're looking for.
    self.assertEqual(
        {
            'read_value_chromium@aaaaaaa_%s' % (attempt,): {
                'benchmark': 'some_benchmark',
                'change': mock.ANY,
                'mode': 'histogram_sets',
                'results_filename': 'some_benchmark/perf_results.json',
                'results_path': ['some_benchmark', 'perf_results.json'],
                'histogram_options': {
                    'grouping_label': 'label',
                    'story': 'story',
                    'statistic': None,
                    'histogram_name': 'some_chart',
                },
                'graph_json_options': {
                    'chart': 'some_chart',
                    'trace': 'some_trace',
                },
                'status': 'completed',
                'result_values': [0, 1, 2],
                'tries': 1,
                'index': attempt,
            } for attempt in range(10)
        },
        task_module.Evaluate(
            self.job,
            event_module.Event(type='select', target_task=None, payload={}),
            evaluators.Selector(task_type='read_value')))
Beispiel #9
0
 def testEvaluateFailure_HistogramNoSamples(self, isolate_retrieve):
     histogram = histogram_module.Histogram('some_benchmark', 'count')
     histograms = histogram_set.HistogramSet([histogram])
     histograms.AddSharedDiagnosticToAllHistograms(
         reserved_infos.STORY_TAGS.name,
         generic_set.GenericSet(['group:tir_label']))
     histograms.AddSharedDiagnosticToAllHistograms(
         reserved_infos.STORIES.name,
         generic_set.GenericSet(['https://story']))
     isolate_retrieve.side_effect = itertools.chain(
         *itertools.repeat([(
             '{"files": {"some_benchmark/perf_results.json": '
             '{"h": "394890891823812873798734a"}}}'),
                            json.dumps(histograms.AsDicts())], 10))
     self.PopulateTaskGraph(benchmark='some_benchmark',
                            chart='some_chart',
                            tir_label='tir_label',
                            story='https://story')
     self.assertNotEqual({},
                         task_module.Evaluate(
                             self.job,
                             event_module.Event(type='initiate',
                                                target_task=None,
                                                payload={}),
                             self.evaluator))
     self.assertEqual(
         {
             'read_value_chromium@aaaaaaa_%s' % (attempt, ): {
                 'benchmark':
                 'some_benchmark',
                 'mode':
                 'histogram_sets',
                 'results_filename':
                 'some_benchmark/perf_results.json',
                 'histogram_options': {
                     'tir_label': 'tir_label',
                     'story': 'https://story',
                     'statistic': None,
                 },
                 'graph_json_options': {
                     'chart': 'some_chart',
                     'trace': 'some_trace'
                 },
                 'status':
                 'failed',
                 'errors': [{
                     'reason': 'ReadValueNoValues',
                     'message': mock.ANY,
                 }],
                 'tries':
                 1,
             }
             for attempt in range(10)
         },
         task_module.Evaluate(
             self.job,
             event_module.Event(type='select', target_task=None,
                                payload={}),
             evaluators.Selector(task_type='read_value')))
Beispiel #10
0
 def testEvaluateFailure_HistogramNoValues(self, isolate_retrieve):
     isolate_retrieve.side_effect = itertools.chain(*itertools.repeat(
         [('{"files": {"some_benchmark/perf_results.json": '
           '{"h": "394890891823812873798734a"}}}'),
          json.dumps(
              histogram_set.HistogramSet([
                  histogram_module.Histogram('some_benchmark', 'count')
              ]).AsDicts())], 10))
     self.PopulateTaskGraph(benchmark='some_benchmark',
                            chart='some_chart',
                            grouping_label='label',
                            story='https://story')
     self.assertNotEqual({},
                         task_module.Evaluate(
                             self.job,
                             event_module.Event(type='initiate',
                                                target_task=None,
                                                payload={}),
                             self.evaluator))
     self.assertEqual(
         {
             'read_value_chromium@aaaaaaa_%s' % (attempt, ): {
                 'benchmark':
                 'some_benchmark',
                 'change':
                 mock.ANY,
                 'mode':
                 'histogram_sets',
                 'results_filename':
                 'some_benchmark/perf_results.json',
                 'histogram_options': {
                     'grouping_label': 'label',
                     'story': 'https://story',
                     'statistic': None,
                     'histogram_name': 'some_chart',
                 },
                 'graph_json_options': {
                     'chart': 'some_chart',
                     'trace': 'some_trace',
                 },
                 'status':
                 'failed',
                 'errors': [{
                     'reason': 'ReadValueNotFound',
                     'message': mock.ANY,
                 }],
                 'tries':
                 1,
                 'index':
                 attempt,
             }
             for attempt in range(10)
         },
         task_module.Evaluate(
             self.job,
             event_module.Event(type='select', target_task=None,
                                payload={}),
             evaluators.Selector(task_type='read_value')))
Beispiel #11
0
 def testSelector_EventType(self):
     task = task_module.InMemoryTask(id='test_id',
                                     task_type='test',
                                     payload={},
                                     status='pending',
                                     dependencies=[])
     accumulator = {}
     evaluators.Selector(event_type='select')(task,
                                              event_module.Event(
                                                  type='unmatched',
                                                  target_task=None,
                                                  payload={}), accumulator)
     self.assertEqual({}, accumulator)
     evaluators.Selector(event_type='select')(task,
                                              event_module.Event(
                                                  type='select',
                                                  target_task=None,
                                                  payload={}), accumulator)
     self.assertEqual({'test_id': mock.ANY}, accumulator)
Beispiel #12
0
 def testEvaluateFailure_DependenciesFailed(self):
   self.PopulateSimpleBisectionGraph()
   task_module.Evaluate(
       self.job,
       event_module.Event(type='initiate', target_task=None, payload={}),
       self.CompoundEvaluatorForTesting(FakeReadValueFails(self.job)))
   evaluate_result = task_module.Evaluate(
       self.job, SelectEvent(), evaluators.Selector(task_type='find_culprit'))
   self.assertIn('performance_bisection', evaluate_result)
   self.assertEqual(evaluate_result['performance_bisection']['status'],
                    'failed')
   self.assertNotEqual([], evaluate_result['performance_bisection']['errors'])
Beispiel #13
0
 def testEvaluateFailure_GraphJsonMissingFile(self, isolate_retrieve):
     isolate_retrieve.return_value = '{"files": {}}'
     self.PopulateTaskGraph(benchmark='some_benchmark',
                            chart='chart',
                            trace='trace',
                            mode='graph_json')
     self.assertNotEqual({},
                         task_module.Evaluate(
                             self.job,
                             event_module.Event(type='initiate',
                                                target_task=None,
                                                payload={}),
                             self.evaluator))
     self.assertEqual(
         {
             'read_value_chromium@aaaaaaa_%s' % (attempt, ): {
                 'benchmark':
                 'some_benchmark',
                 'change':
                 mock.ANY,
                 'mode':
                 'graph_json',
                 'results_filename':
                 'some_benchmark/perf_results.json',
                 'histogram_options': {
                     'grouping_label': None,
                     'story': None,
                     'statistic': None,
                     'histogram_name': 'chart',
                 },
                 'graph_json_options': {
                     'chart': 'chart',
                     'trace': 'trace',
                 },
                 'errors': [{
                     'reason': 'ReadValueNoFile',
                     'message': mock.ANY,
                 }],
                 'status':
                 'failed',
                 'tries':
                 1,
                 'index':
                 attempt,
             }
             for attempt in range(10)
         },
         task_module.Evaluate(
             self.job,
             event_module.Event(type='select', target_task=None,
                                payload={}),
             evaluators.Selector(task_type='read_value')))
Beispiel #14
0
 def testSelector_Predicate(self):
     task = task_module.InMemoryTask(id='test_id',
                                     task_type='test',
                                     payload={},
                                     status='pending',
                                     dependencies=[])
     accumulator = {}
     evaluators.Selector(predicate=lambda *_: True)(
         task,
         event_module.Event(type='unimportant',
                            target_task=None,
                            payload={}), accumulator)
     self.assertEqual({'test_id': mock.ANY}, accumulator)
 def testEvaluateSuccess_NoReproduction(self):
   self.PopulateSimpleBisectionGraph(self.job)
   task_module.Evaluate(
       self.job,
       event_module.Event(type='initiate', target_task=None, payload={}),
       self.BisectionEvaluatorForTesting(
           bisection_test_util.FakeReadValueSameResult(self.job, 1.0)))
   evaluate_result = task_module.Evaluate(
       self.job,
       event_module.Event(type='select', target_task=None, payload={}),
       evaluators.Selector(task_type='find_culprit'))
   self.assertIn('performance_bisection', evaluate_result)
   logging.info('Results: %s', evaluate_result['performance_bisection'])
   self.assertEquals(evaluate_result['performance_bisection']['culprits'], [])
Beispiel #16
0
 def testEvaluateSuccess_GraphJson(self, isolate_retrieve):
     isolate_retrieve.side_effect = itertools.chain(*itertools.repeat(
         [('{"files": {"some_benchmark/perf_results.json": '
           '{"h": "394890891823812873798734a"}}}'),
          json.dumps(
              {'chart': {
                  'traces': {
                      'trace': ['126444.869721', '0.0']
                  }
              }})], 10))
     self.PopulateTaskGraph(benchmark='some_benchmark',
                            chart='chart',
                            trace='trace',
                            mode='graph_json')
     self.assertNotEqual({},
                         task_module.Evaluate(
                             self.job,
                             event_module.Event(type='initiate',
                                                target_task=None,
                                                payload={}),
                             self.evaluator))
     self.assertEqual(
         {
             'read_value_chromium@aaaaaaa_%s' % (attempt, ): {
                 'benchmark': 'some_benchmark',
                 'mode': 'graph_json',
                 'results_filename': 'some_benchmark/perf_results.json',
                 'histogram_options': {
                     'tir_label': None,
                     'story': None,
                     'statistic': None,
                 },
                 'graph_json_options': {
                     'chart': 'chart',
                     'trace': 'trace',
                 },
                 'result_values': [126444.869721],
                 'status': 'completed',
                 'tries': 1,
             }
             for attempt in range(10)
         },
         task_module.Evaluate(
             self.job,
             event_module.Event(type='select', target_task=None,
                                payload={}),
             evaluators.Selector(task_type='read_value')))
Beispiel #17
0
    def testEvaluateSuccess_SpeculateBisection(self):
        self.PopulateSimpleBisectionGraph()
        task_module.Evaluate(
            self.job,
            event_module.Event(type='initiate', target_task=None, payload={}),
            self.CompoundEvaluatorForTesting(
                FakeReadValueMapResult(
                    self.job, {
                        change_module.Change.FromDict({
                            'commits': [{
                                'repository': 'chromium',
                                'git_hash': commit
                            }]
                        }): values
                        for commit, values in (
                            ('commit_0', [1.0] * 10),
                            ('commit_1', [1.0] * 10),
                            ('commit_2', [2.0] * 10),
                            ('commit_3', [2.0] * 10),
                            ('commit_4', [2.0] * 10),
                            ('commit_5', [2.0] * 10),
                        )
                    })))
        evaluate_result = task_module.Evaluate(
            self.job, SelectEvent(),
            evaluators.Selector(task_type='find_culprit'))
        self.assertIn('performance_bisection', evaluate_result)
        logging.info('Results: %s', evaluate_result['performance_bisection'])

        # Here we're testing that we can find the change between commit_1 and
        # commit_2 in the values we seed above.
        self.assertEquals(evaluate_result['performance_bisection']['culprits'],
                          [[
                              change_module.Change.FromDict({
                                  'commits': [{
                                      'repository': 'chromium',
                                      'git_hash': 'commit_1'
                                  }]
                              }).AsDict(),
                              change_module.Change.FromDict({
                                  'commits': [{
                                      'repository': 'chromium',
                                      'git_hash': 'commit_2'
                                  }]
                              }).AsDict()
                          ]])
Beispiel #18
0
    def testEvaluateHandleFailures_Hard(self, swarming_task_stdout,
                                        swarming_task_result,
                                        swarming_tasks_new):
        swarming_tasks_new.return_value = {'task_id': 'task id'}
        evaluator = evaluators.SequenceEvaluator(evaluators=(
            evaluators.FilteringEvaluator(
                predicate=evaluators.TaskTypeEq('find_isolate'),
                delegate=evaluators.SequenceEvaluator(
                    evaluators=(bisection_test_util.FakeFoundIsolate(self.job),
                                evaluators.TaskPayloadLiftingEvaluator()))),
            run_test.Evaluator(self.job),
        ))
        self.assertNotEqual({},
                            task_module.Evaluate(
                                self.job,
                                event_module.Event(type='initiate',
                                                   target_task=None,
                                                   payload={}), evaluator))

        # We set it up so that when we poll the swarming task, that we're going to
        # get an error status. We're expecting that hard failures are detected.
        swarming_task_stdout.return_value = {
            'output':
            """Traceback (most recent call last):
  File "../../testing/scripts/run_performance_tests.py", line 282, in <module>
    sys.exit(main())
  File "../../testing/scripts/run_performance_tests.py", line 226, in main
    benchmarks = args.benchmark_names.split(',')
AttributeError: 'Namespace' object has no attribute 'benchmark_names'"""
        }
        swarming_task_result.return_value = {
            'bot_id': 'bot id',
            'exit_code': 1,
            'failure': True,
            'outputs_ref': {
                'isolatedserver': 'output isolate server',
                'isolated': 'output isolate hash',
            },
            'state': 'COMPLETED',
        }
        for attempt in range(10):
            self.assertNotEqual(
                {},
                task_module.Evaluate(
                    self.job,
                    event_module.Event(
                        type='update',
                        target_task='run_test_chromium@aaaaaaa_%s' %
                        (attempt, ),
                        payload={
                            'kind': 'pubsub_message',
                            'action': 'poll'
                        }), evaluator), 'Attempt #%s' % (attempt, ))
        self.assertEqual(
            {
                'run_test_chromium@aaaaaaa_%s' % (attempt, ): {
                    'status': 'failed',
                    'swarming_server': 'some_server',
                    'dimensions': DIMENSIONS,
                    'errors': mock.ANY,
                    'extra_args': [],
                    'swarming_request_body': {
                        'name': mock.ANY,
                        'user': mock.ANY,
                        'priority': mock.ANY,
                        'task_slices': mock.ANY,
                        'tags': mock.ANY,
                        'pubsub_auth_token': mock.ANY,
                        'pubsub_topic': mock.ANY,
                        'pubsub_userdata': mock.ANY,
                        'service_account': mock.ANY,
                    },
                    'swarming_task_result': {
                        'bot_id': mock.ANY,
                        'state': 'COMPLETED',
                        'failure': True,
                    },
                    'isolate_server': 'output isolate server',
                    'isolate_hash': 'output isolate hash',
                    'swarming_task_id': 'task id',
                    'tries': 1,
                    'change': mock.ANY,
                    'index': attempt,
                }
                for attempt in range(10)
            },
            task_module.Evaluate(
                self.job,
                event_module.Event(type='select', target_task=None,
                                   payload={}),
                evaluators.Selector(task_type='run_test')))
Beispiel #19
0
    def testEvaluateSuccess_HistogramSummary(self, isolate_retrieve):
        samples = []
        hists = []
        for i in range(10):
            hist = histogram_module.Histogram('some_benchmark', 'count')
            hist.AddSample(0)
            hist.AddSample(1)
            hist.AddSample(2)
            hist.diagnostics[reserved_infos.STORIES.name] = (
                generic_set.GenericSet(['story%d' % i]))
            hist.diagnostics[reserved_infos.STORY_TAGS.name] = (
                generic_set.GenericSet(['group:label1']))
            hists.append(hist)
            samples.extend(hist.sample_values)

        for i in range(10):
            hist = histogram_module.Histogram('some_benchmark', 'count')
            hist.AddSample(0)
            hist.AddSample(1)
            hist.AddSample(2)
            hist.diagnostics[reserved_infos.STORIES.name] = (
                generic_set.GenericSet(['another_story%d' % i]))
            hist.diagnostics[reserved_infos.STORY_TAGS.name] = (
                generic_set.GenericSet(['group:label2']))
            hists.append(hist)
            samples.extend(hist.sample_values)

        histograms = histogram_set.HistogramSet(hists)
        histograms.AddSharedDiagnosticToAllHistograms(
            reserved_infos.STORY_TAGS.name,
            generic_set.GenericSet(['group:label']))
        isolate_retrieve.side_effect = itertools.chain(
            *itertools.repeat([(
                '{"files": {"some_benchmark/perf_results.json": '
                '{"h": "394890891823812873798734a"}}}'),
                               json.dumps(histograms.AsDicts())], 10))
        self.PopulateTaskGraph(benchmark='some_benchmark')
        self.assertNotEqual({},
                            task_module.Evaluate(
                                self.job,
                                event_module.Event(type='initiate',
                                                   target_task=None,
                                                   payload={}),
                                self.evaluator))
        self.assertEqual(
            {
                'read_value_chromium@aaaaaaa_%s' % (attempt, ): {
                    'benchmark': 'some_benchmark',
                    'change': mock.ANY,
                    'mode': 'histogram_sets',
                    'results_filename': 'some_benchmark/perf_results.json',
                    'histogram_options': {
                        'grouping_label': None,
                        'story': None,
                        'statistic': None,
                    },
                    'graph_json_options': {
                        'chart': None,
                        'trace': 'some_trace'
                    },
                    'result_values': [sum(samples)],
                    'status': 'completed',
                    'tries': 1,
                    'index': attempt,
                }
                for attempt in range(10)
            },
            task_module.Evaluate(
                self.job,
                event_module.Event(type='select', target_task=None,
                                   payload={}),
                evaluators.Selector(task_type='read_value')))
Beispiel #20
0
    def _FormatAndPostBugCommentOnComplete(self):
        logging.debug('Processing outputs.')
        if self._IsTryJob():
            # There is no comparison metric.
            title = '<b>%s Job complete. See results below.</b>' % _ROUND_PUSHPIN
            deferred.defer(_PostBugCommentDeferred,
                           self.bug_id,
                           '\n'.join((title, self.url)),
                           labels=['Pinpoint-Tryjob-Completed'],
                           _retry_options=RETRY_OPTIONS)
            return

        # There is a comparison metric.
        differences = []
        result_values = {}
        if not self.use_execution_engine:
            differences = self.state.Differences()
            for change_a, change_b in differences:
                result_values.setdefault(change_a,
                                         self.state.ResultValues(change_a))
                result_values.setdefault(change_b,
                                         self.state.ResultValues(change_b))
        else:
            logging.debug('Execution Engine: Finding culprits.')
            context = task_module.Evaluate(
                self, event_module.SelectEvent(),
                evaluators.Selector(
                    event_type='select',
                    include_keys={'culprits', 'change', 'result_values'}))
            differences = [
                (change_module.ReconstituteChange(change_a),
                 change_module.ReconstituteChange(change_b))
                for change_a, change_b in context.get('performance_bisection',
                                                      {}).get('culprits', [])
            ]
            result_values = {
                change_module.ReconstituteChange(v.get('change')):
                v.get('result_values')
                for v in context.values()
                if 'change' in v and 'result_values' in v
            }

        if not differences:
            title = "<b>%s Couldn't reproduce a difference.</b>" % _ROUND_PUSHPIN
            deferred.defer(_PostBugCommentDeferred,
                           self.bug_id,
                           '\n'.join((title, self.url)),
                           labels=['Pinpoint-No-Repro'],
                           _retry_options=RETRY_OPTIONS)
            return

        # Collect the result values for each of the differences
        difference_details = []
        commit_infos = []
        commits_with_deltas = {}
        for change_a, change_b in differences:
            if change_b.patch:
                commit = change_b.patch
            else:
                commit = change_b.last_commit
            commit_info = commit.AsDict()

            values_a = result_values[change_a]
            values_b = result_values[change_b]
            difference = _FormatDifferenceForBug(commit_info, values_a,
                                                 values_b, self.state.metric)
            difference_details.append(difference)
            commit_infos.append(commit_info)
            if values_a and values_b:
                mean_delta = job_state.Mean(values_b) - job_state.Mean(
                    values_a)
                commits_with_deltas[commit.id_string] = (mean_delta,
                                                         commit_info)

        deferred.defer(_UpdatePostAndMergeDeferred,
                       difference_details,
                       commit_infos,
                       list(commits_with_deltas.values()),
                       self.bug_id,
                       self.tags,
                       self.url,
                       _retry_options=RETRY_OPTIONS)
Beispiel #21
0
    def _FormatAndPostBugCommentOnComplete(self):
        logging.debug('Processing outputs.')
        if self._IsTryJob():
            # There is no comparison metric.
            title = '<b>%s Job complete. See results below.</b>' % _ROUND_PUSHPIN
            deferred.defer(_PostBugCommentDeferred,
                           self.bug_id,
                           '\n'.join((title, self.url)),
                           project=self.project,
                           labels=['Pinpoint-Tryjob-Completed'],
                           _retry_options=RETRY_OPTIONS)
            return

        # There is a comparison metric.
        differences = []
        result_values = {}
        changes_examined = None
        if not self.use_execution_engine:
            differences = self.state.Differences()
            for change_a, change_b in differences:
                result_values.setdefault(change_a,
                                         self.state.ResultValues(change_a))
                result_values.setdefault(change_b,
                                         self.state.ResultValues(change_b))
            changes_examined = self.state.ChangesExamined()
        else:
            logging.debug('Execution Engine: Finding culprits.')
            context = task_module.Evaluate(
                self, event_module.SelectEvent(),
                evaluators.Selector(
                    event_type='select',
                    include_keys={'culprits', 'change', 'result_values'}))
            differences = [
                (change_module.ReconstituteChange(change_a),
                 change_module.ReconstituteChange(change_b))
                for change_a, change_b in context.get('performance_bisection',
                                                      {}).get('culprits', [])
            ]
            result_values = {
                change_module.ReconstituteChange(v.get('change')):
                v.get('result_values')
                for v in context.values()
                if 'change' in v and 'result_values' in v
            }

        if not differences:
            # When we cannot find a difference, we want to not only update the issue
            # with that (minimal) information but also automatically mark the issue
            # WontFix. This is based on information we've gathered in production that
            # most issues where we find Pinpoint cannot reproduce the difference end
            # up invariably as "Unconfirmed" with very little follow-up.
            title = "<b>%s Couldn't reproduce a difference.</b>" % _ROUND_PUSHPIN
            deferred.defer(_PostBugCommentDeferred,
                           self.bug_id,
                           '\n'.join((title, self.url)),
                           project=self.project,
                           labels=['Pinpoint-No-Repro'],
                           status='WontFix',
                           _retry_options=RETRY_OPTIONS)
            return

        # Collect the result values for each of the differences
        bug_update_builder = job_bug_update.DifferencesFoundBugUpdateBuilder(
            self.state.metric)
        bug_update_builder.SetExaminedCount(changes_examined)
        for change_a, change_b in differences:
            if change_b.patch:
                commit = change_b.patch
            else:
                commit = change_b.last_commit

            values_a = result_values[change_a]
            values_b = result_values[change_b]
            bug_update_builder.AddDifference(commit, values_a, values_b)

        deferred.defer(job_bug_update.UpdatePostAndMergeDeferred,
                       bug_update_builder,
                       self.bug_id,
                       self.tags,
                       self.url,
                       self.project,
                       _retry_options=RETRY_OPTIONS)
Beispiel #22
0
    def testEvaluateToCompletion(self, swarming_task_result,
                                 swarming_tasks_new):
        swarming_tasks_new.return_value = {'task_id': 'task id'}
        evaluator = evaluators.SequenceEvaluator(evaluators=(
            evaluators.FilteringEvaluator(
                predicate=evaluators.TaskTypeEq('find_isolate'),
                delegate=evaluators.SequenceEvaluator(
                    evaluators=(bisection_test_util.FakeFoundIsolate(self.job),
                                evaluators.TaskPayloadLiftingEvaluator()))),
            run_test.Evaluator(self.job),
        ))
        self.assertNotEqual({},
                            task_module.Evaluate(
                                self.job,
                                event_module.Event(type='initiate',
                                                   target_task=None,
                                                   payload={}), evaluator))

        # Ensure that we've found all the 'run_test' tasks.
        self.assertEqual(
            {
                'run_test_chromium@aaaaaaa_%s' % (attempt, ): {
                    'status': 'ongoing',
                    'swarming_server': 'some_server',
                    'dimensions': DIMENSIONS,
                    'extra_args': [],
                    'swarming_request_body': {
                        'name': mock.ANY,
                        'user': mock.ANY,
                        'priority': mock.ANY,
                        'task_slices': mock.ANY,
                        'tags': mock.ANY,
                        'pubsub_auth_token': mock.ANY,
                        'pubsub_topic': mock.ANY,
                        'pubsub_userdata': mock.ANY,
                        'service_account': mock.ANY,
                    },
                    'swarming_task_id': 'task id',
                    'tries': 1,
                    'change': mock.ANY,
                    'index': attempt,
                }
                for attempt in range(10)
            },
            task_module.Evaluate(
                self.job,
                event_module.Event(type='select', target_task=None,
                                   payload={}),
                evaluators.Selector(task_type='run_test')))

        # Ensure that we've actually made the calls to the Swarming service.
        swarming_tasks_new.assert_called()
        self.assertGreaterEqual(swarming_tasks_new.call_count, 10)

        # Then we propagate an event for each of the run_test tasks in the graph.
        swarming_task_result.return_value = {
            'bot_id': 'bot id',
            'exit_code': 0,
            'failure': False,
            'outputs_ref': {
                'isolatedserver': 'output isolate server',
                'isolated': 'output isolate hash',
            },
            'state': 'COMPLETED',
        }
        for attempt in range(10):
            self.assertNotEqual(
                {},
                task_module.Evaluate(
                    self.job,
                    event_module.Event(
                        type='update',
                        target_task='run_test_chromium@aaaaaaa_%s' %
                        (attempt, ),
                        payload={}), evaluator), 'Attempt #%s' % (attempt, ))

        # Ensure that we've polled the status of each of the tasks, and that we've
        # marked the tasks completed.
        self.assertEqual(
            {
                'run_test_chromium@aaaaaaa_%s' % (attempt, ): {
                    'status': 'completed',
                    'swarming_server': 'some_server',
                    'dimensions': DIMENSIONS,
                    'extra_args': [],
                    'swarming_request_body': {
                        'name': mock.ANY,
                        'user': mock.ANY,
                        'priority': mock.ANY,
                        'task_slices': mock.ANY,
                        'tags': mock.ANY,
                        'pubsub_auth_token': mock.ANY,
                        'pubsub_topic': mock.ANY,
                        'pubsub_userdata': mock.ANY,
                        'service_account': mock.ANY,
                    },
                    'swarming_task_result': {
                        'bot_id': mock.ANY,
                        'state': 'COMPLETED',
                        'failure': False,
                    },
                    'isolate_server': 'output isolate server',
                    'isolate_hash': 'output isolate hash',
                    'swarming_task_id': 'task id',
                    'tries': 1,
                    'change': mock.ANY,
                    'index': attempt,
                }
                for attempt in range(10)
            },
            task_module.Evaluate(
                self.job,
                event_module.Event(type='select', target_task=None,
                                   payload={}),
                evaluators.Selector(task_type='run_test')))

        # Ensure that we've actually made the calls to the Swarming service.
        swarming_task_result.assert_called()
        self.assertGreaterEqual(swarming_task_result.call_count, 10)
Beispiel #23
0
    def testEvaluateHandleFailures_Expired(self, swarming_task_result,
                                           swarming_tasks_new):
        swarming_tasks_new.return_value = {'task_id': 'task id'}
        evaluator = evaluators.SequenceEvaluator(evaluators=(
            evaluators.FilteringEvaluator(
                predicate=evaluators.TaskTypeEq('find_isolate'),
                delegate=evaluators.SequenceEvaluator(
                    evaluators=(bisection_test_util.FakeFoundIsolate(self.job),
                                evaluators.TaskPayloadLiftingEvaluator()))),
            run_test.Evaluator(self.job),
        ))
        self.assertNotEqual({},
                            task_module.Evaluate(
                                self.job,
                                event_module.Event(type='initiate',
                                                   target_task=None,
                                                   payload={}), evaluator))
        swarming_task_result.return_value = {
            'state': 'EXPIRED',
        }
        for attempt in range(10):
            self.assertNotEqual(
                {},
                task_module.Evaluate(
                    self.job,
                    event_module.Event(
                        type='update',
                        target_task='run_test_chromium@aaaaaaa_%s' %
                        (attempt, ),
                        payload={
                            'kind': 'pubsub_message',
                            'action': 'poll'
                        }), evaluator), 'Attempt #%s' % (attempt, ))

        self.assertEqual(
            {
                'run_test_chromium@aaaaaaa_%s' % (attempt, ): {
                    'status': 'failed',
                    'swarming_server': 'some_server',
                    'dimensions': DIMENSIONS,
                    'errors': [
                        {
                            'reason': 'SwarmingExpired',
                            'message': mock.ANY
                        },
                    ],
                    'extra_args': [],
                    'swarming_request_body': {
                        'name': mock.ANY,
                        'user': mock.ANY,
                        'priority': mock.ANY,
                        'task_slices': mock.ANY,
                        'tags': mock.ANY,
                        'pubsub_auth_token': mock.ANY,
                        'pubsub_topic': mock.ANY,
                        'pubsub_userdata': mock.ANY,
                        'service_account': mock.ANY,
                    },
                    'swarming_task_result': {
                        'state': 'EXPIRED',
                    },
                    'swarming_task_id': 'task id',
                    'tries': 1,
                    'change': mock.ANY,
                    'index': attempt,
                }
                for attempt in range(10)
            },
            task_module.Evaluate(
                self.job,
                event_module.Event(type='select', target_task=None,
                                   payload={}),
                evaluators.Selector(task_type='run_test')))