def testEvaluateSuccess_NeedToRefineAttempts(self): self.PopulateSimpleBisectionGraph() task_module.Evaluate( self.job, event_module.Event(type='initiate', target_task=None, payload={}), self.CompoundEvaluatorForTesting( FakeReadValueMapResult( self.job, { change_module.Change.FromDict({ 'commits': [{ 'repository': 'chromium', 'git_hash': commit }] }): values for commit, values in ( ('commit_0', range(10)), ('commit_1', range(10)), ('commit_2', range(4, 14)), ('commit_3', range(3, 13)), ('commit_4', range(3, 13)), ('commit_5', range(3, 13)), ) }))) # Here we test that we have more than the minimum attempts for the change # between commit_1 and commit_2. evaluate_result = task_module.Evaluate( self.job, SelectEvent(), evaluators.Selector(task_type='read_value')) attempt_counts = {} for payload in evaluate_result.values(): change = change_module.Change.FromDict(payload.get('change')) attempt_counts[change] = attempt_counts.get(change, 0) + 1 self.assertGreater( attempt_counts[change_module.Change.FromDict({ 'commits': [{ 'repository': 'chromium', 'git_hash': 'commit_2', }] })], 10) self.assertLess( attempt_counts[change_module.Change.FromDict({ 'commits': [{ 'repository': 'chromium', 'git_hash': 'commit_2', }] })], 100) # We know that we will never get a deterministic answer, so we ensure that # we don't inadvertently blame the wrong changes at the end of the # refinement. evaluate_result = task_module.Evaluate( self.job, SelectEvent(), evaluators.Selector(task_type='find_culprit')) self.assertIn('performance_bisection', evaluate_result) logging.info('Results: %s', evaluate_result['performance_bisection']) self.assertEquals(evaluate_result['performance_bisection']['culprits'], [])
def testEvaluateSuccess_NeedToRefineAttempts(self): self.PopulateSimpleBisectionGraph(self.job) task_module.Evaluate( self.job, event_module.Event(type='initiate', target_task=None, payload={}), self.BisectionEvaluatorForTesting( bisection_test_util.FakeReadValueMapResult( self.job, { change_module.Change.FromDict({ 'commits': [{ 'repository': 'chromium', 'git_hash': commit }] }): values for commit, values in ( ('commit_0', range(10)), ('commit_1', range(1, 11)), ('commit_2', range(2, 12)), ('commit_3', range(3, 13)), ('commit_4', range(3, 13)), ('commit_5', range(3, 13)), ) }))) # Here we test that we have more than the minimum attempts for the change # between commit_1 and commit_2. evaluate_result = task_module.Evaluate( self.job, bisection_test_util.SelectEvent(), evaluators.Selector(task_type='read_value')) attempt_counts = {} for payload in evaluate_result.values(): change = change_module.Change.FromDict(payload.get('change')) attempt_counts[change] = attempt_counts.get(change, 0) + 1 self.assertGreater( attempt_counts[change_module.Change.FromDict({ 'commits': [{ 'repository': 'chromium', 'git_hash': 'commit_2', }] })], 10) self.assertLess( attempt_counts[change_module.Change.FromDict({ 'commits': [{ 'repository': 'chromium', 'git_hash': 'commit_2', }] })], 100) # We know that we will refine the graph until we see the progression from # commit_0 -> commit_1 -> commit_2 -> commit_3 and stabilize. evaluate_result = task_module.Evaluate( self.job, bisection_test_util.SelectEvent(), evaluators.Selector(task_type='find_culprit')) self.assertIn('performance_bisection', evaluate_result) self.assertEquals(evaluate_result['performance_bisection']['culprits'], [mock.ANY, mock.ANY, mock.ANY])
def testSelector_Combinations(self): matching_task_types = (None, 'test') matching_event_types = (None, 'select') task = task_module.InMemoryTask(id='test_id', task_type='test', payload={}, status='pending', dependencies=[]) for task_type in matching_task_types: for event_type in matching_event_types: if not task_type and not event_type: continue accumulator = {} evaluators.Selector(task_type=task_type, event_type=event_type)( task, event_module.Event(type='select', target_task=None, payload={}), accumulator) self.assertEqual({'test_id': mock.ANY}, accumulator, 'task_type = %s, event_type = %s') non_matching_task_types = ('unmatched_task', ) non_matching_event_types = ('unmatched_event', ) # Because the Selector's default behaviour is a logical disjunction of # matchers, we ensure that we will always find the tasks and handle events # if either (or both) match. for task_type in [t for t in matching_task_types if t is not None]: for event_type in non_matching_event_types: accumulator = {} evaluators.Selector(task_type=task_type, event_type=event_type)( task, event_module.Event(type='select', target_task=None, payload={}), accumulator) self.assertEqual({'test_id': mock.ANY}, accumulator, 'task_type = %s, event_type = %s') for task_type in non_matching_task_types: for event_type in [ t for t in matching_event_types if t is not None ]: accumulator = {} evaluators.Selector(task_type=task_type, event_type=event_type)( task, event_module.Event(type='select', target_task=None, payload={}), accumulator) self.assertEqual({'test_id': mock.ANY}, accumulator, 'task_type = %s, event_type = %s')
def testEvaluateSuccess_HistogramSkipRefTraceUrls(self, isolate_retrieve): hist = histogram_module.Histogram('some_benchmark', 'count') hist.AddSample(0) hist.diagnostics[reserved_infos.TRACE_URLS.name] = ( generic_set.GenericSet(['trace_url1', 'trace_url2'])) hist2 = histogram_module.Histogram('hist2', 'count') hist2.diagnostics[reserved_infos.TRACE_URLS.name] = ( generic_set.GenericSet(['trace_url3'])) hist2.diagnostics[reserved_infos.TRACE_URLS.name].guid = 'foo' histograms = histogram_set.HistogramSet([hist, hist2]) isolate_retrieve.side_effect = itertools.chain( *itertools.repeat([('{"files": {"some_benchmark/perf_results.json": ' '{"h": "394890891823812873798734a"}}}'), json.dumps(histograms.AsDicts())], 10)) self.PopulateTaskGraph(benchmark='some_benchmark') self.assertNotEqual({}, task_module.Evaluate( self.job, event_module.Event( type='initiate', target_task=None, payload={}), self.evaluator)) self.assertEqual( { 'read_value_chromium@aaaaaaa_%s' % (attempt,): { 'benchmark': 'some_benchmark', 'change': mock.ANY, 'mode': 'histogram_sets', 'results_filename': 'some_benchmark/perf_results.json', 'histogram_options': { 'grouping_label': None, 'story': None, 'statistic': None, }, 'graph_json_options': { 'chart': None, 'trace': 'some_trace' }, 'result_values': [0], 'status': 'completed', 'tries': 1, 'trace_urls': [{ 'key': 'trace', 'value': 'trace_url1', 'url': 'trace_url1' }, { 'key': 'trace', 'value': 'trace_url2', 'url': 'trace_url2', }] } for attempt in range(10) }, task_module.Evaluate( self.job, event_module.Event(type='select', target_task=None, payload={}), evaluators.Selector(task_type='read_value')))
def testEvaluateSuccess_MultipleHistograms(self, isolate_retrieve): def CreateHistogram(name): histogram = histogram_module.Histogram(name, 'count') histogram.AddSample(0) histogram.AddSample(1) histogram.AddSample(2) return histogram histograms = histogram_set.HistogramSet([ CreateHistogram(name) for name in ('some_chart', 'some_chart', 'some_other_chart') ]) histograms.AddSharedDiagnosticToAllHistograms( reserved_infos.STORY_TAGS.name, generic_set.GenericSet(['group:label'])) histograms.AddSharedDiagnosticToAllHistograms( reserved_infos.STORIES.name, generic_set.GenericSet(['story'])) isolate_retrieve.side_effect = itertools.chain( *itertools.repeat([('{"files": {"some_benchmark/perf_results.json": ' '{"h": "394890891823812873798734a"}}}'), json.dumps(histograms.AsDicts())], 10)) self.PopulateTaskGraph( benchmark='some_benchmark', chart='some_chart', grouping_label='label', story='story') self.assertNotEqual({}, task_module.Evaluate( self.job, event_module.Event( type='initiate', target_task=None, payload={}), self.evaluator)) self.assertEqual( { 'read_value_chromium@aaaaaaa_%s' % (attempt,): { 'benchmark': 'some_benchmark', 'change': mock.ANY, 'mode': 'histogram_sets', 'results_filename': 'some_benchmark/perf_results.json', 'results_path': ['some_benchmark', 'perf_results.json'], 'histogram_options': { 'grouping_label': 'label', 'story': 'story', 'statistic': None, 'histogram_name': 'some_chart', }, 'graph_json_options': { 'chart': 'some_chart', 'trace': 'some_trace' }, 'result_values': [0, 1, 2, 0, 1, 2], 'status': 'completed', 'tries': 1, 'index': attempt, } for attempt in range(10) }, task_module.Evaluate( self.job, event_module.Event(type='select', target_task=None, payload={}), evaluators.Selector(task_type='read_value')))
def testEvaluateFail_GraphJsonMissingTrace(self, isolate_retrieve): isolate_retrieve.side_effect = itertools.chain(*itertools.repeat( [('{"files": {"some_benchmark/perf_results.json": ' '{"h": "394890891823812873798734a"}}}'), json.dumps( {'chart': { 'traces': { 'trace': ['126444.869721', '0.0'] } }})], 10)) self.PopulateTaskGraph(benchmark='some_benchmark', chart='chart', trace='must_not_be_found', mode='graph_json') self.assertNotEqual({}, task_module.Evaluate( self.job, event_module.Event(type='initiate', target_task=None, payload={}), self.evaluator)) self.assertEqual( { 'read_value_chromium@aaaaaaa_%s' % (attempt, ): { 'benchmark': 'some_benchmark', 'change': mock.ANY, 'mode': 'graph_json', 'results_filename': 'some_benchmark/perf_results.json', 'histogram_options': { 'grouping_label': None, 'story': None, 'statistic': None, 'histogram_name': 'chart', }, 'graph_json_options': { 'chart': 'chart', 'trace': 'must_not_be_found', }, 'errors': [{ 'reason': 'ReadValueTraceNotFound', 'message': mock.ANY, }], 'status': 'failed', 'tries': 1, 'index': attempt, } for attempt in range(10) }, task_module.Evaluate( self.job, event_module.Event(type='select', target_task=None, payload={}), evaluators.Selector(task_type='read_value')))
def testEvaluateFailedDependency(self, *_): self.PopulateTaskGraph(benchmark='some_benchmark', chart='chart', trace='must_not_be_found', mode='graph_json') self.assertNotEqual( {}, task_module.Evaluate( self.job, event_module.Event(type='initiate', target_task=None, payload={}), evaluators.SequenceEvaluator(evaluators=( evaluators.FilteringEvaluator( predicate=evaluators.TaskTypeEq('find_isolate'), delegate=evaluators.SequenceEvaluator(evaluators=( functools.partial(FakeFoundIsolate, self.job), evaluators.TaskPayloadLiftingEvaluator()))), evaluators.FilteringEvaluator( predicate=evaluators.TaskTypeEq('run_test'), delegate=evaluators.SequenceEvaluator(evaluators=( functools.partial(FakeFailedRunTest, self.job), evaluators.TaskPayloadLiftingEvaluator()))), read_value.Evaluator(self.job), )))) self.assertEqual( { 'read_value_chromium@aaaaaaa_%s' % (attempt, ): { 'benchmark': 'some_benchmark', 'mode': 'graph_json', 'results_filename': 'some_benchmark/perf_results.json', 'histogram_options': { 'tir_label': None, 'story': None, 'statistic': None, }, 'graph_json_options': { 'chart': 'chart', 'trace': 'must_not_be_found', }, 'errors': [{ 'reason': 'DependencyFailed', 'message': mock.ANY, }], 'status': 'failed', 'tries': 1, } for attempt in range(10) }, task_module.Evaluate( self.job, event_module.Event(type='select', target_task=None, payload={}), evaluators.Selector(task_type='read_value')))
def testEvaluateSuccess_WithData(self, isolate_retrieve): # Seed the response to the call to the isolate service. histogram = histogram_module.Histogram('some_chart', 'count') histogram.AddSample(0) histogram.AddSample(1) histogram.AddSample(2) histograms = histogram_set.HistogramSet([histogram]) histograms.AddSharedDiagnosticToAllHistograms( reserved_infos.STORY_TAGS.name, generic_set.GenericSet(['group:label'])) histograms.AddSharedDiagnosticToAllHistograms( reserved_infos.STORIES.name, generic_set.GenericSet(['story'])) isolate_retrieve.side_effect = itertools.chain( *itertools.repeat([('{"files": {"some_benchmark/perf_results.json": ' '{"h": "394890891823812873798734a"}}}'), json.dumps(histograms.AsDicts())], 10)) # Set it up so that we are building a graph that's looking for no statistic. self.PopulateTaskGraph( benchmark='some_benchmark', chart='some_chart', grouping_label='label', story='story') self.assertNotEqual({}, task_module.Evaluate( self.job, event_module.Event( type='initiate', target_task=None, payload={}), self.evaluator)) # Ensure we find the find a value, and the histogram (?) associated with the # data we're looking for. self.assertEqual( { 'read_value_chromium@aaaaaaa_%s' % (attempt,): { 'benchmark': 'some_benchmark', 'change': mock.ANY, 'mode': 'histogram_sets', 'results_filename': 'some_benchmark/perf_results.json', 'results_path': ['some_benchmark', 'perf_results.json'], 'histogram_options': { 'grouping_label': 'label', 'story': 'story', 'statistic': None, 'histogram_name': 'some_chart', }, 'graph_json_options': { 'chart': 'some_chart', 'trace': 'some_trace', }, 'status': 'completed', 'result_values': [0, 1, 2], 'tries': 1, 'index': attempt, } for attempt in range(10) }, task_module.Evaluate( self.job, event_module.Event(type='select', target_task=None, payload={}), evaluators.Selector(task_type='read_value')))
def testEvaluateFailure_HistogramNoSamples(self, isolate_retrieve): histogram = histogram_module.Histogram('some_benchmark', 'count') histograms = histogram_set.HistogramSet([histogram]) histograms.AddSharedDiagnosticToAllHistograms( reserved_infos.STORY_TAGS.name, generic_set.GenericSet(['group:tir_label'])) histograms.AddSharedDiagnosticToAllHistograms( reserved_infos.STORIES.name, generic_set.GenericSet(['https://story'])) isolate_retrieve.side_effect = itertools.chain( *itertools.repeat([( '{"files": {"some_benchmark/perf_results.json": ' '{"h": "394890891823812873798734a"}}}'), json.dumps(histograms.AsDicts())], 10)) self.PopulateTaskGraph(benchmark='some_benchmark', chart='some_chart', tir_label='tir_label', story='https://story') self.assertNotEqual({}, task_module.Evaluate( self.job, event_module.Event(type='initiate', target_task=None, payload={}), self.evaluator)) self.assertEqual( { 'read_value_chromium@aaaaaaa_%s' % (attempt, ): { 'benchmark': 'some_benchmark', 'mode': 'histogram_sets', 'results_filename': 'some_benchmark/perf_results.json', 'histogram_options': { 'tir_label': 'tir_label', 'story': 'https://story', 'statistic': None, }, 'graph_json_options': { 'chart': 'some_chart', 'trace': 'some_trace' }, 'status': 'failed', 'errors': [{ 'reason': 'ReadValueNoValues', 'message': mock.ANY, }], 'tries': 1, } for attempt in range(10) }, task_module.Evaluate( self.job, event_module.Event(type='select', target_task=None, payload={}), evaluators.Selector(task_type='read_value')))
def testEvaluateFailure_HistogramNoValues(self, isolate_retrieve): isolate_retrieve.side_effect = itertools.chain(*itertools.repeat( [('{"files": {"some_benchmark/perf_results.json": ' '{"h": "394890891823812873798734a"}}}'), json.dumps( histogram_set.HistogramSet([ histogram_module.Histogram('some_benchmark', 'count') ]).AsDicts())], 10)) self.PopulateTaskGraph(benchmark='some_benchmark', chart='some_chart', grouping_label='label', story='https://story') self.assertNotEqual({}, task_module.Evaluate( self.job, event_module.Event(type='initiate', target_task=None, payload={}), self.evaluator)) self.assertEqual( { 'read_value_chromium@aaaaaaa_%s' % (attempt, ): { 'benchmark': 'some_benchmark', 'change': mock.ANY, 'mode': 'histogram_sets', 'results_filename': 'some_benchmark/perf_results.json', 'histogram_options': { 'grouping_label': 'label', 'story': 'https://story', 'statistic': None, 'histogram_name': 'some_chart', }, 'graph_json_options': { 'chart': 'some_chart', 'trace': 'some_trace', }, 'status': 'failed', 'errors': [{ 'reason': 'ReadValueNotFound', 'message': mock.ANY, }], 'tries': 1, 'index': attempt, } for attempt in range(10) }, task_module.Evaluate( self.job, event_module.Event(type='select', target_task=None, payload={}), evaluators.Selector(task_type='read_value')))
def testSelector_EventType(self): task = task_module.InMemoryTask(id='test_id', task_type='test', payload={}, status='pending', dependencies=[]) accumulator = {} evaluators.Selector(event_type='select')(task, event_module.Event( type='unmatched', target_task=None, payload={}), accumulator) self.assertEqual({}, accumulator) evaluators.Selector(event_type='select')(task, event_module.Event( type='select', target_task=None, payload={}), accumulator) self.assertEqual({'test_id': mock.ANY}, accumulator)
def testEvaluateFailure_DependenciesFailed(self): self.PopulateSimpleBisectionGraph() task_module.Evaluate( self.job, event_module.Event(type='initiate', target_task=None, payload={}), self.CompoundEvaluatorForTesting(FakeReadValueFails(self.job))) evaluate_result = task_module.Evaluate( self.job, SelectEvent(), evaluators.Selector(task_type='find_culprit')) self.assertIn('performance_bisection', evaluate_result) self.assertEqual(evaluate_result['performance_bisection']['status'], 'failed') self.assertNotEqual([], evaluate_result['performance_bisection']['errors'])
def testEvaluateFailure_GraphJsonMissingFile(self, isolate_retrieve): isolate_retrieve.return_value = '{"files": {}}' self.PopulateTaskGraph(benchmark='some_benchmark', chart='chart', trace='trace', mode='graph_json') self.assertNotEqual({}, task_module.Evaluate( self.job, event_module.Event(type='initiate', target_task=None, payload={}), self.evaluator)) self.assertEqual( { 'read_value_chromium@aaaaaaa_%s' % (attempt, ): { 'benchmark': 'some_benchmark', 'change': mock.ANY, 'mode': 'graph_json', 'results_filename': 'some_benchmark/perf_results.json', 'histogram_options': { 'grouping_label': None, 'story': None, 'statistic': None, 'histogram_name': 'chart', }, 'graph_json_options': { 'chart': 'chart', 'trace': 'trace', }, 'errors': [{ 'reason': 'ReadValueNoFile', 'message': mock.ANY, }], 'status': 'failed', 'tries': 1, 'index': attempt, } for attempt in range(10) }, task_module.Evaluate( self.job, event_module.Event(type='select', target_task=None, payload={}), evaluators.Selector(task_type='read_value')))
def testSelector_Predicate(self): task = task_module.InMemoryTask(id='test_id', task_type='test', payload={}, status='pending', dependencies=[]) accumulator = {} evaluators.Selector(predicate=lambda *_: True)( task, event_module.Event(type='unimportant', target_task=None, payload={}), accumulator) self.assertEqual({'test_id': mock.ANY}, accumulator)
def testEvaluateSuccess_NoReproduction(self): self.PopulateSimpleBisectionGraph(self.job) task_module.Evaluate( self.job, event_module.Event(type='initiate', target_task=None, payload={}), self.BisectionEvaluatorForTesting( bisection_test_util.FakeReadValueSameResult(self.job, 1.0))) evaluate_result = task_module.Evaluate( self.job, event_module.Event(type='select', target_task=None, payload={}), evaluators.Selector(task_type='find_culprit')) self.assertIn('performance_bisection', evaluate_result) logging.info('Results: %s', evaluate_result['performance_bisection']) self.assertEquals(evaluate_result['performance_bisection']['culprits'], [])
def testEvaluateSuccess_GraphJson(self, isolate_retrieve): isolate_retrieve.side_effect = itertools.chain(*itertools.repeat( [('{"files": {"some_benchmark/perf_results.json": ' '{"h": "394890891823812873798734a"}}}'), json.dumps( {'chart': { 'traces': { 'trace': ['126444.869721', '0.0'] } }})], 10)) self.PopulateTaskGraph(benchmark='some_benchmark', chart='chart', trace='trace', mode='graph_json') self.assertNotEqual({}, task_module.Evaluate( self.job, event_module.Event(type='initiate', target_task=None, payload={}), self.evaluator)) self.assertEqual( { 'read_value_chromium@aaaaaaa_%s' % (attempt, ): { 'benchmark': 'some_benchmark', 'mode': 'graph_json', 'results_filename': 'some_benchmark/perf_results.json', 'histogram_options': { 'tir_label': None, 'story': None, 'statistic': None, }, 'graph_json_options': { 'chart': 'chart', 'trace': 'trace', }, 'result_values': [126444.869721], 'status': 'completed', 'tries': 1, } for attempt in range(10) }, task_module.Evaluate( self.job, event_module.Event(type='select', target_task=None, payload={}), evaluators.Selector(task_type='read_value')))
def testEvaluateSuccess_SpeculateBisection(self): self.PopulateSimpleBisectionGraph() task_module.Evaluate( self.job, event_module.Event(type='initiate', target_task=None, payload={}), self.CompoundEvaluatorForTesting( FakeReadValueMapResult( self.job, { change_module.Change.FromDict({ 'commits': [{ 'repository': 'chromium', 'git_hash': commit }] }): values for commit, values in ( ('commit_0', [1.0] * 10), ('commit_1', [1.0] * 10), ('commit_2', [2.0] * 10), ('commit_3', [2.0] * 10), ('commit_4', [2.0] * 10), ('commit_5', [2.0] * 10), ) }))) evaluate_result = task_module.Evaluate( self.job, SelectEvent(), evaluators.Selector(task_type='find_culprit')) self.assertIn('performance_bisection', evaluate_result) logging.info('Results: %s', evaluate_result['performance_bisection']) # Here we're testing that we can find the change between commit_1 and # commit_2 in the values we seed above. self.assertEquals(evaluate_result['performance_bisection']['culprits'], [[ change_module.Change.FromDict({ 'commits': [{ 'repository': 'chromium', 'git_hash': 'commit_1' }] }).AsDict(), change_module.Change.FromDict({ 'commits': [{ 'repository': 'chromium', 'git_hash': 'commit_2' }] }).AsDict() ]])
def testEvaluateHandleFailures_Hard(self, swarming_task_stdout, swarming_task_result, swarming_tasks_new): swarming_tasks_new.return_value = {'task_id': 'task id'} evaluator = evaluators.SequenceEvaluator(evaluators=( evaluators.FilteringEvaluator( predicate=evaluators.TaskTypeEq('find_isolate'), delegate=evaluators.SequenceEvaluator( evaluators=(bisection_test_util.FakeFoundIsolate(self.job), evaluators.TaskPayloadLiftingEvaluator()))), run_test.Evaluator(self.job), )) self.assertNotEqual({}, task_module.Evaluate( self.job, event_module.Event(type='initiate', target_task=None, payload={}), evaluator)) # We set it up so that when we poll the swarming task, that we're going to # get an error status. We're expecting that hard failures are detected. swarming_task_stdout.return_value = { 'output': """Traceback (most recent call last): File "../../testing/scripts/run_performance_tests.py", line 282, in <module> sys.exit(main()) File "../../testing/scripts/run_performance_tests.py", line 226, in main benchmarks = args.benchmark_names.split(',') AttributeError: 'Namespace' object has no attribute 'benchmark_names'""" } swarming_task_result.return_value = { 'bot_id': 'bot id', 'exit_code': 1, 'failure': True, 'outputs_ref': { 'isolatedserver': 'output isolate server', 'isolated': 'output isolate hash', }, 'state': 'COMPLETED', } for attempt in range(10): self.assertNotEqual( {}, task_module.Evaluate( self.job, event_module.Event( type='update', target_task='run_test_chromium@aaaaaaa_%s' % (attempt, ), payload={ 'kind': 'pubsub_message', 'action': 'poll' }), evaluator), 'Attempt #%s' % (attempt, )) self.assertEqual( { 'run_test_chromium@aaaaaaa_%s' % (attempt, ): { 'status': 'failed', 'swarming_server': 'some_server', 'dimensions': DIMENSIONS, 'errors': mock.ANY, 'extra_args': [], 'swarming_request_body': { 'name': mock.ANY, 'user': mock.ANY, 'priority': mock.ANY, 'task_slices': mock.ANY, 'tags': mock.ANY, 'pubsub_auth_token': mock.ANY, 'pubsub_topic': mock.ANY, 'pubsub_userdata': mock.ANY, 'service_account': mock.ANY, }, 'swarming_task_result': { 'bot_id': mock.ANY, 'state': 'COMPLETED', 'failure': True, }, 'isolate_server': 'output isolate server', 'isolate_hash': 'output isolate hash', 'swarming_task_id': 'task id', 'tries': 1, 'change': mock.ANY, 'index': attempt, } for attempt in range(10) }, task_module.Evaluate( self.job, event_module.Event(type='select', target_task=None, payload={}), evaluators.Selector(task_type='run_test')))
def testEvaluateSuccess_HistogramSummary(self, isolate_retrieve): samples = [] hists = [] for i in range(10): hist = histogram_module.Histogram('some_benchmark', 'count') hist.AddSample(0) hist.AddSample(1) hist.AddSample(2) hist.diagnostics[reserved_infos.STORIES.name] = ( generic_set.GenericSet(['story%d' % i])) hist.diagnostics[reserved_infos.STORY_TAGS.name] = ( generic_set.GenericSet(['group:label1'])) hists.append(hist) samples.extend(hist.sample_values) for i in range(10): hist = histogram_module.Histogram('some_benchmark', 'count') hist.AddSample(0) hist.AddSample(1) hist.AddSample(2) hist.diagnostics[reserved_infos.STORIES.name] = ( generic_set.GenericSet(['another_story%d' % i])) hist.diagnostics[reserved_infos.STORY_TAGS.name] = ( generic_set.GenericSet(['group:label2'])) hists.append(hist) samples.extend(hist.sample_values) histograms = histogram_set.HistogramSet(hists) histograms.AddSharedDiagnosticToAllHistograms( reserved_infos.STORY_TAGS.name, generic_set.GenericSet(['group:label'])) isolate_retrieve.side_effect = itertools.chain( *itertools.repeat([( '{"files": {"some_benchmark/perf_results.json": ' '{"h": "394890891823812873798734a"}}}'), json.dumps(histograms.AsDicts())], 10)) self.PopulateTaskGraph(benchmark='some_benchmark') self.assertNotEqual({}, task_module.Evaluate( self.job, event_module.Event(type='initiate', target_task=None, payload={}), self.evaluator)) self.assertEqual( { 'read_value_chromium@aaaaaaa_%s' % (attempt, ): { 'benchmark': 'some_benchmark', 'change': mock.ANY, 'mode': 'histogram_sets', 'results_filename': 'some_benchmark/perf_results.json', 'histogram_options': { 'grouping_label': None, 'story': None, 'statistic': None, }, 'graph_json_options': { 'chart': None, 'trace': 'some_trace' }, 'result_values': [sum(samples)], 'status': 'completed', 'tries': 1, 'index': attempt, } for attempt in range(10) }, task_module.Evaluate( self.job, event_module.Event(type='select', target_task=None, payload={}), evaluators.Selector(task_type='read_value')))
def _FormatAndPostBugCommentOnComplete(self): logging.debug('Processing outputs.') if self._IsTryJob(): # There is no comparison metric. title = '<b>%s Job complete. See results below.</b>' % _ROUND_PUSHPIN deferred.defer(_PostBugCommentDeferred, self.bug_id, '\n'.join((title, self.url)), labels=['Pinpoint-Tryjob-Completed'], _retry_options=RETRY_OPTIONS) return # There is a comparison metric. differences = [] result_values = {} if not self.use_execution_engine: differences = self.state.Differences() for change_a, change_b in differences: result_values.setdefault(change_a, self.state.ResultValues(change_a)) result_values.setdefault(change_b, self.state.ResultValues(change_b)) else: logging.debug('Execution Engine: Finding culprits.') context = task_module.Evaluate( self, event_module.SelectEvent(), evaluators.Selector( event_type='select', include_keys={'culprits', 'change', 'result_values'})) differences = [ (change_module.ReconstituteChange(change_a), change_module.ReconstituteChange(change_b)) for change_a, change_b in context.get('performance_bisection', {}).get('culprits', []) ] result_values = { change_module.ReconstituteChange(v.get('change')): v.get('result_values') for v in context.values() if 'change' in v and 'result_values' in v } if not differences: title = "<b>%s Couldn't reproduce a difference.</b>" % _ROUND_PUSHPIN deferred.defer(_PostBugCommentDeferred, self.bug_id, '\n'.join((title, self.url)), labels=['Pinpoint-No-Repro'], _retry_options=RETRY_OPTIONS) return # Collect the result values for each of the differences difference_details = [] commit_infos = [] commits_with_deltas = {} for change_a, change_b in differences: if change_b.patch: commit = change_b.patch else: commit = change_b.last_commit commit_info = commit.AsDict() values_a = result_values[change_a] values_b = result_values[change_b] difference = _FormatDifferenceForBug(commit_info, values_a, values_b, self.state.metric) difference_details.append(difference) commit_infos.append(commit_info) if values_a and values_b: mean_delta = job_state.Mean(values_b) - job_state.Mean( values_a) commits_with_deltas[commit.id_string] = (mean_delta, commit_info) deferred.defer(_UpdatePostAndMergeDeferred, difference_details, commit_infos, list(commits_with_deltas.values()), self.bug_id, self.tags, self.url, _retry_options=RETRY_OPTIONS)
def _FormatAndPostBugCommentOnComplete(self): logging.debug('Processing outputs.') if self._IsTryJob(): # There is no comparison metric. title = '<b>%s Job complete. See results below.</b>' % _ROUND_PUSHPIN deferred.defer(_PostBugCommentDeferred, self.bug_id, '\n'.join((title, self.url)), project=self.project, labels=['Pinpoint-Tryjob-Completed'], _retry_options=RETRY_OPTIONS) return # There is a comparison metric. differences = [] result_values = {} changes_examined = None if not self.use_execution_engine: differences = self.state.Differences() for change_a, change_b in differences: result_values.setdefault(change_a, self.state.ResultValues(change_a)) result_values.setdefault(change_b, self.state.ResultValues(change_b)) changes_examined = self.state.ChangesExamined() else: logging.debug('Execution Engine: Finding culprits.') context = task_module.Evaluate( self, event_module.SelectEvent(), evaluators.Selector( event_type='select', include_keys={'culprits', 'change', 'result_values'})) differences = [ (change_module.ReconstituteChange(change_a), change_module.ReconstituteChange(change_b)) for change_a, change_b in context.get('performance_bisection', {}).get('culprits', []) ] result_values = { change_module.ReconstituteChange(v.get('change')): v.get('result_values') for v in context.values() if 'change' in v and 'result_values' in v } if not differences: # When we cannot find a difference, we want to not only update the issue # with that (minimal) information but also automatically mark the issue # WontFix. This is based on information we've gathered in production that # most issues where we find Pinpoint cannot reproduce the difference end # up invariably as "Unconfirmed" with very little follow-up. title = "<b>%s Couldn't reproduce a difference.</b>" % _ROUND_PUSHPIN deferred.defer(_PostBugCommentDeferred, self.bug_id, '\n'.join((title, self.url)), project=self.project, labels=['Pinpoint-No-Repro'], status='WontFix', _retry_options=RETRY_OPTIONS) return # Collect the result values for each of the differences bug_update_builder = job_bug_update.DifferencesFoundBugUpdateBuilder( self.state.metric) bug_update_builder.SetExaminedCount(changes_examined) for change_a, change_b in differences: if change_b.patch: commit = change_b.patch else: commit = change_b.last_commit values_a = result_values[change_a] values_b = result_values[change_b] bug_update_builder.AddDifference(commit, values_a, values_b) deferred.defer(job_bug_update.UpdatePostAndMergeDeferred, bug_update_builder, self.bug_id, self.tags, self.url, self.project, _retry_options=RETRY_OPTIONS)
def testEvaluateToCompletion(self, swarming_task_result, swarming_tasks_new): swarming_tasks_new.return_value = {'task_id': 'task id'} evaluator = evaluators.SequenceEvaluator(evaluators=( evaluators.FilteringEvaluator( predicate=evaluators.TaskTypeEq('find_isolate'), delegate=evaluators.SequenceEvaluator( evaluators=(bisection_test_util.FakeFoundIsolate(self.job), evaluators.TaskPayloadLiftingEvaluator()))), run_test.Evaluator(self.job), )) self.assertNotEqual({}, task_module.Evaluate( self.job, event_module.Event(type='initiate', target_task=None, payload={}), evaluator)) # Ensure that we've found all the 'run_test' tasks. self.assertEqual( { 'run_test_chromium@aaaaaaa_%s' % (attempt, ): { 'status': 'ongoing', 'swarming_server': 'some_server', 'dimensions': DIMENSIONS, 'extra_args': [], 'swarming_request_body': { 'name': mock.ANY, 'user': mock.ANY, 'priority': mock.ANY, 'task_slices': mock.ANY, 'tags': mock.ANY, 'pubsub_auth_token': mock.ANY, 'pubsub_topic': mock.ANY, 'pubsub_userdata': mock.ANY, 'service_account': mock.ANY, }, 'swarming_task_id': 'task id', 'tries': 1, 'change': mock.ANY, 'index': attempt, } for attempt in range(10) }, task_module.Evaluate( self.job, event_module.Event(type='select', target_task=None, payload={}), evaluators.Selector(task_type='run_test'))) # Ensure that we've actually made the calls to the Swarming service. swarming_tasks_new.assert_called() self.assertGreaterEqual(swarming_tasks_new.call_count, 10) # Then we propagate an event for each of the run_test tasks in the graph. swarming_task_result.return_value = { 'bot_id': 'bot id', 'exit_code': 0, 'failure': False, 'outputs_ref': { 'isolatedserver': 'output isolate server', 'isolated': 'output isolate hash', }, 'state': 'COMPLETED', } for attempt in range(10): self.assertNotEqual( {}, task_module.Evaluate( self.job, event_module.Event( type='update', target_task='run_test_chromium@aaaaaaa_%s' % (attempt, ), payload={}), evaluator), 'Attempt #%s' % (attempt, )) # Ensure that we've polled the status of each of the tasks, and that we've # marked the tasks completed. self.assertEqual( { 'run_test_chromium@aaaaaaa_%s' % (attempt, ): { 'status': 'completed', 'swarming_server': 'some_server', 'dimensions': DIMENSIONS, 'extra_args': [], 'swarming_request_body': { 'name': mock.ANY, 'user': mock.ANY, 'priority': mock.ANY, 'task_slices': mock.ANY, 'tags': mock.ANY, 'pubsub_auth_token': mock.ANY, 'pubsub_topic': mock.ANY, 'pubsub_userdata': mock.ANY, 'service_account': mock.ANY, }, 'swarming_task_result': { 'bot_id': mock.ANY, 'state': 'COMPLETED', 'failure': False, }, 'isolate_server': 'output isolate server', 'isolate_hash': 'output isolate hash', 'swarming_task_id': 'task id', 'tries': 1, 'change': mock.ANY, 'index': attempt, } for attempt in range(10) }, task_module.Evaluate( self.job, event_module.Event(type='select', target_task=None, payload={}), evaluators.Selector(task_type='run_test'))) # Ensure that we've actually made the calls to the Swarming service. swarming_task_result.assert_called() self.assertGreaterEqual(swarming_task_result.call_count, 10)
def testEvaluateHandleFailures_Expired(self, swarming_task_result, swarming_tasks_new): swarming_tasks_new.return_value = {'task_id': 'task id'} evaluator = evaluators.SequenceEvaluator(evaluators=( evaluators.FilteringEvaluator( predicate=evaluators.TaskTypeEq('find_isolate'), delegate=evaluators.SequenceEvaluator( evaluators=(bisection_test_util.FakeFoundIsolate(self.job), evaluators.TaskPayloadLiftingEvaluator()))), run_test.Evaluator(self.job), )) self.assertNotEqual({}, task_module.Evaluate( self.job, event_module.Event(type='initiate', target_task=None, payload={}), evaluator)) swarming_task_result.return_value = { 'state': 'EXPIRED', } for attempt in range(10): self.assertNotEqual( {}, task_module.Evaluate( self.job, event_module.Event( type='update', target_task='run_test_chromium@aaaaaaa_%s' % (attempt, ), payload={ 'kind': 'pubsub_message', 'action': 'poll' }), evaluator), 'Attempt #%s' % (attempt, )) self.assertEqual( { 'run_test_chromium@aaaaaaa_%s' % (attempt, ): { 'status': 'failed', 'swarming_server': 'some_server', 'dimensions': DIMENSIONS, 'errors': [ { 'reason': 'SwarmingExpired', 'message': mock.ANY }, ], 'extra_args': [], 'swarming_request_body': { 'name': mock.ANY, 'user': mock.ANY, 'priority': mock.ANY, 'task_slices': mock.ANY, 'tags': mock.ANY, 'pubsub_auth_token': mock.ANY, 'pubsub_topic': mock.ANY, 'pubsub_userdata': mock.ANY, 'service_account': mock.ANY, }, 'swarming_task_result': { 'state': 'EXPIRED', }, 'swarming_task_id': 'task id', 'tries': 1, 'change': mock.ANY, 'index': attempt, } for attempt in range(10) }, task_module.Evaluate( self.job, event_module.Event(type='select', target_task=None, payload={}), evaluators.Selector(task_type='run_test')))