def test_cq_status_fetch_stats_new_fetch_from_last_build_run(self): urlfetch_mock = mock.Mock() urlfetch_mock.return_value.content = json.dumps({ 'more': False, 'cursor': None, 'results': []}) FetchStatus(cursor='xxx', begin='1', end='2', done=True).put() BuildRun(time_started=datetime.datetime(2015, 10, 30, 12, 17, 42), time_finished=datetime.datetime(2015, 10, 30, 13, 17, 42), buildnumber=0, result=0).put() BuildRun(time_started=datetime.datetime(2015, 10, 30, 11, 17, 42), time_finished=datetime.datetime(2015, 10, 30, 12, 17, 42), buildnumber=0, result=0).put() with mock.patch('google.appengine.api.urlfetch.fetch', urlfetch_mock): cq_status.fetch_cq_status() urlfetch_mock.assert_called_once_with( 'https://chromium-cq-status.appspot.com/query?' 'tags=action=verifier_jobs_update&begin=1446211062.0&end=1446214662.0&' 'count=10')
def _create_flake(): tf = datetime.datetime.utcnow() ts = tf - datetime.timedelta(hours=1) p = PatchsetBuilderRuns(issue=1, patchset=1, master='tryserver.bar', builder='baz').put() br_f0 = BuildRun(parent=p, buildnumber=10, result=2, time_started=ts, time_finished=tf).put() br_f1 = BuildRun(parent=p, buildnumber=20, result=2, time_started=ts, time_finished=tf).put() br_s0 = BuildRun(parent=p, buildnumber=30, result=0, time_started=ts, time_finished=tf).put() occ1 = FlakyRun(failure_run=br_f0, success_run=br_s0, failure_run_time_started=ts, failure_run_time_finished=tf, flakes=[ FlakeOccurrence(name='step1', failure='testX'), ]) occ2 = FlakyRun(failure_run=br_f1, success_run=br_s0, failure_run_time_started=ts, failure_run_time_finished=tf, flakes=[ FlakeOccurrence(name='step2', failure='testX'), FlakeOccurrence(name='step3', failure='step3'), ]) f = Flake(name='testX', count_day=10, occurrences=[occ1.put(), occ2.put()], is_step=True, issue_id=123456) return f, [occ1, occ2]
def test_ignores_null_flaky_runs(self): last_updated = datetime.datetime.now() fake_build_key = BuildRun(buildnumber=1, result=1, time_finished=last_updated).put() flake_run_key = FlakyRun(failure_run=fake_build_key, success_run=fake_build_key, failure_run_time_finished=last_updated, flakes=[ FlakeOccurrence(name='fake_step', failure='fake_test_name'), FlakeOccurrence(name='fake_step2', failure='fake_test_name') ]).put() null_flake_run_key = ndb.Key('FlakyRun', 'fake-key') Flake(issue_id=1, is_step=False, name='fake_test_name', issue_last_updated=last_updated, occurrences=[ flake_run_key, null_flake_run_key, ]).put() self.test_app.get('/migrate') flake_types = FlakeType.query().fetch() self.assertEqual(len(flake_types), 2) flake_type_1 = flake_types[0] self.assertEqual(flake_type_1.project, 'chromium') self.assertEqual(flake_type_1.step_name, 'fake_step') self.assertEqual(flake_type_1.test_name, 'fake_test_name') self.assertIsNone(flake_type_1.config) self.assertEqual(flake_type_1.last_updated, last_updated) flake_type_2 = flake_types[1] self.assertEqual(flake_type_2.project, 'chromium') self.assertEqual(flake_type_2.step_name, 'fake_step2') self.assertEqual(flake_type_2.test_name, 'fake_test_name') self.assertIsNone(flake_type_2.config) self.assertEqual(flake_type_2.last_updated, last_updated) issues = Issue.query().fetch() self.assertEqual(len(issues), 1) issue = issues[0] self.assertEqual(issue.issue_id, 1) self.assertEqual(issue.project, 'chromium') self.assertEqual(sorted(issue.flake_type_keys), sorted(flake_type.key for flake_type in flake_types))
def _create_flake(self): tf = datetime.datetime(2016, 8, 6, 10, 20, 30) ts = tf - datetime.timedelta(hours=1) tf2 = tf - datetime.timedelta(days=5) ts2 = tf2 - datetime.timedelta(hours=1) p = PatchsetBuilderRuns(issue=123456, patchset=1, master='tryserver.test', builder='test-builder').put() br_f0 = BuildRun(parent=p, buildnumber=0, result=2, time_started=ts2, time_finished=tf2).put() br_f1 = BuildRun(parent=p, buildnumber=1, result=2, time_started=ts, time_finished=tf).put() br_s1 = BuildRun(parent=p, buildnumber=2, result=0, time_started=ts, time_finished=tf).put() br_f2 = BuildRun(parent=p, buildnumber=3, result=4, time_started=ts, time_finished=tf).put() br_s2 = BuildRun(parent=p, buildnumber=4, result=0, time_started=ts, time_finished=tf).put() occ_key1 = FlakyRun(failure_run=br_f0, success_run=br_s2, flakes=[ FlakeOccurrence(name='foo (x)', failure='foo.bar'), FlakeOccurrence(name='foo (x)', failure='other')], failure_run_time_started=ts2, failure_run_time_finished=tf2).put() occ_key2 = FlakyRun(failure_run=br_f1, success_run=br_s1, flakes=[ FlakeOccurrence(name='bar (y)', failure='foo.bar')], failure_run_time_started=ts, failure_run_time_finished=tf).put() occ_key3 = FlakyRun(failure_run=br_f2, success_run=br_s2, flakes=[ FlakeOccurrence( name='foo (x)', failure='foo.bar', issue_id=100), FlakeOccurrence( name='bar (y)', failure='foo.bar', issue_id=200)], failure_run_time_started=ts, failure_run_time_finished=tf).put() return Flake(name='foo.bar', count_day=10, is_step=False, occurrences=[occ_key1, occ_key2, occ_key3])
def post(self): if (not self.request.get('failure_run_key') or not self.request.get('success_run_key')): self.response.set_status(400, 'Invalid request parameters') return failure_run = ndb.Key(urlsafe=self.request.get('failure_run_key')).get() success_run = ndb.Key(urlsafe=self.request.get('success_run_key')).get() flaky_run = FlakyRun( failure_run=failure_run.key, failure_run_time_started=failure_run.time_started, failure_run_time_finished=failure_run.time_finished, success_run=success_run.key) failure_time = failure_run.time_finished patchset_builder_runs = failure_run.key.parent().get() master = BuildRun.removeMasterPrefix(patchset_builder_runs.master) url = ('https://chrome-build-extract.appspot.com/p/' + master + '/builders/' + patchset_builder_runs.builder +'/builds/' + str(failure_run.buildnumber) + '?json=1') urlfetch.set_default_fetch_deadline(60) logging.info('get_flaky_run_reason ' + url) response = urlfetch.fetch(url) if response.status_code >= 400 and response.status_code <= 599: logging.error('The request to %s has returned %d: %s', url, response.status_code, response.content) self.response.set_status(500, 'Failed to fetch build.') return json_result = json.loads(response.content) steps = json_result['steps'] failed_steps = [] passed_steps = [] for step in steps: result = step['results'][0] if build_result.isResultSuccess(result): passed_steps.append(step) continue if not build_result.isResultFailure(result): continue step_name = step['name'] step_text = ' '.join(step['text']) if step_name in IGNORED_STEPS: continue # Custom (non-trivial) rules for ignoring flakes in certain steps: # - [swarming] ...: summary step would also be red (do not double count) # - Patch failure: ingore non-infra failures as they are typically due to # changes in the code on HEAD # - bot_update PATCH FAILED: Duplicates failure in 'Patch failure' step. # - ... (retry summary): this is an artificial step to fail the build due # to another step that has failed earlier (do not double count). if (step_name.startswith('[swarming]') or (step_name == 'Patch failure' and result != build_result.EXCEPTION) or (step_name == 'bot_update' and 'PATCH FAILED' in step_text)): continue failed_steps.append(step) steps_to_ignore = [] for step in failed_steps: step_name = step['name'] if '(with patch)' in step_name: # Ignore any steps from the same test suite, which is determined by the # normalized step name. Additionally, if the step fails without patch, # ignore the original step as well because tree is busted. normalized_step_name = normalize_test_type(step_name, True) for other_step in failed_steps: if other_step == step: continue normalized_other_step_name = normalize_test_type( other_step['name'], True) if normalized_other_step_name == normalized_step_name: steps_to_ignore.append(other_step['name']) if '(without patch)' in other_step['name']: steps_to_ignore.append(step['name']) flakes_to_update = [] for step in failed_steps: step_name = step['name'] if step_name in steps_to_ignore: continue flakes, is_step = self.get_flakes( master, patchset_builder_runs.builder, failure_run.buildnumber, step) for flake in flakes: flake_occurrence = FlakeOccurrence(name=step_name, failure=flake) flaky_run.flakes.append(flake_occurrence) flakes_to_update.append((flake, is_step)) # Do not create FlakyRuns if all failed steps have been ignored. if not flaky_run.flakes: return flaky_run_key = flaky_run.put() for flake, is_step in flakes_to_update: self.add_failure_to_flake(flake, flaky_run_key, failure_time, is_step) self.flaky_runs.increment_by(1)
def fetch_cq_status(): """Fetches data from chromium-cq-status app and saves new data. Remembers old cursor and fetches new data. """ fetch_status = FetchStatus.query().get() cursor = '' begin = '' end = '' retry_count = 0 while True: if fetch_status: if fetch_status.done: logging.info('historical fetching done so fetch latest...') end = str(time_functions.timestamp.utcnow_ts()) last_build_run_seen = BuildRun.query().order( -BuildRun.time_finished).fetch(1) begin = str(time_functions.timestamp.utctimestamp( last_build_run_seen[0].time_finished)) cursor = '' else: begin = fetch_status.begin end = fetch_status.end cursor = fetch_status.cursor else: logging.info('didnt find any historical information. fetching last week') begin = str(time_functions.timestamp.utctimestamp( datetime.datetime.utcnow() - datetime.timedelta(weeks=1))) end = str(time_functions.timestamp.utcnow_ts()) if begin and end: logging.info('fetching from ' + str(datetime.datetime.fromtimestamp(float(begin))) + ' to ' + str(datetime.datetime.fromtimestamp(float(end))) + ' cursor: ' + cursor) else: logging.info('fetching with no begin/end and cursor: ' + cursor) url = "https://chromium-cq-status.appspot.com/query" params = [] params.append('tags=action=verifier_jobs_update') if cursor: params.append('cursor=' + cursor) if begin: params.append('begin=' + begin) if end: params.append('end=' + end) # Tried count of 200 or more but would get OOM or deadline errors. Even 50 # sometimes gives: # "Values may not be more than 1000000 bytes in length; received 2118015 # bytes" params.append('count=10') url += '?' + '&'.join(params) logging.info('fetching url: ' + url) try: urlfetch.set_default_fetch_deadline(60) result = urlfetch.fetch(url).content timestamp_str = '"timestamp":"' logging_idx = result.find(timestamp_str) if logging_idx != -1: logging_idx += len(timestamp_str) logging_idx2 = result.find('"', logging_idx) logging.info(' current fetch has time of ' + result[logging_idx:logging_idx2]) try: json_result = json.loads(result) more = json_result['more'] cursor = json_result['cursor'] try: logging_output = parse_cq_data(json_result) if logging_output: logging.info('found flakes: ' + ' '.join(logging_output)) except DeadlineExceededError: logging.info('got DeadlineExceededError during parse_cq_data, ' 'catching to not show up as error') return except ValueError: requests_metric.increment_by(1, fields={'status': 'parse_error'}) logging.exception('failed to parse CQ data from %s', url) if 'DeadlineExceededError' in result: logging.error('got deadline exceeded, trying again after 1s') time.sleep(1) continue elif retry_count < 3: retry_count += 1 logging.error('will retry after sleeping ' + str(retry_count)) time.sleep(retry_count) continue else: logging.error('giving up and will count current fetch as done') # Don't want to continue this as could be a bad cursor. more = False else: requests_metric.increment_by(1, fields={'status': 'success'}) if not fetch_status: fetch_status = FetchStatus() fetch_status.done = not more if fetch_status.done: fetch_status.cursor = '' fetch_status.begin = '' fetch_status.end = '' retry_count = 0 logging.info('finished fetching for current cursor') else: fetch_status.begin = begin fetch_status.end = end fetch_status.cursor = cursor fetch_status.put() if not more: return # finish the cron job and wait for next iteration except urllib2.URLError, e: requests_metric.increment_by(1, fields={'status': 'fetch_error'}) logging.warning('Failed to fetch CQ status: %s', e.reason)
def parse_cq_data(json_data): logging_output = [] for result in json_data.get('results', {}): fields = result.get('fields', []) if not 'action' in fields: continue action = fields.get('action') if action != 'verifier_jobs_update': continue if fields.get('verifier') != 'try job': continue # At the moment, much of the parsing logic assumes this is a Chromium # tryjob. if fields.get('project') != 'chromium': continue job_states = fields.get('jobs', []) for state in job_states: # Just go by |result|. #if state not in ['JOB_SUCCEEDED', 'JOB_FAILED', 'JOB_TIMED_OUT']: # continue for job in job_states[state]: build_properties = job.get('build_properties') if not build_properties: continue try: master = job['master'] builder = job['builder'] result = job['result'] timestamp_tz = dateutil.parser.parse(job['timestamp']) # We assume timestamps from chromium-cq-status are already in UTC. timestamp = timestamp_tz.replace(tzinfo=None) except KeyError: continue try: buildnumber = get_int_value(build_properties, 'buildnumber') issue = get_int_value(build_properties, 'issue') patchset = get_int_value(build_properties, 'patchset') attempt_start_ts = get_int_value(build_properties, 'attempt_start_ts') time_started = datetime.datetime.utcfromtimestamp( attempt_start_ts / 1000000) except ValueError: continue if build_result.isResultPending(result): continue # At this point, only success or failure. success = build_result.isResultSuccess(result) patchset_builder_runs = get_patchset_builder_runs(issue=issue, patchset=patchset, master=master, builder=builder) build_run = BuildRun(parent=patchset_builder_runs.key, buildnumber=buildnumber, result=result, time_started=time_started, time_finished=timestamp) previous_runs = BuildRun.query( ancestor=patchset_builder_runs.key).fetch() duplicate = False for previous_run in previous_runs: # We saw this build run already or there are multiple green runs, # in which case we ignore subsequent ones to avoid showing failures # multiple times. if (previous_run.buildnumber == buildnumber) or \ (build_run.is_success and previous_run.is_success) : duplicate = True break if duplicate: continue build_run.put() for previous_run in previous_runs: if previous_run.is_success == build_run.is_success: continue if success: # We saw the flake and then the pass. failure_run = previous_run success_run = build_run else: # We saw the pass and then the failure. Could happen when fetching # historical data, or for the bot_update step (patch can't be # applied cleanly anymore). failure_run = build_run success_run = previous_run logging_output.append(failure_run.key.parent().get().builder + str(failure_run.buildnumber)) # Queue a task to fetch the error of this failure and create FlakyRun. flakes_metric.increment_by(1) taskqueue.add( queue_name='issue-updates', url='/issues/create_flaky_run', params={'failure_run_key': failure_run.key.urlsafe(), 'success_run_key': success_run.key.urlsafe()}) return logging_output
def post(self): if (not self.request.get('failure_run_key') or not self.request.get('success_run_key')): self.response.set_status(400, 'Invalid request parameters') return failure_run = ndb.Key(urlsafe=self.request.get('failure_run_key')).get() success_run = ndb.Key(urlsafe=self.request.get('success_run_key')).get() flaky_run = FlakyRun( failure_run=failure_run.key, failure_run_time_started=failure_run.time_started, failure_run_time_finished=failure_run.time_finished, success_run=success_run.key) failure_time = failure_run.time_finished patchset_builder_runs = failure_run.key.parent().get() master = BuildRun.removeMasterPrefix(patchset_builder_runs.master) url = ('https://luci-milo.appspot.com/' 'prpc/milo.Buildbot/GetBuildbotBuildJSON') request = json.dumps({ 'master': master, 'builder': patchset_builder_runs.builder, 'buildNum': failure_run.buildnumber, }) headers = { 'Content-Type': 'application/json', 'Accept': 'application/json', } urlfetch.set_default_fetch_deadline(60) logging.info('get_flaky_run_reason: %s, %s', url, request) response = urlfetch.fetch( url, payload=request, method=urlfetch.POST, headers=headers, validate_certificate=True) if response.status_code != 200: logging.error('The request to %s has returned %d: %s', url, response.status_code, response.content) self.response.set_status(500, 'Failed to fetch build.') return content = response.content if content.startswith(_MILO_RESPONSE_PREFIX): content = content[len(_MILO_RESPONSE_PREFIX):] data = json.loads(content)['data'] json_result = json.loads(base64.b64decode(data)) steps = json_result['steps'] failed_steps = [] passed_steps = [] for step in steps: result = step['results'][0] if build_result.isResultSuccess(result): passed_steps.append(step) continue if not build_result.isResultFailure(result): continue # For Luci builds, some steps don't have step text anymore. Such steps # include 'Failure reason', 'analyze', etc. step_text = ' '.join(step['text'] or []) step_name = step['name'] if step_name in IGNORED_STEPS: continue # Custom (non-trivial) rules for ignoring flakes in certain steps: # - [swarming] ...: summary step would also be red (do not double count) # - Patch failure: ingore non-infra failures as they are typically due to # changes in the code on HEAD # - bot_update PATCH FAILED: Duplicates failure in 'Patch failure' step. # - ... (retry summary): this is an artificial step to fail the build due # to another step that has failed earlier (do not double count). if (step_name.startswith('[swarming]') or (step_name == 'Patch failure' and result != build_result.EXCEPTION) or (step_name == 'bot_update' and 'PATCH FAILED' in step_text)): continue failed_steps.append(step) steps_to_ignore = [] for step in failed_steps: step_name = step['name'] if '(with patch)' in step_name: # Ignore any steps from the same test suite, which is determined by the # normalized step name. Additionally, if the step fails without patch, # ignore the original step as well because tree is busted. normalized_step_name = normalize_test_type(step_name, True) for other_step in failed_steps: if other_step == step: continue normalized_other_step_name = normalize_test_type( other_step['name'], True) if normalized_other_step_name == normalized_step_name: steps_to_ignore.append(other_step['name']) if '(without patch)' in other_step['name']: steps_to_ignore.append(step['name']) flakes_to_update = [] for step in failed_steps: step_name = step['name'] if step_name in steps_to_ignore: continue flakes, is_step = self.get_flakes( master, patchset_builder_runs.builder, failure_run.buildnumber, step) if is_step and not is_infra_step_flake(step_name): continue # Ignore flakes of non-infra steps. for flake in flakes: flake_occurrence = FlakeOccurrence(name=step_name, failure=flake) flaky_run.flakes.append(flake_occurrence) flakes_to_update.append((flake, is_step)) # Do not create FlakyRuns if all failed steps have been ignored. if not flaky_run.flakes: return flaky_run_key = flaky_run.put() for flake, is_step in flakes_to_update: if self.is_duplicate_occurrence(flake, flaky_run): logging.info('Not adding duplicate occurrence for the same CL') continue self.add_failure_to_flake(flake, flaky_run_key, failure_time, is_step) self.flaky_runs.increment_by(1)
def fetch_cq_status(): """Fetches data from chromium-cq-status app and saves new data. Remembers old cursor and fetches new data. """ fetch_status = FetchStatus.query().get() cursor = '' begin = '' end = '' retry_count = 0 while True: if fetch_status: if fetch_status.done: logging.info('historical fetching done so fetch latest...') end = str(time_functions.timestamp.utcnow_ts()) last_build_run_seen = BuildRun.query().order( -BuildRun.time_finished).fetch(1) begin = str( time_functions.timestamp.utctimestamp( last_build_run_seen[0].time_finished)) cursor = '' else: begin = fetch_status.begin end = fetch_status.end cursor = fetch_status.cursor else: logging.info( 'didnt find any historical information. fetching last week') begin = str( time_functions.timestamp.utctimestamp( datetime.datetime.utcnow() - datetime.timedelta(weeks=1))) end = str(time_functions.timestamp.utcnow_ts()) if begin and end: logging.info('fetching from %s to %s cursor: %s', str(datetime.datetime.utcfromtimestamp(float(begin))), str(datetime.datetime.utcfromtimestamp(float(end))), cursor) else: logging.info('fetching with no begin/end and cursor: ' + cursor) url = "https://chromium-cq-status.appspot.com/query" params = [] params.append('tags=action=verifier_jobs_update') if cursor: params.append('cursor=' + cursor) if begin: params.append('begin=' + begin) if end: params.append('end=' + end) # Tried count of 200 or more but would get OOM or deadline errors. Even 50 # sometimes gives: # "Values may not be more than 1000000 bytes in length; received 2118015 # bytes" params.append('count=10') url += '?' + '&'.join(params) logging.info('fetching url: ' + url) try: urlfetch.set_default_fetch_deadline(60) result = urlfetch.fetch(url).content timestamp_str = '"timestamp":"' logging_idx = result.find(timestamp_str) if logging_idx != -1: logging_idx += len(timestamp_str) logging_idx2 = result.find('"', logging_idx) logging.info(' current fetch has time of ' + result[logging_idx:logging_idx2]) try: json_result = json.loads(result) more = json_result['more'] cursor = json_result['cursor'] try: logging_output = parse_cq_data(json_result) if logging_output: logging.info('found flakes: ' + ' '.join(logging_output)) except DeadlineExceededError: logging.info( 'got DeadlineExceededError during parse_cq_data, ' 'catching to not show up as error') return except ValueError: requests_metric.increment_by(1, fields={'status': 'parse_error'}) logging.exception('failed to parse CQ data from %s', url) if 'DeadlineExceededError' in result: logging.error( 'got deadline exceeded, trying again after 1s') time.sleep(1) continue elif retry_count < 3: retry_count += 1 logging.error('will retry after sleeping ' + str(retry_count)) time.sleep(retry_count) continue else: logging.error( 'giving up and will count current fetch as done') # Don't want to continue this as could be a bad cursor. more = False else: requests_metric.increment_by(1, fields={'status': 'success'}) if not fetch_status: fetch_status = FetchStatus() fetch_status.done = not more if fetch_status.done: fetch_status.cursor = '' fetch_status.begin = '' fetch_status.end = '' retry_count = 0 logging.info('finished fetching for current cursor') else: fetch_status.begin = begin fetch_status.end = end fetch_status.cursor = cursor fetch_status.put() if not more: return # finish the cron job and wait for next iteration except urllib2.URLError, e: requests_metric.increment_by(1, fields={'status': 'fetch_error'}) logging.warning('Failed to fetch CQ status: %s', e.reason)
def parse_cq_data(json_data): logging_output = [] for result in json_data.get('results', {}): fields = result.get('fields', []) if not 'action' in fields: logging.warning('Missing field action in status record') parsing_errors.increment_by(1) continue action = fields.get('action') if action != 'verifier_jobs_update': continue if fields.get('verifier') != 'try job': continue # At the moment, much of the parsing logic assumes this is a Chromium # tryjob. project = fields.get('project') if project != 'chromium/chromium/src': logging.info('project not chromium: %s', project) continue job_states = fields.get('jobs', {}) for job in itertools.chain.from_iterable(job_states.values()): try: builder = job['builder'] result = job['result'] timestamp_tz = dateutil.parser.parse( job.get('created_ts') or job['timestamp']) # We assume timestamps from chromium-cq-status are already in UTC. timestamp = timestamp_tz.replace(tzinfo=None) except KeyError: logging.warning('Failed to parse job details', exc_info=True) parsing_errors.increment_by(1) continue if build_result.isResultPending(result): continue build_properties = job.get('build_properties') if not build_properties: logging.warning( 'Missing field build_properties in job details') parsing_errors.increment_by(1) continue issue = -1 patchset = -1 time_started = 0 try: buildnumber = get_int_value(build_properties, 'buildnumber') if 'patch_issue' in build_properties: issue = get_int_value(build_properties, 'patch_issue') else: # pragma: no cover logging.warning('no issue') if 'patch_set' in build_properties: patchset = get_int_value(build_properties, 'patch_set') else: # pragma: no cover logging.warning('no patchset') if 'attempt_start_ts' in build_properties: attempt_start_ts = get_int_value(build_properties, 'attempt_start_ts') time_started = datetime.datetime.utcfromtimestamp( attempt_start_ts / 1000000) else: # pragma: no cover logging.warning('no attempt_start_ts') continue # For builds through Buildbucket, job['master'] is actually the bucket # name. For buildbot-based builds, it just happens to be the same as the # master name. For Luci-based builds, it is different from the master # name, and the master name is set as a build property instead. # https://chromium.googlesource.com/chromium/src/+/infra/config/cr-buildbucket.cfg#115 # So in either case, the "real" master name is in the build properties. master = build_properties['mastername'] except (ValueError, KeyError): logging.warning('Failed to parse build properties', exc_info=True) parsing_errors.increment_by(1) continue # At this point, only success or failure. success = build_result.isResultSuccess(result) patchset_builder_runs = get_patchset_builder_runs( issue=issue, patchset=patchset, master=master, builder=builder) build_run = BuildRun(parent=patchset_builder_runs.key, buildnumber=buildnumber, result=result, time_started=time_started, time_finished=timestamp) previous_runs = BuildRun.query( ancestor=patchset_builder_runs.key).fetch() duplicate = False for previous_run in previous_runs: # We saw this build run already or there are multiple green runs, # in which case we ignore subsequent ones to avoid showing failures # multiple times. if (previous_run.buildnumber == buildnumber) or \ (build_run.is_success and previous_run.is_success) : duplicate = True break if duplicate: continue build_run.put() for previous_run in previous_runs: if previous_run.is_success == build_run.is_success: continue if success: # We saw the flake and then the pass. failure_run = previous_run success_run = build_run else: # We saw the pass and then the failure. Could happen when fetching # historical data, or for the bot_update step (patch can't be # applied cleanly anymore). failure_run = build_run success_run = previous_run logging_output.append(failure_run.key.parent().get().builder + str(failure_run.buildnumber)) # Queue a task to fetch the error of this failure and create FlakyRun. flakes_metric.increment_by(1) taskqueue.add(queue_name='issue-updates', url='/issues/create_flaky_run', params={ 'failure_run_key': failure_run.key.urlsafe(), 'success_run_key': success_run.key.urlsafe() }) return logging_output
def _create_flakes(ts, tf, ts2, tf2): p = PatchsetBuilderRuns(issue=123456, patchset=1, master='tryserver.test', builder='test-builder').put() br_f0 = BuildRun(parent=p, buildnumber=0, result=2, time_started=ts2, time_finished=tf2).put() br_f1 = BuildRun(parent=p, buildnumber=1, result=2, time_started=ts, time_finished=tf).put() br_s1 = BuildRun(parent=p, buildnumber=2, result=0, time_started=ts, time_finished=tf).put() br_f2 = BuildRun(parent=p, buildnumber=3, result=4, time_started=ts, time_finished=tf2).put() br_s2 = BuildRun(parent=p, buildnumber=4, result=0, time_started=ts, time_finished=tf2).put() occ_key1 = FlakyRun(failure_run=br_f0, success_run=br_s2, failure_run_time_started=ts2, failure_run_time_finished=tf2).put() occ_key2 = FlakyRun(failure_run=br_f1, success_run=br_s1, failure_run_time_started=ts, failure_run_time_finished=tf).put() occ_key3 = FlakyRun(failure_run=br_f2, success_run=br_s2, failure_run_time_started=ts, failure_run_time_finished=tf).put() Flake(name='foo', last_hour=True, last_day=True, last_week=True, last_month=True).put() Flake(name='bar', last_hour=True, last_day=True, last_week=True, last_month=True, occurrences=[occ_key1, occ_key2]).put() Flake(name='baz', last_hour=True, last_day=True, last_week=True, last_month=True, occurrences=[occ_key3]).put() Flake(name='zee', last_hour=False, last_day=False, last_week=True, last_month=False).put()
def parse_cq_data(json_data): logging_output = [] for result in json_data['results']: fields = result['fields'] if not 'action' in fields: continue action = fields['action'] if action != 'verifier_jobs_update': continue if fields['verifier'] != 'try job': continue job_states = fields['jobs'] for state in job_states: # Just go by |result|. #if state not in ['JOB_SUCCEEDED', 'JOB_FAILED', 'JOB_TIMED_OUT']: # continue for job in job_states[state]: build_properties = job['build_properties'] if not build_properties: continue master = job['master'] builder = job['builder'] result = job['result'] timestamp = datetime.datetime.strptime(job['timestamp'], '%Y-%m-%d %H:%M:%S.%f') try: buildnumber = get_int_value(build_properties, 'buildnumber') issue = get_int_value(build_properties, 'issue') patchset = get_int_value(build_properties, 'patchset') except ValueError as e: continue if build_result.isResultPending(result): continue # At this point, only success or failure. success = build_result.isResultSuccess(result) patchset_builder_runs = get_patchset_builder_runs(issue=issue, patchset=patchset, master=master, builder=builder) build_run = BuildRun(parent=patchset_builder_runs.key, buildnumber=buildnumber, result=result, time_finished=timestamp) previous_runs = BuildRun.query( ancestor=patchset_builder_runs.key).fetch() duplicate = False for previous_run in previous_runs: # We saw this build run already or there are multiple green runs, # in which case we ignore subsequent ones to avoid showing failures # multiple times. if (previous_run.buildnumber == buildnumber) or \ (build_run.is_success and previous_run.is_success) : duplicate = True break if duplicate: continue build_run.put() for previous_run in previous_runs: if previous_run.is_success == build_run.is_success: continue if success: # We saw the flake and then the pass. flaky_run = FlakyRun( failure_run=previous_run.key, failure_run_time_finished=previous_run.time_finished, success_run=build_run.key) flaky_run.put() logging_output.append(previous_run.key.parent().get().builder + str(previous_run.buildnumber)) else: # We saw the pass and then the failure. Could happen when fetching # historical data. flaky_run = FlakyRun( failure_run=build_run.key, failure_run_time_finished=build_run.time_finished, success_run=previous_run.key) flaky_run.put() logging_output.append(build_run.key.parent().get().builder + str(build_run.buildnumber)) # Queue a task to fetch the error of this failure. deferred.defer(get_flaky_run_reason, flaky_run.key) return logging_output