def __call__(self, task, *_): task.payload.update({ 'errors': [{ 'reason': 'SomeReason', 'message': 'This is a message explaining things.', }] }) return [ lambda _: task_module.UpdateTask( self.job, task.id, new_state='failed', payload=task.payload) ]
def __call__(self, accumulator): # TODO(dberris): Maybe use a value in the accumulator to check whether we # should bail? self.task.payload.update( {'tries': self.task.payload.get('tries', 0) + 1}) task_module.UpdateTask(self.job, self.task.id, new_state='ongoing', payload=self.task.payload) result = find_isolate_quest.RequestBuild( self.task.payload.get('builder'), self.change, self.task.payload.get('bucket'), find_isolate_quest.BuildTagsFromJob(self.job), self.task) self.task.payload.update({'buildbucket_result': result}) # TODO(dberris): Poll the ongoing build if the attempt to update fails, if # we have the data in payload? task_module.UpdateTask(self.job, self.task.id, payload=self.task.payload)
def FakeSuccessfulRunTest(job, task, *_): if task.status == 'completed': return None task.payload.update({ 'isolate_server': 'https://isolate.server', 'isolate_hash': '12334981aad2304ff1243458', }) return [ lambda _: task_module.UpdateTask( job, task.id, new_state='completed', payload=task.payload) ]
def FakeFoundIsolate(job, task, *_): if task.status == 'completed': return None task.payload.update({ 'isolate_server': 'https://isolate.server', 'isolate_hash': '12049adfa129339482234098', }) return [ lambda _: task_module.UpdateTask( job, task.id, new_state='completed', payload=task.payload) ]
def FakeFailedRunTest(job, task, *_): if task.status == 'failed': return None task.payload.update({ 'errors': [{ 'reason': 'SomeReason', 'message': 'There is some message here.', }] }) return [ lambda _: task_module.UpdateTask( job, task.id, new_state='failed', payload=task.payload) ]
def FakeFindIsolateFailed(job, task, *_): if task.status == 'failed': return None task.payload.update({ 'tries': 1, 'buildbucket_job_status': { 'status': 'COMPLETED', 'result': 'FAILURE', 'result_details_json': '{}', } }) return [ lambda _: task_module.UpdateTask( job, task.id, new_state='failed', payload=task.payload) ]
def __call__(self, _): start_change = change_module.ReconstituteChange( self.task.payload['start_change']) end_change = change_module.ReconstituteChange( self.task.payload['end_change']) try: # We're storing this once, so that we don't need to always get this when # working with the individual commits. This reduces our reliance on # datastore operations throughout the course of handling the culprit # finding process. # # TODO(dberris): Expand the commits into the full table of dependencies? # Because every commit in the chromium repository is likely to be building # against different versions of the dependencies (v8, skia, etc.) # we'd need to expand the concept of a changelist (CL, or Change in the # Pinpoint codebase) so that we know which versions of the dependencies to # use in specific CLs. Once we have this, we might be able to operate # cleanly on just Change instances instead of just raw commits. # # TODO(dberris): Model the "merge-commit" like nature of auto-roll CLs by # allowing the preparation action to model the non-linearity of the # history. This means we'll need a concept of levels, where changes in a # single repository history (the main one) operates at a higher level # linearly, and if we're descending into rolls that we're exploring a # lower level in the linear history. This is similar to the following # diagram: # # main -> m0 -> m1 -> m2 -> roll0 -> m3 -> ... # | # dependency .............. +-> d0 -> d1 # # Ideally we'll already have this expanded before we go ahead and perform # a bisection, to amortise the cost of making requests to back-end # services for this kind of information in tight loops. commits = change_module.Commit.CommitRange(start_change.base_commit, end_change.base_commit) self.task.payload.update({ 'commits': [ collections.OrderedDict( [('repository', start_change.base_commit.repository), ('git_hash', start_change.base_commit.git_hash)]) ] + [ collections.OrderedDict( [('repository', start_change.base_commit.repository), ('git_hash', commit['commit'])]) for commit in reversed(commits) ] }) task_module.UpdateTask( self.job, self.task.id, new_state='ongoing', payload=self.task.payload) except gitiles_service.NotFoundError as e: # TODO(dberris): We need to be more resilient to intermittent failures # from the Gitiles service here. self.task.payload.update({ 'errors': self.task.payload.get('errors', []) + [{ 'reason': 'GitilesFetchError', 'message': e.message }] }) task_module.UpdateTask( self.job, self.task.id, new_state='failed', payload=self.task.payload)
def __call__(self, _): logging.debug('Scheduling a Swarming task to run a test.') body = { 'name': 'Pinpoint job', 'user': '******', # TODO(dberris): Make these constants configurable? 'priority': '100', 'task_slices': [{ 'properties': self.properties, 'expiration_secs': '86400', # 1 day. }], # Since we're always going to be using the PubSub handling, we add the # tags unconditionally. 'tags': [ '%s:%s' % (k, v) for k, v in run_test_quest.SwarmingTagsFromJob(self.job).items() ], # Use an explicit service account. 'service_account': run_test_quest._TESTER_SERVICE_ACCOUNT, # TODO(dberris): Consolidate constants in environment vars? 'pubsub_topic': 'projects/chromeperf/topics/pinpoint-swarming-updates', 'pubsub_auth_token': 'UNUSED', 'pubsub_userdata': json.dumps({ 'job_id': self.job.job_id, 'task': { 'type': 'run_test', 'id': self.task.id, }, }), } self.task.payload.update({ 'swarming_request_body': body, }) # Ensure that this thread/process/handler is the first to mark this task # 'ongoing'. Only proceed in scheduling a Swarming request if we're the # first one to do so. task_module.UpdateTask(self.job, self.task.id, new_state='ongoing', payload=self.task.payload) # At this point we know we were successful in transitioning to 'ongoing'. try: response = swarming.Swarming( self.task.payload.get('swarming_server')).Tasks().New(body) self.task.payload.update({ 'swarming_task_id': response.get('task_id'), 'tries': self.task.payload.get('tries', 0) + 1 }) except request.RequestError as e: self.task.payload.update({ 'errors': self.task.payload.get('errors', []) + [{ 'reason': type(e).__name__, 'message': 'Encountered failure in swarming request: %s' % (e, ), }] }) # Update the payload with the task id from the Swarming request. Note that # this could also fail to commit. task_module.UpdateTask(self.job, self.task.id, payload=self.task.payload)
def __call__(self, accumulator): # The task contains the buildbucket_result which we need to update by # polling the status of the id. build_details = self.task.payload.get('buildbucket_result') if not build_details: logging.error( 'No build details in attempt to update build status; task = %s', self.task) task_module.UpdateTask(self.job, self.task.id, new_state='failed') return None # Attempt to use the payload in a buildbucket pub/sub update to handle the # update without polling. Only poll as a last resort. build = self.event.payload if build is None or 'id' not in build: try: build_id = build_details.get('build', {}).get('id') if build_id is None: logging.error( 'No build details stored in task payload; task = %s', self.task) self.task.payload.update({ 'errors': self.task.payload.get('errors', []) + [{ 'reason': 'MissingBuildDetails', 'message': 'Cannot find build details in task.', }] }) task_module.UpdateTask(self.job, self.task.id, new_state='failed', payload=self.task.payload) return None build = buildbucket_service.GetJobStatus(build_id).get( 'build', {}) except request.RequestError as e: logging.error('Failed getting Buildbucket Job status: %s', e) self.task.payload.update({ 'errors': self.task.payload.get('errors', []) + [{ 'reason': type(e).__name__, 'message': 'Service request error response: %s' % (e, ), }] }) task_module.UpdateTask(self.job, self.task.id, new_state='failed', payload=self.task.payload) return None logging.debug('buildbucket response: %s', build) # Update the buildbucket result. self.task.payload.update({ 'buildbucket_job_status': build, }) # Decide whether the build was successful or not. if build.get('status') != 'COMPLETED': # Skip this update. return None result = build.get('result') if not result: logging.debug('Missing result field in response, bailing.') self.task.payload.update({ 'errors': self.task.payload.get('errors', []) + [{ 'reason': 'InvalidResponse', 'message': 'Response is missing the "result" field.' }] }) task_module.UpdateTask(self.job, self.task.id, new_state='failed', payload=self.task.payload) return None self.task.payload.update({'build_url': build.get('url')}) if result in FAILURE_MAPPING: self.task.payload.update({ 'errors': self.task.payload.get('errors', []) + [{ 'reason': 'BuildFailed', 'message': 'Swarming task %s failed with status "%s"' % (build.get('id'), result) }] }) task_module.UpdateTask(self.job, self.task.id, new_state=FAILURE_MAPPING[result], payload=self.task.payload) return None # Parse the result and mark this task completed. if 'result_details_json' not in build: self.task.payload.update({ 'errors': [{ 'reason': 'BuildIsolateNotFound', 'message': 'Could not find isolate for build at %s' % (self.change, ) }] }) task_module.UpdateTask(self.job, self.task.id, new_state='failed', payload=self.task.payload) return None try: result_details = json.loads(build['result_details_json']) except ValueError as e: self.task.payload.update({ 'errors': [{ 'reason': 'BuildIsolateNotFound', 'message': 'Invalid JSON response: %s' % (e, ) }] }) task_module.UpdateTask(self.job, self.task.id, new_state='failed', payload=self.task.payload) return None if 'properties' not in result_details: self.task.payload.update({ 'errors': [{ 'reason': 'BuildIsolateNotFound', 'message': 'Could not find result details for build at %s' % (self.change, ) }] }) task_module.UpdateTask(self.job, self.task.id, new_state='failed', payload=self.task.payload) return None properties = result_details['properties'] # Validate whether the properties in the result include required data. required_keys = set(['isolate_server', 'got_revision_cp']) missing_keys = required_keys - set(properties) if missing_keys: self.task.payload.update({ 'errors': [{ 'reason': 'BuildIsolateNotFound', 'message': 'Properties in result missing required data: %s' % (missing_keys, ) }] }) task_module.UpdateTask(self.job, self.task.id, new_state='failed', payload=self.task.payload) return None commit_position = properties['got_revision_cp'].replace('@', '(at)') suffix = ('without_patch' if 'patch_storage' not in properties else 'with_patch') key = '_'.join(('swarm_hashes', commit_position, suffix)) if self.task.payload.get('target') not in properties.get(key, {}): # TODO(dberris): Update the job state with an exception, or set of # failures. self.task.payload.update({ 'errors': [{ 'reason': 'BuildIsolateNotFound', 'message': 'Could not find isolate for build at %s' % (self.change, ) }] }) task_module.UpdateTask(self.job, self.task.id, new_state='failed', payload=self.task.payload) return None self.task.payload.update({ 'isolate_server': properties['isolate_server'], 'isolate_hash': properties[key][self.task.payload.get('target')] }) task_module.UpdateTask(self.job, self.task.id, new_state='completed', payload=self.task.payload)
def CompleteWithCachedIsolate(_): task_module.UpdateTask( self.job, task.id, new_state='completed', payload=task.payload)
def UpdateTask(job, task_id, new_state, _): logging.debug('Updating task "%s" to "%s"', task_id, new_state) task_module.UpdateTask(job, task_id, new_state=new_state)
def __call__(self, _): swarming_server = self.task.payload.get('swarming_server') task_id = self.task.payload.get('swarming_task_id') swarming_task = swarming.Swarming(swarming_server).Task(task_id) result = swarming_task.Result() self.task.payload.update({ 'swarming_task_result': { k: v for k, v in result.items() if k in {'bot_id', 'state', 'failure'} } }) task_state = result.get('state') if task_state in {'PENDING', 'RUNNING'}: # Commit the task payload still. task_module.UpdateTask(self.job, self.task.id, payload=self.task.payload) return if task_state == 'EXPIRED': # TODO(dberris): Do a retry, reset the payload and run an "initiate"? self.task.payload.update({ 'errors': [{ 'reason': 'SwarmingExpired', 'message': 'Request to the Swarming service expired.', }] }) task_module.UpdateTask(self.job, self.task.id, new_state='failed', payload=self.task.payload) return if task_state != 'COMPLETED': task_module.UpdateTask(self.job, self.task.id, new_state='failed', payload=self.task.payload) return self.task.payload.update({ 'isolate_server': result.get('outputs_ref', {}).get('isolatedserver'), 'isolate_hash': result.get('outputs_ref', {}).get('isolated'), }) new_state = 'completed' if result.get('failure', False): new_state = 'failed' exception_string = run_test_quest.ParseException( swarming_task.Stdout()['output']) if not exception_string: exception_string = 'No exception found in Swarming task output.' self.task.payload.update({ 'errors': self.task.payload.get('errors', []) + [{ 'reason': 'RunTestFailed', 'message': 'Running the test failed: %s' % (exception_string, ) }] }) task_module.UpdateTask(self.job, self.task.id, new_state=new_state, payload=self.task.payload)
def __call__(self, task, *_): task.payload.update({'result_values': [self.result]}) return [ lambda _: task_module.UpdateTask( self.job, task.id, new_state='completed', payload=task.payload) ]
def __call__(self, task, event, accumulator): # Outline: # - Check dependencies to see if they're 'completed', looking for: # - Isolate server # - Isolate hash dep_map = { dep: { 'isolate_server': accumulator.get(dep, {}).get('isolate_server'), 'isolate_hash': accumulator.get(dep, {}).get('isolate_hash'), 'status': accumulator.get(dep, {}).get('status'), } for dep in task.dependencies } if not dep_map: logging.error( 'No dependencies for "run_test" task, unlikely to proceed; task = %s', task) return None dep_value = {} if len(dep_map) > 1: # TODO(dberris): Figure out whether it's a valid use-case to have multiple # isolate inputs to Swarming. logging.error(('Found multiple dependencies for run_test; ' 'picking a random input; task = %s'), task) dep_value.update(dep_map.values()[0]) if dep_value.get('status') == 'failed': task.payload.update({ 'errors': [{ 'reason': 'BuildIsolateNotFound', 'message': ('The build task this depends on failed, ' 'so we cannot proceed to running the tests.') }] }) return [ lambda _: task_module.UpdateTask(self.job, task.id, new_state='failed', payload=task.payload) ] if dep_value.get('status') == 'completed': properties = { 'input_ref': { 'isolatedserver': dep_value.get('isolate_server'), 'isolated': dep_value.get('isolate_hash'), }, 'extra_args': task.payload.get('extra_args'), 'dimensions': task.payload.get('dimensions'), # TODO(dberris): Make these hard-coded-values configurable? 'execution_timeout_secs': '21600', # 6 hours, for rendering.mobile. 'io_timeout_secs': '14400', # 4 hours, to match the perf bots. } return [ ScheduleTestAction(job=self.job, task=task, properties=properties) ]
def __call__(self, _): return task_module.UpdateTask( self.job, self.task.id, new_state=self.new_state, payload=self.payload)
def __call__(self, _): task_module.UpdateTask(self.job, self.task.id, payload=self.task.payload)
def __call__(self, accumulator): # TODO(dberris): Maybe consider cancelling outstanding actions? Here we'll # need a way of synthesising actions if we want to force the continuation of # a task graph's evaluation. task_module.UpdateTask( self.job, self.task.id, new_state=self.state, payload=self.task.payload)
def __call__(self, _): swarming_server = self.task.payload.get('swarming_server') task_id = self.task.payload.get('swarming_task_id') swarming_task = swarming.Swarming(swarming_server).Task(task_id) result = swarming_task.Result() self.task.payload.update({ 'swarming_task_result': { k: v for k, v in result.items() if k in {'bot_id', 'state', 'failure'} } }) task_state = result.get('state') if task_state in {'PENDING', 'RUNNING'}: # Commit the task payload still. task_module.UpdateTask(self.job, self.task.id, payload=self.task.payload) return if task_state == 'EXPIRED': # TODO(dberris): Do a retry, reset the payload and run an "initiate"? self.task.payload.update({ 'errors': [{ 'reason': 'SwarmingExpired', 'message': 'Request to the Swarming service expired.', }] }) task_module.UpdateTask(self.job, self.task.id, new_state='failed', payload=self.task.payload) return if task_state != 'COMPLETED': task_module.UpdateTask(self.job, self.task.id, new_state='failed', payload=self.task.payload) return if 'outputs_ref' in result: self.task.payload.update({ 'isolate_server': result.get('outputs_ref').get('isolatedserver'), 'isolate_hash': result.get('outputs_ref').get('isolated'), }) elif 'cas_output_root' in result: self.task.payload.update({ 'cas_root_ref': result.get('cas_output_root'), }) new_state = 'completed' if result.get('failure', False): new_state = 'failed' self.task.payload.update({ 'errors': self.task.payload.get('errors', []) + [{ 'reason': 'RunTestFailed', 'message': ('Running the test failed, see isolate output: ' 'https://%s/browse?digest=%s' % ( self.task.payload.get('isolate_server'), self.task.payload.get('isolate_hash'), )) }] }) task_module.UpdateTask(self.job, self.task.id, new_state=new_state, payload=self.task.payload)
def __call__(self, accumulator): # The task contains the buildbucket_result which we need to update by # polling the status of the id. build_details = self.task.payload.get('buildbucket_result') if not build_details: logging.error( 'No build details in attempt to update build status; task = %s', self.task) task_module.UpdateTask(self.job, self.task.id, new_state='failed') return None # Use the build ID and poll. # TODO(dberris): Handle errors when getting job status? build = buildbucket_service.GetJobStatus(build_details).get('build', {}) logging.debug('buildbucket response: %s', build) # Update the buildbucket result. self.task.payload.update({ 'buildbucket_job_status': build, }) # Decide whether the build was successful or not. if build.get('status') != 'COMPLETED': logging.error('Unexpected status: %s', build.get('status')) task_module.UpdateTask( self.job, self.task.id, new_state='failed', payload=self.task.payload) return None result = build.get('result') if not result: logging.debug('Missing result field in response, bailing.') task_module.UpdateTask( self.job, self.task.id, new_state='failed', payload=self.task.payload) return None if result in FAILURE_MAPPING: task_module.UpdateTask( self.job, self.task.id, new_state=FAILURE_MAPPING[result], payload=self.task.payload) return None # Parse the result and mark this task completed. if 'result_details_json' not in build: self.task.payload.update({ 'errors': [{ 'reason': 'BuildIsolateNotFound', 'message': 'Could not find isolate for build at %s' % (self.change,) }] }) task_module.UpdateTask( self.job, self.task.id, new_state='failed', payload=self.task.payload) return None try: result_details = json.loads(build['result_details_json']) except ValueError as e: self.task.payload.update({ 'errors': [{ 'reason': 'BuildIsolateNotFound', 'message': 'Invalid JSON response: %s' % (e,) }] }) task_module.UpdateTask( self.job, self.task.id, new_state='failed', payload=self.task.payload) return None if 'properties' not in result_details: self.task.payload.update({ 'errors': [{ 'reason': 'BuildIsolateNotFound', 'message': 'Could not find result details for build at %s' % (self.change,) }] }) task_module.UpdateTask( self.job, self.task.id, new_state='failed', payload=self.task.payload) return None properties = result_details['properties'] # Validate whether the properties in the result include required data. required_keys = set(['isolate_server', 'got_revision_cp']) missing_keys = required_keys - set(properties) if missing_keys: self.task.payload.update({ 'errors': [{ 'reason': 'BuildIsolateNotFound', 'message': 'Properties in result missing required data: %s' % (missing_keys,) }] }) task_module.UpdateTask( self.job, self.task.id, new_state='failed', payload=self.task.payload) return None commit_position = properties['got_revision_cp'].replace('@', '(at)') suffix = ('without_patch' if 'patch_storage' not in properties else 'with_patch') key = '_'.join(('swarm_hashes', commit_position, suffix)) if self.task.payload.get('target') not in properties.get(key, {}): # TODO(dberris): Update the job state with an exception, or set of # failures. self.task.payload.update({ 'errors': [{ 'reason': 'BuildIsolateNotFound', 'message': 'Could not find isolate for build at %s' % (self.change,) }] }) task_module.UpdateTask( self.job, self.task.id, new_state='failed', payload=self.task.payload) return None self.task.payload.update({ 'isolate_server': properties['isolate_server'], 'isolate_hash': properties[key][self.task.payload.get('target')] }) task_module.UpdateTask( self.job, self.task.id, new_state='completed', payload=self.task.payload)
def __call__(self, _): task_module.UpdateTask(self.job, self.task.id, new_state='failed', payload=self.task.payload)