def post(self, build_id): build = Build.query.options( joinedload('project', innerjoin=True), joinedload('author'), joinedload('source').joinedload('revision'), ).get(build_id) if build is None: return '', 404 if build.status == Status.finished: return '', 204 cancelled = [] # find any active/pending jobs for job in filter(lambda x: x.status != Status.finished, build.jobs): # TODO(dcramer): we make an assumption that there is a single step jobplan, implementation = JobPlan.get_build_step_for_job( job_id=job.id) if not implementation: continue implementation.cancel(job=job) cancelled.append(job) if not cancelled: return '', 204 build.status = Status.finished build.result = Result.aborted db.session.add(build) return self.respond(build)
def sync_artifact(artifact_id=None, **kwargs): """ Downloads an artifact from jenkins. """ artifact = Artifact.query.get(artifact_id) if artifact is None: return step = artifact.step if step.result == Result.aborted: return _, implementation = JobPlan.get_build_step_for_job(job_id=step.job_id) # TODO(dcramer): we eventually want to abstract the entirety of Jenkins # artifact syncing so that we pull files and then process them if artifact.file: try: implementation.get_artifact_manager(step).process(artifact) except UnrecoverableException: current_app.logger.exception( 'Unrecoverable exception processing artifact %s: %s', artifact.step_id, artifact) else: try: implementation.fetch_artifact(artifact=artifact) except UnrecoverableException: current_app.logger.exception( 'Unrecoverable exception fetching artifact %s: %s', artifact.step_id, artifact)
def sync_artifact(artifact_id=None, **kwargs): """ Downloads an artifact from jenkins. """ artifact = Artifact.query.get(artifact_id) if artifact is None: return step = artifact.step if step.result == Result.aborted: return # TODO(dcramer): we eventually want to abstract the entirety of Jenkins # artifact syncing so that we pull files and then process them if artifact.file: try: manager.process(artifact) except Exception: current_app.logger.exception( 'Unrecoverable exception processing artifact %s: %s', artifact.step_id, artifact) else: jobplan, implementation = JobPlan.get_build_step_for_job( job_id=step.job_id) try: implementation.fetch_artifact(artifact=artifact, **kwargs) except UnrecoverableException: current_app.logger.exception( 'Unrecoverable exception fetching artifact %s: %s', artifact.step_id, artifact)
def _sync_artifacts_for_jobstep(step): # only generate the sync_artifact tasks for this step once if Task.query.filter( Task.parent_id == step.id, Task.task_name == 'sync_artifact', ).first(): return artifacts = Artifact.query.filter(Artifact.step_id == step.id).all() _, buildstep = JobPlan.get_build_step_for_job(job_id=step.job_id) prefer_artifactstore = buildstep.prefer_artifactstore() artifact_manager = buildstep.get_artifact_manager(step) to_sync = _get_artifacts_to_sync(artifacts, artifact_manager, prefer_artifactstore) # buildstep may want to check for e.g. required artifacts buildstep.verify_final_artifacts(step, to_sync) for artifact in to_sync: sync_artifact.delay_if_needed( artifact_id=artifact.id.hex, task_id=artifact.id.hex, parent_task_id=step.id.hex, )
def post(self, build_id): build = Build.query.options( joinedload('project', innerjoin=True), joinedload('author'), joinedload('source').joinedload('revision'), ).get(build_id) if build is None: return '', 404 if build.status == Status.finished: return '', 204 cancelled = [] # find any active/pending jobs for job in filter(lambda x: x.status != Status.finished, build.jobs): # TODO(dcramer): we make an assumption that there is a single step _, implementation = JobPlan.get_build_step_for_job(job_id=job.id) if not implementation: continue implementation.cancel(job=job) cancelled.append(job) if not cancelled: return '', 204 build.status = Status.finished build.result = Result.aborted db.session.add(build) return self.respond(build)
def create_job(job_id): job = Job.query.get(job_id) if not job: return # we might already be marked as finished for various reasons # (such as aborting the task) if job.status == Status.finished: return jobplan, implementation = JobPlan.get_build_step_for_job(job_id=job.id) if implementation is None: # TODO(dcramer): record a FailureReason? job.status = Status.finished job.result = Result.failed current_app.logger.exception('No build plan set %s', job_id) return try: implementation.execute(job=job) except UnrecoverableException: job.status = Status.finished job.result = Result.aborted current_app.logger.exception('Unrecoverable exception creating %s', job_id) return sync_job.delay( job_id=job.id.hex, task_id=job.id.hex, parent_task_id=job.build_id.hex, )
def get(self, step_id): jobstep = JobStep.query.options( joinedload('project', innerjoin=True), ).get(step_id) if jobstep is None: return '', 404 jobplan = JobPlan.query.filter( JobPlan.job_id == jobstep.job_id, ).first() # determine if there's an expected snapshot outcome expected_image = SnapshotImage.query.filter( SnapshotImage.job_id == jobstep.job_id, ).first() current_image = None # we only send a current snapshot if we're not expecting to build # a new image if not expected_image: current_image = None if jobplan: current_image = jobplan.snapshot_image if current_image is None and current_app.config['DEFAULT_SNAPSHOT']: current_image = { 'id': current_app.config['DEFAULT_SNAPSHOT'], } context = self.serialize(jobstep) context['commands'] = self.serialize(list(jobstep.commands)) context['snapshot'] = self.serialize(current_image) context['expectedSnapshot'] = self.serialize(expected_image) context['project'] = self.serialize(jobstep.project) context['job'] = self.serialize(jobstep.job) _, buildstep = JobPlan.get_build_step_for_job(jobstep.job_id) resource_limits = buildstep.get_resource_limits() if buildstep else {} if resource_limits: context['resourceLimits'] = resource_limits lxc_config = buildstep.get_lxc_config(jobstep) if buildstep else None if lxc_config: context["adapter"] = "lxc" lxc_config = { 'preLaunch': lxc_config.prelaunch, 'postLaunch': lxc_config.postlaunch, 's3Bucket': lxc_config.s3_bucket, 'compression': lxc_config.compression, 'release': lxc_config.release, } context['lxcConfig'] = lxc_config debugConfig = buildstep.debug_config if buildstep else {} if 'debugForceInfraFailure' in jobstep.data: debugConfig['forceInfraFailure'] = jobstep.data['debugForceInfraFailure'] if debugConfig: context['debugConfig'] = self.serialize(debugConfig) return self.respond(context, serialize=False)
def sync_job_phases(job, phases=None, implementation=None): if phases is None: phases = JobPhase.query.filter(JobPhase.job_id == job.id) if implementation is None: _, implementation = JobPlan.get_build_step_for_job(job_id=job.id) for phase in phases: sync_phase(phase, implementation)
def create_job_plan(self, job, plan): job_plan = JobPlan( project_id=job.project_id, build_id=job.build_id, plan_id=plan.id, job_id=job.id, ) db.session.add(job_plan) db.session.commit() return job_plan
def execute_build(build, snapshot_id, no_snapshot): if no_snapshot: assert snapshot_id is None, 'Cannot specify snapshot with no_snapshot option' # TODO(dcramer): most of this should be abstracted into sync_build as if it # were a "im on step 0, create step 1" project = build.project # We choose a snapshot before creating jobplans. This is so that different # jobplans won't end up using different snapshots in a build. if snapshot_id is None and not no_snapshot: snapshot = Snapshot.get_current(project.id) if snapshot: snapshot_id = snapshot.id jobs = [] for plan in get_build_plans(project): job = Job( build=build, build_id=build.id, project=project, project_id=project.id, source=build.source, source_id=build.source_id, status=build.status, label=plan.label, ) db.session.add(job) jobplan = JobPlan.build_jobplan(plan, job, snapshot_id=snapshot_id) db.session.add(jobplan) jobs.append(job) db.session.commit() for job in jobs: create_job.delay( job_id=job.id.hex, task_id=job.id.hex, parent_task_id=job.build_id.hex, ) db.session.commit() sync_build.delay( build_id=build.id.hex, task_id=build.id.hex, ) return build
def execute_build(build): # TODO(dcramer): most of this should be abstracted into sync_build as if it # were a "im on step 0, create step 1" project = build.project jobs = [] for plan in project.plans: job = Job( build=build, build_id=build.id, project=project, project_id=project.id, source=build.source, source_id=build.source_id, status=build.status, label=plan.label, ) db.session.add(job) jobplan = JobPlan( project=project, job=job, build=build, plan=plan, ) db.session.add(jobplan) jobs.append(job) db.session.commit() publish_build_update(build) for job in jobs: publish_job_update(job) create_job.delay( job_id=job.id.hex, task_id=job.id.hex, parent_task_id=job.build_id.hex, ) db.session.commit() sync_build.delay( build_id=job.build_id.hex, task_id=job.build_id.hex, ) return build
def process(self, fp): try: phase_config = json.load(fp) except ValueError: uri = build_uri('/find_build/{0}/'.format(self.step.job.build_id.hex)) self.logger.warning('Failed to parse json; (step=%s, build=%s)', self.step.id.hex, uri, exc_info=True) self._add_failure_reason() else: _, implementation = JobPlan.get_build_step_for_job(job_id=self.step.job_id) try: implementation.expand_jobs(self.step, phase_config) except Exception: uri = build_uri('/find_build/{0}/'.format(self.step.job.build_id.hex)) self.logger.warning('expand_jobs failed (step=%s, build=%s)', self.step.id.hex, uri, exc_info=True) self._add_failure_reason()
def create_job(job_id): """ Kicks off a newly created job within a build; enqueued for each job within a new build. """ job = Job.query.get(job_id) if not job: return if job.project.status == ProjectStatus.inactive: current_app.logger.warn('Project is not active: %s', job.project.slug) job.status = Status.finished job.result = Result.aborted db.session.add(job) db.session.flush() return # we might already be marked as finished for various reasons # (such as aborting the task) if job.status == Status.finished: return jobplan, implementation = JobPlan.get_build_step_for_job(job_id=job.id) if implementation is None: # TODO(dcramer): record a FailureReason? job.status = Status.finished job.result = Result.aborted db.session.add(job) db.session.flush() current_app.logger.exception('No build plan set %s', job_id) return try: implementation.execute(job=job) except UnrecoverableException: job.status = Status.finished job.result = Result.infra_failed db.session.add(job) db.session.flush() current_app.logger.exception('Unrecoverable exception creating %s', job_id) return sync_job.delay( job_id=job.id.hex, task_id=job.id.hex, parent_task_id=job.build_id.hex, )
def process(self, fp): try: phase_config = json.load(fp) _, implementation = JobPlan.get_build_step_for_job(job_id=self.step.job_id) implementation.expand_jobs(self.step, phase_config) except Exception: uri = build_uri('/find_build/{0}/'.format(self.step.job.build_id.hex)) self.logger.warning('Failed to parse json; (step=%s, build=%s)', self.step.id.hex, uri, exc_info=True) try_create(FailureReason, { 'step_id': self.step.id, 'job_id': self.step.job_id, 'build_id': self.step.job.build_id, 'project_id': self.step.project_id, 'reason': 'malformed_artifact' }) db.session.commit()
def create_job(job_id): """ Kicks off a newly created job within a build; enqueued for each job within a new build. """ job = Job.query.get(job_id) if not job: return if job.project.status == ProjectStatus.inactive: current_app.logger.warn('Project is not active: %s', job.project.slug) job.status = Status.finished job.result = Result.aborted db.session.add(job) db.session.flush() return # we might already be marked as finished for various reasons # (such as aborting the task) if job.status == Status.finished: return _, implementation = JobPlan.get_build_step_for_job(job_id=job.id) if implementation is None: # TODO(dcramer): record a FailureReason? job.status = Status.finished job.result = Result.aborted db.session.add(job) db.session.flush() current_app.logger.exception('No build plan set %s', job_id) return try: implementation.execute(job=job) except UnrecoverableException: job.status = Status.finished job.result = Result.infra_failed db.session.add(job) db.session.flush() current_app.logger.exception('Unrecoverable exception creating %s', job_id) return sync_job.delay( job_id=job.id.hex, task_id=job.id.hex, parent_task_id=job.build_id.hex, )
def get(self, step_id): jobstep = JobStep.query.options( joinedload('project', innerjoin=True), ).get(step_id) if jobstep is None: return '', 404 jobplan = JobPlan.query.filter( JobPlan.job_id == jobstep.job_id, ).first() # determine if there's an expected snapshot outcome expected_image = SnapshotImage.query.filter( SnapshotImage.job_id == jobstep.job_id, ).first() current_image = None # we only send a current snapshot if we're not expecting to build # a new image if not expected_image: current_image = None if jobplan: current_image = jobplan.snapshot_image if current_image is None and current_app.config['DEFAULT_SNAPSHOT']: current_image = { 'id': current_app.config['DEFAULT_SNAPSHOT'], } context = self.serialize(jobstep) context['commands'] = self.serialize(list(jobstep.commands)) context['snapshot'] = self.serialize(current_image) context['expectedSnapshot'] = self.serialize(expected_image) context['project'] = self.serialize(jobstep.project) context['job'] = self.serialize(jobstep.job) _, buildstep = JobPlan.get_build_step_for_job(jobstep.job_id) debugConfig = buildstep.debug_config if buildstep else {} if 'debugForceInfraFailure' in jobstep.data: debugConfig['forceInfraFailure'] = jobstep.data['debugForceInfraFailure'] if debugConfig: context['debugConfig'] = self.serialize(debugConfig) return self.respond(context, serialize=False)
def expand_command(self, command, expander, data): jobstep = command.jobstep phase_name = data.get('phase') if not phase_name: phase_count = db.session.query( func.count(), ).filter( JobPhase.job_id == jobstep.job_id, ).scalar() phase_name = 'Phase #{}'.format(phase_count) new_jobphase = JobPhase( job_id=jobstep.job_id, project_id=jobstep.project_id, label=phase_name, status=Status.queued, ) db.session.add(new_jobphase) _, buildstep = JobPlan.get_build_step_for_job(jobstep.job_id) results = [] for future_jobstep in expander.expand(max_executors=jobstep.data['max_executors'], test_stats_from=buildstep.get_test_stats_from()): new_jobstep = buildstep.create_expanded_jobstep(jobstep, new_jobphase, future_jobstep) results.append(new_jobstep) # If there are no tests to run, the phase is done. if len(results) == 0: new_jobphase.status = Status.finished new_jobphase.result = Result.passed db.session.add(new_jobphase) db.session.flush() for new_jobstep in results: sync_job_step.delay_if_needed( step_id=new_jobstep.id.hex, task_id=new_jobstep.id.hex, parent_task_id=new_jobphase.job.id.hex, ) return results
def execute_build(build): # TODO(dcramer): most of this should be abstracted into sync_build as if it # were a "im on step 0, create step 1" project = build.project jobs = [] for plan in get_build_plans(project): job = Job( build=build, build_id=build.id, project=project, project_id=project.id, source=build.source, source_id=build.source_id, status=build.status, label=plan.label, ) db.session.add(job) jobplan = JobPlan.build_jobplan(plan, job) db.session.add(jobplan) jobs.append(job) db.session.commit() for job in jobs: create_job.delay( job_id=job.id.hex, task_id=job.id.hex, parent_task_id=job.build_id.hex, ) db.session.commit() sync_build.delay( build_id=build.id.hex, task_id=build.id.hex, ) return build
def expand_command(self, command, expander, data): jobstep = command.jobstep phase_name = data.get('phase') if not phase_name: phase_count = db.session.query(func.count(), ).filter( JobPhase.job_id == jobstep.job_id, ).scalar() phase_name = 'Phase #{}'.format(phase_count) new_jobphase = JobPhase( job_id=jobstep.job_id, project_id=jobstep.project_id, label=phase_name, status=Status.queued, ) db.session.add(new_jobphase) _, buildstep = JobPlan.get_build_step_for_job(jobstep.job_id) results = [] for future_jobstep in expander.expand( max_executors=jobstep.data['max_executors'], test_stats_from=buildstep.get_test_stats_from()): new_jobstep = buildstep.create_expanded_jobstep( jobstep, new_jobphase, future_jobstep) results.append(new_jobstep) # If there are no tests to run, the phase is done. if len(results) == 0: new_jobphase.status = Status.finished new_jobphase.result = Result.passed db.session.add(new_jobphase) db.session.flush() for new_jobstep in results: sync_job_step.delay_if_needed( step_id=new_jobstep.id.hex, task_id=new_jobstep.id.hex, parent_task_id=new_jobphase.job.id.hex, ) return results
def post(self): try: with redis.lock('jobstep:allocate', nowait=True): to_allocate = self.find_next_jobstep() # Should 204, but flask/werkzeug throws StopIteration (bug!) for tests if to_allocate is None: return self.respond([]) to_allocate.status = Status.allocated db.session.add(to_allocate) db.session.flush() except redis.UnableToGetLock: return error('Another allocation is in progress', http_code=503) try: jobplan, buildstep = JobPlan.get_build_step_for_job(to_allocate.job_id) assert jobplan and buildstep context = self.serialize(to_allocate) context['project'] = self.serialize(to_allocate.project) context['resources'] = { 'cpus': to_allocate.data.get('cpus', 4), 'mem': to_allocate.data.get('mem', 8 * 1024), } context['cmd'] = buildstep.get_allocation_command(to_allocate) return self.respond([context]) except Exception: to_allocate.status = Status.finished to_allocate.result = Result.aborted db.session.add(to_allocate) db.session.flush() logging.exception( 'Exception occurred while allocating job step for project %s', to_allocate.project.slug) return error('Internal error while attempting allocation', http_code=503)
def process(self, fp): try: phase_config = json.load(fp) except ValueError: uri = build_uri('/find_build/{0}/'.format(self.step.job.build_id.hex)) self.logger.warning('Failed to parse json; (step=%s, build=%s)', self.step.id.hex, uri, exc_info=True) self._add_failure_reason() else: _, implementation = JobPlan.get_build_step_for_job(job_id=self.step.job_id) try: implementation.expand_jobs(self.step, phase_config) except ArtifactParseError: uri = build_uri('/find_build/{0}/'.format(self.step.job.build_id.hex)) self.logger.warning('malformed %s artifact (step=%s, build=%s)', self.FILENAMES[0], self.step.id.hex, uri, exc_info=True) self._add_failure_reason() except Exception: uri = build_uri('/find_build/{0}/'.format(self.step.job.build_id.hex)) self.logger.warning('expand_jobs failed (step=%s, build=%s)', self.step.id.hex, uri, exc_info=True) self.step.result = Result.infra_failed db.session.add(self.step) db.session.commit()
def expand_command(self, command, expander, data): jobstep = command.jobstep phase_name = data.get('phase') if not phase_name: phase_count = db.session.query( func.count(), ).filter( JobPhase.job_id == jobstep.job_id, ).scalar() phase_name = 'Phase #{}'.format(phase_count) jobstep.data['expanded'] = True db.session.add(jobstep) new_jobphase = JobPhase( job_id=jobstep.job_id, project_id=jobstep.project_id, label=phase_name, status=Status.queued, ) db.session.add(new_jobphase) _, buildstep = JobPlan.get_build_step_for_job(jobstep.job_id) results = [] for future_jobstep in expander.expand(max_executors=jobstep.data['max_executors']): new_jobstep = buildstep.expand_jobstep(jobstep, new_jobphase, future_jobstep) results.append(new_jobstep) db.session.flush() for new_jobstep in results: sync_job_step.delay_if_needed( step_id=new_jobstep.id.hex, task_id=new_jobstep.id.hex, parent_task_id=new_jobphase.job.id.hex, ) return results
def expand_command(self, command, expander, data): jobstep = command.jobstep phase_name = data.get('phase') if not phase_name: phase_count = db.session.query(func.count(), ).filter( JobPhase.job_id == jobstep.job_id, ).scalar() phase_name = 'Phase #{}'.format(phase_count) jobstep.data['expanded'] = True db.session.add(jobstep) new_jobphase = JobPhase( job_id=jobstep.job_id, project_id=jobstep.project_id, label=phase_name, status=Status.queued, ) db.session.add(new_jobphase) _, buildstep = JobPlan.get_build_step_for_job(jobstep.job_id) results = [] for future_jobstep in expander.expand( max_executors=jobstep.data['max_executors']): new_jobstep = buildstep.expand_jobstep(jobstep, new_jobphase, future_jobstep) results.append(new_jobstep) db.session.flush() for new_jobstep in results: sync_job_step.delay_if_needed( step_id=new_jobstep.id.hex, task_id=new_jobstep.id.hex, parent_task_id=new_jobphase.job.id.hex, ) return results
def _sync_artifacts_for_jobstep(step): # only generate the sync_artifact tasks for this step once if Task.query.filter( Task.parent_id == step.id, Task.task_name == 'sync_artifact', ).first(): return _, buildstep = JobPlan.get_build_step_for_job(job_id=step.job_id) prefer_artifactstore = buildstep.prefer_artifactstore() artifacts = Artifact.query.filter(Artifact.step_id == step.id).all() to_sync = _get_artifacts_to_sync(artifacts, prefer_artifactstore) # buildstep may want to check for e.g. required artifacts buildstep.verify_final_artifacts(step, to_sync) for artifact in to_sync: sync_artifact.delay_if_needed( artifact_id=artifact.id.hex, task_id=artifact.id.hex, parent_task_id=step.id.hex, )
def process(self, fp): try: phase_config = json.load(fp) except ValueError: uri = build_uri('/find_build/{0}/'.format( self.step.job.build_id.hex)) self.logger.warning('Failed to parse json; (step=%s, build=%s)', self.step.id.hex, uri, exc_info=True) self._add_failure_reason() else: _, implementation = JobPlan.get_build_step_for_job( job_id=self.step.job_id) try: implementation.expand_jobs(self.step, phase_config) except Exception: uri = build_uri('/find_build/{0}/'.format( self.step.job.build_id.hex)) self.logger.warning('expand_jobs failed (step=%s, build=%s)', self.step.id.hex, uri, exc_info=True) self._add_failure_reason()
def process(self, fp): try: phase_config = json.load(fp) except ValueError: uri = build_uri('/find_build/{0}/'.format( self.step.job.build_id.hex)) self.logger.warning('Failed to parse json; (step=%s, build=%s)', self.step.id.hex, uri, exc_info=True) self._add_failure_reason() else: _, implementation = JobPlan.get_build_step_for_job( job_id=self.step.job_id) try: implementation.expand_jobs(self.step, phase_config) except ArtifactParseError: uri = build_uri('/find_build/{0}/'.format( self.step.job.build_id.hex)) self.logger.warning( 'malformed %s artifact (step=%s, build=%s)', self.FILENAMES[0], self.step.id.hex, uri, exc_info=True) self._add_failure_reason() except Exception: uri = build_uri('/find_build/{0}/'.format( self.step.job.build_id.hex)) self.logger.warning('expand_jobs failed (step=%s, build=%s)', self.step.id.hex, uri, exc_info=True) self.step.result = Result.infra_failed db.session.add(self.step) db.session.commit()
def sync_job_step(step_id): """ Polls a jenkins build for updates. May have sync_artifact children. """ step = JobStep.query.get(step_id) if not step: return jobplan, implementation = JobPlan.get_build_step_for_job( job_id=step.job_id) # only synchronize if upstream hasn't suggested we're finished if step.status != Status.finished: implementation.update_step(step=step) db.session.flush() _sync_from_artifact_store(step) if step.status == Status.finished: _sync_artifacts_for_jobstep(step) is_finished = ( step.status == Status.finished and # make sure all child tasks (like sync_artifact) have also finished sync_job_step.verify_all_children() == Status.finished) if not is_finished: default_timeout = current_app.config['DEFAULT_JOB_TIMEOUT_MIN'] if has_timed_out(step, jobplan, default_timeout=default_timeout): old_status = step.status step.data['timed_out'] = True implementation.cancel_step(step=step) # Not all implementations can actually cancel, but it's dead to us as of now # so we mark it as finished. step.status = Status.finished step.date_finished = datetime.utcnow() # Implementations default to marking canceled steps as aborted, # but we're not canceling on good terms (it should be done by now) # so we consider it a failure here. # # We check whether the step was marked as in_progress to make a best # guess as to whether this is an infrastructure failure, or the # repository under test is just taking too long. This won't be 100% # reliable, but is probably good enough. if old_status == Status.in_progress: step.result = Result.failed else: step.result = Result.infra_failed db.session.add(step) job = step.job try_create( FailureReason, { 'step_id': step.id, 'job_id': job.id, 'build_id': job.build_id, 'project_id': job.project_id, 'reason': 'timeout' }) db.session.flush() statsreporter.stats().incr('job_step_timed_out') # If we timeout something that isn't in progress, that's our fault, and we should know. if old_status != Status.in_progress: current_app.logger.warning( "Timed out jobstep that wasn't in progress: %s (was %s)", step.id, old_status) raise sync_job_step.NotFinished # Ignore any 'failures' if the build did not finish properly. # NOTE(josiah): we might want to include "unknown" and "skipped" here as # well, or have some named condition like "not meaningful_result(step.result)". if step.result in (Result.aborted, Result.infra_failed): _report_jobstep_result(step) return # Check for FailureReason objects generated by child jobs failure_result = _result_from_failure_reasons(step) if failure_result and failure_result != step.result: step.result = failure_result db.session.add(step) db.session.commit() if failure_result == Result.infra_failed: _report_jobstep_result(step) return try: record_coverage_stats(step) except Exception: current_app.logger.exception( 'Failing recording coverage stats for step %s', step.id) # We need the start time of this step's phase to determine if we're part of # the last phase. So, if date_started is empty, wait for sync_phase to catch # up and try again. if _expects_tests(jobplan) and not step.phase.date_started: current_app.logger.warning( "Phase[%s].date_started is missing. Retrying Step", step.phase.id) # Reset result to unknown to reduce window where test might be incorrectly green. # Set status to in_progress so that the next sync_job_step will fetch status from Jenkins again. step.result = Result.unknown step.status = Status.in_progress raise sync_job_step.NotFinished missing_tests = is_missing_tests(step, jobplan) try_create(ItemStat, where={ 'item_id': step.id, 'name': 'tests_missing', 'value': int(missing_tests), }) if missing_tests: if step.result != Result.failed: step.result = Result.failed db.session.add(step) try_create( FailureReason, { 'step_id': step.id, 'job_id': step.job_id, 'build_id': step.job.build_id, 'project_id': step.project_id, 'reason': 'missing_tests' }) db.session.commit() db.session.flush() if has_test_failures(step): if step.result != Result.failed: step.result = Result.failed db.session.add(step) try_create( FailureReason, { 'step_id': step.id, 'job_id': step.job_id, 'build_id': step.job.build_id, 'project_id': step.project_id, 'reason': 'test_failures' }) db.session.commit() _report_jobstep_result(step)
def post(self, project_id): """Initiates a new snapshot for this project.""" project = Project.get(project_id) if not project: return '', 404 args = self.post_parser.parse_args() repository = project.repository try: revision = identify_revision(repository, args.sha) except MissingRevision: # if the default fails, we absolutely can't continue and the # client should send a valid revision return '{"error": "Unable to find a matching revision."}', 400 if revision: sha = revision.sha else: sha = args.sha plan_list = get_snapshottable_plans(project) if not plan_list: return '{"error": "No snapshottable plans associated with project."}', 400 source, _ = get_or_create(Source, where={ 'repository': repository, 'revision_sha': sha, }) build = Build( source_id=source.id, source=source, project_id=project.id, project=project, label='Create Snapshot', status=Status.queued, cause=Cause.snapshot, target=sha[:12], ) db.session.add(build) # TODO(dcramer): this needs to update with the build result snapshot = Snapshot( project_id=project.id, source_id=source.id, build_id=build.id, status=SnapshotStatus.pending, ) db.session.add(snapshot) jobs = [] for plan in plan_list: job = Job( build=build, build_id=build.id, project=project, project_id=project.id, source=build.source, source_id=build.source_id, status=build.status, label='Create Snapshot: %s' % (plan.label,), ) db.session.add(job) jobplan = JobPlan.build_jobplan(plan, job) db.session.add(jobplan) image = SnapshotImage( job=job, snapshot=snapshot, plan=plan, ) db.session.add(image) jobs.append(job) db.session.commit() for job in jobs: create_job.delay( job_id=job.id.hex, task_id=job.id.hex, parent_task_id=job.build_id.hex, ) db.session.commit() sync_build.delay( build_id=build.id.hex, task_id=build.id.hex, ) return self.respond(snapshot)
def create_job_plan(self, job, plan, snapshot_id=None): jobplan = JobPlan.build_jobplan(plan, job, snapshot_id=snapshot_id) db.session.add(jobplan) db.session.commit() return jobplan
def sync_job(job_id): """ Updates jobphase and job statuses based on the status of the constituent jobsteps. """ job = Job.query.get(job_id) if not job: return if job.status == Status.finished: return jobplan, implementation = JobPlan.get_build_step_for_job(job_id=job.id) try: implementation.update(job=job) except UnrecoverableException: job.status = Status.finished job.result = Result.infra_failed current_app.logger.exception('Unrecoverable exception syncing %s', job.id) all_phases = list(job.phases) # propagate changes to any phases as they live outside of the # normalize synchronization routines sync_job_phases(job, all_phases, implementation) is_finished = sync_job.verify_all_children() == Status.finished if any(p.status != Status.finished for p in all_phases): is_finished = False job.date_started = safe_agg( min, (j.date_started for j in all_phases if j.date_started)) if is_finished: job.date_finished = safe_agg( max, (j.date_finished for j in all_phases if j.date_finished)) else: job.date_finished = None if job.date_started and job.date_finished: job.duration = int((job.date_finished - job.date_started).total_seconds() * 1000) else: job.duration = None # if any phases are marked as failing, fail the build if any(j.result is Result.failed for j in all_phases): job.result = Result.failed # If any test cases were marked as failing, fail the build. # The exception is if the only failing test case occurred in a JobStep that # had an infra failure. In this case we can't trust the test case result as # being meaningful and so we ignore these. elif TestCase.query.join(JobStep, JobStep.id == TestCase.step_id).filter( TestCase.result == Result.failed, TestCase.job_id == job.id, JobStep.result != Result.infra_failed ).first(): job.result = Result.failed # if we've finished all phases, use the best result available elif is_finished: # Sets the final job result. implementation.validate(job=job) else: job.result = Result.unknown if is_finished: job.status = Status.finished else: # ensure we dont set the status to finished unless it actually is new_status = aggregate_status((j.status for j in all_phases)) if new_status != Status.finished: job.status = new_status elif job.status == Status.finished: job.status = Status.in_progress current_app.logger.exception('Job incorrectly marked as finished: %s', job.id) if db.session.is_modified(job): job.date_modified = datetime.utcnow() db.session.add(job) db.session.commit() if not is_finished: raise sync_job.NotFinished try: aggregate_job_stat(job, 'test_count') aggregate_job_stat(job, 'test_duration') aggregate_job_stat(job, 'test_failures') aggregate_job_stat(job, 'test_rerun_count') aggregate_job_stat(job, 'tests_missing') aggregate_job_stat(job, 'lines_covered') aggregate_job_stat(job, 'lines_uncovered') aggregate_job_stat(job, 'diff_lines_covered') aggregate_job_stat(job, 'diff_lines_uncovered') except Exception: current_app.logger.exception('Failing recording aggregate stats for job %s', job.id) fire_signal.delay( signal='job.finished', kwargs={'job_id': job.id.hex}, ) if jobplan: queue.delay('update_project_plan_stats', kwargs={ 'project_id': job.project_id.hex, 'plan_id': jobplan.plan_id.hex, }, countdown=1)
def job(build, change=None, **kwargs): kwargs.setdefault('project', build.project) kwargs.setdefault('label', get_sentences(1)[0][:128]) kwargs.setdefault('status', Status.finished) kwargs.setdefault('result', Result.passed) kwargs.setdefault('duration', random.randint(10000, 100000)) if 'source' not in kwargs: kwargs['source'] = source(build.repository) kwargs['source_id'] = kwargs['source'].id kwargs['project_id'] = kwargs['project'].id kwargs['build_id'] = build.id if change: kwargs['change_id'] = change.id job = Job(build=build, change=change, **kwargs) db.session.add(job) node, _ = get_or_create(Node, where={ 'label': get_sentences(1)[0][:32], }) jobplan = JobPlan( plan=plan(), build=build, project=job.project, job=job, ) db.session.add(jobplan) phase1_setup = JobPhase( project=job.project, job=job, status=Status.finished, result=Result.passed, label='Setup', ) db.session.add(phase1_setup) phase1_compile = JobPhase( project=job.project, job=job, status=Status.finished, result=Result.passed, label='Compile', ) db.session.add(phase1_compile) phase1_test = JobPhase( project=job.project, job=job, status=kwargs['status'], result=kwargs['result'], label='Test', ) db.session.add(phase1_test) step = JobStep( project=job.project, job=job, phase=phase1_setup, status=phase1_setup.status, result=phase1_setup.result, label='Setup', node=node, ) db.session.add(step) step = JobStep( project=job.project, job=job, phase=phase1_compile, status=phase1_compile.status, result=phase1_compile.result, label='Compile', node=node, ) db.session.add(step) step = JobStep( project=job.project, job=job, phase=phase1_test, status=phase1_test.status, result=phase1_test.result, label=TEST_STEP_LABELS.next(), node=node, ) db.session.add(step) step = JobStep( project=job.project, job=job, phase=phase1_test, status=phase1_test.status, result=phase1_test.result, label=TEST_STEP_LABELS.next(), node=node, ) db.session.add(step) return job
def job(build, change=None, **kwargs): kwargs.setdefault('project', build.project) kwargs.setdefault('label', get_sentences(1)[0][:128]) kwargs.setdefault('status', Status.finished) kwargs.setdefault('result', Result.passed) kwargs.setdefault('duration', random.randint(10000, 100000)) kwargs['source'] = build.source kwargs['source_id'] = kwargs['source'].id kwargs['project_id'] = kwargs['project'].id kwargs['build_id'] = build.id if change: kwargs['change_id'] = change.id job = Job( build=build, change=change, **kwargs ) db.session.add(job) node, created = get_or_create(Node, where={ 'label': get_sentences(1)[0][:32], }) if created: cluster, _ = get_or_create(Cluster, where={ 'label': get_sentences(1)[0][:32], }) clusternode = ClusterNode(cluster=cluster, node=node) db.session.add(clusternode) jobplan = JobPlan.build_jobplan(plan(build.project), job) db.session.add(jobplan) phase1_setup = JobPhase( project=job.project, job=job, date_started=job.date_started, date_finished=job.date_finished, status=Status.finished, result=Result.passed, label='Setup', ) db.session.add(phase1_setup) phase1_compile = JobPhase( project=job.project, job=job, date_started=job.date_started, date_finished=job.date_finished, status=Status.finished, result=Result.passed, label='Compile', ) db.session.add(phase1_compile) phase1_test = JobPhase( project=job.project, job=job, date_started=job.date_started, date_finished=job.date_finished, status=kwargs['status'], result=kwargs['result'], label='Test', ) db.session.add(phase1_test) step = JobStep( project=job.project, job=job, phase=phase1_setup, status=phase1_setup.status, result=phase1_setup.result, label='Setup', node=node, ) db.session.add(step) command = Command( jobstep=step, script="echo 1", label="echo 1", ) db.session.add(command) step = JobStep( project=job.project, job=job, phase=phase1_compile, status=phase1_compile.status, result=phase1_compile.result, label='Compile', node=node, ) db.session.add(step) command = Command( jobstep=step, script="echo 2", label="echo 2", ) db.session.add(command) step = JobStep( project=job.project, job=job, phase=phase1_test, status=phase1_test.status, result=phase1_test.result, label=TEST_STEP_LABELS.next(), node=node, ) db.session.add(step) command = Command( jobstep=step, script="echo 3", label="echo 3", ) db.session.add(command) step = JobStep( project=job.project, job=job, phase=phase1_test, status=phase1_test.status, result=phase1_test.result, label=TEST_STEP_LABELS.next(), node=node, ) db.session.add(step) command = Command( jobstep=step, script="echo 4", label="echo 4", ) db.session.add(command) if phase1_test.result == Result.failed: db.session.add(FailureReason( reason='test_failures', build_id=build.id, job_id=job.id, step_id=step.id, project_id=job.project_id )) return job
def sync_job(job_id): job = Job.query.get(job_id) if not job: return if job.status == Status.finished: return # TODO(dcramer): we make an assumption that there is a single step jobplan, implementation = JobPlan.get_build_step_for_job(job_id=job.id) try: implementation.update(job=job) except UnrecoverableException: job.status = Status.finished job.result = Result.aborted current_app.logger.exception('Unrecoverable exception syncing %s', job.id) is_finished = sync_job.verify_all_children() == Status.finished if is_finished: job.status = Status.finished db.session.flush() all_phases = list(job.phases) # propagate changes to any phases as they live outside of the # normalize synchronization routines sync_job_phases(job, all_phases) job.date_started = safe_agg(min, (j.date_started for j in all_phases if j.date_started)) if is_finished: job.date_finished = safe_agg(max, (j.date_finished for j in all_phases if j.date_finished)) else: job.date_finished = None if job.date_started and job.date_finished: job.duration = int( (job.date_finished - job.date_started).total_seconds() * 1000) else: job.duration = None # if any phases are marked as failing, fail the build if any(j.result is Result.failed for j in all_phases): job.result = Result.failed # if any test cases were marked as failing, fail the build elif TestCase.query.filter(TestCase.result == Result.failed, TestCase.job_id == job.id).first(): job.result = Result.failed # if we've finished all phases, use the best result available elif is_finished: job.result = safe_agg(max, (j.result for j in all_phases)) else: job.result = Result.unknown if is_finished: job.status = Status.finished elif any(j.status is not Status.queued for j in all_phases): job.status = Status.in_progress else: job.status = Status.queued if db.session.is_modified(job): job.date_modified = datetime.utcnow() db.session.add(job) db.session.commit() if not is_finished: raise sync_job.NotFinished try: aggregate_job_stat(job, 'test_count') aggregate_job_stat(job, 'test_duration') aggregate_job_stat(job, 'test_failures') aggregate_job_stat(job, 'test_rerun_count') aggregate_job_stat(job, 'tests_missing') aggregate_job_stat(job, 'lines_covered') aggregate_job_stat(job, 'lines_uncovered') aggregate_job_stat(job, 'diff_lines_covered') aggregate_job_stat(job, 'diff_lines_uncovered') except Exception: current_app.logger.exception( 'Failing recording aggregate stats for job %s', job.id) fire_signal.delay( signal='job.finished', kwargs={'job_id': job.id.hex}, ) if jobplan: queue.delay('update_project_plan_stats', kwargs={ 'project_id': job.project_id.hex, 'plan_id': jobplan.plan_id.hex, }, countdown=1)
def sync_job_step(step_id): step = JobStep.query.get(step_id) if not step: return jobplan, implementation = JobPlan.get_build_step_for_job( job_id=step.job_id) # only synchronize if upstream hasn't suggested we're finished if step.status != Status.finished: implementation.update_step(step=step) db.session.flush() if step.status != Status.finished: is_finished = False else: is_finished = sync_job_step.verify_all_children() == Status.finished if not is_finished: if has_timed_out(step, jobplan): implementation.cancel_step(step=step) step.result = Result.failed db.session.add(step) job = step.job try_create( FailureReason, { 'step_id': step.id, 'job_id': job.id, 'build_id': job.build_id, 'project_id': job.project_id, 'reason': 'timeout' }) db.session.flush() if step.status != Status.in_progress: retry_after = QUEUED_RETRY_DELAY else: retry_after = None raise sync_job_step.NotFinished(retry_after=retry_after) # ignore any 'failures' if its aborted if step.result == Result.aborted: return try: record_coverage_stats(step) except Exception: current_app.logger.exception( 'Failing recording coverage stats for step %s', step.id) missing_tests = is_missing_tests(step, jobplan) try_create(ItemStat, where={ 'item_id': step.id, 'name': 'tests_missing', }, defaults={'value': int(missing_tests)}) if step.result == Result.passed and missing_tests: step.result = Result.failed db.session.add(step) if missing_tests: if step.result != Result.failed: step.result = Result.failed db.session.add(step) try_create( FailureReason, { 'step_id': step.id, 'job_id': step.job_id, 'build_id': step.job.build_id, 'project_id': step.project_id, 'reason': 'missing_tests' }) db.session.commit() db.session.flush() if has_test_failures(step): if step.result != Result.failed: step.result = Result.failed db.session.add(step) try_create( FailureReason, { 'step_id': step.id, 'job_id': step.job_id, 'build_id': step.job.build_id, 'project_id': step.project_id, 'reason': 'test_failures' }) db.session.commit()
def post(self): args = json.loads(request.data) try: resources = args['resources'] except KeyError: return error('Missing resources attribute') total_cpus = int(resources['cpus']) total_mem = int(resources['mem']) # MB with statsreporter.stats().timer('jobstep_allocate'): try: with redis.lock('jobstep:allocate', nowait=True): available_allocations = self.find_next_jobsteps(limit=10) to_allocate = [] for jobstep in available_allocations: req_cpus = jobstep.data.get('cpus', 4) req_mem = jobstep.data.get('mem', 8 * 1024) if total_cpus >= req_cpus and total_mem >= req_mem: total_cpus -= req_cpus total_mem -= req_mem jobstep.status = Status.allocated db.session.add(jobstep) to_allocate.append(jobstep) else: logging.info('Not allocating %s due to lack of offered resources', jobstep.id.hex) if not to_allocate: # Should 204, but flask/werkzeug throws StopIteration (bug!) for tests return self.respond([]) db.session.flush() except UnableToGetLock: return error('Another allocation is in progress', http_code=503) context = [] for jobstep, jobstep_data in zip(to_allocate, self.serialize(to_allocate)): try: jobplan, buildstep = JobPlan.get_build_step_for_job(jobstep.job_id) assert jobplan and buildstep jobstep_data['project'] = self.serialize(jobstep.project) jobstep_data['resources'] = { 'cpus': jobstep.data.get('cpus', 4), 'mem': jobstep.data.get('mem', 8 * 1024), } jobstep_data['cmd'] = buildstep.get_allocation_command(jobstep) except Exception: jobstep.status = Status.finished jobstep.result = Result.infra_failed db.session.add(jobstep) db.session.flush() logging.exception( 'Exception occurred while allocating job step %s for project %s', jobstep.id.hex, jobstep.project.slug) else: context.append(jobstep_data) return self.respond(context)
def post(self, project_id): """Initiates a new snapshot for this project.""" project = Project.get(project_id) if not project: return '', 404 args = self.post_parser.parse_args() repository = project.repository try: revision = identify_revision(repository, args.sha) except MissingRevision: # if the default fails, we absolutely can't continue and the # client should send a valid revision return '{"error": "Unable to find a matching revision."}', 400 if revision: sha = revision.sha else: sha = args.sha plan_list = get_snapshottable_plans(project) if not plan_list: return '{"error": "No snapshottable plans associated with project."}', 400 source, _ = get_or_create(Source, where={ 'repository': repository, 'revision_sha': sha, 'patch_id': None, }) build = Build( source_id=source.id, source=source, project_id=project.id, project=project, label='Create Snapshot', status=Status.queued, cause=Cause.snapshot, target=sha[:12], ) db.session.add(build) # TODO(dcramer): this needs to update with the build result snapshot = Snapshot( project_id=project.id, source_id=source.id, build_id=build.id, status=SnapshotStatus.pending, ) db.session.add(snapshot) jobs = [] for plan in plan_list: job = Job( build=build, build_id=build.id, project=project, project_id=project.id, source=build.source, source_id=build.source_id, status=build.status, label='Create Snapshot: %s' % (plan.label, ), ) db.session.add(job) jobplan = JobPlan.build_jobplan(plan, job) db.session.add(jobplan) image = SnapshotImage( job=job, snapshot=snapshot, plan=plan, ) db.session.add(image) jobs.append(job) db.session.commit() for job in jobs: create_job.delay( job_id=job.id.hex, task_id=job.id.hex, parent_task_id=job.build_id.hex, ) db.session.commit() sync_build.delay( build_id=build.id.hex, task_id=build.id.hex, ) return self.respond(snapshot)
def sync_job(job_id): with RCount('sync_job'): job = Job.query.get(job_id) if not job: return if job.status == Status.finished: return # TODO(dcramer): we make an assumption that there is a single step jobplan, implementation = JobPlan.get_build_step_for_job(job_id=job.id) try: implementation.update(job=job) except UnrecoverableException: job.status = Status.finished job.result = Result.aborted current_app.logger.exception('Unrecoverable exception syncing %s', job.id) all_phases = list(job.phases) # propagate changes to any phases as they live outside of the # normalize synchronization routines sync_job_phases(job, all_phases) is_finished = sync_job.verify_all_children() == Status.finished if any(p.status != Status.finished for p in all_phases): is_finished = False job.date_started = safe_agg( min, (j.date_started for j in all_phases if j.date_started)) if is_finished: job.date_finished = safe_agg( max, (j.date_finished for j in all_phases if j.date_finished)) else: job.date_finished = None if job.date_started and job.date_finished: job.duration = int((job.date_finished - job.date_started).total_seconds() * 1000) else: job.duration = None # if any phases are marked as failing, fail the build if any(j.result is Result.failed for j in all_phases): job.result = Result.failed # if any test cases were marked as failing, fail the build elif TestCase.query.filter(TestCase.result == Result.failed, TestCase.job_id == job.id).first(): job.result = Result.failed # if we've finished all phases, use the best result available elif is_finished: job.result = aggregate_result((j.result for j in all_phases)) else: job.result = Result.unknown if is_finished: job.status = Status.finished else: # ensure we dont set the status to finished unless it actually is new_status = aggregate_status((j.status for j in all_phases)) if new_status != Status.finished: job.status = new_status elif job.status == Status.finished: job.status = Status.in_progress current_app.logger.exception('Job incorrectly marked as finished: %s', job.id) if db.session.is_modified(job): job.date_modified = datetime.utcnow() db.session.add(job) db.session.commit() if not is_finished: raise sync_job.NotFinished try: aggregate_job_stat(job, 'test_count') aggregate_job_stat(job, 'test_duration') aggregate_job_stat(job, 'test_failures') aggregate_job_stat(job, 'test_rerun_count') aggregate_job_stat(job, 'tests_missing') aggregate_job_stat(job, 'lines_covered') aggregate_job_stat(job, 'lines_uncovered') aggregate_job_stat(job, 'diff_lines_covered') aggregate_job_stat(job, 'diff_lines_uncovered') except Exception: current_app.logger.exception('Failing recording aggregate stats for job %s', job.id) fire_signal.delay( signal='job.finished', kwargs={'job_id': job.id.hex}, ) if jobplan: queue.delay('update_project_plan_stats', kwargs={ 'project_id': job.project_id.hex, 'plan_id': jobplan.plan_id.hex, }, countdown=1)
def sync_job_step(step_id): step = JobStep.query.get(step_id) if not step: return jobplan, implementation = JobPlan.get_build_step_for_job(job_id=step.job_id) # only synchronize if upstream hasn't suggested we're finished if step.status != Status.finished: implementation.update_step(step=step) db.session.flush() if step.status != Status.finished: is_finished = False else: is_finished = sync_job_step.verify_all_children() == Status.finished if not is_finished: if has_timed_out(step, jobplan): implementation.cancel_step(step=step) step.result = Result.failed db.session.add(step) job = step.job try_create(FailureReason, { 'step_id': step.id, 'job_id': job.id, 'build_id': job.build_id, 'project_id': job.project_id, 'reason': 'timeout' }) db.session.flush() raise sync_job_step.NotFinished # ignore any 'failures' if its aborted if step.result == Result.aborted: return try: record_coverage_stats(step) except Exception: current_app.logger.exception('Failing recording coverage stats for step %s', step.id) missing_tests = is_missing_tests(step, jobplan) try_create(ItemStat, where={ 'item_id': step.id, 'name': 'tests_missing', }, defaults={ 'value': int(missing_tests) }) if step.result == Result.passed and missing_tests: step.result = Result.failed db.session.add(step) if missing_tests: if step.result != Result.failed: step.result = Result.failed db.session.add(step) try_create(FailureReason, { 'step_id': step.id, 'job_id': step.job_id, 'build_id': step.job.build_id, 'project_id': step.project_id, 'reason': 'missing_tests' }) db.session.commit() db.session.flush() if has_test_failures(step): if step.result != Result.failed: step.result = Result.failed db.session.add(step) try_create(FailureReason, { 'step_id': step.id, 'job_id': step.job_id, 'build_id': step.job.build_id, 'project_id': step.project_id, 'reason': 'test_failures' }) db.session.commit()
def sync_job(job_id): """ Updates jobphase and job statuses based on the status of the constituent jobsteps. """ job = Job.query.get(job_id) if not job: return if job.status == Status.finished: return # TODO(dcramer): we make an assumption that there is a single step jobplan, implementation = JobPlan.get_build_step_for_job(job_id=job.id) try: implementation.update(job=job) except UnrecoverableException: job.status = Status.finished job.result = Result.infra_failed current_app.logger.exception('Unrecoverable exception syncing %s', job.id) all_phases = list(job.phases) # propagate changes to any phases as they live outside of the # normalize synchronization routines sync_job_phases(job, all_phases, implementation) is_finished = sync_job.verify_all_children() == Status.finished if any(p.status != Status.finished for p in all_phases): is_finished = False job.date_started = safe_agg(min, (j.date_started for j in all_phases if j.date_started)) if is_finished: job.date_finished = safe_agg(max, (j.date_finished for j in all_phases if j.date_finished)) else: job.date_finished = None if job.date_started and job.date_finished: job.duration = int( (job.date_finished - job.date_started).total_seconds() * 1000) else: job.duration = None # if any phases are marked as failing, fail the build if any(j.result is Result.failed for j in all_phases): job.result = Result.failed # If any test cases were marked as failing, fail the build. # The exception is if the only failing test case occurred in a JobStep that # had an infra failure. In this case we can't trust the test case result as # being meaningful and so we ignore these. elif TestCase.query.join(JobStep, JobStep.id == TestCase.step_id).filter( TestCase.result == Result.failed, TestCase.job_id == job.id, JobStep.result != Result.infra_failed).first(): job.result = Result.failed # if we've finished all phases, use the best result available elif is_finished: # Sets the final job result. implementation.validate(job=job) else: job.result = Result.unknown if is_finished: job.status = Status.finished else: # ensure we dont set the status to finished unless it actually is new_status = aggregate_status((j.status for j in all_phases)) if new_status != Status.finished: job.status = new_status elif job.status == Status.finished: job.status = Status.in_progress current_app.logger.exception( 'Job incorrectly marked as finished: %s', job.id) if db.session.is_modified(job): job.date_modified = datetime.utcnow() db.session.add(job) db.session.commit() if not is_finished: raise sync_job.NotFinished try: aggregate_job_stat(job, 'test_count') aggregate_job_stat(job, 'test_duration') aggregate_job_stat(job, 'test_failures') aggregate_job_stat(job, 'test_rerun_count') aggregate_job_stat(job, 'tests_missing') aggregate_job_stat(job, 'lines_covered') aggregate_job_stat(job, 'lines_uncovered') aggregate_job_stat(job, 'diff_lines_covered') aggregate_job_stat(job, 'diff_lines_uncovered') except Exception: current_app.logger.exception( 'Failing recording aggregate stats for job %s', job.id) fire_signal.delay( signal='job.finished', kwargs={'job_id': job.id.hex}, ) if jobplan: queue.delay('update_project_plan_stats', kwargs={ 'project_id': job.project_id.hex, 'plan_id': jobplan.plan_id.hex, }, countdown=1)
def create_job_plan(self, job, plan): jobplan = JobPlan.build_jobplan(plan, job) db.session.add(jobplan) db.session.commit() return jobplan
def post(self): args = json.loads(request.data) try: resources = args['resources'] except KeyError: return error('Missing resources attribute') total_cpus = int(resources['cpus']) total_mem = int(resources['mem']) # MB try: with redis.lock('jobstep:allocate', nowait=True): available_allocations = self.find_next_jobsteps(limit=10) to_allocate = [] for jobstep in available_allocations: req_cpus = jobstep.data.get('cpus', 4) req_mem = jobstep.data.get('mem', 8 * 1024) if total_cpus >= req_cpus and total_mem >= req_mem: total_cpus -= req_cpus total_mem -= req_mem jobstep.status = Status.allocated db.session.add(jobstep) to_allocate.append(jobstep) else: logging.info( 'Not allocating %s due to lack of offered resources', jobstep.id.hex) if not to_allocate: # Should 204, but flask/werkzeug throws StopIteration (bug!) for tests return self.respond([]) db.session.flush() except redis.UnableToGetLock: return error('Another allocation is in progress', http_code=503) context = [] for jobstep, jobstep_data in zip(to_allocate, self.serialize(to_allocate)): try: jobplan, buildstep = JobPlan.get_build_step_for_job( jobstep.job_id) assert jobplan and buildstep jobstep_data['project'] = self.serialize(jobstep.project) jobstep_data['resources'] = { 'cpus': jobstep.data.get('cpus', 4), 'mem': jobstep.data.get('mem', 8 * 1024), } jobstep_data['cmd'] = buildstep.get_allocation_command(jobstep) except Exception: jobstep.status = Status.finished jobstep.result = Result.aborted db.session.add(jobstep) db.session.flush() logging.exception( 'Exception occurred while allocating job step %s for project %s', jobstep.id.hex, jobstep.project.slug) else: context.append(jobstep_data) return self.respond(context)
def post(self): args = json.loads(request.data) try: resources = args['resources'] except KeyError: return error('Missing resources attribute') # cpu and mem as 0 are treated by changes-client # as having no enforced limit total_cpus = int(resources.get('cpus', 0)) total_mem = int(resources.get('mem', 0)) # MB with statsreporter.stats().timer('jobstep_allocate'): try: with redis.lock('jobstep:allocate', nowait=True): available_allocations = self.find_next_jobsteps(limit=10) to_allocate = [] for jobstep in available_allocations: req_cpus = jobstep.data.get('cpus', 4) req_mem = jobstep.data.get('mem', 8 * 1024) if total_cpus >= req_cpus and total_mem >= req_mem: total_cpus -= req_cpus total_mem -= req_mem jobstep.status = Status.allocated db.session.add(jobstep) to_allocate.append(jobstep) # The JobSteps returned are pending_allocation, and the initial state for a Mesos JobStep is # pending_allocation, so we can determine how long it was pending by how long ago it was # created. pending_seconds = ( datetime.utcnow() - jobstep.date_created).total_seconds() statsreporter.stats().log_timing( 'duration_pending_allocation', pending_seconds * 1000) else: logging.info( 'Not allocating %s due to lack of offered resources', jobstep.id.hex) if not to_allocate: # Should 204, but flask/werkzeug throws StopIteration (bug!) for tests return self.respond([]) db.session.flush() except UnableToGetLock: return error('Another allocation is in progress', http_code=503) context = [] for jobstep, jobstep_data in zip(to_allocate, self.serialize(to_allocate)): try: jobplan, buildstep = JobPlan.get_build_step_for_job( jobstep.job_id) assert jobplan and buildstep jobstep_data['project'] = self.serialize(jobstep.project) jobstep_data['resources'] = { 'cpus': jobstep.data.get('cpus', 4), 'mem': jobstep.data.get('mem', 8 * 1024), } jobstep_data['cmd'] = buildstep.get_allocation_command( jobstep) except Exception: jobstep.status = Status.finished jobstep.result = Result.infra_failed db.session.add(jobstep) db.session.flush() logging.exception( 'Exception occurred while allocating job step %s for project %s', jobstep.id.hex, jobstep.project.slug) else: context.append(jobstep_data) return self.respond(context)
def post(self): args = json.loads(request.data) try: resources = args['resources'] except KeyError: return error('Missing resources attribute') # cpu and mem as 0 are treated by changes-client # as having no enforced limit total_cpus = int(resources.get('cpus', 0)) total_mem = int(resources.get('mem', 0)) # MB with statsreporter.stats().timer('jobstep_allocate'): try: with redis.lock('jobstep:allocate', nowait=True): available_allocations = self.find_next_jobsteps(limit=10) to_allocate = [] for jobstep in available_allocations: req_cpus = jobstep.data.get('cpus', 4) req_mem = jobstep.data.get('mem', 8 * 1024) if total_cpus >= req_cpus and total_mem >= req_mem: total_cpus -= req_cpus total_mem -= req_mem jobstep.status = Status.allocated db.session.add(jobstep) to_allocate.append(jobstep) # The JobSteps returned are pending_allocation, and the initial state for a Mesos JobStep is # pending_allocation, so we can determine how long it was pending by how long ago it was # created. pending_seconds = (datetime.utcnow() - jobstep.date_created).total_seconds() statsreporter.stats().log_timing('duration_pending_allocation', pending_seconds * 1000) else: logging.info('Not allocating %s due to lack of offered resources', jobstep.id.hex) if not to_allocate: # Should 204, but flask/werkzeug throws StopIteration (bug!) for tests return self.respond([]) db.session.flush() except UnableToGetLock: return error('Another allocation is in progress', http_code=503) context = [] for jobstep, jobstep_data in zip(to_allocate, self.serialize(to_allocate)): try: jobplan, buildstep = JobPlan.get_build_step_for_job(jobstep.job_id) assert jobplan and buildstep jobstep_data['project'] = self.serialize(jobstep.project) jobstep_data['resources'] = { 'cpus': jobstep.data.get('cpus', 4), 'mem': jobstep.data.get('mem', 8 * 1024), } jobstep_data['cmd'] = buildstep.get_allocation_command(jobstep) except Exception: jobstep.status = Status.finished jobstep.result = Result.infra_failed db.session.add(jobstep) db.session.flush() logging.exception( 'Exception occurred while allocating job step %s for project %s', jobstep.id.hex, jobstep.project.slug) else: context.append(jobstep_data) return self.respond(context)
def sync_job_step(step_id): """ Polls a jenkins build for updates. May have sync_artifact children. """ step = JobStep.query.get(step_id) if not step: return jobplan, implementation = JobPlan.get_build_step_for_job(job_id=step.job_id) # only synchronize if upstream hasn't suggested we're finished if step.status != Status.finished: implementation.update_step(step=step) db.session.flush() _sync_from_artifact_store(step) if step.status == Status.finished: _sync_artifacts_for_jobstep(step) is_finished = (step.status == Status.finished and # make sure all child tasks (like sync_artifact) have also finished sync_job_step.verify_all_children() == Status.finished) if not is_finished: default_timeout = current_app.config['DEFAULT_JOB_TIMEOUT_MIN'] if has_timed_out(step, jobplan, default_timeout=default_timeout): old_status = step.status step.data['timed_out'] = True implementation.cancel_step(step=step) # Not all implementations can actually cancel, but it's dead to us as of now # so we mark it as finished. step.status = Status.finished step.date_finished = datetime.utcnow() # Implementations default to marking canceled steps as aborted, # but we're not canceling on good terms (it should be done by now) # so we consider it a failure here. # # We check whether the step was marked as in_progress to make a best # guess as to whether this is an infrastructure failure, or the # repository under test is just taking too long. This won't be 100% # reliable, but is probably good enough. if old_status == Status.in_progress: step.result = Result.failed else: step.result = Result.infra_failed db.session.add(step) job = step.job try_create(FailureReason, { 'step_id': step.id, 'job_id': job.id, 'build_id': job.build_id, 'project_id': job.project_id, 'reason': 'timeout' }) db.session.flush() statsreporter.stats().incr('job_step_timed_out') # If we timeout something that isn't in progress, that's our fault, and we should know. if old_status != Status.in_progress: current_app.logger.warning( "Timed out jobstep that wasn't in progress: %s (was %s)", step.id, old_status) raise sync_job_step.NotFinished # Ignore any 'failures' if the build did not finish properly. # NOTE(josiah): we might want to include "unknown" and "skipped" here as # well, or have some named condition like "not meaningful_result(step.result)". if step.result in (Result.aborted, Result.infra_failed): _report_jobstep_result(step) return # Check for FailureReason objects generated by child jobs failure_result = _result_from_failure_reasons(step) if failure_result and failure_result != step.result: step.result = failure_result db.session.add(step) db.session.commit() if failure_result == Result.infra_failed: _report_jobstep_result(step) return try: record_coverage_stats(step) except Exception: current_app.logger.exception('Failing recording coverage stats for step %s', step.id) # We need the start time of this step's phase to determine if we're part of # the last phase. So, if date_started is empty, wait for sync_phase to catch # up and try again. if _expects_tests(jobplan) and not step.phase.date_started: current_app.logger.warning( "Phase[%s].date_started is missing. Retrying Step", step.phase.id) # Reset result to unknown to reduce window where test might be incorrectly green. # Set status to in_progress so that the next sync_job_step will fetch status from Jenkins again. step.result = Result.unknown step.status = Status.in_progress raise sync_job_step.NotFinished missing_tests = is_missing_tests(step, jobplan) try_create(ItemStat, where={ 'item_id': step.id, 'name': 'tests_missing', 'value': int(missing_tests), }) if missing_tests: if step.result != Result.failed: step.result = Result.failed db.session.add(step) try_create(FailureReason, { 'step_id': step.id, 'job_id': step.job_id, 'build_id': step.job.build_id, 'project_id': step.project_id, 'reason': 'missing_tests' }) db.session.commit() db.session.flush() if has_test_failures(step): if step.result != Result.failed: step.result = Result.failed db.session.add(step) try_create(FailureReason, { 'step_id': step.id, 'job_id': step.job_id, 'build_id': step.job.build_id, 'project_id': step.project_id, 'reason': 'test_failures' }) db.session.commit() _report_jobstep_result(step)