def verify_all_children(self): task_list = list(Task.query.filter( Task.parent_id == self.task_id, Task.status != Status.finished, )) if not task_list: return Status.finished current_datetime = datetime.utcnow() need_expire = set() need_run = set() has_pending = False for task in task_list: if self.needs_expired(task): need_expire.add(task) continue has_pending = True if self.needs_requeued(task) and 'kwargs' in task.data: need_run.add(task) if need_expire: Task.query.filter( Task.id.in_([n.id for n in need_expire]), ).update({ Task.date_modified: current_datetime, Task.date_finished: current_datetime, Task.status: Status.finished, Task.result: Result.aborted, }, synchronize_session=False) db.session.commit() if need_run: for task in need_run: child_kwargs = task.data['kwargs'].copy() child_kwargs['parent_task_id'] = task.parent_id.hex child_kwargs['task_id'] = task.task_id.hex queue.delay(task.task_name, kwargs=child_kwargs) Task.query.filter( Task.id.in_([n.id for n in need_run]), ).update({ Task.date_modified: current_datetime, }, synchronize_session=False) db.session.commit() for name, count in Counter((task.task_name for task in need_run)).iteritems(): statsreporter.stats().incr('task_revived_by_parent_' + name, count) if has_pending: status = Status.in_progress else: status = Status.finished return status
def verify_all_children(self): task_list = list( Task.query.filter( Task.parent_id == self.task_id, Task.status != Status.finished, )) if not task_list: return Status.finished current_datetime = datetime.utcnow() need_expire = set() need_run = set() has_pending = False for task in task_list: if self.needs_expired(task): need_expire.add(task) continue has_pending = True if self.needs_requeued(task) and 'kwargs' in task.data: need_run.add(task) if need_expire: Task.query.filter(Task.id.in_( [n.id for n in need_expire]), ).update( { Task.date_modified: current_datetime, Task.date_finished: current_datetime, Task.status: Status.finished, Task.result: Result.aborted, }, synchronize_session=False) db.session.commit() if need_run: for task in need_run: child_kwargs = task.data['kwargs'].copy() child_kwargs['parent_task_id'] = task.parent_id.hex child_kwargs['task_id'] = task.task_id.hex queue.delay(task.task_name, kwargs=child_kwargs) Task.query.filter(Task.id.in_([n.id for n in need_run]), ).update( { Task.date_modified: current_datetime, }, synchronize_session=False) db.session.commit() if has_pending: status = Status.in_progress else: status = Status.finished return status
def delay_if_needed(self, **kwargs): """ Enqueue this task if it's new or hasn't checked in in a reasonable amount of time. >>> task.delay_if_needed( >>> task_id='33846695b2774b29a71795a009e8168a', >>> parent_task_id='659974858dcf4aa08e73a940e1066328', >>> ) """ kwargs.setdefault("task_id", uuid4().hex) fn_kwargs = dict((k, v) for k, v in kwargs.iteritems() if k not in ("task_id", "parent_task_id")) task, created = get_or_create( Task, where={"task_name": self.task_name, "task_id": kwargs["task_id"]}, defaults={ "parent_id": kwargs.get("parent_task_id"), "data": {"kwargs": fn_kwargs}, "status": Status.queued, }, ) if created or self.needs_requeued(task): if not created: task.date_modified = datetime.utcnow() db.session.add(task) db.session.commit() queue.delay(self.task_name, kwargs=kwargs, countdown=_DEFAULT_COUNTDOWN) if created: self._report_created()
def delay(self, **kwargs): """ Enqueue this task. >>> task.delay( >>> task_id='33846695b2774b29a71795a009e8168a', >>> parent_task_id='659974858dcf4aa08e73a940e1066328', >>> ) """ kwargs.setdefault("task_id", uuid4().hex) fn_kwargs = dict((k, v) for k, v in kwargs.iteritems() if k not in ("task_id", "parent_task_id")) task, created = get_or_create( Task, where={"task_name": self.task_name, "task_id": kwargs["task_id"]}, defaults={ "parent_id": kwargs.get("parent_task_id"), "data": {"kwargs": fn_kwargs}, "status": Status.queued, }, ) if not created: task.date_modified = datetime.utcnow() db.session.add(task) db.session.commit() queue.delay(self.task_name, kwargs=kwargs, countdown=CONTINUE_COUNTDOWN)
def delay(self, **kwargs): """ Enqueue this task. >>> task.delay( >>> task_id='33846695b2774b29a71795a009e8168a', >>> parent_task_id='659974858dcf4aa08e73a940e1066328', >>> ) """ assert kwargs.get('task_id') fn_kwargs = dict( (k, v) for k, v in kwargs.iteritems() if k not in ('task_id', 'parent_task_id') ) try_create(Task, where={ 'task_name': self.task_name, 'parent_id': kwargs.get('parent_task_id'), 'task_id': kwargs['task_id'], 'status': Status.queued, 'data': { 'kwargs': fn_kwargs, }, }) db.session.commit() queue.delay( self.task_name, kwargs=kwargs, countdown=CONTINUE_COUNTDOWN, )
def delay(self, **kwargs): """ Enqueue this task. >>> task.delay( >>> task_id='33846695b2774b29a71795a009e8168a', >>> parent_task_id='659974858dcf4aa08e73a940e1066328', >>> ) """ assert kwargs.get('task_id') fn_kwargs = dict((k, v) for k, v in kwargs.iteritems() if k not in ('task_id', 'parent_task_id')) try_create(Task, where={ 'task_name': self.task_name, 'parent_id': kwargs.get('parent_task_id'), 'task_id': kwargs['task_id'], 'status': Status.queued, 'data': { 'kwargs': fn_kwargs, }, }) db.session.commit() queue.delay( self.task_name, kwargs=kwargs, countdown=CONTINUE_COUNTDOWN, )
def _continue(self, kwargs): kwargs["task_id"] = self.task_id kwargs["parent_task_id"] = self.parent_id self._update({Task.date_modified: datetime.utcnow(), Task.status: Status.in_progress}) db.session.commit() queue.delay(self.task_name, kwargs=kwargs, countdown=CONTINUE_COUNTDOWN)
def verify_all_children(self): task_list = list(Task.query.filter(Task.parent_id == self.task_id, Task.status != Status.finished)) if not task_list: return Status.finished current_datetime = datetime.utcnow() need_expire = set() need_run = set() has_pending = False for task in task_list: if self.needs_expired(task): need_expire.add(task) continue has_pending = True if self.needs_requeued(task) and "kwargs" in task.data: need_run.add(task) if need_expire: Task.query.filter(Task.id.in_([n.id for n in need_expire])).update( { Task.date_modified: current_datetime, Task.date_finished: current_datetime, Task.status: Status.finished, Task.result: Result.aborted, }, synchronize_session=False, ) db.session.commit() if need_run: for task in need_run: child_kwargs = task.data["kwargs"].copy() child_kwargs["parent_task_id"] = task.parent_id.hex child_kwargs["task_id"] = task.task_id.hex queue.delay(task.task_name, kwargs=child_kwargs) Task.query.filter(Task.id.in_([n.id for n in need_run])).update( {Task.date_modified: current_datetime}, synchronize_session=False ) db.session.commit() if has_pending: status = Status.in_progress else: status = Status.finished return status
def sync_repo(repo_id, continuous=True): repo = Repository.query.get(repo_id) if not repo: print 'Repository not found' return vcs = repo.get_vcs() if vcs is None: print 'No VCS backend available' return Repository.query.filter( Repository.id == repo.id, ).update({ 'last_update_attempt': datetime.utcnow(), }, synchronize_session=False) db.session.commit() if vcs.exists(): vcs.update() else: vcs.clone() # TODO(dcramer): this doesnt scrape everything, and really we wouldn't # want to do this all in a single job so we should split this into a # backfill task # TODO(dcramer): this doesn't collect commits in non-default branches might_have_more = True parent = None while might_have_more: might_have_more = False for commit in vcs.log(parent=parent): revision, created = commit.save(repo) db.session.commit() if not created: break might_have_more = True parent = commit.id queue.delay('notify_revision_created', kwargs={ 'repository_id': repo.id.hex, 'revision_sha': revision.sha, }) Repository.query.filter( Repository.id == repo.id, ).update({ 'last_update': datetime.utcnow(), }, synchronize_session=False) db.session.commit() if continuous: raise sync_repo.NotFinished
def sync_repo(repo_id, continuous=True): repo = Repository.query.get(repo_id) if not repo: print 'Repository not found' return vcs = repo.get_vcs() if vcs is None: print 'No VCS backend available' return Repository.query.filter(Repository.id == repo.id, ).update( { 'last_update_attempt': datetime.utcnow(), }, synchronize_session=False) db.session.commit() if vcs.exists(): vcs.update() else: vcs.clone() # TODO(dcramer): this doesnt scrape everything, and really we wouldn't # want to do this all in a single job so we should split this into a # backfill task # TODO(dcramer): this doesn't collect commits in non-default branches might_have_more = True parent = None while might_have_more: might_have_more = False for commit in vcs.log(parent=parent): revision, created = commit.save(repo) db.session.commit() if not created: break might_have_more = True parent = commit.id queue.delay('notify_revision_created', kwargs={ 'repository_id': repo.id.hex, 'revision_sha': revision.sha, }) Repository.query.filter(Repository.id == repo.id, ).update( { 'last_update': datetime.utcnow(), }, synchronize_session=False) db.session.commit() if continuous: raise sync_repo.NotFinished
def _retry(self): """ Retry this task and update it's state. >>> task.retry() """ # TODO(dcramer): this needs to handle too-many-retries itself assert self.task_id task = Task.query.filter( Task.task_name == self.task_name, Task.task_id == self.task_id, Task.parent_id == self.parent_id, ).first() if task and self.max_retries and task.num_retries > self.max_retries: date_finished = datetime.utcnow() self._update({ Task.date_finished: date_finished, Task.date_modified: date_finished, Task.status: Status.finished, Task.result: Result.failed, }) db.session.commit() raise TooManyRetries('%s failed after %d retries' % (self.task_name, task.num_retries)) self._update({ Task.date_modified: datetime.utcnow(), Task.status: Status.in_progress, Task.num_retries: Task.num_retries + 1, }) db.session.commit() kwargs = self.kwargs.copy() kwargs['task_id'] = self.task_id kwargs['parent_task_id'] = self.parent_id retry_number = db.session.query(Task.num_retries).filter( Task.task_name == self.task_name, Task.task_id == self.task_id, Task.parent_id == self.parent_id, ).scalar() or 0 retry_countdown = min(BASE_RETRY_COUNTDOWN + (retry_number**2), 300) queue.delay( self.task_name, kwargs=kwargs, countdown=retry_countdown, )
def _retry(self): """ Retry this task and update it's state. >>> task.retry() """ # TODO(dcramer): this needs to handle too-many-retries itself assert self.task_id task = Task.query.filter( Task.task_name == self.task_name, Task.task_id == self.task_id, Task.parent_id == self.parent_id, ).first() if task and self.max_retries and task.num_retries > self.max_retries: date_finished = datetime.utcnow() self._update({ Task.date_finished: date_finished, Task.date_modified: date_finished, Task.status: Status.finished, Task.result: Result.failed, }) db.session.commit() raise TooManyRetries('%s failed after %d retries' % (self.task_name, task.num_retries)) self._update({ Task.date_modified: datetime.utcnow(), Task.status: Status.in_progress, Task.num_retries: Task.num_retries + 1, }) db.session.commit() kwargs = self.kwargs.copy() kwargs['task_id'] = self.task_id kwargs['parent_task_id'] = self.parent_id retry_number = db.session.query(Task.num_retries).filter( Task.task_name == self.task_name, Task.task_id == self.task_id, Task.parent_id == self.parent_id, ).scalar() or 0 retry_countdown = min(BASE_RETRY_COUNTDOWN + (retry_number ** 2), 300) queue.delay( self.task_name, kwargs=kwargs, countdown=retry_countdown, )
def _continue(self, kwargs, retry_after=CONTINUE_COUNTDOWN): kwargs['task_id'] = self.task_id kwargs['parent_task_id'] = self.parent_id self._update({ Task.date_modified: datetime.utcnow(), Task.status: Status.in_progress, }) db.session.commit() queue.delay( self.task_name, kwargs=kwargs, countdown=retry_after, )
def delay_if_needed(self, **kwargs): """ Enqueue this task if it's new or hasn't checked in in a reasonable amount of time. >>> task.delay_if_needed( >>> task_id='33846695b2774b29a71795a009e8168a', >>> parent_task_id='659974858dcf4aa08e73a940e1066328', >>> ) """ kwargs.setdefault('task_id', uuid4().hex) fn_kwargs = dict((k, v) for k, v in kwargs.iteritems() if k not in ('task_id', 'parent_task_id')) task, created = get_or_create(Task, where={ 'task_name': self.task_name, 'task_id': kwargs['task_id'], }, defaults={ 'parent_id': kwargs.get('parent_task_id'), 'data': { 'kwargs': fn_kwargs, }, 'status': Status.queued, }) if created: decr('untracked_task') if created or self.needs_requeued(task): if not created: task.date_modified = datetime.utcnow() db.session.add(task) db.session.commit() queue.delay( self.task_name, kwargs=kwargs, countdown=CONTINUE_COUNTDOWN, )
def delay_if_needed(self, **kwargs): """ Enqueue this task if it's new or hasn't checked in in a reasonable amount of time. >>> task.delay_if_needed( >>> task_id='33846695b2774b29a71795a009e8168a', >>> parent_task_id='659974858dcf4aa08e73a940e1066328', >>> ) """ kwargs.setdefault('task_id', uuid4().hex) fn_kwargs = dict( (k, v) for k, v in kwargs.iteritems() if k not in ('task_id', 'parent_task_id') ) task, created = get_or_create(Task, where={ 'task_name': self.task_name, 'task_id': kwargs['task_id'], }, defaults={ 'parent_id': kwargs.get('parent_task_id'), 'data': { 'kwargs': fn_kwargs, }, 'status': Status.queued, }) if created or self.needs_requeued(task): if not created: task.date_modified = datetime.utcnow() db.session.add(task) db.session.commit() queue.delay( self.task_name, kwargs=kwargs, countdown=CONTINUE_COUNTDOWN, ) if created: self._report_created()
def _retry(self): """ Retry this task and update it's state. >>> task.retry() """ # TODO(dcramer): this needs to handle too-many-retries itself assert self.task_id task = Task.query.filter( Task.task_name == self.task_name, Task.task_id == self.task_id, ).first() if task and task.num_retries > MAX_RETRIES: date_finished = datetime.utcnow() self._update({ Task.date_finished: date_finished, Task.date_modified: date_finished, Task.status: Status.finished, Task.result: Result.failed, }) db.session.commit() raise TooManyRetries self._update({ Task.date_modified: datetime.utcnow(), Task.status: Status.in_progress, Task.num_retries: Task.num_retries + 1, }) db.session.commit() kwargs = self.kwargs.copy() kwargs['task_id'] = self.task_id kwargs['parent_task_id'] = self.parent_id queue.delay( self.task_name, kwargs=kwargs, countdown=RETRY_COUNTDOWN, )
def delay(self, **kwargs): """ Enqueue this task. >>> task.delay( >>> task_id='33846695b2774b29a71795a009e8168a', >>> parent_task_id='659974858dcf4aa08e73a940e1066328', >>> ) """ kwargs.setdefault('task_id', uuid4().hex) fn_kwargs = dict((k, v) for k, v in kwargs.iteritems() if k not in ('task_id', 'parent_task_id')) task, created = get_or_create(Task, where={ 'task_name': self.task_name, 'task_id': kwargs['task_id'], }, defaults={ 'parent_id': kwargs.get('parent_task_id'), 'data': { 'kwargs': fn_kwargs, }, 'status': Status.queued, }) if not created: task.date_modified = datetime.utcnow() db.session.add(task) db.session.commit() if created: self._report_created() queue.delay( self.task_name, kwargs=kwargs, countdown=_DEFAULT_COUNTDOWN, )
def delay(self, **kwargs): """ Enqueue this task. >>> task.delay( >>> task_id='33846695b2774b29a71795a009e8168a', >>> parent_task_id='659974858dcf4aa08e73a940e1066328', >>> ) """ kwargs.setdefault('task_id', uuid4().hex) fn_kwargs = dict( (k, v) for k, v in kwargs.iteritems() if k not in ('task_id', 'parent_task_id') ) task, created = get_or_create(Task, where={ 'task_name': self.task_name, 'task_id': kwargs['task_id'], }, defaults={ 'parent_id': kwargs.get('parent_task_id'), 'data': { 'kwargs': fn_kwargs, }, 'status': Status.queued, }) if not created: task.date_modified = datetime.utcnow() db.session.add(task) db.session.commit() if created: self._report_created() queue.delay( self.task_name, kwargs=kwargs, )
def sync_build(build_id): """ Synchronizing the build happens continuously until all jobs have reported in as finished or have failed/aborted. This task is responsible for: - Checking in with jobs - Aborting/retrying them if they're beyond limits - Aggregating the results from jobs into the build itself """ build = Build.query.get(build_id) if not build: return if build.status == Status.finished: return all_jobs = list(Job.query.filter( Job.build_id == build_id, )) is_finished = sync_build.verify_all_children() == Status.finished if any(p.status != Status.finished for p in all_jobs): is_finished = False prev_started = build.date_started build.date_started = safe_agg( min, (j.date_started for j in all_jobs if j.date_started)) # We want to report how long we waited for the build to start once and only once, # so we do it at the transition from not started to started. if not prev_started and build.date_started: queued_time = build.date_started - build.date_created statsreporter.stats().log_timing('build_start_latency', _timedelta_to_millis(queued_time)) if is_finished: # If there are no jobs (or no jobs with a finished date) fall back to # finishing now, since at this point, the build is done executing. build.date_finished = safe_agg( max, (j.date_finished for j in all_jobs if j.date_finished), datetime.utcnow()) else: build.date_finished = None if build.date_started and build.date_finished: build.duration = _timedelta_to_millis(build.date_finished - build.date_started) else: build.duration = None if any(j.result is Result.failed for j in all_jobs): build.result = Result.failed elif is_finished: build.result = aggregate_result((j.result for j in all_jobs)) else: build.result = Result.unknown if is_finished: build.status = Status.finished else: # ensure we dont set the status to finished unless it actually is new_status = aggregate_status((j.status for j in all_jobs)) if new_status != Status.finished: build.status = new_status if is_finished: build.date_decided = datetime.utcnow() decided_latency = build.date_decided - build.date_finished statsreporter.stats().log_timing('build_decided_latency', _timedelta_to_millis(decided_latency)) else: build.date_decided = None if db.session.is_modified(build): build.date_modified = datetime.utcnow() db.session.add(build) db.session.commit() if not is_finished: raise sync_build.NotFinished with statsreporter.stats().timer('build_stat_aggregation'): try: aggregate_build_stat(build, 'test_count') aggregate_build_stat(build, 'test_duration') aggregate_build_stat(build, 'test_failures') aggregate_build_stat(build, 'test_rerun_count') aggregate_build_stat(build, 'tests_missing') aggregate_build_stat(build, 'lines_covered') aggregate_build_stat(build, 'lines_uncovered') aggregate_build_stat(build, 'diff_lines_covered') aggregate_build_stat(build, 'diff_lines_uncovered') except Exception: current_app.logger.exception('Failing recording aggregate stats for build %s', build.id) fire_signal.delay( signal='build.finished', kwargs={'build_id': build.id.hex}, ) queue.delay('update_project_stats', kwargs={ 'project_id': build.project_id.hex, }, countdown=1)
def sync_job(job_id): job = Job.query.get(job_id) if not job: return if job.status == Status.finished: return # TODO(dcramer): we make an assumption that there is a single step job_plan = JobPlan.query.options(subqueryload_all('plan.steps')).filter( JobPlan.job_id == job.id, ).join(Plan).first() try: if not job_plan: raise UnrecoverableException( 'Got sync_job task without job plan: %s' % (job.id, )) try: step = job_plan.plan.steps[0] except IndexError: raise UnrecoverableException('Missing steps for plan') implementation = step.get_implementation() implementation.update(job=job) except UnrecoverableException: job.status = Status.finished job.result = Result.aborted current_app.logger.exception('Unrecoverable exception syncing %s', job.id) is_finished = sync_job.verify_all_children() == Status.finished if is_finished: job.status = Status.finished all_phases = list(job.phases) job.date_started = safe_agg(min, (j.date_started for j in all_phases if j.date_started)) if is_finished: job.date_finished = safe_agg(max, (j.date_finished for j in all_phases if j.date_finished)) else: job.date_finished = None if job.date_started and job.date_finished: job.duration = int( (job.date_finished - job.date_started).total_seconds() * 1000) else: job.duration = None # if any phases are marked as failing, fail the build if any(j.result is Result.failed for j in all_phases): job.result = Result.failed # if any test cases were marked as failing, fail the build elif TestCase.query.filter(TestCase.result == Result.failed, TestCase.job_id == job.id).first(): job.result = Result.failed # if we've finished all phases, use the best result available elif is_finished: job.result = safe_agg(max, (j.result for j in all_phases), Result.unknown) else: job.result = Result.unknown if is_finished: job.status = Status.finished elif any(j.status is Status.in_progress for j in all_phases): job.status = Status.in_progress else: job.status = Status.queued if db.session.is_modified(job): job.date_modified = datetime.utcnow() db.session.add(job) db.session.commit() publish_job_update(job) if not is_finished: raise sync_job.NotFinished _record_tests_missing(job) queue.delay('notify_job_finished', kwargs={ 'job_id': job.id.hex, }) if job_plan: queue.delay('update_project_plan_stats', kwargs={ 'project_id': job.project_id.hex, 'plan_id': job_plan.plan_id.hex, }, countdown=1)
def sync_job(job_id): with RCount('sync_job'): job = Job.query.get(job_id) if not job: return if job.status == Status.finished: return # TODO(dcramer): we make an assumption that there is a single step jobplan, implementation = JobPlan.get_build_step_for_job(job_id=job.id) try: implementation.update(job=job) except UnrecoverableException: job.status = Status.finished job.result = Result.aborted current_app.logger.exception('Unrecoverable exception syncing %s', job.id) all_phases = list(job.phases) # propagate changes to any phases as they live outside of the # normalize synchronization routines sync_job_phases(job, all_phases) is_finished = sync_job.verify_all_children() == Status.finished if any(p.status != Status.finished for p in all_phases): is_finished = False job.date_started = safe_agg( min, (j.date_started for j in all_phases if j.date_started)) if is_finished: job.date_finished = safe_agg( max, (j.date_finished for j in all_phases if j.date_finished)) else: job.date_finished = None if job.date_started and job.date_finished: job.duration = int((job.date_finished - job.date_started).total_seconds() * 1000) else: job.duration = None # if any phases are marked as failing, fail the build if any(j.result is Result.failed for j in all_phases): job.result = Result.failed # if any test cases were marked as failing, fail the build elif TestCase.query.filter(TestCase.result == Result.failed, TestCase.job_id == job.id).first(): job.result = Result.failed # if we've finished all phases, use the best result available elif is_finished: job.result = aggregate_result((j.result for j in all_phases)) else: job.result = Result.unknown if is_finished: job.status = Status.finished else: # ensure we dont set the status to finished unless it actually is new_status = aggregate_status((j.status for j in all_phases)) if new_status != Status.finished: job.status = new_status elif job.status == Status.finished: job.status = Status.in_progress current_app.logger.exception('Job incorrectly marked as finished: %s', job.id) if db.session.is_modified(job): job.date_modified = datetime.utcnow() db.session.add(job) db.session.commit() if not is_finished: raise sync_job.NotFinished try: aggregate_job_stat(job, 'test_count') aggregate_job_stat(job, 'test_duration') aggregate_job_stat(job, 'test_failures') aggregate_job_stat(job, 'test_rerun_count') aggregate_job_stat(job, 'tests_missing') aggregate_job_stat(job, 'lines_covered') aggregate_job_stat(job, 'lines_uncovered') aggregate_job_stat(job, 'diff_lines_covered') aggregate_job_stat(job, 'diff_lines_uncovered') except Exception: current_app.logger.exception('Failing recording aggregate stats for job %s', job.id) fire_signal.delay( signal='job.finished', kwargs={'job_id': job.id.hex}, ) if jobplan: queue.delay('update_project_plan_stats', kwargs={ 'project_id': job.project_id.hex, 'plan_id': jobplan.plan_id.hex, }, countdown=1)
def sync_build(build_id): """ Synchronizing the build happens continuously until all jobs have reported in as finished or have failed/aborted. This task is responsible for: - Checking in with jobs - Aborting/retrying them if they're beyond limits - Aggregating the results from jobs into the build itself """ build = Build.query.get(build_id) if not build: return if build.status == Status.finished: return all_jobs = list(Job.query.filter(Job.build_id == build_id, )) is_finished = sync_build.verify_all_children() == Status.finished build.date_started = safe_agg(min, (j.date_started for j in all_jobs if j.date_started)) if is_finished: build.date_finished = safe_agg(max, (j.date_finished for j in all_jobs if j.date_finished)) else: build.date_finished = None if build.date_started and build.date_finished: build.duration = int( (build.date_finished - build.date_started).total_seconds() * 1000) else: build.duration = None if any(j.result is Result.failed for j in all_jobs): build.result = Result.failed elif is_finished: build.result = safe_agg(max, (j.result for j in all_jobs)) else: build.result = Result.unknown if is_finished: build.status = Status.finished elif any(j.status is not Status.queued for j in all_jobs): build.status = Status.in_progress else: build.status = Status.queued if db.session.is_modified(build): build.date_modified = datetime.utcnow() db.session.add(build) db.session.commit() if not is_finished: raise sync_build.NotFinished try: aggregate_build_stat(build, 'test_count') aggregate_build_stat(build, 'test_duration') aggregate_build_stat(build, 'test_failures') aggregate_build_stat(build, 'test_rerun_count') aggregate_build_stat(build, 'tests_missing') aggregate_build_stat(build, 'lines_covered') aggregate_build_stat(build, 'lines_uncovered') aggregate_build_stat(build, 'diff_lines_covered') aggregate_build_stat(build, 'diff_lines_uncovered') except Exception: current_app.logger.exception( 'Failing recording aggregate stats for build %s', build.id) fire_signal.delay( signal='build.finished', kwargs={'build_id': build.id.hex}, ) queue.delay('update_project_stats', kwargs={ 'project_id': build.project_id.hex, }, countdown=1)
def sync_build(build_id): """ Synchronizing the build happens continuously until all jobs have reported in as finished or have failed/aborted. This task is responsible for: - Checking in with jobs - Aborting/retrying them if they're beyond limits - Aggregating the results from jobs into the build itself """ build = Build.query.get(build_id) if not build: return if build.status == Status.finished: return all_jobs = list(Job.query.filter( Job.build_id == build_id, )) is_finished = sync_build.verify_all_children() == Status.finished if any(p.status != Status.finished for p in all_jobs): is_finished = False build.date_started = safe_agg( min, (j.date_started for j in all_jobs if j.date_started)) if is_finished: build.date_finished = safe_agg( max, (j.date_finished for j in all_jobs if j.date_finished)) else: build.date_finished = None if build.date_started and build.date_finished: build.duration = int((build.date_finished - build.date_started).total_seconds() * 1000) else: build.duration = None if any(j.result is Result.failed for j in all_jobs): build.result = Result.failed elif is_finished: build.result = aggregate_result((j.result for j in all_jobs)) else: build.result = Result.unknown if is_finished: build.status = Status.finished else: # ensure we dont set the status to finished unless it actually is new_status = aggregate_status((j.status for j in all_jobs)) if new_status != Status.finished: build.status = new_status if db.session.is_modified(build): build.date_modified = datetime.utcnow() db.session.add(build) db.session.commit() if not is_finished: raise sync_build.NotFinished try: aggregate_build_stat(build, 'test_count') aggregate_build_stat(build, 'test_duration') aggregate_build_stat(build, 'test_failures') aggregate_build_stat(build, 'test_rerun_count') aggregate_build_stat(build, 'tests_missing') aggregate_build_stat(build, 'lines_covered') aggregate_build_stat(build, 'lines_uncovered') aggregate_build_stat(build, 'diff_lines_covered') aggregate_build_stat(build, 'diff_lines_uncovered') except Exception: current_app.logger.exception('Failing recording aggregate stats for build %s', build.id) fire_signal.delay( signal='build.finished', kwargs={'build_id': build.id.hex}, ) queue.delay('update_project_stats', kwargs={ 'project_id': build.project_id.hex, }, countdown=1)
def sync_job(job_id): job = Job.query.get(job_id) if not job: return if job.status == Status.finished: return # TODO(dcramer): we make an assumption that there is a single step jobplan, implementation = JobPlan.get_build_step_for_job(job_id=job.id) try: implementation.update(job=job) except UnrecoverableException: job.status = Status.finished job.result = Result.aborted current_app.logger.exception('Unrecoverable exception syncing %s', job.id) is_finished = sync_job.verify_all_children() == Status.finished if is_finished: job.status = Status.finished db.session.flush() all_phases = list(job.phases) # propagate changes to any phases as they live outside of the # normalize synchronization routines sync_job_phases(job, all_phases) job.date_started = safe_agg(min, (j.date_started for j in all_phases if j.date_started)) if is_finished: job.date_finished = safe_agg(max, (j.date_finished for j in all_phases if j.date_finished)) else: job.date_finished = None if job.date_started and job.date_finished: job.duration = int( (job.date_finished - job.date_started).total_seconds() * 1000) else: job.duration = None # if any phases are marked as failing, fail the build if any(j.result is Result.failed for j in all_phases): job.result = Result.failed # if any test cases were marked as failing, fail the build elif TestCase.query.filter(TestCase.result == Result.failed, TestCase.job_id == job.id).first(): job.result = Result.failed # if we've finished all phases, use the best result available elif is_finished: job.result = safe_agg(max, (j.result for j in all_phases)) else: job.result = Result.unknown if is_finished: job.status = Status.finished elif any(j.status is not Status.queued for j in all_phases): job.status = Status.in_progress else: job.status = Status.queued if db.session.is_modified(job): job.date_modified = datetime.utcnow() db.session.add(job) db.session.commit() if not is_finished: raise sync_job.NotFinished try: aggregate_job_stat(job, 'test_count') aggregate_job_stat(job, 'test_duration') aggregate_job_stat(job, 'test_failures') aggregate_job_stat(job, 'test_rerun_count') aggregate_job_stat(job, 'tests_missing') aggregate_job_stat(job, 'lines_covered') aggregate_job_stat(job, 'lines_uncovered') aggregate_job_stat(job, 'diff_lines_covered') aggregate_job_stat(job, 'diff_lines_uncovered') except Exception: current_app.logger.exception( 'Failing recording aggregate stats for job %s', job.id) fire_signal.delay( signal='job.finished', kwargs={'job_id': job.id.hex}, ) if jobplan: queue.delay('update_project_plan_stats', kwargs={ 'project_id': job.project_id.hex, 'plan_id': jobplan.plan_id.hex, }, countdown=1)
def sync_build(build_id): """ Synchronizing the build happens continuously until all jobs have reported in as finished or have failed/aborted. This task is responsible for: - Checking in with jobs - Aborting/retrying them if they're beyond limits - Aggregating the results from jobs into the build itself """ build = Build.query.get(build_id) if not build: return if build.status == Status.finished: return all_jobs = list(Job.query.filter( Job.build_id == build_id, )) is_finished = sync_build.verify_all_children() == Status.finished build.date_started = safe_agg( min, (j.date_started for j in all_jobs if j.date_started)) if is_finished: build.date_finished = safe_agg( max, (j.date_finished for j in all_jobs if j.date_finished)) else: build.date_finished = None if build.date_started and build.date_finished: build.duration = int((build.date_finished - build.date_started).total_seconds() * 1000) else: build.duration = None if any(j.result is Result.failed for j in all_jobs): build.result = Result.failed elif is_finished: build.result = safe_agg( max, (j.result for j in all_jobs), Result.unknown) else: build.result = Result.unknown if is_finished: build.status = Status.finished elif any(j.status is Status.in_progress for j in all_jobs): build.status = Status.in_progress else: build.status = Status.queued if db.session.is_modified(build): build.date_modified = datetime.utcnow() db.session.add(build) db.session.commit() publish_build_update(build) if not is_finished: raise sync_build.NotFinished _record_tests_missing(build) queue.delay('notify_build_finished', kwargs={ 'build_id': build.id.hex, }) queue.delay('update_project_stats', kwargs={ 'project_id': build.project_id.hex, }, countdown=1)
def sync_job(job_id): """ Updates jobphase and job statuses based on the status of the constituent jobsteps. """ job = Job.query.get(job_id) if not job: return if job.status == Status.finished: return # TODO(dcramer): we make an assumption that there is a single step jobplan, implementation = JobPlan.get_build_step_for_job(job_id=job.id) try: implementation.update(job=job) except UnrecoverableException: job.status = Status.finished job.result = Result.infra_failed current_app.logger.exception('Unrecoverable exception syncing %s', job.id) all_phases = list(job.phases) # propagate changes to any phases as they live outside of the # normalize synchronization routines sync_job_phases(job, all_phases, implementation) is_finished = sync_job.verify_all_children() == Status.finished if any(p.status != Status.finished for p in all_phases): is_finished = False job.date_started = safe_agg(min, (j.date_started for j in all_phases if j.date_started)) if is_finished: job.date_finished = safe_agg(max, (j.date_finished for j in all_phases if j.date_finished)) else: job.date_finished = None if job.date_started and job.date_finished: job.duration = int( (job.date_finished - job.date_started).total_seconds() * 1000) else: job.duration = None # if any phases are marked as failing, fail the build if any(j.result is Result.failed for j in all_phases): job.result = Result.failed # If any test cases were marked as failing, fail the build. # The exception is if the only failing test case occurred in a JobStep that # had an infra failure. In this case we can't trust the test case result as # being meaningful and so we ignore these. elif TestCase.query.join(JobStep, JobStep.id == TestCase.step_id).filter( TestCase.result == Result.failed, TestCase.job_id == job.id, JobStep.result != Result.infra_failed).first(): job.result = Result.failed # if we've finished all phases, use the best result available elif is_finished: # Sets the final job result. implementation.validate(job=job) else: job.result = Result.unknown if is_finished: job.status = Status.finished else: # ensure we dont set the status to finished unless it actually is new_status = aggregate_status((j.status for j in all_phases)) if new_status != Status.finished: job.status = new_status elif job.status == Status.finished: job.status = Status.in_progress current_app.logger.exception( 'Job incorrectly marked as finished: %s', job.id) if db.session.is_modified(job): job.date_modified = datetime.utcnow() db.session.add(job) db.session.commit() if not is_finished: raise sync_job.NotFinished try: aggregate_job_stat(job, 'test_count') aggregate_job_stat(job, 'test_duration') aggregate_job_stat(job, 'test_failures') aggregate_job_stat(job, 'test_rerun_count') aggregate_job_stat(job, 'tests_missing') aggregate_job_stat(job, 'lines_covered') aggregate_job_stat(job, 'lines_uncovered') aggregate_job_stat(job, 'diff_lines_covered') aggregate_job_stat(job, 'diff_lines_uncovered') except Exception: current_app.logger.exception( 'Failing recording aggregate stats for job %s', job.id) fire_signal.delay( signal='job.finished', kwargs={'job_id': job.id.hex}, ) if jobplan: queue.delay('update_project_plan_stats', kwargs={ 'project_id': job.project_id.hex, 'plan_id': jobplan.plan_id.hex, }, countdown=1)
def sync_job(job_id): job = Job.query.get(job_id) if not job: return if job.status == Status.finished: return # TODO(dcramer): we make an assumption that there is a single step job_plan = JobPlan.query.options( subqueryload_all('plan.steps') ).filter( JobPlan.job_id == job.id, ).join(Plan).first() try: if not job_plan: raise UnrecoverableException('Got sync_job task without job plan: %s' % (job.id,)) try: step = job_plan.plan.steps[0] except IndexError: raise UnrecoverableException('Missing steps for plan') implementation = step.get_implementation() if has_timed_out(job, job_plan): remaining_steps = list(JobStep.query.filter( JobStep.status != Status.finished, JobStep.job_id == job.id, )) implementation.cancel(job=job) for step in remaining_steps: step.result = Result.failed step.status = Status.finished db.session.add(step) try_create(FailureReason, { 'step_id': step.id, 'job_id': job.id, 'build_id': job.build_id, 'project_id': job.project_id, 'reason': 'timeout' }) # ensure the job result actually reflects a failure job.result = Result.failed job.status = Status.finished db.session.add(job) else: implementation.update(job=job) except UnrecoverableException: job.status = Status.finished job.result = Result.aborted current_app.logger.exception('Unrecoverable exception syncing %s', job.id) is_finished = sync_job.verify_all_children() == Status.finished if is_finished: job.status = Status.finished db.session.flush() all_phases = list(job.phases) # propagate changes to any phases as they live outside of the # normalize synchronization routines sync_job_phases(job, all_phases) job.date_started = safe_agg( min, (j.date_started for j in all_phases if j.date_started)) if is_finished: job.date_finished = safe_agg( max, (j.date_finished for j in all_phases if j.date_finished)) else: job.date_finished = None if job.date_started and job.date_finished: job.duration = int((job.date_finished - job.date_started).total_seconds() * 1000) else: job.duration = None # if any phases are marked as failing, fail the build if any(j.result is Result.failed for j in all_phases): job.result = Result.failed # if any test cases were marked as failing, fail the build elif TestCase.query.filter(TestCase.result == Result.failed, TestCase.job_id == job.id).first(): job.result = Result.failed # if we've finished all phases, use the best result available elif is_finished: job.result = safe_agg(max, (j.result for j in all_phases)) else: job.result = Result.unknown if is_finished: job.status = Status.finished elif any(j.status is not Status.queued for j in all_phases): job.status = Status.in_progress else: job.status = Status.queued if db.session.is_modified(job): job.date_modified = datetime.utcnow() db.session.add(job) db.session.commit() if not is_finished: raise sync_job.NotFinished try: aggregate_job_stat(job, 'test_count') aggregate_job_stat(job, 'test_duration') aggregate_job_stat(job, 'test_failures') aggregate_job_stat(job, 'test_rerun_count') aggregate_job_stat(job, 'tests_missing') aggregate_job_stat(job, 'lines_covered') aggregate_job_stat(job, 'lines_uncovered') aggregate_job_stat(job, 'diff_lines_covered') aggregate_job_stat(job, 'diff_lines_uncovered') except Exception: current_app.logger.exception('Failing recording aggregate stats for job %s', job.id) fire_signal.delay( signal='job.finished', kwargs={'job_id': job.id.hex}, ) if job_plan: queue.delay('update_project_plan_stats', kwargs={ 'project_id': job.project_id.hex, 'plan_id': job_plan.plan_id.hex, }, countdown=1)
def sync_job(job_id): """ Updates jobphase and job statuses based on the status of the constituent jobsteps. """ job = Job.query.get(job_id) if not job: return if job.status == Status.finished: return jobplan, implementation = JobPlan.get_build_step_for_job(job_id=job.id) try: implementation.update(job=job) except UnrecoverableException: job.status = Status.finished job.result = Result.infra_failed current_app.logger.exception('Unrecoverable exception syncing %s', job.id) all_phases = list(job.phases) # propagate changes to any phases as they live outside of the # normalize synchronization routines sync_job_phases(job, all_phases, implementation) is_finished = sync_job.verify_all_children() == Status.finished if any(p.status != Status.finished for p in all_phases): is_finished = False job.date_started = safe_agg( min, (j.date_started for j in all_phases if j.date_started)) if is_finished: job.date_finished = safe_agg( max, (j.date_finished for j in all_phases if j.date_finished)) else: job.date_finished = None if job.date_started and job.date_finished: job.duration = int((job.date_finished - job.date_started).total_seconds() * 1000) else: job.duration = None # if any phases are marked as failing, fail the build if any(j.result is Result.failed for j in all_phases): job.result = Result.failed # If any test cases were marked as failing, fail the build. # The exception is if the only failing test case occurred in a JobStep that # had an infra failure. In this case we can't trust the test case result as # being meaningful and so we ignore these. elif TestCase.query.join(JobStep, JobStep.id == TestCase.step_id).filter( TestCase.result == Result.failed, TestCase.job_id == job.id, JobStep.result != Result.infra_failed ).first(): job.result = Result.failed # if we've finished all phases, use the best result available elif is_finished: # Sets the final job result. implementation.validate(job=job) else: job.result = Result.unknown if is_finished: job.status = Status.finished else: # ensure we dont set the status to finished unless it actually is new_status = aggregate_status((j.status for j in all_phases)) if new_status != Status.finished: job.status = new_status elif job.status == Status.finished: job.status = Status.in_progress current_app.logger.exception('Job incorrectly marked as finished: %s', job.id) if db.session.is_modified(job): job.date_modified = datetime.utcnow() db.session.add(job) db.session.commit() if not is_finished: raise sync_job.NotFinished try: aggregate_job_stat(job, 'test_count') aggregate_job_stat(job, 'test_duration') aggregate_job_stat(job, 'test_failures') aggregate_job_stat(job, 'test_rerun_count') aggregate_job_stat(job, 'tests_missing') aggregate_job_stat(job, 'lines_covered') aggregate_job_stat(job, 'lines_uncovered') aggregate_job_stat(job, 'diff_lines_covered') aggregate_job_stat(job, 'diff_lines_uncovered') except Exception: current_app.logger.exception('Failing recording aggregate stats for job %s', job.id) fire_signal.delay( signal='job.finished', kwargs={'job_id': job.id.hex}, ) if jobplan: queue.delay('update_project_plan_stats', kwargs={ 'project_id': job.project_id.hex, 'plan_id': jobplan.plan_id.hex, }, countdown=1)
def sync_job(job_id): job = Job.query.get(job_id) if not job: return if job.status == Status.finished: return # TODO(dcramer): we make an assumption that there is a single step job_plan = JobPlan.query.options( subqueryload_all('plan.steps') ).filter( JobPlan.job_id == job.id, ).join(Plan).first() try: if not job_plan: raise UnrecoverableException('Got sync_job task without job plan: %s' % (job.id,)) try: step = job_plan.plan.steps[0] except IndexError: raise UnrecoverableException('Missing steps for plan') implementation = step.get_implementation() implementation.update(job=job) except UnrecoverableException: job.status = Status.finished job.result = Result.aborted current_app.logger.exception('Unrecoverable exception syncing %s', job.id) is_finished = sync_job.verify_all_children() == Status.finished if is_finished: job.status = Status.finished all_phases = list(job.phases) job.date_started = safe_agg( min, (j.date_started for j in all_phases if j.date_started)) if is_finished: job.date_finished = safe_agg( max, (j.date_finished for j in all_phases if j.date_finished)) else: job.date_finished = None if job.date_started and job.date_finished: job.duration = int((job.date_finished - job.date_started).total_seconds() * 1000) else: job.duration = None # if any phases are marked as failing, fail the build if any(j.result is Result.failed for j in all_phases): job.result = Result.failed # if any test cases were marked as failing, fail the build elif TestCase.query.filter(TestCase.result == Result.failed, TestCase.job_id == job.id).first(): job.result = Result.failed # if we've finished all phases, use the best result available elif is_finished: job.result = safe_agg( max, (j.result for j in all_phases), Result.unknown) else: job.result = Result.unknown if is_finished: job.status = Status.finished elif any(j.status is Status.in_progress for j in all_phases): job.status = Status.in_progress else: job.status = Status.queued if db.session.is_modified(job): job.date_modified = datetime.utcnow() db.session.add(job) db.session.commit() publish_job_update(job) if not is_finished: raise sync_job.NotFinished _record_tests_missing(job) queue.delay('notify_job_finished', kwargs={ 'job_id': job.id.hex, }) if job_plan: queue.delay('update_project_plan_stats', kwargs={ 'project_id': job.project_id.hex, 'plan_id': job_plan.plan_id.hex, }, countdown=1)
def sync_build(build_id): """ Synchronizing the build happens continuously until all jobs have reported in as finished or have failed/aborted. This task is responsible for: - Checking in with jobs - Aborting/retrying them if they're beyond limits - Aggregating the results from jobs into the build itself """ build = Build.query.get(build_id) if not build: return if build.status == Status.finished: return all_jobs = list(Job.query.filter(Job.build_id == build_id, )) is_finished = sync_build.verify_all_children() == Status.finished build.date_started = safe_agg(min, (j.date_started for j in all_jobs if j.date_started)) if is_finished: build.date_finished = safe_agg(max, (j.date_finished for j in all_jobs if j.date_finished)) else: build.date_finished = None if build.date_started and build.date_finished: build.duration = int( (build.date_finished - build.date_started).total_seconds() * 1000) else: build.duration = None if any(j.result is Result.failed for j in all_jobs): build.result = Result.failed elif is_finished: build.result = safe_agg(max, (j.result for j in all_jobs), Result.unknown) else: build.result = Result.unknown if is_finished: build.status = Status.finished elif any(j.status is Status.in_progress for j in all_jobs): build.status = Status.in_progress else: build.status = Status.queued if db.session.is_modified(build): build.date_modified = datetime.utcnow() db.session.add(build) db.session.commit() publish_build_update(build) if not is_finished: raise sync_build.NotFinished _record_tests_missing(build) queue.delay('notify_build_finished', kwargs={ 'build_id': build.id.hex, }) queue.delay('update_project_stats', kwargs={ 'project_id': build.project_id.hex, }, countdown=1)