def update_cl_metrics(handler): cl_obj = req('GET', '%s/%s.json' % (DB, handler.request.get('src'))) t_queued = parse_iso_time(cl_obj['time_queued']) t_ended = parse_iso_time(cl_obj['time_ended']) write_metrics({'ci_cl_completion_time': { 'l': {}, 'v': int((t_ended - t_queued).total_seconds()) }})
def check_pending_cl(handler): # This function can be called twice on the same CL, e.g., in the case when the # Presubmit-Ready label is applied after we have finished running all the # jobs (we run presubmit regardless, only the voting is conditioned by PR). cl_and_ps = handler.request.get('cl_and_ps') cl_obj = req('GET', '%s/cls/%s.json' % (DB, cl_and_ps)) all_jobs = cl_obj.get('jobs', {}).keys() pending_jobs = [] for job_id in all_jobs: job_status = req('GET', '%s/jobs/%s/status.json' % (DB, job_id)) pending_jobs += [job_id] if job_status in ('QUEUED', 'STARTED') else [] if pending_jobs: # If the CL has been pending for too long cancel all its jobs. Upon the next # scan it will be deleted and optionally voted on. t_queued = parse_iso_time(cl_obj['time_queued']) age_sec = (datetime.utcnow() - t_queued).total_seconds() if age_sec > CL_TIMEOUT_SEC: logging.warning('Canceling %s, it has been pending for too long (%s sec)', cl_and_ps, int(age_sec)) map(lambda x: defer('cancel_job', job_id=x), pending_jobs) return logging.info('All jobs completed for CL %s', cl_and_ps) # Remove the CL from the pending queue and update end time. patch_obj = { 'cls_pending/%s' % cl_and_ps: {}, # = DELETE 'cls/%s/time_ended' % cl_and_ps: cl_obj.get('time_ended', utc_now_iso()), } req('PATCH', '%s.json' % DB, body=patch_obj) defer('update_cl_metrics', src='cls/' + cl_and_ps) map(lambda x: defer('update_job_metrics', job_id=x), all_jobs) if cl_obj.get('wants_vote'): defer('comment_and_vote_cl', cl_and_ps=cl_and_ps)
def update_job_metrics(handler): job_id = handler.request.get('job_id') job = req('GET', '%s/jobs/%s.json' % (DB, job_id)) metrics = {} if 'time_queued' in job and 'time_started' in job: t_queued = parse_iso_time(job['time_queued']) t_started = parse_iso_time(job['time_started']) metrics['ci_job_queue_time'] = { 'l': {'job_type': job['type']}, 'v': int((t_started - t_queued).total_seconds())} if 'time_ended' in job and 'time_started' in job: t_started = parse_iso_time(job['time_started']) t_ended = parse_iso_time(job['time_ended']) metrics['ci_job_run_time'] = { 'l': {'job_type': job['type']}, 'v': int((t_ended - t_started).total_seconds())} if metrics: write_metrics(metrics)
def delete_stale_jobs(handler): '''Deletes jobs that are left in the running queue for too long This is usually due to a crash in the VM that handles them. ''' running_jobs = req('GET', '%s/jobs_running.json?shallow=true' % (DB)) or {} for job_id in running_jobs.iterkeys(): job = req('GET', '%s/jobs/%s.json' % (DB, job_id)) time_started = parse_iso_time(job.get('time_started', utc_now_iso())) age = (datetime.now() - time_started).total_seconds() if age > JOB_TIMEOUT_SEC * 2: defer('cancel_job', job_id=job_id)