def get_extensions(job): """Extracts ExtendedFields from a job, if present. Args: job: A dict with dsub job metadata Returns: ExtendedFields: Populated extensions on the job """ envs = job['envs'] script = None if envs and '_SCRIPT' in envs: script = envs['_SCRIPT'] del envs['_SCRIPT'] events = [ EventDetail(e['start-time'], e['name']) for e in job.get('events', []) ] return ExtendedFields(user_id=job['user-id'], status_detail=job.get('status-detail'), logs=logs.dsub_to_api(job), last_update=job.get('last-update'), envs=job['envs'], source_file=script, events=events)
def get_job(id, **kwargs): """ Query for job and task-level metadata for a specified job :param id: Job ID :type id: str :rtype: JobMetadataResponse """ url = '{cromwell_url}/{id}/metadata?{query}'.format( cromwell_url=_get_base_url(), id=id, query='includeKey=' + '&includeKey='.join(job_include_keys)) response = requests.get(url, auth=kwargs.get('auth'), headers=kwargs.get('auth_headers')) if response.status_code != 200: handle_error(response) job = response.json() failures = [ format_task_failure(name, m) for name, metadata in job.get('calls', {}).items() for m in metadata if m.get('failures') is not None ] # if there are no tasks/subworkflows but there are errors, get them if not len(failures) and job.get('failures'): failures = [format_workflow_failure(f) for f in job.get('failures')] tasks = [ format_task(task_name, task_metadata) for task_name, task_metadata in job.get('calls', {}).items() ] sorted_tasks = sorted(tasks, key=lambda t: t.start) start = _parse_datetime(job.get('start')) submission = _parse_datetime(job.get('submission')) if submission is None: # Submission is required by the common jobs API. Submission may be missing # for subworkflows in which case we fallback to the workflow start time # or, if not started, the current time. This fallback logic may be # removed if/when Cromwell changes behavior per https://github.com/broadinstitute/cromwell/issues/2968. submission = start or datetime.utcnow() return JobMetadataResponse( id=id, name=job.get('workflowName'), status=job_statuses.cromwell_workflow_status_to_api(job.get('status')), submission=submission, start=start, end=_parse_datetime(job.get('end')), inputs=update_key_names(job.get('inputs', {})), outputs=update_key_names(job.get('outputs', {})), labels=job.get('labels'), failures=failures, extensions=ExtendedFields(tasks=sorted_tasks, parent_job_id=job.get('parentWorkflowId')))
def test_format_job_with_no_start_date(self): time = '2017-10-27T18:04:47Z' job = {'id': '12345', 'name': 'TestJob', 'status': 'Failed'} formatted_time = datetime.strptime(time, '%Y-%m-%dT%H:%M:%SZ') result = QueryJobsResult(id=job.get('id'), name=job.get('name'), status=job.get('status'), submission=formatted_time, extensions=ExtendedFields()) self.assertEqual(jobs_controller.format_job(job, formatted_time), result)
def test_format_job_with_no_start_date(self): time = '2017-10-27T18:04:47Z' job = {'id': '12345', 'name': 'TestJob', 'status': 'Failed'} formatted_time = dateutil.parser.parse(time).astimezone(tzutc()) result = QueryJobsResult(id=job.get('id'), name=job.get('name'), status=job.get('status'), start=formatted_time, submission=formatted_time, extensions=ExtendedFields()) self.assertEqual(jobs_controller.format_job(job, formatted_time), result)
def get_job(id, **kwargs): """ Query for job and task-level metadata for a specified job :param id: Job ID :type id: str :rtype: JobMetadataResponse """ url = '{cromwell_url}/{id}/metadata'.format(cromwell_url=_get_base_url(), id=id) response = requests.get(url, auth=kwargs.get('auth'), headers=kwargs.get('auth_headers')) job = response.json() if response.status_code == BadRequest.code: raise BadRequest(job.get('message')) elif response.status_code == NotFound.code: raise NotFound(job.get('message')) elif response.status_code == InternalServerError.code: raise InternalServerError(job.get('message')) failures = None if job.get('failures'): failures = [ FailureMessage(failure=f['message']) for f in job['failures'] ] # Get the most recent run of each task in task_metadata tasks = [ format_task(task_name, task_metadata[-1]) for task_name, task_metadata in job.get('calls', {}).items() ] sorted_tasks = sorted(tasks, key=lambda t: t.start) start = _parse_datetime(job.get('start')) submission = _parse_datetime(job.get('submission')) if submission is None: # Submission is required by the common jobs API. Submission may be missing # for subworkflows in which case we fallback to the workflow start time # or, if not started, the current time. This fallback logic may be # removed if/when Cromwell changes behavior per https://github.com/broadinstitute/cromwell/issues/2968. submission = start or datetime.utcnow() return JobMetadataResponse(id=id, name=job.get('workflowName'), status=job.get('status'), submission=submission, start=start, end=_parse_datetime(job.get('end')), inputs=update_key_names(job.get('inputs', {})), outputs=update_key_names(job.get('outputs', {})), labels=job.get('labels'), failures=failures, extensions=ExtendedFields(tasks=sorted_tasks))
def format_job(job, now): start = _parse_datetime(job.get('start')) or now submission = _parse_datetime(job.get('submission')) if submission is None: # Submission is required by the common jobs API. Submission may be missing # for subworkflows in which case we fallback to the workflow start time # or, if not started, the current time. This fallback logic may be # removed if/when Cromwell changes behavior per https://github.com/broadinstitute/cromwell/issues/2968. submission = start end = _parse_datetime(job.get('end')) return QueryJobsResult( id=job.get('id'), name=job.get('name'), status=job_statuses.cromwell_workflow_status_to_api(job.get('status')), submission=submission, start=start, end=end, labels=job.get('labels'), extensions=ExtendedFields(parent_job_id=job.get('parentWorkflowId')))
def format_job(job, now): start = _parse_datetime(job.get('start')) submission = start if submission is None: # Submission is required by the common jobs API. Submission is not # currently returned via Cromwell QueryJobs, so start is used as a # stand-in value. If the job hasn't actually started yet, fake the # submission time as 'now' rather than returning null. Switch to true # submission time if/when supported by Cromwell: https://github.com/broadinstitute/cromwell/issues/3167. submission = now end = _parse_datetime(job.get('end')) return QueryJobsResult( id=job.get('id'), name=job.get('name'), status=job.get('status'), submission=submission, start=start, end=end, labels=job.get('labels'), extensions=ExtendedFields(parent_job_id=job.get('parentWorkflowId')))