Example #1
0
def get_extensions(job):
    """Extracts ExtendedFields from a job, if present.

        Args:
            job: A dict with dsub job metadata

        Returns:
            ExtendedFields: Populated extensions on the job
    """
    envs = job['envs']
    script = None
    if envs and '_SCRIPT' in envs:
        script = envs['_SCRIPT']
        del envs['_SCRIPT']

    events = [
        EventDetail(e['start-time'], e['name']) for e in job.get('events', [])
    ]

    return ExtendedFields(user_id=job['user-id'],
                          status_detail=job.get('status-detail'),
                          logs=logs.dsub_to_api(job),
                          last_update=job.get('last-update'),
                          envs=job['envs'],
                          source_file=script,
                          events=events)
Example #2
0
def get_job(id, **kwargs):
    """
    Query for job and task-level metadata for a specified job

    :param id: Job ID
    :type id: str

    :rtype: JobMetadataResponse
    """

    url = '{cromwell_url}/{id}/metadata?{query}'.format(
        cromwell_url=_get_base_url(),
        id=id,
        query='includeKey=' + '&includeKey='.join(job_include_keys))
    response = requests.get(url,
                            auth=kwargs.get('auth'),
                            headers=kwargs.get('auth_headers'))

    if response.status_code != 200:
        handle_error(response)

    job = response.json()

    failures = [
        format_task_failure(name, m)
        for name, metadata in job.get('calls', {}).items() for m in metadata
        if m.get('failures') is not None
    ]

    # if there are no tasks/subworkflows but there are errors, get them
    if not len(failures) and job.get('failures'):
        failures = [format_workflow_failure(f) for f in job.get('failures')]

    tasks = [
        format_task(task_name, task_metadata)
        for task_name, task_metadata in job.get('calls', {}).items()
    ]

    sorted_tasks = sorted(tasks, key=lambda t: t.start)
    start = _parse_datetime(job.get('start'))
    submission = _parse_datetime(job.get('submission'))
    if submission is None:
        # Submission is required by the common jobs API. Submission may be missing
        # for subworkflows in which case we fallback to the workflow start time
        # or, if not started, the current time. This fallback logic may be
        # removed if/when Cromwell changes behavior per https://github.com/broadinstitute/cromwell/issues/2968.
        submission = start or datetime.utcnow()
    return JobMetadataResponse(
        id=id,
        name=job.get('workflowName'),
        status=job_statuses.cromwell_workflow_status_to_api(job.get('status')),
        submission=submission,
        start=start,
        end=_parse_datetime(job.get('end')),
        inputs=update_key_names(job.get('inputs', {})),
        outputs=update_key_names(job.get('outputs', {})),
        labels=job.get('labels'),
        failures=failures,
        extensions=ExtendedFields(tasks=sorted_tasks,
                                  parent_job_id=job.get('parentWorkflowId')))
 def test_format_job_with_no_start_date(self):
     time = '2017-10-27T18:04:47Z'
     job = {'id': '12345', 'name': 'TestJob', 'status': 'Failed'}
     formatted_time = datetime.strptime(time, '%Y-%m-%dT%H:%M:%SZ')
     result = QueryJobsResult(id=job.get('id'),
                              name=job.get('name'),
                              status=job.get('status'),
                              submission=formatted_time,
                              extensions=ExtendedFields())
     self.assertEqual(jobs_controller.format_job(job, formatted_time),
                      result)
 def test_format_job_with_no_start_date(self):
     time = '2017-10-27T18:04:47Z'
     job = {'id': '12345', 'name': 'TestJob', 'status': 'Failed'}
     formatted_time = dateutil.parser.parse(time).astimezone(tzutc())
     result = QueryJobsResult(id=job.get('id'),
                              name=job.get('name'),
                              status=job.get('status'),
                              start=formatted_time,
                              submission=formatted_time,
                              extensions=ExtendedFields())
     self.assertEqual(jobs_controller.format_job(job, formatted_time),
                      result)
Example #5
0
def get_job(id, **kwargs):
    """
    Query for job and task-level metadata for a specified job

    :param id: Job ID
    :type id: str

    :rtype: JobMetadataResponse
    """
    url = '{cromwell_url}/{id}/metadata'.format(cromwell_url=_get_base_url(),
                                                id=id)
    response = requests.get(url,
                            auth=kwargs.get('auth'),
                            headers=kwargs.get('auth_headers'))
    job = response.json()
    if response.status_code == BadRequest.code:
        raise BadRequest(job.get('message'))
    elif response.status_code == NotFound.code:
        raise NotFound(job.get('message'))
    elif response.status_code == InternalServerError.code:
        raise InternalServerError(job.get('message'))

    failures = None
    if job.get('failures'):
        failures = [
            FailureMessage(failure=f['message']) for f in job['failures']
        ]
    # Get the most recent run of each task in task_metadata
    tasks = [
        format_task(task_name, task_metadata[-1])
        for task_name, task_metadata in job.get('calls', {}).items()
    ]
    sorted_tasks = sorted(tasks, key=lambda t: t.start)
    start = _parse_datetime(job.get('start'))
    submission = _parse_datetime(job.get('submission'))
    if submission is None:
        # Submission is required by the common jobs API. Submission may be missing
        # for subworkflows in which case we fallback to the workflow start time
        # or, if not started, the current time. This fallback logic may be
        # removed if/when Cromwell changes behavior per https://github.com/broadinstitute/cromwell/issues/2968.
        submission = start or datetime.utcnow()
    return JobMetadataResponse(id=id,
                               name=job.get('workflowName'),
                               status=job.get('status'),
                               submission=submission,
                               start=start,
                               end=_parse_datetime(job.get('end')),
                               inputs=update_key_names(job.get('inputs', {})),
                               outputs=update_key_names(job.get('outputs',
                                                                {})),
                               labels=job.get('labels'),
                               failures=failures,
                               extensions=ExtendedFields(tasks=sorted_tasks))
Example #6
0
def format_job(job, now):
    start = _parse_datetime(job.get('start')) or now
    submission = _parse_datetime(job.get('submission'))
    if submission is None:
        # Submission is required by the common jobs API. Submission may be missing
        # for subworkflows in which case we fallback to the workflow start time
        # or, if not started, the current time. This fallback logic may be
        # removed if/when Cromwell changes behavior per https://github.com/broadinstitute/cromwell/issues/2968.
        submission = start
    end = _parse_datetime(job.get('end'))
    return QueryJobsResult(
        id=job.get('id'),
        name=job.get('name'),
        status=job_statuses.cromwell_workflow_status_to_api(job.get('status')),
        submission=submission,
        start=start,
        end=end,
        labels=job.get('labels'),
        extensions=ExtendedFields(parent_job_id=job.get('parentWorkflowId')))
Example #7
0
def format_job(job, now):
    start = _parse_datetime(job.get('start'))
    submission = start
    if submission is None:
        # Submission is required by the common jobs API. Submission is not
        # currently returned via Cromwell QueryJobs, so start is used as a
        # stand-in value. If the job hasn't actually started yet, fake the
        # submission time as 'now' rather than returning null. Switch to true
        # submission time if/when supported by Cromwell: https://github.com/broadinstitute/cromwell/issues/3167.
        submission = now
    end = _parse_datetime(job.get('end'))
    return QueryJobsResult(
        id=job.get('id'),
        name=job.get('name'),
        status=job.get('status'),
        submission=submission,
        start=start,
        end=end,
        labels=job.get('labels'),
        extensions=ExtendedFields(parent_job_id=job.get('parentWorkflowId')))