Beispiel #1
0
def get_job(id):
    """Get a job's metadata by API Job ID.
    Args:
        id (str): Job ID to get
    Returns:
        JobMetadataResponse: Response containing relevant metadata
    """
    proj_id, job_id, task_id, attempt = job_ids.api_to_dsub(
        id, _provider_type())
    provider = providers.get_provider(_provider_type(), proj_id, _auth_token())

    jobs = []
    try:
        jobs = execute_redirect_stdout(lambda: dstat.dstat_job_producer(
            provider=provider,
            statuses={'*'},
            job_ids={job_id},
            task_ids={task_id} if task_id else None,
            task_attempts={attempt} if attempt else None,
            full_output=True).next())
    except apiclient.errors.HttpError as error:
        _handle_http_error(error, proj_id)

    # A job_id and task_id define a unique job (should only be one)
    if len(jobs) > 1:
        raise BadRequest('Found more than one job with ID {}'.format(id))
    elif len(jobs) == 0:
        raise NotFound('Could not find any jobs with ID {}'.format(id))
    return _metadata_response(id, jobs[0])
Beispiel #2
0
def abort_job(id):
    """Abort a job by API Job ID.

    Args:
        id (str): Job ID to be aborted

    Returns: None
    """
    # Attempt is unused in aborting because only one attempt can be running at
    # a time.
    proj_id, job_id, task_id, _ = job_ids.api_to_dsub(id, _provider_type())
    provider = providers.get_provider(_provider_type(), proj_id, _auth_token())

    # TODO(bryancrampton): Add flag to ddel to support deleting only
    # 'singleton' tasks.
    status = get_job(id).status

    # TODO(https://github.com/googlegenomics/dsub/issues/81): Remove this
    # provider-specific logic
    if isinstance(provider, stub.StubJobProvider):
        status = status[0]

    if status != job_statuses.ApiStatus.RUNNING:
        raise PreconditionFailed(
            'Job already in terminal status `{}`'.format(status))

    # TODO(https://github.com/googlegenomics/dsub/issues/92): Remove this
    # hacky re-routing of stdout once dsub removes it from the python API
    deleted = execute_redirect_stdout(
        lambda: ddel.ddel_tasks(provider=provider,
                                job_ids={job_id},
                                task_ids={task_id} if task_id else None))
    if len(deleted) != 1:
        raise InternalServerError('Failed to abort dsub job')
def get_job_aggregations(timeFrame, projectId=None):
    """Query for aggregated jobs in the given time frame.

    Args:
        timeFrame (str): Time Frame to aggregate over
        param projectId (str): The ID of the project to query

    Returns:
        AggregationResponse: Response contains aggregation of jobs
    """
    window_min = time_frame.time_frame_to_start_time(timeFrame)
    provider = providers.get_provider(_provider_type(), projectId,
                                      _auth_token())

    jobs = jobs_generator.generate_jobs_by_window(provider, projectId,
                                                  window_min)

    total_summary = {}
    user_summary = {}
    job_name_summary = {}
    label_summaries = {}

    for job in jobs:
        _count_total_summary(job, total_summary)
        _count_for_key(job, user_summary, job.extensions.user_id)
        _count_for_key(job, job_name_summary, job.name)
        _count_top_labels(job, label_summaries)

    aggregations = [
        _to_aggregation('User Id', 'userId', user_summary),
        _to_aggregation('Job Name', 'name', job_name_summary)
    ] + _to_top_labels_aggregations(label_summaries)

    return AggregationResponse(summary=_to_summary_counts(total_summary),
                               aggregations=aggregations)
def get_job_aggregations(timeFrame, projectId=None):
    """Query for aggregated jobs in the given time frame.

    Args:
        timeFrame (str): Time Frame to aggregate over
        param projectId (str): The ID of the project to query

    Returns:
        AggregationResponse: Response contains aggregation of jobs
    """
    window_min = time_frame.time_frame_to_start_time(timeFrame)
    provider = providers.get_provider(_provider_type(), projectId,
                                      _auth_token())

    jobs = jobs_generator.generate_jobs_by_window(provider, projectId,
                                                  window_min)

    total_summary = {}
    user_summary = {}
    job_name_summary = {}
    label_summaries = {}

    # AGGREGATION_JOB_LABEL_FILTER is a global config value used to distinguish testing jobs from batch to batch by timestamp
    has_aggregation_filter = 'AGGREGATION_JOB_LABEL_FILTER' in current_app.config
    # aggregation_filter is in string format 'key=value'
    if has_aggregation_filter:
        aggregation_filter = current_app.config[
            'AGGREGATION_JOB_LABEL_FILTER'].split('=')
        filter_key = aggregation_filter[0]
        filter_value = aggregation_filter[1]

    try:
        for job in jobs:
            if has_aggregation_filter and job.labels[
                    filter_key] != filter_value:
                continue

            _count_total_summary(job, total_summary)
            _count_for_key(job, user_summary, job.extensions.user_id)
            _count_for_key(job, job_name_summary, job.name)
            _count_top_labels(job, label_summaries)
    except apiclient.errors.HttpError as error:
        _handle_http_error(error, projectId)

    aggregations = [
        _to_aggregation('User Id', 'userId', user_summary),
        _to_aggregation('Job Name', 'name', job_name_summary)
    ] + _to_top_labels_aggregations(label_summaries)

    return AggregationResponse(summary=_to_summary_counts(total_summary),
                               aggregations=aggregations)
Beispiel #5
0
def query_jobs(body):
    """
    Query jobs by various filter criteria.

    Args:
        body (dict): The JSON request body.

    Returns:
        QueryJobsResponse: Response containing results from query
    """
    query = QueryJobsRequest.from_dict(body)
    proj_id = query.extensions.project_id if query.extensions else None
    provider = providers.get_provider(_provider_type(), proj_id, _auth_token())
    create_time_max, offset_id = page_tokens.decode_create_time_max(
        query.page_token) or (None, None)
    query.page_size = min(query.page_size or _DEFAULT_PAGE_SIZE,
                          _MAX_PAGE_SIZE)

    query.start = query.start.replace(tzinfo=tzlocal()).replace(
        microsecond=0) if query.start else None
    query.end = query.end.replace(tzinfo=tzlocal()).replace(
        microsecond=0) if query.end else None
    if query.submission:
        query.submission = query.submission.replace(tzinfo=tzlocal()).replace(
            microsecond=0)

    if query.page_size < 0:
        raise BadRequest("The pageSize query parameter must be non-negative.")
    if query.start and query.end and query.start >= query.end:
        raise BadRequest("Invalid query: start date must precede end date.")
    if query.start and create_time_max and query.start > create_time_max:
        raise BadRequest(
            "Invalid query: start date is invalid with pagination token.")
    if query.submission:
        if query.start and query.submission > query.start:
            raise BadRequest(
                "Invalid query: submission date must be <= start date.")
        if query.end and query.submission >= query.end:
            raise BadRequest(
                "Invalid query: submission date must precede end date.")

    generator = jobs_generator.generate_jobs(provider, query, create_time_max,
                                             offset_id)
    jobs = []
    try:
        for job in generator:
            jobs.append(job)
            if len(jobs) == query.page_size:
                break
    except apiclient.errors.HttpError as error:
        _handle_http_error(error, proj_id)

    try:
        next_job = generator.next()
        next_ct = next_job.submission
        last_ct = jobs[-1].submission
        offset_id = next_job.id if next_ct == last_ct else None
        token = page_tokens.encode_create_time_max(next_ct, offset_id)
        return QueryJobsResponse(results=jobs, next_page_token=token)
    except StopIteration:
        return QueryJobsResponse(results=jobs)