Beispiel #1
0
def query_cluster(cluster, uuids, pred, timeout, interval, make_request_fn,
                  entity_type):
    """
    Queries the given cluster for the given uuids with
    an optional predicate, pred, that must be satisfied
    """
    def satisfy_pred():
        return pred(
            http.make_data_request(lambda: make_request_fn(cluster, uuids)))

    entities = http.make_data_request(lambda: make_request_fn(cluster, uuids))
    if pred and len(entities) > 0:
        if entity_type == 'job':
            wait_text = 'Waiting for the following jobs'
        elif entity_type == 'instance':
            wait_text = 'Waiting for instances of the following jobs'
        elif entity_type == 'group':
            wait_text = 'Waiting for the following job groups'
        else:
            raise Exception('Invalid entity type %s.' % entity_type)

        uuid_text = ', '.join([e['uuid'] for e in entities])
        wait_text = '%s on %s: %s' % (wait_text, colors.bold(
            cluster['name']), uuid_text)
        index = progress.add(wait_text)
        if pred(entities):
            progress.update(index, colors.bold('Done'))
        else:
            entities = wait_until(satisfy_pred, timeout, interval)
            if entities:
                progress.update(index, colors.bold('Done'))
            else:
                raise TimeoutError('Timeout waiting for response.')
    return entities
Beispiel #2
0
def __query_cluster(cluster, uuids, pred, timeout, interval, make_request_fn, entity_type):
    """
    Queries the given cluster for the given uuids with
    an optional predicate, pred, that must be satisfied
    """

    def satisfy_pred():
        return pred(http.make_data_request(cluster, lambda: make_request_fn(cluster, uuids)))

    entities = http.make_data_request(cluster, lambda: make_request_fn(cluster, uuids))
    num_entities = len(entities)
    if pred and num_entities > 0:
        s = 's' if num_entities > 1 else ''
        num_string = colors.bold(str(num_entities))
        if entity_type == Types.JOB:
            wait_text = f'Waiting for {num_string} job{s}'
        elif entity_type == Types.INSTANCE:
            wait_text = f'Waiting for instances of {num_string} job{s}'
        elif entity_type == Types.GROUP:
            wait_text = f'Waiting for {num_string} job group{s}'
        else:
            raise Exception(f'Invalid entity type {entity_type}.')

        wait_text = f'{wait_text} on {colors.bold(cluster["name"])}'
        index = progress.add(wait_text)
        if pred(entities):
            progress.update(index, colors.bold('Done'))
        else:
            entities = wait_until(satisfy_pred, timeout, interval)
            if entities:
                progress.update(index, colors.bold('Done'))
            else:
                raise TimeoutError('Timeout waiting for response.')
    return entities
Beispiel #3
0
def print_formatted_cluster_or_pool_usage(cluster_or_pool,
                                          cluster_or_pool_usage):
    """Prints the query result for a cluster or pool in a cluster as a hierarchical set of bullets"""
    usage_map = cluster_or_pool_usage['usage']
    share_map = cluster_or_pool_usage['share']
    print_info(colors.bold(cluster_or_pool))
    print_info(format_share(share_map))
    print_info(format_usage(usage_map))
    applications = cluster_or_pool_usage['applications']
    if applications:
        print_info('Applications:')
    else:
        print_info(colors.waiting('Nothing Running'))
    for application, application_usage in applications.items():
        usage_map = application_usage['usage']
        print_info(
            f'- {colors.running(application if application else "[no application defined]")}'
        )
        print_info(f'  {format_usage(usage_map)}')
        print_info('  Job Groups:')
        for group, group_usage in application_usage['groups'].items():
            usage_map = group_usage['usage']
            jobs = group_usage['jobs']
            print_info(f'\t- {colors.bold(group if group else "[ungrouped]")}')
            print_info(f'\t  {format_usage(usage_map)}')
            print_info(f'\t  Jobs: {len(jobs)}')
            print_info('')
    print_info('')
Beispiel #4
0
def submit_federated(clusters, jobs, group, pool):
    """
    Attempts to submit the provided jobs to each cluster in clusters, until a cluster
    returns a "created" status code. If no cluster returns "created" status, throws.
    """
    for cluster in clusters:
        cluster_name = cluster['name']
        cluster_url = cluster['url']
        try:
            print_info('Attempting to submit on %s cluster...' %
                       colors.bold(cluster_name))

            json_body = {'jobs': jobs}
            if group:
                json_body['groups'] = [group]
            if pool:
                json_body['pool'] = pool

            resp = http.post(cluster, 'jobs', json_body)
            print_submit_result(cluster, resp)
            if resp.status_code == 201:
                metrics.inc('command.submit.jobs', len(jobs))
                return 0
        except requests.exceptions.ReadTimeout as rt:
            logging.exception(rt)
            print_info(
                colors.failed(
                    f'Encountered read timeout with {cluster_name} ({cluster_url}). Your submission may have completed.'
                ))
            return 1
        except IOError as ioe:
            logging.exception(ioe)
            reason = f'Cannot connect to {cluster_name} ({cluster_url})'
            message = submit_failed_message(cluster_name, reason)
            print_info(f'{message}\n')
    raise Exception(
        colors.failed(
            'Job submission failed on all of your configured clusters.'))
Beispiel #5
0
def submit_federated(clusters, jobs):
    """
    Attempts to submit the provided jobs to each cluster in clusters, until a cluster
    returns a "created" status code. If no cluster returns "created" status, throws.
    """
    for cluster in clusters:
        cluster_name = cluster['name']
        try:
            print_info('Attempting to submit on %s cluster...' %
                       colors.bold(cluster_name))
            resp = http.post(cluster, 'rawscheduler', {'jobs': jobs})
            print_submit_result(cluster, resp)
            if resp.status_code == 201:
                metrics.inc('command.submit.jobs', len(jobs))
                return 0
        except IOError as ioe:
            logging.info(ioe)
            reason = 'Cannot connect to %s (%s)' % (cluster_name,
                                                    cluster['url'])
            print_info('%s\n' % submit_failed_message(cluster_name, reason))
    raise Exception(
        colors.failed(
            'Job submission failed on all of your configured clusters.'))