Example #1
0
def query_instances_on_cluster(cluster, status, start_ms, end_ms):
    """Queries cluster for instance stats with the given status / time"""
    params = {'status': status, 'start': start_ms, 'end': end_ms}
    stats = http.make_data_request(cluster, lambda: http.get(cluster, 'stats/instances', params=params))
    overall_stats = stats['overall']
    data = {'count': overall_stats['count'] if 'count' in overall_stats else 0}
    return data
Example #2
0
def make_instance_request(cluster, uuids):
    """Attempts to query instances corresponding to the given uuids from cluster."""
    return http.get(cluster,
                    'rawscheduler',
                    params={
                        'instance': uuids,
                        'partial': 'true'
                    })
Example #3
0
def get_compute_cluster_config(cluster, compute_cluster_name):
    """
    :param cluster: cook scheduler cluster
    :param compute_cluster_name: compute cluster
    :return: config of the compute cluster Looks at both /settings (for static clusters) and /compute-clusters (for dynamic clusters)
    """
    cook_cluster_settings = http.get(cluster, 'settings', params={}).json()
    cook_compute_clusters = http.get(cluster, 'compute-clusters',
                                     params={}).json()
    rval = next((c
                 for c in (s['config']
                           for s in cook_cluster_settings['compute-clusters'])
                 if c['compute-cluster-name'] == compute_cluster_name), None)
    if not rval:
        rval = next(c['cluster-definition']['config']
                    for c in cook_compute_clusters['in-mem-configs']
                    if c['name'] == compute_cluster_name)
    return rval
Example #4
0
def make_group_request(cluster, uuids):
    """Attempts to query groups corresponding to the given uuids from cluster."""
    return http.get(cluster,
                    'group',
                    params={
                        'uuid': uuids,
                        'partial': 'true',
                        'detailed': 'true'
                    })
Example #5
0
def get_compute_cluster_config(cluster, compute_cluster_name):
    """
    :param cluster: cook scheduler cluster
    :param compute_cluster_name: compute cluster
    :return: config of the compute cluster
    """
    cook_cluster_settings = http.get(cluster, 'settings', params={}).json()
    return next(c for c in (s['config']
                            for s in cook_cluster_settings['compute-clusters'])
                if c['compute-cluster-name'] == compute_cluster_name)
Example #6
0
def list_jobs_on_cluster(cluster, state, user, start_ms, end_ms, name, limit,
                         include_custom_executor):
    """Queries cluster for jobs with the given state / user / time / name"""
    if 'all' in state:
        state = ['waiting', 'running', 'completed']
    params = {'user': user, 'name': name, 'limit': limit}
    if include_custom_executor:
        params['state'] = state
        params['start'] = start_ms
        params['end'] = end_ms
        jobs = http.make_data_request(
            cluster, lambda: http.get(cluster, 'jobs', params=params))
    else:
        params['state'] = '+'.join(state)
        params['start-ms'] = start_ms
        params['end-ms'] = end_ms
        jobs = http.make_data_request(
            cluster, lambda: http.get(cluster, 'list', params=params))
    entities = {'jobs': jobs, 'count': len(jobs)}
    return entities
Example #7
0
def list_jobs_on_cluster(cluster, state, user, start_ms, end_ms, name, limit):
    """Queries cluster for jobs with the given state / user / time / name"""
    if 'all' in state:
        state_string = 'waiting+running+completed'
    else:
        state_string = '+'.join(state)
    params = {
        'state': state_string,
        'user': user,
        'start-ms': start_ms,
        'end-ms': end_ms,
        'name': name,
        'limit': limit
    }
    jobs = http.make_data_request(
        cluster, lambda: http.get(cluster, 'list', params=params))
    entities = {'jobs': jobs, 'count': len(jobs)}
    return entities
Example #8
0
def list_jobs_on_cluster(cluster, state, user, lookback_hours, name, limit):
    """Queries cluster for jobs with the given state / user / time / name"""
    now_ms = int(round(time.time() * 1000))
    lookback_ms = int(lookback_hours * MILLIS_PER_HOUR)
    start_ms = now_ms - lookback_ms
    if 'all' in state:
        state_string = 'waiting+running+completed'
    else:
        state_string = '+'.join(state)
    params = {
        'state': state_string,
        'user': user,
        'start-ms': start_ms,
        'name': name,
        'limit': limit
    }
    jobs = http.make_data_request(
        lambda: http.get(cluster, 'list', params=params))
    entities = {'jobs': jobs, 'count': len(jobs)}
    return entities
Example #9
0
def copy_limits(args, config_path):
    """Copies limits (share and quota) for a particular user from one cluster to another cluster"""
    user = args.get('user')

    from_cluster = args.get('from')
    from_url = args.get('from_url')
    if not from_cluster and not from_url:
        copy_limits_parser.print_help()
        print()
        raise Exception(f'You must provide either a from-cluster name (--from) or URL (--from-url).')

    to_cluster = args.get('to')
    to_url = args.get('to_url')
    if not to_cluster and not to_url:
        copy_limits_parser.print_help()
        print()
        raise Exception(f'You must provide either a to-cluster name (--to) or URL (--to-url).')

    _, config_map = configuration.load_config_with_defaults(config_path)
    from_clusters = load_target_clusters(config_map, from_url, from_cluster)
    to_clusters = load_target_clusters(config_map, to_url, to_cluster)
    assert len(from_clusters) == 1, 'Only a single from-cluster is supported.'
    assert len(to_clusters) == 1, 'Only a single to-cluster is supported.'
    from_cluster = from_clusters[0]
    to_cluster = to_clusters[0]
    from_cluster_name = from_cluster['name']
    to_cluster_name = to_cluster['name']
    print(f'Copying limits for {terminal.bold(user)} user '
          f'from {terminal.bold(from_cluster_name)} '
          f'to {terminal.bold(to_cluster_name)}:')
    from_pools = http.make_data_request(from_cluster, lambda: http.get(from_cluster, 'pools', params={}))
    to_pools = http.make_data_request(to_cluster, lambda: http.get(to_cluster, 'pools', params={}))
    from_pools_dict = {pool['name']: pool for pool in from_pools}
    to_pools_dict = {pool['name']: pool for pool in to_pools}
    for pool_name, from_pool in from_pools_dict.items():
        if pool_name in to_pools_dict and to_pools_dict[pool_name]['state'] != 'inactive':
            print(f'\n=== Pool: {pool_name} ===')
            query_result = query([from_cluster, to_cluster], user)
            query_result = filter_query_result_by_pools(query_result, [pool_name])
            print_formatted(query_result)
            answer = input(f'Copy limits for {terminal.bold(pool_name)} pool '
                           f'from {terminal.bold(from_cluster_name)} '
                           f'to {terminal.bold(to_cluster_name)}? ')
            should_copy = str2bool(answer)
            if should_copy:
                from_dict = query_result['clusters'][from_cluster_name]['pools'][pool_name]
                reason = f'Copying limits for {user} user from {from_cluster_name} to {to_cluster_name}'

                from_share = from_dict['share']
                resp = http.post(to_cluster,
                                 'share',
                                 {'pool': pool_name,
                                  'user': user,
                                  'reason': reason,
                                  'share': from_share})
                if resp.status_code != 201:
                    print_error(f'Setting share for {pool_name} on {to_cluster_name} '
                                f'failed with status code {resp.status_code}: {resp.text}')
                else:
                    print(terminal.success(f'Copied share for {pool_name} pool '
                                           f'from {from_cluster_name} '
                                           f'to {to_cluster_name}.'))

                from_quota = from_dict['quota']
                resp = http.post(to_cluster,
                                 'quota',
                                 {'pool': pool_name,
                                  'user': user,
                                  'reason': reason,
                                  'quota': from_quota})
                if resp.status_code != 201:
                    print_error(f'Setting quota for {pool_name} on {to_cluster_name} '
                                f'failed with status code {resp.status_code}: {resp.text}')
                else:
                    print(terminal.success(f'Copied quota for {pool_name} pool '
                                           f'from {from_cluster_name} '
                                           f'to {to_cluster_name}.'))
Example #10
0
def get_usage_on_cluster(cluster, user):
    """Queries cluster for usage information for the given user"""
    params = {'user': user, 'group_breakdown': 'true'}
    usage_map = http.make_data_request(
        cluster, lambda: http.get(cluster, 'usage', params=params))
    if not usage_map:
        print_error(
            f'Unable to retrieve usage information on {cluster["name"]} ({cluster["url"]}).'
        )
        return {'count': 0}

    using_pools = 'pools' in usage_map
    pool_names = usage_map['pools'].keys() if using_pools else []

    share_map = http.make_data_request(
        cluster, lambda: http.get(cluster, 'share', params={'user': user}))
    if not share_map:
        print_error(
            f'Unable to retrieve share information on {cluster["name"]} ({cluster["url"]}).'
        )
        return {'count': 0}

    if using_pools != ('pools' in share_map):
        print_error(
            f'Share information on {cluster["name"]} ({cluster["url"]}) is invalid. '
            f'Usage information is{"" if using_pools else " not"} per pool, but share '
            f'is{"" if not using_pools else " not"}')
        return {'count': 0}
    if pool_names != (share_map['pools'].keys() if using_pools else []):
        print_error(
            f'Share information on {cluster["name"]} ({cluster["url"]}) is invalid. '
            f'Usage information has pools: {pool_names}, but share '
            f'has pools: {share_map["pools"].keys()}')
        return {'count': 0}

    def make_query_result(using_pools, usage_map, share_map, pool_data=None):
        query_result = {
            'using_pools': using_pools,
            'usage': usage_map['total_usage'],
            'share': share_map
        }
        query_result.update(get_job_data(cluster, usage_map))
        if pool_data:
            query_result.update(pool_data)
        return query_result

    if using_pools:
        pools = http.make_data_request(
            cluster, lambda: http.get(cluster, 'pools', params={}))
        pools_dict = {pool['name']: pool for pool in pools}
        for pool_name in pool_names:
            if pool_name not in pools_dict or 'state' not in pools_dict[
                    pool_name]:
                print_error(
                    f'Pool information on {cluster["name"]} ({cluster["url"]}) is invalid. '
                    f'Can\'t determine the state of pool {pool_name}')
                return {'count': 0}
        query_result = {
            'using_pools': using_pools,
            'pools': {
                pool_name:
                make_query_result(using_pools, usage_map['pools'][pool_name],
                                  share_map['pools'][pool_name],
                                  {'state': pools_dict[pool_name]['state']})
                for pool_name in pool_names
            }
        }
        return query_result
    else:
        return make_query_result(using_pools, usage_map, share_map)