def query_instances_on_cluster(cluster, status, start_ms, end_ms): """Queries cluster for instance stats with the given status / time""" params = {'status': status, 'start': start_ms, 'end': end_ms} stats = http.make_data_request(cluster, lambda: http.get(cluster, 'stats/instances', params=params)) overall_stats = stats['overall'] data = {'count': overall_stats['count'] if 'count' in overall_stats else 0} return data
def make_instance_request(cluster, uuids): """Attempts to query instances corresponding to the given uuids from cluster.""" return http.get(cluster, 'rawscheduler', params={ 'instance': uuids, 'partial': 'true' })
def get_compute_cluster_config(cluster, compute_cluster_name): """ :param cluster: cook scheduler cluster :param compute_cluster_name: compute cluster :return: config of the compute cluster Looks at both /settings (for static clusters) and /compute-clusters (for dynamic clusters) """ cook_cluster_settings = http.get(cluster, 'settings', params={}).json() cook_compute_clusters = http.get(cluster, 'compute-clusters', params={}).json() rval = next((c for c in (s['config'] for s in cook_cluster_settings['compute-clusters']) if c['compute-cluster-name'] == compute_cluster_name), None) if not rval: rval = next(c['cluster-definition']['config'] for c in cook_compute_clusters['in-mem-configs'] if c['name'] == compute_cluster_name) return rval
def make_group_request(cluster, uuids): """Attempts to query groups corresponding to the given uuids from cluster.""" return http.get(cluster, 'group', params={ 'uuid': uuids, 'partial': 'true', 'detailed': 'true' })
def get_compute_cluster_config(cluster, compute_cluster_name): """ :param cluster: cook scheduler cluster :param compute_cluster_name: compute cluster :return: config of the compute cluster """ cook_cluster_settings = http.get(cluster, 'settings', params={}).json() return next(c for c in (s['config'] for s in cook_cluster_settings['compute-clusters']) if c['compute-cluster-name'] == compute_cluster_name)
def list_jobs_on_cluster(cluster, state, user, start_ms, end_ms, name, limit, include_custom_executor): """Queries cluster for jobs with the given state / user / time / name""" if 'all' in state: state = ['waiting', 'running', 'completed'] params = {'user': user, 'name': name, 'limit': limit} if include_custom_executor: params['state'] = state params['start'] = start_ms params['end'] = end_ms jobs = http.make_data_request( cluster, lambda: http.get(cluster, 'jobs', params=params)) else: params['state'] = '+'.join(state) params['start-ms'] = start_ms params['end-ms'] = end_ms jobs = http.make_data_request( cluster, lambda: http.get(cluster, 'list', params=params)) entities = {'jobs': jobs, 'count': len(jobs)} return entities
def list_jobs_on_cluster(cluster, state, user, start_ms, end_ms, name, limit): """Queries cluster for jobs with the given state / user / time / name""" if 'all' in state: state_string = 'waiting+running+completed' else: state_string = '+'.join(state) params = { 'state': state_string, 'user': user, 'start-ms': start_ms, 'end-ms': end_ms, 'name': name, 'limit': limit } jobs = http.make_data_request( cluster, lambda: http.get(cluster, 'list', params=params)) entities = {'jobs': jobs, 'count': len(jobs)} return entities
def list_jobs_on_cluster(cluster, state, user, lookback_hours, name, limit): """Queries cluster for jobs with the given state / user / time / name""" now_ms = int(round(time.time() * 1000)) lookback_ms = int(lookback_hours * MILLIS_PER_HOUR) start_ms = now_ms - lookback_ms if 'all' in state: state_string = 'waiting+running+completed' else: state_string = '+'.join(state) params = { 'state': state_string, 'user': user, 'start-ms': start_ms, 'name': name, 'limit': limit } jobs = http.make_data_request( lambda: http.get(cluster, 'list', params=params)) entities = {'jobs': jobs, 'count': len(jobs)} return entities
def copy_limits(args, config_path): """Copies limits (share and quota) for a particular user from one cluster to another cluster""" user = args.get('user') from_cluster = args.get('from') from_url = args.get('from_url') if not from_cluster and not from_url: copy_limits_parser.print_help() print() raise Exception(f'You must provide either a from-cluster name (--from) or URL (--from-url).') to_cluster = args.get('to') to_url = args.get('to_url') if not to_cluster and not to_url: copy_limits_parser.print_help() print() raise Exception(f'You must provide either a to-cluster name (--to) or URL (--to-url).') _, config_map = configuration.load_config_with_defaults(config_path) from_clusters = load_target_clusters(config_map, from_url, from_cluster) to_clusters = load_target_clusters(config_map, to_url, to_cluster) assert len(from_clusters) == 1, 'Only a single from-cluster is supported.' assert len(to_clusters) == 1, 'Only a single to-cluster is supported.' from_cluster = from_clusters[0] to_cluster = to_clusters[0] from_cluster_name = from_cluster['name'] to_cluster_name = to_cluster['name'] print(f'Copying limits for {terminal.bold(user)} user ' f'from {terminal.bold(from_cluster_name)} ' f'to {terminal.bold(to_cluster_name)}:') from_pools = http.make_data_request(from_cluster, lambda: http.get(from_cluster, 'pools', params={})) to_pools = http.make_data_request(to_cluster, lambda: http.get(to_cluster, 'pools', params={})) from_pools_dict = {pool['name']: pool for pool in from_pools} to_pools_dict = {pool['name']: pool for pool in to_pools} for pool_name, from_pool in from_pools_dict.items(): if pool_name in to_pools_dict and to_pools_dict[pool_name]['state'] != 'inactive': print(f'\n=== Pool: {pool_name} ===') query_result = query([from_cluster, to_cluster], user) query_result = filter_query_result_by_pools(query_result, [pool_name]) print_formatted(query_result) answer = input(f'Copy limits for {terminal.bold(pool_name)} pool ' f'from {terminal.bold(from_cluster_name)} ' f'to {terminal.bold(to_cluster_name)}? ') should_copy = str2bool(answer) if should_copy: from_dict = query_result['clusters'][from_cluster_name]['pools'][pool_name] reason = f'Copying limits for {user} user from {from_cluster_name} to {to_cluster_name}' from_share = from_dict['share'] resp = http.post(to_cluster, 'share', {'pool': pool_name, 'user': user, 'reason': reason, 'share': from_share}) if resp.status_code != 201: print_error(f'Setting share for {pool_name} on {to_cluster_name} ' f'failed with status code {resp.status_code}: {resp.text}') else: print(terminal.success(f'Copied share for {pool_name} pool ' f'from {from_cluster_name} ' f'to {to_cluster_name}.')) from_quota = from_dict['quota'] resp = http.post(to_cluster, 'quota', {'pool': pool_name, 'user': user, 'reason': reason, 'quota': from_quota}) if resp.status_code != 201: print_error(f'Setting quota for {pool_name} on {to_cluster_name} ' f'failed with status code {resp.status_code}: {resp.text}') else: print(terminal.success(f'Copied quota for {pool_name} pool ' f'from {from_cluster_name} ' f'to {to_cluster_name}.'))
def get_usage_on_cluster(cluster, user): """Queries cluster for usage information for the given user""" params = {'user': user, 'group_breakdown': 'true'} usage_map = http.make_data_request( cluster, lambda: http.get(cluster, 'usage', params=params)) if not usage_map: print_error( f'Unable to retrieve usage information on {cluster["name"]} ({cluster["url"]}).' ) return {'count': 0} using_pools = 'pools' in usage_map pool_names = usage_map['pools'].keys() if using_pools else [] share_map = http.make_data_request( cluster, lambda: http.get(cluster, 'share', params={'user': user})) if not share_map: print_error( f'Unable to retrieve share information on {cluster["name"]} ({cluster["url"]}).' ) return {'count': 0} if using_pools != ('pools' in share_map): print_error( f'Share information on {cluster["name"]} ({cluster["url"]}) is invalid. ' f'Usage information is{"" if using_pools else " not"} per pool, but share ' f'is{"" if not using_pools else " not"}') return {'count': 0} if pool_names != (share_map['pools'].keys() if using_pools else []): print_error( f'Share information on {cluster["name"]} ({cluster["url"]}) is invalid. ' f'Usage information has pools: {pool_names}, but share ' f'has pools: {share_map["pools"].keys()}') return {'count': 0} def make_query_result(using_pools, usage_map, share_map, pool_data=None): query_result = { 'using_pools': using_pools, 'usage': usage_map['total_usage'], 'share': share_map } query_result.update(get_job_data(cluster, usage_map)) if pool_data: query_result.update(pool_data) return query_result if using_pools: pools = http.make_data_request( cluster, lambda: http.get(cluster, 'pools', params={})) pools_dict = {pool['name']: pool for pool in pools} for pool_name in pool_names: if pool_name not in pools_dict or 'state' not in pools_dict[ pool_name]: print_error( f'Pool information on {cluster["name"]} ({cluster["url"]}) is invalid. ' f'Can\'t determine the state of pool {pool_name}') return {'count': 0} query_result = { 'using_pools': using_pools, 'pools': { pool_name: make_query_result(using_pools, usage_map['pools'][pool_name], share_map['pools'][pool_name], {'state': pools_dict[pool_name]['state']}) for pool_name in pool_names } } return query_result else: return make_query_result(using_pools, usage_map, share_map)