def query_cluster(cluster, uuids, pred, timeout, interval, make_request_fn, entity_type): """ Queries the given cluster for the given uuids with an optional predicate, pred, that must be satisfied """ def satisfy_pred(): return pred( http.make_data_request(lambda: make_request_fn(cluster, uuids))) entities = http.make_data_request(lambda: make_request_fn(cluster, uuids)) if pred and len(entities) > 0: if entity_type == 'job': wait_text = 'Waiting for the following jobs' elif entity_type == 'instance': wait_text = 'Waiting for instances of the following jobs' elif entity_type == 'group': wait_text = 'Waiting for the following job groups' else: raise Exception('Invalid entity type %s.' % entity_type) uuid_text = ', '.join([e['uuid'] for e in entities]) wait_text = '%s on %s: %s' % (wait_text, colors.bold( cluster['name']), uuid_text) index = progress.add(wait_text) if pred(entities): progress.update(index, colors.bold('Done')) else: entities = wait_until(satisfy_pred, timeout, interval) if entities: progress.update(index, colors.bold('Done')) else: raise TimeoutError('Timeout waiting for response.') return entities
def __query_cluster(cluster, uuids, pred, timeout, interval, make_request_fn, entity_type): """ Queries the given cluster for the given uuids with an optional predicate, pred, that must be satisfied """ def satisfy_pred(): return pred(http.make_data_request(cluster, lambda: make_request_fn(cluster, uuids))) entities = http.make_data_request(cluster, lambda: make_request_fn(cluster, uuids)) num_entities = len(entities) if pred and num_entities > 0: s = 's' if num_entities > 1 else '' num_string = colors.bold(str(num_entities)) if entity_type == Types.JOB: wait_text = f'Waiting for {num_string} job{s}' elif entity_type == Types.INSTANCE: wait_text = f'Waiting for instances of {num_string} job{s}' elif entity_type == Types.GROUP: wait_text = f'Waiting for {num_string} job group{s}' else: raise Exception(f'Invalid entity type {entity_type}.') wait_text = f'{wait_text} on {colors.bold(cluster["name"])}' index = progress.add(wait_text) if pred(entities): progress.update(index, colors.bold('Done')) else: entities = wait_until(satisfy_pred, timeout, interval) if entities: progress.update(index, colors.bold('Done')) else: raise TimeoutError('Timeout waiting for response.') return entities
def print_formatted_cluster_or_pool_usage(cluster_or_pool, cluster_or_pool_usage): """Prints the query result for a cluster or pool in a cluster as a hierarchical set of bullets""" usage_map = cluster_or_pool_usage['usage'] share_map = cluster_or_pool_usage['share'] print_info(colors.bold(cluster_or_pool)) print_info(format_share(share_map)) print_info(format_usage(usage_map)) applications = cluster_or_pool_usage['applications'] if applications: print_info('Applications:') else: print_info(colors.waiting('Nothing Running')) for application, application_usage in applications.items(): usage_map = application_usage['usage'] print_info( f'- {colors.running(application if application else "[no application defined]")}' ) print_info(f' {format_usage(usage_map)}') print_info(' Job Groups:') for group, group_usage in application_usage['groups'].items(): usage_map = group_usage['usage'] jobs = group_usage['jobs'] print_info(f'\t- {colors.bold(group if group else "[ungrouped]")}') print_info(f'\t {format_usage(usage_map)}') print_info(f'\t Jobs: {len(jobs)}') print_info('') print_info('')
def submit_federated(clusters, jobs, group, pool): """ Attempts to submit the provided jobs to each cluster in clusters, until a cluster returns a "created" status code. If no cluster returns "created" status, throws. """ for cluster in clusters: cluster_name = cluster['name'] cluster_url = cluster['url'] try: print_info('Attempting to submit on %s cluster...' % colors.bold(cluster_name)) json_body = {'jobs': jobs} if group: json_body['groups'] = [group] if pool: json_body['pool'] = pool resp = http.post(cluster, 'jobs', json_body) print_submit_result(cluster, resp) if resp.status_code == 201: metrics.inc('command.submit.jobs', len(jobs)) return 0 except requests.exceptions.ReadTimeout as rt: logging.exception(rt) print_info( colors.failed( f'Encountered read timeout with {cluster_name} ({cluster_url}). Your submission may have completed.' )) return 1 except IOError as ioe: logging.exception(ioe) reason = f'Cannot connect to {cluster_name} ({cluster_url})' message = submit_failed_message(cluster_name, reason) print_info(f'{message}\n') raise Exception( colors.failed( 'Job submission failed on all of your configured clusters.'))
def submit_federated(clusters, jobs): """ Attempts to submit the provided jobs to each cluster in clusters, until a cluster returns a "created" status code. If no cluster returns "created" status, throws. """ for cluster in clusters: cluster_name = cluster['name'] try: print_info('Attempting to submit on %s cluster...' % colors.bold(cluster_name)) resp = http.post(cluster, 'rawscheduler', {'jobs': jobs}) print_submit_result(cluster, resp) if resp.status_code == 201: metrics.inc('command.submit.jobs', len(jobs)) return 0 except IOError as ioe: logging.info(ioe) reason = 'Cannot connect to %s (%s)' % (cluster_name, cluster['url']) print_info('%s\n' % submit_failed_message(cluster_name, reason)) raise Exception( colors.failed( 'Job submission failed on all of your configured clusters.'))