def submit_federated(clusters, jobs, group, pool): """ Attempts to submit the provided jobs to each cluster in clusters, until a cluster returns a "created" status code. If no cluster returns "created" status, throws. """ messages = "" for cluster in clusters: cluster_name = cluster['name'] cluster_url = cluster['url'] try: print_info('Attempting to submit on %s cluster...' % terminal.bold(cluster_name)) json_body = {'jobs': jobs} if group: json_body['groups'] = [group] if pool: json_body['pool'] = pool resp = http.post(cluster, 'jobs', json_body) print_submit_result(cluster, resp) if resp.status_code == 201: metrics.inc('command.submit.jobs', len(jobs)) return 0 except requests.exceptions.ReadTimeout as rt: logging.exception(rt) print_info(terminal.failed( f'Encountered read timeout with {cluster_name} ({cluster_url}). Your submission may have completed.')) return 1 except IOError as ioe: logging.exception(ioe) reason = f'Cannot connect to {cluster_name} ({cluster_url})' message = submit_failed_message(cluster_name, reason) messages += message print_error(messages) raise Exception(terminal.failed('Job submission failed on all of your configured clusters.'))
def submit_federated(clusters, jobs): """ Attempts to submit the provided jobs to each cluster in clusters, until a cluster returns a "created" status code. If no cluster returns "created" status, throws. """ for cluster in clusters: cluster_name = cluster['name'] try: print_info('Attempting to submit on %s cluster...' % colors.bold(cluster_name)) resp = http.post(cluster, 'rawscheduler', {'jobs': jobs}) print_submit_result(cluster, resp) if resp.status_code == 201: metrics.inc('command.submit.jobs', len(jobs)) return 0 except IOError as ioe: logging.info(ioe) reason = 'Cannot connect to %s (%s)' % (cluster_name, cluster['url']) print_info('%s\n' % submit_failed_message(cluster_name, reason)) raise Exception( colors.failed( 'Job submission failed on all of your configured clusters.'))
def copy_limits(args, config_path): """Copies limits (share and quota) for a particular user from one cluster to another cluster""" user = args.get('user') from_cluster = args.get('from') from_url = args.get('from_url') if not from_cluster and not from_url: copy_limits_parser.print_help() print() raise Exception(f'You must provide either a from-cluster name (--from) or URL (--from-url).') to_cluster = args.get('to') to_url = args.get('to_url') if not to_cluster and not to_url: copy_limits_parser.print_help() print() raise Exception(f'You must provide either a to-cluster name (--to) or URL (--to-url).') _, config_map = configuration.load_config_with_defaults(config_path) from_clusters = load_target_clusters(config_map, from_url, from_cluster) to_clusters = load_target_clusters(config_map, to_url, to_cluster) assert len(from_clusters) == 1, 'Only a single from-cluster is supported.' assert len(to_clusters) == 1, 'Only a single to-cluster is supported.' from_cluster = from_clusters[0] to_cluster = to_clusters[0] from_cluster_name = from_cluster['name'] to_cluster_name = to_cluster['name'] print(f'Copying limits for {terminal.bold(user)} user ' f'from {terminal.bold(from_cluster_name)} ' f'to {terminal.bold(to_cluster_name)}:') from_pools = http.make_data_request(from_cluster, lambda: http.get(from_cluster, 'pools', params={})) to_pools = http.make_data_request(to_cluster, lambda: http.get(to_cluster, 'pools', params={})) from_pools_dict = {pool['name']: pool for pool in from_pools} to_pools_dict = {pool['name']: pool for pool in to_pools} for pool_name, from_pool in from_pools_dict.items(): if pool_name in to_pools_dict and to_pools_dict[pool_name]['state'] != 'inactive': print(f'\n=== Pool: {pool_name} ===') query_result = query([from_cluster, to_cluster], user) query_result = filter_query_result_by_pools(query_result, [pool_name]) print_formatted(query_result) answer = input(f'Copy limits for {terminal.bold(pool_name)} pool ' f'from {terminal.bold(from_cluster_name)} ' f'to {terminal.bold(to_cluster_name)}? ') should_copy = str2bool(answer) if should_copy: from_dict = query_result['clusters'][from_cluster_name]['pools'][pool_name] reason = f'Copying limits for {user} user from {from_cluster_name} to {to_cluster_name}' from_share = from_dict['share'] resp = http.post(to_cluster, 'share', {'pool': pool_name, 'user': user, 'reason': reason, 'share': from_share}) if resp.status_code != 201: print_error(f'Setting share for {pool_name} on {to_cluster_name} ' f'failed with status code {resp.status_code}: {resp.text}') else: print(terminal.success(f'Copied share for {pool_name} pool ' f'from {from_cluster_name} ' f'to {to_cluster_name}.')) from_quota = from_dict['quota'] resp = http.post(to_cluster, 'quota', {'pool': pool_name, 'user': user, 'reason': reason, 'quota': from_quota}) if resp.status_code != 201: print_error(f'Setting quota for {pool_name} on {to_cluster_name} ' f'failed with status code {resp.status_code}: {resp.text}') else: print(terminal.success(f'Copied quota for {pool_name} pool ' f'from {from_cluster_name} ' f'to {to_cluster_name}.'))