def submit_federated(clusters, jobs, group, pool): """ Attempts to submit the provided jobs to each cluster in clusters, until a cluster returns a "created" status code. If no cluster returns "created" status, throws. """ messages = "" for cluster in clusters: cluster_name = cluster['name'] cluster_url = cluster['url'] try: print_info('Attempting to submit on %s cluster...' % terminal.bold(cluster_name)) json_body = {'jobs': jobs} if group: json_body['groups'] = [group] if pool: json_body['pool'] = pool resp = http.post(cluster, 'jobs', json_body) print_submit_result(cluster, resp) if resp.status_code == 201: metrics.inc('command.submit.jobs', len(jobs)) return 0 except requests.exceptions.ReadTimeout as rt: logging.exception(rt) print_info(terminal.failed( f'Encountered read timeout with {cluster_name} ({cluster_url}). Your submission may have completed.')) return 1 except IOError as ioe: logging.exception(ioe) reason = f'Cannot connect to {cluster_name} ({cluster_url})' message = submit_failed_message(cluster_name, reason) messages += message print_error(messages) raise Exception(terminal.failed('Job submission failed on all of your configured clusters.'))
def run(args): """ Main entrypoint to the cook scheduler CLI. Loads configuration files, processes global command line arguments, and calls other command line sub-commands (actions) if necessary. """ args = vars(parser.parse_args(args)) print_version = args.pop('version') if print_version: print(f'cs version {version.VERSION}') return 0 util.silent = args.pop('silent') verbose = args.pop('verbose') and not util.silent log_format = '%(asctime)s [%(levelname)s] [%(name)s] %(message)s' if verbose: logging.getLogger('').handlers = [] logging.basicConfig(format=log_format, level=logging.DEBUG) else: logging.disable(logging.FATAL) logging.debug('args: %s' % args) action = args.pop('action') config_path = args.pop('config') cluster = args.pop('cluster') url = args.pop('url') if action is None: parser.print_help() else: config_map = configuration.load_config_with_defaults(config_path) try: metrics.initialize(config_map) metrics.inc('command.%s.runs' % action) clusters = load_target_clusters(config_map, url, cluster) http.configure(config_map) args = {k: v for k, v in args.items() if v is not None} defaults = config_map.get('defaults') action_defaults = (defaults.get(action) if defaults else None) or {} result = actions[action](clusters, deep_merge(action_defaults, args), config_path) logging.debug('result: %s' % result) return result finally: metrics.close() return None
def submit_federated(clusters, jobs): """ Attempts to submit the provided jobs to each cluster in clusters, until a cluster returns a "created" status code. If no cluster returns "created" status, throws. """ for cluster in clusters: cluster_name = cluster['name'] try: print_info('Attempting to submit on %s cluster...' % colors.bold(cluster_name)) resp = http.post(cluster, 'rawscheduler', {'jobs': jobs}) print_submit_result(cluster, resp) if resp.status_code == 201: metrics.inc('command.submit.jobs', len(jobs)) return 0 except IOError as ioe: logging.info(ioe) reason = 'Cannot connect to %s (%s)' % (cluster_name, cluster['url']) print_info('%s\n' % submit_failed_message(cluster_name, reason)) raise Exception( colors.failed( 'Job submission failed on all of your configured clusters.'))
def run(args, plugins): """ Main entrypoint to the cook scheduler CLI. Loads configuration files, processes global command line arguments, and calls other command line sub-commands (actions) if necessary. plugins is a map from plugin-name -> function or Class.SubCommandPlugin """ # This has to happen before we parse the args, otherwise we might # get subcommand not found. for name, instance in plugins.items(): if isinstance(instance, SubCommandPlugin): logging.debug('Adding SubCommandPlugin %s' % name) try: instance.register(subparsers.add_parser, configuration.add_defaults) logging.debug('Done adding SubCommandPlugin %s' % name) name = instance.name() if name in actions: raise Exception( 'SubCommandPlugin %s clashes with an existing subcommand.' % name) actions[name] = instance.run except Exception as e: print('Failed to load SubCommandPlugin %s: %s' % (name, e), file=sys.stderr) args = vars(parser.parse_args(args)) util.silent = args.pop('silent') verbose = args.pop('verbose') and not util.silent log_format = '%(asctime)s [%(levelname)s] [%(name)s] %(message)s' if verbose: logging.getLogger('').handlers = [] logging.basicConfig(format=log_format, level=logging.DEBUG) else: logging.disable(logging.FATAL) logging.debug('args: %s', args) action = args.pop('action') config_path = args.pop('config') cluster = args.pop('cluster') url = args.pop('url') if action is None: parser.print_help() else: _, config_map = configuration.load_config_with_defaults(config_path) try: metrics.initialize(config_map) metrics.inc('command.%s.runs' % action) clusters = load_target_clusters(config_map, url, cluster) http.configure(config_map, plugins) cook.plugins.configure(plugins) args = {k: v for k, v in args.items() if v is not None} defaults = config_map.get('defaults') action_defaults = (defaults.get(action) if defaults else None) or {} logging.debug('going to execute % action' % action) result = actions[action](clusters, deep_merge(action_defaults, args), config_path) logging.debug('result: %s' % result) return result finally: metrics.close() return None