Ejemplo n.º 1
0
def submit_federated(clusters, jobs, group, pool):
    """
    Attempts to submit the provided jobs to each cluster in clusters, until a cluster
    returns a "created" status code. If no cluster returns "created" status, throws.
    """
    messages = ""
    for cluster in clusters:
        cluster_name = cluster['name']
        cluster_url = cluster['url']
        try:
            print_info('Attempting to submit on %s cluster...' % terminal.bold(cluster_name))

            json_body = {'jobs': jobs}
            if group:
                json_body['groups'] = [group]
            if pool:
                json_body['pool'] = pool

            resp = http.post(cluster, 'jobs', json_body)
            print_submit_result(cluster, resp)
            if resp.status_code == 201:
                metrics.inc('command.submit.jobs', len(jobs))
                return 0
        except requests.exceptions.ReadTimeout as rt:
            logging.exception(rt)
            print_info(terminal.failed(
                f'Encountered read timeout with {cluster_name} ({cluster_url}). Your submission may have completed.'))
            return 1
        except IOError as ioe:
            logging.exception(ioe)
            reason = f'Cannot connect to {cluster_name} ({cluster_url})'
            message = submit_failed_message(cluster_name, reason)
            messages += message
    print_error(messages)
    raise Exception(terminal.failed('Job submission failed on all of your configured clusters.'))
Ejemplo n.º 2
0
Archivo: cli.py Proyecto: dPeS/Cook
def run(args):
    """
    Main entrypoint to the cook scheduler CLI. Loads configuration files, 
    processes global command line arguments, and calls other command line 
    sub-commands (actions) if necessary.
    """
    args = vars(parser.parse_args(args))

    print_version = args.pop('version')
    if print_version:
        print(f'cs version {version.VERSION}')
        return 0

    util.silent = args.pop('silent')
    verbose = args.pop('verbose') and not util.silent

    log_format = '%(asctime)s [%(levelname)s] [%(name)s] %(message)s'
    if verbose:
        logging.getLogger('').handlers = []
        logging.basicConfig(format=log_format, level=logging.DEBUG)
    else:
        logging.disable(logging.FATAL)

    logging.debug('args: %s' % args)

    action = args.pop('action')
    config_path = args.pop('config')
    cluster = args.pop('cluster')
    url = args.pop('url')

    if action is None:
        parser.print_help()
    else:
        config_map = configuration.load_config_with_defaults(config_path)
        try:
            metrics.initialize(config_map)
            metrics.inc('command.%s.runs' % action)
            clusters = load_target_clusters(config_map, url, cluster)
            http.configure(config_map)
            args = {k: v for k, v in args.items() if v is not None}
            defaults = config_map.get('defaults')
            action_defaults = (defaults.get(action)
                               if defaults else None) or {}
            result = actions[action](clusters,
                                     deep_merge(action_defaults,
                                                args), config_path)
            logging.debug('result: %s' % result)
            return result
        finally:
            metrics.close()

    return None
Ejemplo n.º 3
0
Archivo: submit.py Proyecto: m4ce/Cook
def submit_federated(clusters, jobs):
    """
    Attempts to submit the provided jobs to each cluster in clusters, until a cluster
    returns a "created" status code. If no cluster returns "created" status, throws.
    """
    for cluster in clusters:
        cluster_name = cluster['name']
        try:
            print_info('Attempting to submit on %s cluster...' %
                       colors.bold(cluster_name))
            resp = http.post(cluster, 'rawscheduler', {'jobs': jobs})
            print_submit_result(cluster, resp)
            if resp.status_code == 201:
                metrics.inc('command.submit.jobs', len(jobs))
                return 0
        except IOError as ioe:
            logging.info(ioe)
            reason = 'Cannot connect to %s (%s)' % (cluster_name,
                                                    cluster['url'])
            print_info('%s\n' % submit_failed_message(cluster_name, reason))
    raise Exception(
        colors.failed(
            'Job submission failed on all of your configured clusters.'))
Ejemplo n.º 4
0
def run(args, plugins):
    """
    Main entrypoint to the cook scheduler CLI. Loads configuration files, 
    processes global command line arguments, and calls other command line 
    sub-commands (actions) if necessary.

    plugins is a map from plugin-name -> function or Class.SubCommandPlugin
    """

    # This has to happen before we parse the args, otherwise we might
    # get subcommand not found.
    for name, instance in plugins.items():
        if isinstance(instance, SubCommandPlugin):
            logging.debug('Adding SubCommandPlugin %s' % name)
            try:
                instance.register(subparsers.add_parser,
                                  configuration.add_defaults)
                logging.debug('Done adding SubCommandPlugin %s' % name)
                name = instance.name()
                if name in actions:
                    raise Exception(
                        'SubCommandPlugin %s clashes with an existing subcommand.'
                        % name)
                actions[name] = instance.run
            except Exception as e:
                print('Failed to load SubCommandPlugin %s: %s' % (name, e),
                      file=sys.stderr)

    args = vars(parser.parse_args(args))

    util.silent = args.pop('silent')
    verbose = args.pop('verbose') and not util.silent

    log_format = '%(asctime)s [%(levelname)s] [%(name)s] %(message)s'
    if verbose:
        logging.getLogger('').handlers = []
        logging.basicConfig(format=log_format, level=logging.DEBUG)
    else:
        logging.disable(logging.FATAL)

    logging.debug('args: %s', args)

    action = args.pop('action')
    config_path = args.pop('config')
    cluster = args.pop('cluster')
    url = args.pop('url')

    if action is None:
        parser.print_help()
    else:
        _, config_map = configuration.load_config_with_defaults(config_path)
        try:
            metrics.initialize(config_map)
            metrics.inc('command.%s.runs' % action)
            clusters = load_target_clusters(config_map, url, cluster)
            http.configure(config_map, plugins)
            cook.plugins.configure(plugins)
            args = {k: v for k, v in args.items() if v is not None}
            defaults = config_map.get('defaults')
            action_defaults = (defaults.get(action)
                               if defaults else None) or {}
            logging.debug('going to execute % action' % action)
            result = actions[action](clusters,
                                     deep_merge(action_defaults,
                                                args), config_path)
            logging.debug('result: %s' % result)
            return result
        finally:
            metrics.close()

    return None