Exemple #1
0
def _create_scheduler():
    config = configuration.get_config()
    retry_delay = config.getfloat('scheduler', 'retry-delay', 900.0)
    remove_delay = config.getfloat('scheduler', 'remove-delay', 600.0)
    worker_disconnect_delay = config.getfloat('scheduler',
                                              'worker-disconnect-delay', 60.0)
    state_path = config.get('scheduler', 'state-path',
                            '/var/lib/luigi-server/state.pickle')

    # Jobs are disabled if we see more than disable_failures failures in disable_window seconds.
    # These disables last for disable_persist seconds.
    disable_window = config.getint('scheduler', 'disable-window-seconds', 3600)
    disable_failures = config.getint('scheduler', 'disable-num-failures', None)
    disable_persist = config.getint('scheduler', 'disable-persist-seconds',
                                    86400)
    max_shown_tasks = config.getint('scheduler', 'max-shown-tasks', 100000)

    resources = config.getintdict('resources')
    if config.getboolean('scheduler', 'record_task_history', False):
        import db_task_history  # Needs sqlalchemy, thus imported here
        task_history_impl = db_task_history.DbTaskHistory()
    else:
        task_history_impl = task_history.NopHistory()
    return scheduler.CentralPlannerScheduler(
        retry_delay,
        remove_delay,
        worker_disconnect_delay,
        state_path,
        task_history_impl,
        resources,
        disable_persist,
        disable_window,
        disable_failures,
        max_shown_tasks,
    )
Exemple #2
0
def _create_scheduler():
    config = interface.get_config()
    retry_delay = config.getfloat('scheduler', 'retry-delay', 900.0)
    remove_delay = config.getfloat('scheduler', 'remove-delay', 600.0)
    worker_disconnect_delay = config.getfloat('scheduler',
                                              'worker-disconnect-delay', 60.0)
    return scheduler.CentralPlannerScheduler(retry_delay, remove_delay,
                                             worker_disconnect_delay)
Exemple #3
0
def _create_scheduler():
    config = configuration.get_config()
    retry_delay = config.getfloat('scheduler', 'retry-delay', 900.0)
    remove_delay = config.getfloat('scheduler', 'remove-delay', 600.0)
    worker_disconnect_delay = config.getfloat('scheduler', 'worker-disconnect-delay', 60.0)
    if config.getboolean('scheduler', 'record_task_history', False):
        import db_task_history  # Needs sqlalchemy, thus imported here
        task_history_impl = db_task_history.DbTaskHistory()
    else:
        task_history_impl = task_history.NopHistory()
    return scheduler.CentralPlannerScheduler(retry_delay, remove_delay, worker_disconnect_delay, task_history_impl)
Exemple #4
0
class RPCHandler(tornado.web.RequestHandler):
    """ Handle remote scheduling calls using rpc.RemoteSchedulerResponder"""
    scheduler = scheduler.CentralPlannerScheduler()
    api = RemoteSchedulerResponder(scheduler)

    def get(self, method):
        payload = self.get_argument('data', default="{}")
        arguments = json.loads(payload)

        if hasattr(self.api, method):
            result = getattr(self.api, method)(**arguments)
            self.write({"response":
                        result})  # wrap all json response in a dictionary
        else:
            self.send_error(400)
Exemple #5
0
    def run(tasks, override_defaults={}):
        env_params = register.env_params(override_defaults)

        if env_params.lock:
            lock.run_once(env_params.lock_pid_dir)

        if env_params.local_scheduler:
            sch = scheduler.CentralPlannerScheduler()
        else:
            sch = rpc.RemoteScheduler(host=env_params.scheduler_host)

        w = worker.Worker(scheduler=sch, worker_processes=env_params.workers)

        for task in tasks:
            w.add(task)
        w.run()
Exemple #6
0
    def run(tasks, override_defaults={}):
        env_params = EnvironmentParamsContainer.env_params(override_defaults)

        if env_params.lock:
            lock.run_once(env_params.lock_pid_dir)

        if env_params.local_scheduler:
            sch = scheduler.CentralPlannerScheduler()
        else:
            sch = rpc.RemoteScheduler(host=env_params.scheduler_host, port=env_params.scheduler_port)

        w = worker.Worker(scheduler=sch, worker_processes=env_params.workers)
        for task in tasks:
            w.add(task)
        logger = logging.getLogger('luigi-interface')
        logger.info('Done scheduling tasks')
        w.run()
Exemple #7
0
def _create_scheduler():
    config = configuration.get_config()
    retry_delay = config.getfloat('scheduler', 'retry-delay', 900.0)
    remove_delay = config.getfloat('scheduler', 'remove-delay', 600.0)
    worker_disconnect_delay = config.getfloat('scheduler',
                                              'worker-disconnect-delay', 60.0)
    state_path = config.get('scheduler', 'state-path',
                            '/var/lib/luigi-server/state.pickle')
    resources = config.getintdict('resources')
    if config.getboolean('scheduler', 'record_task_history', False):
        import db_task_history  # Needs sqlalchemy, thus imported here
        task_history_impl = db_task_history.DbTaskHistory()
    else:
        task_history_impl = task_history.NopHistory()
    return scheduler.CentralPlannerScheduler(retry_delay, remove_delay,
                                             worker_disconnect_delay,
                                             state_path, task_history_impl,
                                             resources)
Exemple #8
0
 def create_local_scheduler(self):
     return scheduler.CentralPlannerScheduler()
Exemple #9
0
    def run(self, cmdline_args=None, config=None):
        import argparse
        parser = argparse.ArgumentParser()
        # INTERNAL: While changing configuration here, please update documentation in spluigi
        parser.add_argument('--local-scheduler',
                            help='Use local scheduling',
                            action='store_true')
        parser.add_argument(
            '--scheduler-host',
            help=
            'Hostname of machine running remote scheduler [default: %(default)s]',
            default='localhost')
        parser.add_argument('--lock',
                            help='Do not run if the task is already running',
                            action='store_true')
        parser.add_argument(
            '--lock-pid-dir',
            help='Directory to store the pid file [default: %(default)s]',
            default='/var/tmp/luigi')
        parser.add_argument(
            '--workers',
            help=
            'Maximum number of parallel tasks to run [default: %(default)s]',
            default=1,
            type=int)

        def _add_parameter(parser, param_name, param, prefix=''):
            if param.has_default:
                defaulthelp = "[default: %s]" % (param.default, )
            else:
                defaulthelp = ""

            if param.is_list:
                action = "append"
            elif param.is_boolean:
                action = "store_true"
            else:
                action = "store"
            parser.add_argument('--' + param_name.replace('_', '-'),
                                help='%s%s%s' %
                                (prefix, param_name, defaulthelp),
                                default=None,
                                action=action)

        def _add_task_parameters(parser, cls):
            for param_name, param in cls.get_nonglobal_params():
                _add_parameter(parser, param_name, param,
                               cls.task_family + '.')

        def _add_global_parameters(parser):
            for param_name, param in register.get_global_params():
                _add_parameter(parser, param_name, param)

        if register.get_main():
            _add_task_parameters(parser, register.get_main())
            _add_global_parameters(parser)

        else:
            subparsers = parser.add_subparsers(dest='command')

            for name, cls in register.get_reg().iteritems():
                subparser = subparsers.add_parser(name)
                _add_task_parameters(subparser, cls)
                _add_global_parameters(subparser)

        args = parser.parse_args(args=cmdline_args)
        if args.lock:
            lock.run_once(args.lock_pid_dir)
        params = vars(args)  # convert to a str -> str hash

        if register.get_main():
            task_cls = register.get_main()
        else:
            task_cls = register.get_reg()[args.command]

        task = task_cls.from_input(params, register.get_global_params())

        if args.local_scheduler:
            sch = scheduler.CentralPlannerScheduler()
        else:
            sch = rpc.RemoteScheduler(host=args.scheduler_host)

        erroremail = config.get('luigi', 'erroremail') if config else None

        w = worker.Worker(scheduler=sch,
                          erroremail=erroremail,
                          worker_processes=args.workers)

        w.add(task)
        w.run()
Exemple #10
0
    def run(self, cmdline_args=None, config=None):
        parser = PassThroughOptionParser()
        tasks_str = '/'.join(sorted([name for name in register.get_reg()]))

        def add_task_option(p):
            if register.get_main():
                # INTERNAL: While changing configuration here, please update documentation in spluigi
                p.add_option('--task',
                             help='Task to run (' + tasks_str +
                             ') [default: %default]',
                             default=register.get_main().task_family)
            else:
                p.add_option('--task', help='Task to run (%s)' % tasks_str)

        add_task_option(parser)
        options, args = parser.parse_args(args=cmdline_args)

        task_cls_name = options.task
        if self.__existing_optparse:
            parser = self.__existing_optparse
        else:
            parser = optparse.OptionParser()
        add_task_option(parser)
        if config:
            default_scheduler = config.get('luigi', 'scheduler-host')
        else:
            default_scheduler = 'localhost'
        # INTERNAL: While changing configuration here, please update documentation in spluigi
        parser.add_option('--local-scheduler',
                          help='Use local scheduling',
                          action='store_true')
        parser.add_option(
            '--scheduler-host',
            help=
            'Hostname of machine running remote scheduler [default: %default]',
            default=default_scheduler)
        parser.add_option('--lock',
                          help='Do not run if the task is already running',
                          action='store_true')
        parser.add_option(
            '--lock-pid-dir',
            help='Directory to store the pid file [default: %default]',
            default='/var/tmp/luigi')
        parser.add_option(
            '--workers',
            help='Maximum number of parallel tasks to run [default: %default]',
            default=1,
            type=int)

        if task_cls_name not in register.get_reg():
            raise Exception('Error: %s is not a valid tasks (must be %s)' %
                            (task_cls_name, tasks_str))

        # Register all parameters as a big mess
        parameter_defaults = {}
        task_cls = register.get_reg()[task_cls_name]
        params = task_cls.get_nonglobal_params()
        global_params = list(register.get_global_params())

        for param_name, param in global_params:
            parameter_defaults[param_name] = param.default

        for param_name, param in params:
            if param.has_default:
                parameter_defaults[
                    param_name] = param.default  # Will override with whatever: TODO: do more sensibly!

        def _add_parameter(parser, param_name, param, parameter_defaults):
            if param.has_default:
                help_text = '%s [default: %s]' % (param_name,
                                                  parameter_defaults)
            else:
                help_text = param_name
            if param.is_list:
                action = "append"
            elif param.is_boolean:
                action = "store_true"
            else:
                action = "store"
            parser.add_option('--' + param_name.replace('_', '-'),
                              help=help_text,
                              default=None,
                              action=action)

        for param_name, param in global_params:
            _add_parameter(parser, param_name, param, parameter_defaults)

        for param_name, param in params:
            _add_parameter(parser, param_name, param, parameter_defaults)

        # Parse and run
        options, args = parser.parse_args(args=cmdline_args)
        if options.lock:
            lock.run_once(options.lock_pid_dir)
        params = {}
        for k, v in vars(options).iteritems():
            if k not in ['task', 'local_scheduler']:
                params[k] = v
        task = task_cls.from_input(params, global_params)

        if options.local_scheduler:
            sch = scheduler.CentralPlannerScheduler()
        else:
            sch = rpc.RemoteScheduler(host=options.scheduler_host)

        erroremail = config.get('luigi', 'erroremail') if config else None

        # Run
        w = worker.Worker(scheduler=sch,
                          erroremail=erroremail,
                          worker_processes=options.workers)

        w.add(task)
        w.run()