def _create_scheduler(): config = configuration.get_config() retry_delay = config.getfloat('scheduler', 'retry-delay', 900.0) remove_delay = config.getfloat('scheduler', 'remove-delay', 600.0) worker_disconnect_delay = config.getfloat('scheduler', 'worker-disconnect-delay', 60.0) state_path = config.get('scheduler', 'state-path', '/var/lib/luigi-server/state.pickle') # Jobs are disabled if we see more than disable_failures failures in disable_window seconds. # These disables last for disable_persist seconds. disable_window = config.getint('scheduler', 'disable-window-seconds', 3600) disable_failures = config.getint('scheduler', 'disable-num-failures', None) disable_persist = config.getint('scheduler', 'disable-persist-seconds', 86400) max_shown_tasks = config.getint('scheduler', 'max-shown-tasks', 100000) resources = config.getintdict('resources') if config.getboolean('scheduler', 'record_task_history', False): import db_task_history # Needs sqlalchemy, thus imported here task_history_impl = db_task_history.DbTaskHistory() else: task_history_impl = task_history.NopHistory() return scheduler.CentralPlannerScheduler( retry_delay, remove_delay, worker_disconnect_delay, state_path, task_history_impl, resources, disable_persist, disable_window, disable_failures, max_shown_tasks, )
def _create_scheduler(): config = interface.get_config() retry_delay = config.getfloat('scheduler', 'retry-delay', 900.0) remove_delay = config.getfloat('scheduler', 'remove-delay', 600.0) worker_disconnect_delay = config.getfloat('scheduler', 'worker-disconnect-delay', 60.0) return scheduler.CentralPlannerScheduler(retry_delay, remove_delay, worker_disconnect_delay)
def _create_scheduler(): config = configuration.get_config() retry_delay = config.getfloat('scheduler', 'retry-delay', 900.0) remove_delay = config.getfloat('scheduler', 'remove-delay', 600.0) worker_disconnect_delay = config.getfloat('scheduler', 'worker-disconnect-delay', 60.0) if config.getboolean('scheduler', 'record_task_history', False): import db_task_history # Needs sqlalchemy, thus imported here task_history_impl = db_task_history.DbTaskHistory() else: task_history_impl = task_history.NopHistory() return scheduler.CentralPlannerScheduler(retry_delay, remove_delay, worker_disconnect_delay, task_history_impl)
class RPCHandler(tornado.web.RequestHandler): """ Handle remote scheduling calls using rpc.RemoteSchedulerResponder""" scheduler = scheduler.CentralPlannerScheduler() api = RemoteSchedulerResponder(scheduler) def get(self, method): payload = self.get_argument('data', default="{}") arguments = json.loads(payload) if hasattr(self.api, method): result = getattr(self.api, method)(**arguments) self.write({"response": result}) # wrap all json response in a dictionary else: self.send_error(400)
def run(tasks, override_defaults={}): env_params = register.env_params(override_defaults) if env_params.lock: lock.run_once(env_params.lock_pid_dir) if env_params.local_scheduler: sch = scheduler.CentralPlannerScheduler() else: sch = rpc.RemoteScheduler(host=env_params.scheduler_host) w = worker.Worker(scheduler=sch, worker_processes=env_params.workers) for task in tasks: w.add(task) w.run()
def run(tasks, override_defaults={}): env_params = EnvironmentParamsContainer.env_params(override_defaults) if env_params.lock: lock.run_once(env_params.lock_pid_dir) if env_params.local_scheduler: sch = scheduler.CentralPlannerScheduler() else: sch = rpc.RemoteScheduler(host=env_params.scheduler_host, port=env_params.scheduler_port) w = worker.Worker(scheduler=sch, worker_processes=env_params.workers) for task in tasks: w.add(task) logger = logging.getLogger('luigi-interface') logger.info('Done scheduling tasks') w.run()
def _create_scheduler(): config = configuration.get_config() retry_delay = config.getfloat('scheduler', 'retry-delay', 900.0) remove_delay = config.getfloat('scheduler', 'remove-delay', 600.0) worker_disconnect_delay = config.getfloat('scheduler', 'worker-disconnect-delay', 60.0) state_path = config.get('scheduler', 'state-path', '/var/lib/luigi-server/state.pickle') resources = config.getintdict('resources') if config.getboolean('scheduler', 'record_task_history', False): import db_task_history # Needs sqlalchemy, thus imported here task_history_impl = db_task_history.DbTaskHistory() else: task_history_impl = task_history.NopHistory() return scheduler.CentralPlannerScheduler(retry_delay, remove_delay, worker_disconnect_delay, state_path, task_history_impl, resources)
def create_local_scheduler(self): return scheduler.CentralPlannerScheduler()
def run(self, cmdline_args=None, config=None): import argparse parser = argparse.ArgumentParser() # INTERNAL: While changing configuration here, please update documentation in spluigi parser.add_argument('--local-scheduler', help='Use local scheduling', action='store_true') parser.add_argument( '--scheduler-host', help= 'Hostname of machine running remote scheduler [default: %(default)s]', default='localhost') parser.add_argument('--lock', help='Do not run if the task is already running', action='store_true') parser.add_argument( '--lock-pid-dir', help='Directory to store the pid file [default: %(default)s]', default='/var/tmp/luigi') parser.add_argument( '--workers', help= 'Maximum number of parallel tasks to run [default: %(default)s]', default=1, type=int) def _add_parameter(parser, param_name, param, prefix=''): if param.has_default: defaulthelp = "[default: %s]" % (param.default, ) else: defaulthelp = "" if param.is_list: action = "append" elif param.is_boolean: action = "store_true" else: action = "store" parser.add_argument('--' + param_name.replace('_', '-'), help='%s%s%s' % (prefix, param_name, defaulthelp), default=None, action=action) def _add_task_parameters(parser, cls): for param_name, param in cls.get_nonglobal_params(): _add_parameter(parser, param_name, param, cls.task_family + '.') def _add_global_parameters(parser): for param_name, param in register.get_global_params(): _add_parameter(parser, param_name, param) if register.get_main(): _add_task_parameters(parser, register.get_main()) _add_global_parameters(parser) else: subparsers = parser.add_subparsers(dest='command') for name, cls in register.get_reg().iteritems(): subparser = subparsers.add_parser(name) _add_task_parameters(subparser, cls) _add_global_parameters(subparser) args = parser.parse_args(args=cmdline_args) if args.lock: lock.run_once(args.lock_pid_dir) params = vars(args) # convert to a str -> str hash if register.get_main(): task_cls = register.get_main() else: task_cls = register.get_reg()[args.command] task = task_cls.from_input(params, register.get_global_params()) if args.local_scheduler: sch = scheduler.CentralPlannerScheduler() else: sch = rpc.RemoteScheduler(host=args.scheduler_host) erroremail = config.get('luigi', 'erroremail') if config else None w = worker.Worker(scheduler=sch, erroremail=erroremail, worker_processes=args.workers) w.add(task) w.run()
def run(self, cmdline_args=None, config=None): parser = PassThroughOptionParser() tasks_str = '/'.join(sorted([name for name in register.get_reg()])) def add_task_option(p): if register.get_main(): # INTERNAL: While changing configuration here, please update documentation in spluigi p.add_option('--task', help='Task to run (' + tasks_str + ') [default: %default]', default=register.get_main().task_family) else: p.add_option('--task', help='Task to run (%s)' % tasks_str) add_task_option(parser) options, args = parser.parse_args(args=cmdline_args) task_cls_name = options.task if self.__existing_optparse: parser = self.__existing_optparse else: parser = optparse.OptionParser() add_task_option(parser) if config: default_scheduler = config.get('luigi', 'scheduler-host') else: default_scheduler = 'localhost' # INTERNAL: While changing configuration here, please update documentation in spluigi parser.add_option('--local-scheduler', help='Use local scheduling', action='store_true') parser.add_option( '--scheduler-host', help= 'Hostname of machine running remote scheduler [default: %default]', default=default_scheduler) parser.add_option('--lock', help='Do not run if the task is already running', action='store_true') parser.add_option( '--lock-pid-dir', help='Directory to store the pid file [default: %default]', default='/var/tmp/luigi') parser.add_option( '--workers', help='Maximum number of parallel tasks to run [default: %default]', default=1, type=int) if task_cls_name not in register.get_reg(): raise Exception('Error: %s is not a valid tasks (must be %s)' % (task_cls_name, tasks_str)) # Register all parameters as a big mess parameter_defaults = {} task_cls = register.get_reg()[task_cls_name] params = task_cls.get_nonglobal_params() global_params = list(register.get_global_params()) for param_name, param in global_params: parameter_defaults[param_name] = param.default for param_name, param in params: if param.has_default: parameter_defaults[ param_name] = param.default # Will override with whatever: TODO: do more sensibly! def _add_parameter(parser, param_name, param, parameter_defaults): if param.has_default: help_text = '%s [default: %s]' % (param_name, parameter_defaults) else: help_text = param_name if param.is_list: action = "append" elif param.is_boolean: action = "store_true" else: action = "store" parser.add_option('--' + param_name.replace('_', '-'), help=help_text, default=None, action=action) for param_name, param in global_params: _add_parameter(parser, param_name, param, parameter_defaults) for param_name, param in params: _add_parameter(parser, param_name, param, parameter_defaults) # Parse and run options, args = parser.parse_args(args=cmdline_args) if options.lock: lock.run_once(options.lock_pid_dir) params = {} for k, v in vars(options).iteritems(): if k not in ['task', 'local_scheduler']: params[k] = v task = task_cls.from_input(params, global_params) if options.local_scheduler: sch = scheduler.CentralPlannerScheduler() else: sch = rpc.RemoteScheduler(host=options.scheduler_host) erroremail = config.get('luigi', 'erroremail') if config else None # Run w = worker.Worker(scheduler=sch, erroremail=erroremail, worker_processes=options.workers) w.add(task) w.run()