Ejemplo n.º 1
0
    def run(tasks, worker_scheduler_factory=None, override_defaults={}):

        if worker_scheduler_factory is None:
            worker_scheduler_factory = WorkerSchedulerFactory()

        env_params = EnvironmentParamsContainer.env_params(override_defaults)

        if env_params.lock:
            lock.run_once(env_params.lock_pid_dir)

        if env_params.local_scheduler:
            sch = worker_scheduler_factory.create_local_scheduler()
        else:
            sch = worker_scheduler_factory.create_remote_scheduler(
                host=env_params.scheduler_host, port=env_params.scheduler_port)

        w = worker_scheduler_factory.create_worker(
            scheduler=sch, worker_processes=env_params.workers)

        for task in tasks:
            w.add(task)
        logger = logging.getLogger('luigi-interface')
        logger.info('Done scheduling tasks')
        w.run()
        w.stop()
Ejemplo n.º 2
0
    def run(tasks, override_defaults={}):
        env_params = EnvironmentParamsContainer.env_params(override_defaults)

        if env_params.lock:
            lock.run_once(env_params.lock_pid_dir)

        if env_params.local_scheduler:
            sch = scheduler.CentralPlannerScheduler()
        else:
            sch = rpc.RemoteScheduler(host=env_params.scheduler_host)

        w = worker.Worker(scheduler=sch, worker_processes=env_params.workers)
        for task in tasks:
            w.add(task)
        w.run()
Ejemplo n.º 3
0
    def run(tasks, override_defaults={}):
        env_params = register.env_params(override_defaults)

        if env_params.lock:
            lock.run_once(env_params.lock_pid_dir)

        if env_params.local_scheduler:
            sch = scheduler.CentralPlannerScheduler()
        else:
            sch = rpc.RemoteScheduler(host=env_params.scheduler_host)

        w = worker.Worker(scheduler=sch, worker_processes=env_params.workers)

        for task in tasks:
            w.add(task)
        w.run()
Ejemplo n.º 4
0
    def run(tasks, override_defaults={}):
        env_params = EnvironmentParamsContainer.env_params(override_defaults)

        if env_params.lock:
            lock.run_once(env_params.lock_pid_dir)

        if env_params.local_scheduler:
            sch = scheduler.CentralPlannerScheduler()
        else:
            sch = rpc.RemoteScheduler(host=env_params.scheduler_host, port=env_params.scheduler_port)

        w = worker.Worker(scheduler=sch, worker_processes=env_params.workers)
        for task in tasks:
            w.add(task)
        logger = logging.getLogger('luigi-interface')
        logger.info('Done scheduling tasks')
        w.run()
Ejemplo n.º 5
0
    def run(tasks, worker_scheduler_factory=None, override_defaults={}):

        if worker_scheduler_factory is None:
            worker_scheduler_factory = WorkerSchedulerFactory()

        env_params = EnvironmentParamsContainer.env_params(override_defaults)

        if env_params.lock:
            lock.run_once(env_params.lock_pid_dir)

        if env_params.local_scheduler:
            sch = worker_scheduler_factory.create_local_scheduler()
        else:
            sch = worker_scheduler_factory.create_remote_scheduler(host=env_params.scheduler_host, port=env_params.scheduler_port)

        w = worker_scheduler_factory.create_worker(scheduler=sch, worker_processes=env_params.workers)

        for task in tasks:
            w.add(task)
        logger = logging.getLogger('luigi-interface')
        logger.info('Done scheduling tasks')
        w.run()
Ejemplo n.º 6
0
    def run(self, cmdline_args=None, config=None):
        import argparse
        parser = argparse.ArgumentParser()
        # INTERNAL: While changing configuration here, please update documentation in spluigi
        parser.add_argument('--local-scheduler', help='Use local scheduling', action='store_true')
        parser.add_argument('--scheduler-host', help='Hostname of machine running remote scheduler [default: %(default)s]', default='localhost')
        parser.add_argument('--lock', help='Do not run if the task is already running', action='store_true')
        parser.add_argument('--lock-pid-dir', help='Directory to store the pid file [default: %(default)s]', default='/var/tmp/luigi')
        parser.add_argument('--workers', help='Maximum number of parallel tasks to run [default: %(default)s]', default=1, type=int)

        def _add_parameter(parser, param_name, param, prefix=''):
            if param.has_default:
                defaulthelp = "[default: %s]" % (param.default,)
            else:
                defaulthelp = ""
                
            if param.is_list:
                action = "append"
            elif param.is_boolean:
                action = "store_true"
            else:
                action = "store"
            parser.add_argument('--' + param_name.replace('_', '-'), help='%s%s%s' % (prefix, param_name, defaulthelp), default=None, action=action)

        def _add_task_parameters(parser, cls):
            for param_name, param in cls.get_nonglobal_params():
                _add_parameter(parser, param_name, param, cls.task_family + '.')

        def _add_global_parameters(parser):
            for param_name, param in register.get_global_params():
                _add_parameter(parser, param_name, param)

        if register.get_main():
            _add_task_parameters(parser, register.get_main())
            _add_global_parameters(parser)

        else:
            subparsers = parser.add_subparsers(dest='command')

            for name, cls in register.get_reg().iteritems():
                subparser = subparsers.add_parser(name)
                _add_task_parameters(subparser, cls)
                _add_global_parameters(subparser)

        args = parser.parse_args(args=cmdline_args)
        if args.lock:
            lock.run_once(args.lock_pid_dir)
        params = vars(args)  # convert to a str -> str hash

        if register.get_main():
            task_cls = register.get_main()
        else:
            task_cls = register.get_reg()[args.command]

        task = task_cls.from_input(params, register.get_global_params())

        if args.local_scheduler:
            sch = scheduler.CentralPlannerScheduler()
        else:
            sch = rpc.RemoteScheduler(host=args.scheduler_host)

        erroremail = config.get('luigi', 'erroremail') if config else None

        w = worker.Worker(scheduler=sch, erroremail=erroremail, worker_processes=args.workers)

        w.add(task)
        w.run()
Ejemplo n.º 7
0
    def run(self, cmdline_args=None, config=None):
        parser = PassThroughOptionParser()
        tasks_str = '/'.join(sorted([name for name in register.get_reg()]))

        def add_task_option(p):
            if register.get_main():
                # INTERNAL: While changing configuration here, please update documentation in spluigi
                p.add_option('--task', help='Task to run (' + tasks_str + ') [default: %default]', default=register.get_main().task_family)
            else:
                p.add_option('--task', help='Task to run (%s)' % tasks_str)
        add_task_option(parser)
        options, args = parser.parse_args(args=cmdline_args)

        task_cls_name = options.task
        if self.__existing_optparse:
            parser = self.__existing_optparse
        else:
            parser = optparse.OptionParser()
        add_task_option(parser)
        if config:
            default_scheduler = config.get('luigi', 'scheduler-host')
        else:
            default_scheduler = 'localhost'
        # INTERNAL: While changing configuration here, please update documentation in spluigi
        parser.add_option('--local-scheduler', help='Use local scheduling', action='store_true')
        parser.add_option('--scheduler-host', help='Hostname of machine running remote scheduler [default: %default]', default=default_scheduler)
        parser.add_option('--lock', help='Do not run if the task is already running', action='store_true')
        parser.add_option('--lock-pid-dir', help='Directory to store the pid file [default: %default]', default='/var/tmp/luigi')
        parser.add_option('--workers', help='Maximum number of parallel tasks to run [default: %default]', default=1, type=int)

        if task_cls_name not in register.get_reg():
            raise Exception('Error: %s is not a valid tasks (must be %s)' % (task_cls_name, tasks_str))

        # Register all parameters as a big mess
        parameter_defaults = {}
        task_cls = register.get_reg()[task_cls_name]
        params = task_cls.get_nonglobal_params()
        global_params = list(register.get_global_params())

        for param_name, param in global_params:
            parameter_defaults[param_name] = param.default

        for param_name, param in params:
            if param.has_default:
                parameter_defaults[param_name] = param.default  # Will override with whatever: TODO: do more sensibly!

        def _add_parameter(parser, param_name, param, parameter_defaults):
            if param.has_default:
                help_text = '%s [default: %s]' % (param_name, parameter_defaults)
            else:
                help_text = param_name
            if param.is_list:
                action = "append"
            elif param.is_boolean:
                action = "store_true"
            else:
                action = "store"
            parser.add_option('--' + param_name.replace('_', '-'),
                              help=help_text,
                              default=None,
                              action=action)

        for param_name, param in global_params:
            _add_parameter(parser, param_name, param, parameter_defaults)

        for param_name, param in params:
            _add_parameter(parser, param_name, param, parameter_defaults)

        # Parse and run
        options, args = parser.parse_args(args=cmdline_args)
        if options.lock:
            lock.run_once(options.lock_pid_dir)
        params = {}
        for k, v in vars(options).iteritems():
            if k not in ['task', 'local_scheduler']:
                params[k] = v
        task = task_cls.from_input(params, global_params)

        if options.local_scheduler:
            sch = scheduler.CentralPlannerScheduler()
        else:
            sch = rpc.RemoteScheduler(host=options.scheduler_host)

        erroremail = config.get('luigi', 'erroremail') if config else None

        # Run
        w = worker.Worker(scheduler=sch, erroremail=erroremail, worker_processes=options.workers)

        w.add(task)
        w.run()
Ejemplo n.º 8
0
    def run(self, cmdline_args=None, config=None):
        import argparse
        parser = argparse.ArgumentParser()
        # INTERNAL: While changing configuration here, please update documentation in spluigi
        parser.add_argument('--local-scheduler',
                            help='Use local scheduling',
                            action='store_true')
        parser.add_argument(
            '--scheduler-host',
            help=
            'Hostname of machine running remote scheduler [default: %(default)s]',
            default='localhost')
        parser.add_argument('--lock',
                            help='Do not run if the task is already running',
                            action='store_true')
        parser.add_argument(
            '--lock-pid-dir',
            help='Directory to store the pid file [default: %(default)s]',
            default='/var/tmp/luigi')
        parser.add_argument(
            '--workers',
            help=
            'Maximum number of parallel tasks to run [default: %(default)s]',
            default=1,
            type=int)

        def _add_parameter(parser, param_name, param, prefix=''):
            if param.has_default:
                defaulthelp = "[default: %s]" % (param.default, )
            else:
                defaulthelp = ""

            if param.is_list:
                action = "append"
            elif param.is_boolean:
                action = "store_true"
            else:
                action = "store"
            parser.add_argument('--' + param_name.replace('_', '-'),
                                help='%s%s%s' %
                                (prefix, param_name, defaulthelp),
                                default=None,
                                action=action)

        def _add_task_parameters(parser, cls):
            for param_name, param in cls.get_nonglobal_params():
                _add_parameter(parser, param_name, param,
                               cls.task_family + '.')

        def _add_global_parameters(parser):
            for param_name, param in register.get_global_params():
                _add_parameter(parser, param_name, param)

        if register.get_main():
            _add_task_parameters(parser, register.get_main())
            _add_global_parameters(parser)

        else:
            subparsers = parser.add_subparsers(dest='command')

            for name, cls in register.get_reg().iteritems():
                subparser = subparsers.add_parser(name)
                _add_task_parameters(subparser, cls)
                _add_global_parameters(subparser)

        args = parser.parse_args(args=cmdline_args)
        if args.lock:
            lock.run_once(args.lock_pid_dir)
        params = vars(args)  # convert to a str -> str hash

        if register.get_main():
            task_cls = register.get_main()
        else:
            task_cls = register.get_reg()[args.command]

        task = task_cls.from_input(params, register.get_global_params())

        if args.local_scheduler:
            sch = scheduler.CentralPlannerScheduler()
        else:
            sch = rpc.RemoteScheduler(host=args.scheduler_host)

        erroremail = config.get('luigi', 'erroremail') if config else None

        w = worker.Worker(scheduler=sch,
                          erroremail=erroremail,
                          worker_processes=args.workers)

        w.add(task)
        w.run()
Ejemplo n.º 9
0
    def run(self, cmdline_args=None, config=None):
        parser = PassThroughOptionParser()
        tasks_str = '/'.join(sorted([name for name in register.get_reg()]))

        def add_task_option(p):
            if register.get_main():
                # INTERNAL: While changing configuration here, please update documentation in spluigi
                p.add_option('--task',
                             help='Task to run (' + tasks_str +
                             ') [default: %default]',
                             default=register.get_main().task_family)
            else:
                p.add_option('--task', help='Task to run (%s)' % tasks_str)

        add_task_option(parser)
        options, args = parser.parse_args(args=cmdline_args)

        task_cls_name = options.task
        if self.__existing_optparse:
            parser = self.__existing_optparse
        else:
            parser = optparse.OptionParser()
        add_task_option(parser)
        if config:
            default_scheduler = config.get('luigi', 'scheduler-host')
        else:
            default_scheduler = 'localhost'
        # INTERNAL: While changing configuration here, please update documentation in spluigi
        parser.add_option('--local-scheduler',
                          help='Use local scheduling',
                          action='store_true')
        parser.add_option(
            '--scheduler-host',
            help=
            'Hostname of machine running remote scheduler [default: %default]',
            default=default_scheduler)
        parser.add_option('--lock',
                          help='Do not run if the task is already running',
                          action='store_true')
        parser.add_option(
            '--lock-pid-dir',
            help='Directory to store the pid file [default: %default]',
            default='/var/tmp/luigi')
        parser.add_option(
            '--workers',
            help='Maximum number of parallel tasks to run [default: %default]',
            default=1,
            type=int)

        if task_cls_name not in register.get_reg():
            raise Exception('Error: %s is not a valid tasks (must be %s)' %
                            (task_cls_name, tasks_str))

        # Register all parameters as a big mess
        parameter_defaults = {}
        task_cls = register.get_reg()[task_cls_name]
        params = task_cls.get_nonglobal_params()
        global_params = list(register.get_global_params())

        for param_name, param in global_params:
            parameter_defaults[param_name] = param.default

        for param_name, param in params:
            if param.has_default:
                parameter_defaults[
                    param_name] = param.default  # Will override with whatever: TODO: do more sensibly!

        def _add_parameter(parser, param_name, param, parameter_defaults):
            if param.has_default:
                help_text = '%s [default: %s]' % (param_name,
                                                  parameter_defaults)
            else:
                help_text = param_name
            if param.is_list:
                action = "append"
            elif param.is_boolean:
                action = "store_true"
            else:
                action = "store"
            parser.add_option('--' + param_name.replace('_', '-'),
                              help=help_text,
                              default=None,
                              action=action)

        for param_name, param in global_params:
            _add_parameter(parser, param_name, param, parameter_defaults)

        for param_name, param in params:
            _add_parameter(parser, param_name, param, parameter_defaults)

        # Parse and run
        options, args = parser.parse_args(args=cmdline_args)
        if options.lock:
            lock.run_once(options.lock_pid_dir)
        params = {}
        for k, v in vars(options).iteritems():
            if k not in ['task', 'local_scheduler']:
                params[k] = v
        task = task_cls.from_input(params, global_params)

        if options.local_scheduler:
            sch = scheduler.CentralPlannerScheduler()
        else:
            sch = rpc.RemoteScheduler(host=options.scheduler_host)

        erroremail = config.get('luigi', 'erroremail') if config else None

        # Run
        w = worker.Worker(scheduler=sch,
                          erroremail=erroremail,
                          worker_processes=options.workers)

        w.add(task)
        w.run()