def __init__(self, *args, **kwargs) -> None: super(PandasTypeConfigMap, self).__init__(*args, **kwargs) task_names = Register.task_names() task_classes = [Register.get_task_cls(task_name) for task_name in task_names] self._map = { task_class.task_namespace: task_class for task_class in task_classes if issubclass(task_class, PandasTypeConfig) and task_class != PandasTypeConfig }
def add_task_option(p): if main_task_cls: p.add_option('--task', help='Task to run (one of ' + Register.tasks_str() + ') [default: %default]', default=main_task_cls.task_family) else: p.add_option('--task', help='Task to run (one of %s)' % Register.tasks_str())
def run_with_retcodes(argv): """ Run luigi with command line parsing, but raise ``SystemExit`` with the configured exit code. Note: Usually you use the luigi binary directly and don't call this function yourself. :param argv: Should (conceptually) be ``sys.argv[1:]`` """ logger = logging.getLogger('luigi-interface') with luigi.cmdline_parser.CmdlineParser.global_instance(argv): retcodes = retcode() worker = None try: worker = luigi.interface._run(argv)['worker'] except luigi.interface.PidLockAlreadyTakenExit: sys.exit(retcodes.already_running) except Exception: # Some errors occur before logging is set up, we set it up now luigi.interface.setup_interface_logging() logger.exception("Uncaught exception in luigi") sys.exit(retcodes.unhandled_exception) task_sets = luigi.execution_summary._summary_dict(worker) root_task = luigi.execution_summary._root_task(worker) non_empty_categories = {k: v for k, v in task_sets.items() if v}.keys() def has(status): assert status in luigi.execution_summary._ORDERED_STATUSES return status in non_empty_categories codes_and_conds = ( (retcodes.missing_data, has('still_pending_ext')), (retcodes.task_failed, has('failed')), (retcodes.already_running, has('run_by_other_worker')), (retcodes.scheduling_error, has('scheduling_error')), (retcodes.not_run, has('not_run')), ) expected_ret_code = max(code * (1 if cond else 0) for code, cond in codes_and_conds) # if expected_ret_code == 0 and \ # root_task not in task_sets["completed"] and \ # root_task not in task_sets["already_done"]: # sys.exit(retcodes.not_run) # else: # sys.exit(expected_ret_code) ret_code = expected_ret_code if expected_ret_code == 0 and \ root_task not in task_sets["completed"] and \ root_task not in task_sets["already_done"]: ret_code = retcodes.not_run Register.clear_instance_cache() return ret_code
def parse_task(self, cmdline_args=None, main_task_cls=None): if cmdline_args is None: cmdline_args = sys.argv[1:] parser = argparse.ArgumentParser() add_global_parameters(parser) if main_task_cls: add_task_parameters(parser, main_task_cls) args = parser.parse_args(args=cmdline_args) task_cls = main_task_cls else: task_names = Register.task_names() # Parse global arguments and pull out the task name. # We used to do this using subparsers+command, but some issues with # argparse across different versions of Python (2.7.9) made it hard. args, unknown = parser.parse_known_args( args=[a for a in cmdline_args if a != '--help']) if len(unknown) == 0: # In case it included a --help argument, run again parser.parse_known_args(args=cmdline_args) raise SystemExit('No task specified') task_name = unknown[0] if task_name not in task_names: error_task_names(task_name, task_names) task_cls = Register.get_task_cls(task_name) # Add a subparser to parse task-specific arguments subparsers = parser.add_subparsers(dest='command') subparser = subparsers.add_parser(task_name) # Add both task and global params here so that we can support both: # test.py --global-param xyz Test --n 42 # test.py Test --n 42 --global-param xyz add_global_parameters(subparser) add_task_parameters(subparser, task_cls) # Workaround for bug in argparse for Python 2.7.9 # See https://mail.python.org/pipermail/python-dev/2015-January/137699.html subargs = parser.parse_args(args=cmdline_args) for key, value in vars(subargs).items(): if value: # Either True (for boolean args) or non-None (everything else) setattr(args, key, value) # Notice that this is not side effect free because it might set global params set_global_parameters(args) task_params = get_task_parameters(task_cls, args) return [task_cls(**task_params)]
def parse_task(self, cmdline_args=None, main_task_cls=None): if cmdline_args is None: cmdline_args = sys.argv[1:] parser = argparse.ArgumentParser() add_global_parameters(parser) if main_task_cls: add_task_parameters(parser, main_task_cls) args = parser.parse_args(args=cmdline_args) task_cls = main_task_cls else: task_names = Register.task_names() # Parse global arguments and pull out the task name. # We used to do this using subparsers+command, but some issues with # argparse across different versions of Python (2.7.9) made it hard. args, unknown = parser.parse_known_args(args=[a for a in cmdline_args if a != '--help']) if len(unknown) == 0: # In case it included a --help argument, run again parser.parse_known_args(args=cmdline_args) raise SystemExit('No task specified') task_name = unknown[0] if task_name not in task_names: error_task_names(task_name, task_names) task_cls = Register.get_task_cls(task_name) # Add a subparser to parse task-specific arguments subparsers = parser.add_subparsers(dest='command') subparser = subparsers.add_parser(task_name) # Add both task and global params here so that we can support both: # test.py --global-param xyz Test --n 42 # test.py Test --n 42 --global-param xyz add_global_parameters(subparser) add_task_parameters(subparser, task_cls) # Workaround for bug in argparse for Python 2.7.9 # See https://mail.python.org/pipermail/python-dev/2015-January/137699.html subargs = parser.parse_args(args=cmdline_args) for key, value in vars(subargs).items(): if value: # Either True (for boolean args) or non-None (everything else) setattr(args, key, value) # Notice that this is not side effect free because it might set global params set_global_parameters(args) task_params = get_task_parameters(task_cls, args) return [task_cls(**task_params)]
def _build_parser(root_task=None, help_all=False): parser = argparse.ArgumentParser(add_help=False) # Unfortunately, we have to set it as optional to argparse, so we can # parse out stuff like `--module` before we call for `--help`. parser.add_argument('root_task', nargs='?', help='Task family to run. Is not optional.', metavar='Required root task', ) for task_name, is_without_section, param_name, param_obj in Register.get_all_params(): is_the_root_task = task_name == root_task help = param_obj.description if any((is_the_root_task, help_all, param_obj.always_in_help)) else argparse.SUPPRESS flag_name_underscores = param_name if is_without_section else task_name + '_' + param_name global_flag_name = '--' + flag_name_underscores.replace('_', '-') parser.add_argument(global_flag_name, help=help, **param_obj._parser_kwargs(param_name, task_name) ) if is_the_root_task: local_flag_name = '--' + param_name.replace('_', '-') parser.add_argument(local_flag_name, help=help, **param_obj._parser_kwargs(param_name) ) return parser
def complete_config(): """ Create a temporary Luigi Config that has defaults for all variables """ def get_default(param_obj): if isinstance(param_obj, luigi.IntParameter): return '1' elif isinstance(param_obj, luigi.DateParameter): return datetime.date.today().isoformat() elif isinstance(param_obj, luigi.Parameter): return '' config = luigi.configuration.get_config() # Make sure every parameter has a default value sections_to_remove = [] options_to_remove = [] for task_name, is_without_section, param_name, param_obj in Register.get_all_params(): if param_obj._default == _no_value: if is_without_section: sections_to_remove.append(task_name) options_to_remove.append((task_name, param_name)) config.set(task_name, param_name, get_default(param_obj)) # Yield the completed config yield # Remove any config we set up for section, option in options_to_remove: config.remove_option(section, option) for section in sections_to_remove: config.remove_section(section)
def parse(self, cmdline_args=None, main_task_cls=None): parser = PassThroughOptionParser() def add_task_option(p): if main_task_cls: p.add_option('--task', help='Task to run (one of ' + Register.tasks_str() + ') [default: %default]', default=main_task_cls.task_family) else: p.add_option('--task', help='Task to run (one of %s)' % Register.tasks_str()) add_global_parameters(parser, optparse=True) add_task_option(parser) options, args = parser.parse_args(args=cmdline_args) task_cls_name = options.task if self.__existing_optparse: parser = self.__existing_optparse else: parser = optparse.OptionParser() add_task_option(parser) task_cls = Register.get_task_cls(task_cls_name) # Register all parameters as a big mess add_global_parameters(parser, optparse=True) add_task_parameters(parser, task_cls, optparse=True) # Parse and run options, args = parser.parse_args(args=cmdline_args) set_global_parameters(options) task_params = get_task_parameters(task_cls, options) return [task_cls(**task_params)]
def _build_parser(root_task=None, help_all=False): parser = argparse.ArgumentParser(add_help=False) # Unfortunately, we have to set it as optional to argparse, so we can # parse out stuff like `--module` before we call for `--help`. parser.add_argument( 'root_task', nargs='?', help='Task family to run. Is not optional.', metavar='Required root task', ) for task_name, is_without_section, param_name, param_obj in Register.get_all_params( ): is_the_root_task = task_name == root_task help = param_obj.description if any( (is_the_root_task, help_all, param_obj.always_in_help)) else argparse.SUPPRESS flag_name_underscores = param_name if is_without_section else task_name + '_' + param_name global_flag_name = '--' + flag_name_underscores.replace('_', '-') parser.add_argument(global_flag_name, help=help, **param_obj._parser_kwargs( param_name, task_name)) if is_the_root_task: local_flag_name = '--' + param_name.replace('_', '-') parser.add_argument(local_flag_name, help=help, **param_obj._parser_kwargs(param_name)) return parser
def get_global_parameters(): seen_params = set() for task_name, is_without_section, param_name, param in Register.get_all_params(): if param in seen_params: continue seen_params.add(param) yield task_name, is_without_section, param_name, param
def _add_task_batchers(self): for family, task_class, batch_param_names in Register._batchable_tasks(): if batch_param_names: self._scheduler.add_task_batcher( worker=self._id, task_family=family, batched_args=batch_param_names )
def test_externalize_taskclass(self): with self.assertRaises(TaskClassNotFoundException): Register.get_task_cls('scooby.Doo') class Task1(luigi.Task): @classmethod def get_task_family(cls): return "scooby.Doo" self.assertEqual(Task1, Register.get_task_cls('scooby.Doo')) class Task2(luigi.Task): @classmethod def get_task_family(cls): return "scooby.Doo" with self.assertRaises(TaskClassAmbigiousException): Register.get_task_cls('scooby.Doo') class Task3(luigi.Task): @classmethod def get_task_family(cls): return "scooby.Doo" # There previously was a rare bug where the third installed class could # "undo" class ambiguity. with self.assertRaises(TaskClassAmbigiousException): Register.get_task_cls('scooby.Doo')
def _build_parser(active_tasks=set()): parser = argparse.ArgumentParser(add_help=False) for task_name, is_without_section, param_name, param_obj in Register.get_all_params(): as_active = task_name in active_tasks param_obj._add_to_cmdline_parser(parser, param_name, task_name, is_without_section=is_without_section, as_active=as_active) return parser
def _build_parser(active_tasks=set(), help_all=False): parser = argparse.ArgumentParser(add_help=False) for task_name, is_without_section, param_name, param_obj in Register.get_all_params(): as_active = task_name in active_tasks param_obj._add_to_cmdline_parser(parser, param_name, task_name, is_without_section=is_without_section, as_active=as_active, help_all=help_all) return parser
def __init__(self, cmdline_args): """ Initialize cmd line args """ known_args, _ = self._build_parser().parse_known_args(args=cmdline_args) self._attempt_load_module(known_args) # We have to parse again now. As the positionally first unrecognized # argument (the task) could be different. known_args, _ = self._build_parser().parse_known_args(args=cmdline_args) root_task = known_args.root_task parser = self._build_parser(root_task=root_task, help_all=known_args.core_help_all) self._possibly_exit_with_help(parser, known_args) if not root_task: raise SystemExit('No task specified') else: # Check that what we believe to be the task is correctly spelled Register.get_task_cls(root_task) known_args = parser.parse_args(args=cmdline_args) self.known_args = known_args # Also publicly expose parsed arguments
def __init__(self, cmdline_args): """ Initialize cmd line args """ known_args, _ = self._build_parser().parse_known_args(args=cmdline_args) self._attempt_load_module(known_args) # We have to parse again now. As the positionally first unrecognized # argument (the task) could be different. known_args, _ = self._build_parser().parse_known_args(args=cmdline_args) root_task = known_args.root_task parser = self._build_parser(root_task=root_task, help_all=known_args.core_help_all) self._possibly_exit_with_help(parser, known_args) if not root_task: raise SystemExit('No task specified') else: # Check that what we believe to be the task is correctly spelled Register.get_task_cls(root_task) known_args = parser.parse_args(args=cmdline_args) self.known_args = known_args # Also publically expose parsed arguments
def _build_parser(active_tasks=set(), help_all=False): parser = argparse.ArgumentParser(add_help=False) # Unfortunately, we have to set it as optional to argparse, so we can # parse out stuff like `--module` before we call for `--help`. parser.add_argument('task', nargs='?', help='Task family to run. Is not optional.') for task_name, is_without_section, param_name, param_obj in Register.get_all_params(): as_active = task_name in active_tasks param_obj._add_to_cmdline_parser(parser, param_name, task_name, is_without_section=is_without_section, as_active=as_active, help_all=help_all) return parser
def _build_parser(active_tasks=set()): parser = argparse.ArgumentParser(add_help=False) for task_name, is_without_section, param_name, param_obj in Register.get_all_params( ): add = functools.partial(param_obj._add_to_cmdline_parser, parser, param_name, task_name, is_without_section=is_without_section) add(glob=True) if task_name in active_tasks: add(glob=False) return parser
def get_task_cls(mod_cls): """ Resolve string to task class object Reuse Luigi's service that registers task types Like Luigi, we assume that the mod_cls is 'module.class' and we assume that the user has put there pipe location on PYTHONPATH. :param mod_cls: '<module>.<class>' :return: Task class object """ from luigi.task_register import Register mod_path = mod_cls.split('.') mod = '.'.join(mod_path[:-1]) cls = mod_path[-1] if mod is not None: __import__(mod) task_cls = Register.get_task_cls(cls) return task_cls
def _get_task_cls(self): """ Get the task class """ return Register.get_task_cls(self.known_args.root_task)
def get_task_cls(self): """ Get the task class """ return Register.get_task_cls(self._task_name)
def gen_sphinx_tasks(entry_point, labels, *_args, **kwargs): """ Writes a file per label, suitable for use by sphinx.ext.autodoc, using the classes found from entry_point. Also generates toctree.inc, which can be included from the index page to provide links to each generated file. """ # Declare file header strings warning = '''.. WARNING: DO NOT EDIT THIS FILE DIRECTLY Generated by sphinx_source/gen_tasks.py on {now} '''.format(now=time.strftime('%c')) toctree_header = '''{warning} .. toctree:: :maxdepth: 1 ''' incfile_header = '''{warning} .. _{category_slug}: Back to :doc:`index` {label_heading} ''' # Load modules into memory stevedore.ExtensionManager(entry_point) # Used to filter the classes under entry_point entry_point_dot = '{entry_point}.'.format(entry_point=entry_point) # Generate a list of output file arguments from the given labels and categories output = [] categories = kwargs.get('categories', []) for idx, label in enumerate(labels): try: category = '' if idx < len(categories): category = categories[idx] # Create a category slug for sphinx, and name the file with it category_slug = category.replace(' ', '_') or 'all' file_name = '{slug}.rst'.format(slug=category_slug) file_path = os.path.join(SPHINX_DIR, file_name) file_pointer = open(file_path, "w") output.append({ 'fp': file_pointer, 'file_name': file_name, 'category': category, 'category_slug': category_slug, 'label': label, 'label_heading': "{label}\n{_}".format(label=label, _='=' * len(label)), 'modules': {}, }) except IOError: sys.exit( 'Unable to write to {file_path}'.format(file_path=file_path)) # Write the header to the table of contents file tocfile_name = os.path.join(SPHINX_DIR, 'toctree.rst') try: tocfile = open(tocfile_name, "w") tocfile.write(toctree_header.format(warning=warning)) except IOError: sys.exit( 'Unable to write to {file_name}'.format(file_name=tocfile_name)) # For each Task, sorted by class name tasks = Register._get_reg() for name in sorted(tasks): cls = tasks[name] module = cls.__module__ # Show only tasks under entry_point if module.startswith(entry_point_dot): for out in output: # Show only tasks in the output category if getattr(cls, 'task_category', '') == out['category']: if module not in out['modules']: out['modules'][module] = {} out['modules'][module][name] = cls for out in output: modules = sorted(out['modules'].keys()) if modules: tocfile.write("\n {incfile}".format(incfile=out['file_name'])) out['fp'].write(incfile_header.format(warning=warning, **out)) for module in modules: # Strip off entry_point to avoid redundancy in documentation module_heading = '{module}'.format( module=module.replace(entry_point_dot, '')) out['fp'].write("\n\n{module_heading}\n{_}".format( module_heading=module_heading, _='-' * len(module_heading))) out['fp'].write( "\n\n.. automodule:: {module}".format(module=module)) names = out['modules'][module] for name in sorted(names): out['fp'].write("\n\n.. autoclass:: {name}".format(name=name)) out['fp'].close() tocfile.close()