def __init__(self, *args, **kwargs) -> None:
     super(PandasTypeConfigMap, self).__init__(*args, **kwargs)
     task_names = Register.task_names()
     task_classes = [Register.get_task_cls(task_name) for task_name in task_names]
     self._map = {
         task_class.task_namespace: task_class
         for task_class in task_classes if issubclass(task_class, PandasTypeConfig) and task_class != PandasTypeConfig
     }
Exemple #2
0
 def add_task_option(p):
     if main_task_cls:
         p.add_option('--task',
                      help='Task to run (one of ' +
                      Register.tasks_str() + ') [default: %default]',
                      default=main_task_cls.task_family)
     else:
         p.add_option('--task',
                      help='Task to run (one of %s)' %
                      Register.tasks_str())
Exemple #3
0
def run_with_retcodes(argv):
    """
    Run luigi with command line parsing, but raise ``SystemExit`` with the configured exit code.

    Note: Usually you use the luigi binary directly and don't call this function yourself.

    :param argv: Should (conceptually) be ``sys.argv[1:]``
    """
    logger = logging.getLogger('luigi-interface')
    with luigi.cmdline_parser.CmdlineParser.global_instance(argv):
        retcodes = retcode()

    worker = None
    try:
        worker = luigi.interface._run(argv)['worker']
    except luigi.interface.PidLockAlreadyTakenExit:
        sys.exit(retcodes.already_running)
    except Exception:
        # Some errors occur before logging is set up, we set it up now
        luigi.interface.setup_interface_logging()
        logger.exception("Uncaught exception in luigi")
        sys.exit(retcodes.unhandled_exception)

    task_sets = luigi.execution_summary._summary_dict(worker)
    root_task = luigi.execution_summary._root_task(worker)
    non_empty_categories = {k: v for k, v in task_sets.items() if v}.keys()

    def has(status):
        assert status in luigi.execution_summary._ORDERED_STATUSES
        return status in non_empty_categories

    codes_and_conds = (
        (retcodes.missing_data, has('still_pending_ext')),
        (retcodes.task_failed, has('failed')),
        (retcodes.already_running, has('run_by_other_worker')),
        (retcodes.scheduling_error, has('scheduling_error')),
        (retcodes.not_run, has('not_run')),
    )
    expected_ret_code = max(code * (1 if cond else 0)
                            for code, cond in codes_and_conds)

    # if expected_ret_code == 0 and \
    #    root_task not in task_sets["completed"] and \
    #    root_task not in task_sets["already_done"]:
    #     sys.exit(retcodes.not_run)
    # else:
    #     sys.exit(expected_ret_code)

    ret_code = expected_ret_code
    if expected_ret_code == 0 and \
                    root_task not in task_sets["completed"] and \
                    root_task not in task_sets["already_done"]:
        ret_code = retcodes.not_run
    Register.clear_instance_cache()
    return ret_code
Exemple #4
0
    def parse_task(self, cmdline_args=None, main_task_cls=None):
        if cmdline_args is None:
            cmdline_args = sys.argv[1:]

        parser = argparse.ArgumentParser()

        add_global_parameters(parser)

        if main_task_cls:
            add_task_parameters(parser, main_task_cls)

            args = parser.parse_args(args=cmdline_args)
            task_cls = main_task_cls
        else:
            task_names = Register.task_names()

            # Parse global arguments and pull out the task name.
            # We used to do this using subparsers+command, but some issues with
            # argparse across different versions of Python (2.7.9) made it hard.
            args, unknown = parser.parse_known_args(
                args=[a for a in cmdline_args if a != '--help'])
            if len(unknown) == 0:
                # In case it included a --help argument, run again
                parser.parse_known_args(args=cmdline_args)
                raise SystemExit('No task specified')

            task_name = unknown[0]
            if task_name not in task_names:
                error_task_names(task_name, task_names)

            task_cls = Register.get_task_cls(task_name)

            # Add a subparser to parse task-specific arguments
            subparsers = parser.add_subparsers(dest='command')
            subparser = subparsers.add_parser(task_name)

            # Add both task and global params here so that we can support both:
            # test.py --global-param xyz Test --n 42
            # test.py Test --n 42 --global-param xyz
            add_global_parameters(subparser)
            add_task_parameters(subparser, task_cls)

            # Workaround for bug in argparse for Python 2.7.9
            # See https://mail.python.org/pipermail/python-dev/2015-January/137699.html
            subargs = parser.parse_args(args=cmdline_args)
            for key, value in vars(subargs).items():
                if value:  # Either True (for boolean args) or non-None (everything else)
                    setattr(args, key, value)

        # Notice that this is not side effect free because it might set global params
        set_global_parameters(args)
        task_params = get_task_parameters(task_cls, args)

        return [task_cls(**task_params)]
Exemple #5
0
    def parse_task(self, cmdline_args=None, main_task_cls=None):
        if cmdline_args is None:
            cmdline_args = sys.argv[1:]

        parser = argparse.ArgumentParser()

        add_global_parameters(parser)

        if main_task_cls:
            add_task_parameters(parser, main_task_cls)

            args = parser.parse_args(args=cmdline_args)
            task_cls = main_task_cls
        else:
            task_names = Register.task_names()

            # Parse global arguments and pull out the task name.
            # We used to do this using subparsers+command, but some issues with
            # argparse across different versions of Python (2.7.9) made it hard.
            args, unknown = parser.parse_known_args(args=[a for a in cmdline_args if a != '--help'])
            if len(unknown) == 0:
                # In case it included a --help argument, run again
                parser.parse_known_args(args=cmdline_args)
                raise SystemExit('No task specified')

            task_name = unknown[0]
            if task_name not in task_names:
                error_task_names(task_name, task_names)

            task_cls = Register.get_task_cls(task_name)

            # Add a subparser to parse task-specific arguments
            subparsers = parser.add_subparsers(dest='command')
            subparser = subparsers.add_parser(task_name)

            # Add both task and global params here so that we can support both:
            # test.py --global-param xyz Test --n 42
            # test.py Test --n 42 --global-param xyz
            add_global_parameters(subparser)
            add_task_parameters(subparser, task_cls)

            # Workaround for bug in argparse for Python 2.7.9
            # See https://mail.python.org/pipermail/python-dev/2015-January/137699.html
            subargs = parser.parse_args(args=cmdline_args)
            for key, value in vars(subargs).items():
                if value:  # Either True (for boolean args) or non-None (everything else)
                    setattr(args, key, value)

        # Notice that this is not side effect free because it might set global params
        set_global_parameters(args)
        task_params = get_task_parameters(task_cls, args)

        return [task_cls(**task_params)]
Exemple #6
0
    def _build_parser(root_task=None, help_all=False):
        parser = argparse.ArgumentParser(add_help=False)

        # Unfortunately, we have to set it as optional to argparse, so we can
        # parse out stuff like `--module` before we call for `--help`.
        parser.add_argument('root_task',
                            nargs='?',
                            help='Task family to run. Is not optional.',
                            metavar='Required root task',
                            )

        for task_name, is_without_section, param_name, param_obj in Register.get_all_params():
            is_the_root_task = task_name == root_task
            help = param_obj.description if any((is_the_root_task, help_all, param_obj.always_in_help)) else argparse.SUPPRESS
            flag_name_underscores = param_name if is_without_section else task_name + '_' + param_name
            global_flag_name = '--' + flag_name_underscores.replace('_', '-')
            parser.add_argument(global_flag_name,
                                help=help,
                                **param_obj._parser_kwargs(param_name, task_name)
                                )
            if is_the_root_task:
                local_flag_name = '--' + param_name.replace('_', '-')
                parser.add_argument(local_flag_name,
                                    help=help,
                                    **param_obj._parser_kwargs(param_name)
                                    )

        return parser
Exemple #7
0
def complete_config():
    """
    Create a temporary Luigi Config that has defaults for all variables
    """
    def get_default(param_obj):
        if isinstance(param_obj, luigi.IntParameter):
            return '1'
        elif isinstance(param_obj, luigi.DateParameter):
            return datetime.date.today().isoformat()
        elif isinstance(param_obj, luigi.Parameter):
            return ''

    config = luigi.configuration.get_config()

    # Make sure every parameter has a default value
    sections_to_remove = []
    options_to_remove = []
    for task_name, is_without_section, param_name, param_obj in Register.get_all_params():
        if param_obj._default == _no_value:
            if is_without_section:
                sections_to_remove.append(task_name)
            options_to_remove.append((task_name, param_name))
            config.set(task_name, param_name, get_default(param_obj))

    # Yield the completed config
    yield

    # Remove any config we set up
    for section, option in options_to_remove:
        config.remove_option(section, option)
    for section in sections_to_remove:
        config.remove_section(section)
Exemple #8
0
    def parse(self, cmdline_args=None, main_task_cls=None):
        parser = PassThroughOptionParser()

        def add_task_option(p):
            if main_task_cls:
                p.add_option('--task', help='Task to run (one of ' + Register.tasks_str() + ') [default: %default]', default=main_task_cls.task_family)
            else:
                p.add_option('--task', help='Task to run (one of %s)' % Register.tasks_str())

        add_global_parameters(parser, optparse=True)

        add_task_option(parser)
        options, args = parser.parse_args(args=cmdline_args)

        task_cls_name = options.task
        if self.__existing_optparse:
            parser = self.__existing_optparse
        else:
            parser = optparse.OptionParser()
        add_task_option(parser)

        task_cls = Register.get_task_cls(task_cls_name)

        # Register all parameters as a big mess
        add_global_parameters(parser, optparse=True)
        add_task_parameters(parser, task_cls, optparse=True)

        # Parse and run
        options, args = parser.parse_args(args=cmdline_args)

        set_global_parameters(options)
        task_params = get_task_parameters(task_cls, options)

        return [task_cls(**task_params)]
Exemple #9
0
    def _build_parser(root_task=None, help_all=False):
        parser = argparse.ArgumentParser(add_help=False)

        # Unfortunately, we have to set it as optional to argparse, so we can
        # parse out stuff like `--module` before we call for `--help`.
        parser.add_argument(
            'root_task',
            nargs='?',
            help='Task family to run. Is not optional.',
            metavar='Required root task',
        )

        for task_name, is_without_section, param_name, param_obj in Register.get_all_params(
        ):
            is_the_root_task = task_name == root_task
            help = param_obj.description if any(
                (is_the_root_task, help_all,
                 param_obj.always_in_help)) else argparse.SUPPRESS
            flag_name_underscores = param_name if is_without_section else task_name + '_' + param_name
            global_flag_name = '--' + flag_name_underscores.replace('_', '-')
            parser.add_argument(global_flag_name,
                                help=help,
                                **param_obj._parser_kwargs(
                                    param_name, task_name))
            if is_the_root_task:
                local_flag_name = '--' + param_name.replace('_', '-')
                parser.add_argument(local_flag_name,
                                    help=help,
                                    **param_obj._parser_kwargs(param_name))

        return parser
Exemple #10
0
def get_global_parameters():
    seen_params = set()
    for task_name, is_without_section, param_name, param in Register.get_all_params():
        if param in seen_params:
            continue
        seen_params.add(param)
        yield task_name, is_without_section, param_name, param
Exemple #11
0
def get_global_parameters():
    seen_params = set()
    for task_name, is_without_section, param_name, param in Register.get_all_params():
        if param in seen_params:
            continue
        seen_params.add(param)
        yield task_name, is_without_section, param_name, param
Exemple #12
0
 def _add_task_batchers(self):
     for family, task_class, batch_param_names in Register._batchable_tasks():
         if batch_param_names:
             self._scheduler.add_task_batcher(
                 worker=self._id,
                 task_family=family,
                 batched_args=batch_param_names
             )
Exemple #13
0
    def test_externalize_taskclass(self):
        with self.assertRaises(TaskClassNotFoundException):
            Register.get_task_cls('scooby.Doo')

        class Task1(luigi.Task):
            @classmethod
            def get_task_family(cls):
                return "scooby.Doo"

        self.assertEqual(Task1, Register.get_task_cls('scooby.Doo'))

        class Task2(luigi.Task):
            @classmethod
            def get_task_family(cls):
                return "scooby.Doo"

        with self.assertRaises(TaskClassAmbigiousException):
            Register.get_task_cls('scooby.Doo')

        class Task3(luigi.Task):
            @classmethod
            def get_task_family(cls):
                return "scooby.Doo"

        # There previously was a rare bug where the third installed class could
        # "undo" class ambiguity.
        with self.assertRaises(TaskClassAmbigiousException):
            Register.get_task_cls('scooby.Doo')
    def test_externalize_taskclass(self):
        with self.assertRaises(TaskClassNotFoundException):
            Register.get_task_cls('scooby.Doo')

        class Task1(luigi.Task):
            @classmethod
            def get_task_family(cls):
                return "scooby.Doo"

        self.assertEqual(Task1, Register.get_task_cls('scooby.Doo'))

        class Task2(luigi.Task):
            @classmethod
            def get_task_family(cls):
                return "scooby.Doo"

        with self.assertRaises(TaskClassAmbigiousException):
            Register.get_task_cls('scooby.Doo')

        class Task3(luigi.Task):
            @classmethod
            def get_task_family(cls):
                return "scooby.Doo"

        # There previously was a rare bug where the third installed class could
        # "undo" class ambiguity.
        with self.assertRaises(TaskClassAmbigiousException):
            Register.get_task_cls('scooby.Doo')
Exemple #15
0
    def _build_parser(active_tasks=set()):
        parser = argparse.ArgumentParser(add_help=False)

        for task_name, is_without_section, param_name, param_obj in Register.get_all_params():
            as_active = task_name in active_tasks
            param_obj._add_to_cmdline_parser(parser, param_name, task_name,
                                             is_without_section=is_without_section,
                                             as_active=as_active)

        return parser
Exemple #16
0
    def _build_parser(active_tasks=set(), help_all=False):
        parser = argparse.ArgumentParser(add_help=False)

        for task_name, is_without_section, param_name, param_obj in Register.get_all_params():
            as_active = task_name in active_tasks
            param_obj._add_to_cmdline_parser(parser, param_name, task_name,
                                             is_without_section=is_without_section,
                                             as_active=as_active,
                                             help_all=help_all)

        return parser
Exemple #17
0
 def __init__(self, cmdline_args):
     """
     Initialize cmd line args
     """
     known_args, _ = self._build_parser().parse_known_args(args=cmdline_args)
     self._attempt_load_module(known_args)
     # We have to parse again now. As the positionally first unrecognized
     # argument (the task) could be different.
     known_args, _ = self._build_parser().parse_known_args(args=cmdline_args)
     root_task = known_args.root_task
     parser = self._build_parser(root_task=root_task,
                                 help_all=known_args.core_help_all)
     self._possibly_exit_with_help(parser, known_args)
     if not root_task:
         raise SystemExit('No task specified')
     else:
         # Check that what we believe to be the task is correctly spelled
         Register.get_task_cls(root_task)
     known_args = parser.parse_args(args=cmdline_args)
     self.known_args = known_args  # Also publicly expose parsed arguments
Exemple #18
0
 def __init__(self, cmdline_args):
     """
     Initialize cmd line args
     """
     known_args, _ = self._build_parser().parse_known_args(args=cmdline_args)
     self._attempt_load_module(known_args)
     # We have to parse again now. As the positionally first unrecognized
     # argument (the task) could be different.
     known_args, _ = self._build_parser().parse_known_args(args=cmdline_args)
     root_task = known_args.root_task
     parser = self._build_parser(root_task=root_task,
                                 help_all=known_args.core_help_all)
     self._possibly_exit_with_help(parser, known_args)
     if not root_task:
         raise SystemExit('No task specified')
     else:
         # Check that what we believe to be the task is correctly spelled
         Register.get_task_cls(root_task)
     known_args = parser.parse_args(args=cmdline_args)
     self.known_args = known_args  # Also publically expose parsed arguments
    def _build_parser(active_tasks=set(), help_all=False):
        parser = argparse.ArgumentParser(add_help=False)

        # Unfortunately, we have to set it as optional to argparse, so we can
        # parse out stuff like `--module` before we call for `--help`.
        parser.add_argument('task', nargs='?', help='Task family to run. Is not optional.')

        for task_name, is_without_section, param_name, param_obj in Register.get_all_params():
            as_active = task_name in active_tasks
            param_obj._add_to_cmdline_parser(parser, param_name, task_name,
                                             is_without_section=is_without_section,
                                             as_active=as_active,
                                             help_all=help_all)

        return parser
    def _build_parser(active_tasks=set()):
        parser = argparse.ArgumentParser(add_help=False)

        for task_name, is_without_section, param_name, param_obj in Register.get_all_params(
        ):
            add = functools.partial(param_obj._add_to_cmdline_parser,
                                    parser,
                                    param_name,
                                    task_name,
                                    is_without_section=is_without_section)
            add(glob=True)
            if task_name in active_tasks:
                add(glob=False)

        return parser
Exemple #21
0
    def parse(self, cmdline_args=None, main_task_cls=None):
        parser = PassThroughOptionParser()

        def add_task_option(p):
            if main_task_cls:
                p.add_option('--task',
                             help='Task to run (one of ' +
                             Register.tasks_str() + ') [default: %default]',
                             default=main_task_cls.task_family)
            else:
                p.add_option('--task',
                             help='Task to run (one of %s)' %
                             Register.tasks_str())

        add_global_parameters(parser, optparse=True)

        add_task_option(parser)
        options, args = parser.parse_args(args=cmdline_args)

        task_cls_name = options.task
        if self.__existing_optparse:
            parser = self.__existing_optparse
        else:
            parser = optparse.OptionParser()
        add_task_option(parser)

        task_cls = Register.get_task_cls(task_cls_name)

        # Register all parameters as a big mess
        add_global_parameters(parser, optparse=True)
        add_task_parameters(parser, task_cls, optparse=True)

        # Parse and run
        options, args = parser.parse_args(args=cmdline_args)

        set_global_parameters(options)
        task_params = get_task_parameters(task_cls, options)

        return [task_cls(**task_params)]
Exemple #22
0
    def get_task_cls(mod_cls):
        """ Resolve string to task class object

        Reuse Luigi's service that registers task types

        Like Luigi, we assume that the mod_cls is 'module.class' and we assume
        that the user has put there pipe location on PYTHONPATH.

        :param mod_cls: '<module>.<class>'
        :return:        Task class object
        """

        from luigi.task_register import Register

        mod_path = mod_cls.split('.')
        mod = '.'.join(mod_path[:-1])
        cls = mod_path[-1]

        if mod is not None:
            __import__(mod)

        task_cls = Register.get_task_cls(cls)

        return task_cls
Exemple #23
0
 def _get_task_cls(self):
     """
     Get the task class
     """
     return Register.get_task_cls(self.known_args.root_task)
Exemple #24
0
 def add_task_option(p):
     if main_task_cls:
         p.add_option('--task', help='Task to run (one of ' + Register.tasks_str() + ') [default: %default]', default=main_task_cls.task_family)
     else:
         p.add_option('--task', help='Task to run (one of %s)' % Register.tasks_str())
Exemple #25
0
 def get_task_cls(self):
     """
     Get the task class
     """
     return Register.get_task_cls(self._task_name)
def gen_sphinx_tasks(entry_point, labels, *_args, **kwargs):
    """
    Writes a file per label, suitable for use by sphinx.ext.autodoc,
    using the classes found from entry_point.

    Also generates toctree.inc, which can be included from the index
    page to provide links to each generated file.

    """
    # Declare file header strings
    warning = '''..  WARNING: DO NOT EDIT THIS FILE DIRECTLY
    Generated by sphinx_source/gen_tasks.py on {now}

    '''.format(now=time.strftime('%c'))

    toctree_header = '''{warning}
.. toctree::
   :maxdepth: 1
'''
    incfile_header = '''{warning}
..  _{category_slug}:

Back to :doc:`index`

{label_heading}
'''

    # Load modules into memory
    stevedore.ExtensionManager(entry_point)

    # Used to filter the classes under entry_point
    entry_point_dot = '{entry_point}.'.format(entry_point=entry_point)

    # Generate a list of output file arguments from the given labels and categories
    output = []
    categories = kwargs.get('categories', [])
    for idx, label in enumerate(labels):
        try:
            category = ''
            if idx < len(categories):
                category = categories[idx]

            # Create a category slug for sphinx, and name the file with it
            category_slug = category.replace(' ', '_') or 'all'
            file_name = '{slug}.rst'.format(slug=category_slug)
            file_path = os.path.join(SPHINX_DIR, file_name)
            file_pointer = open(file_path, "w")
            output.append({
                'fp':
                file_pointer,
                'file_name':
                file_name,
                'category':
                category,
                'category_slug':
                category_slug,
                'label':
                label,
                'label_heading':
                "{label}\n{_}".format(label=label, _='=' * len(label)),
                'modules': {},
            })
        except IOError:
            sys.exit(
                'Unable to write to {file_path}'.format(file_path=file_path))

    # Write the header to the table of contents file
    tocfile_name = os.path.join(SPHINX_DIR, 'toctree.rst')
    try:
        tocfile = open(tocfile_name, "w")
        tocfile.write(toctree_header.format(warning=warning))
    except IOError:
        sys.exit(
            'Unable to write to {file_name}'.format(file_name=tocfile_name))

    # For each Task, sorted by class name
    tasks = Register._get_reg()
    for name in sorted(tasks):
        cls = tasks[name]
        module = cls.__module__
        # Show only tasks under entry_point
        if module.startswith(entry_point_dot):
            for out in output:
                # Show only tasks in the output category
                if getattr(cls, 'task_category', '') == out['category']:
                    if module not in out['modules']:
                        out['modules'][module] = {}
                    out['modules'][module][name] = cls

    for out in output:
        modules = sorted(out['modules'].keys())
        if modules:
            tocfile.write("\n   {incfile}".format(incfile=out['file_name']))
            out['fp'].write(incfile_header.format(warning=warning, **out))

        for module in modules:
            # Strip off entry_point to avoid redundancy in documentation
            module_heading = '{module}'.format(
                module=module.replace(entry_point_dot, ''))
            out['fp'].write("\n\n{module_heading}\n{_}".format(
                module_heading=module_heading, _='-' * len(module_heading)))
            out['fp'].write(
                "\n\n.. automodule:: {module}".format(module=module))

            names = out['modules'][module]
            for name in sorted(names):
                out['fp'].write("\n\n.. autoclass:: {name}".format(name=name))

        out['fp'].close()

    tocfile.close()
Exemple #27
0
 def _get_task_cls(self):
     """
     Get the task class
     """
     return Register.get_task_cls(self.known_args.root_task)
 def get_task_cls(self):
     """
     Get the task class
     """
     return Register.get_task_cls(self._task_name)