def find_deps_cli(): ''' Finds all tasks on all paths from provided CLI task ''' cmdline_args = sys.argv[1:] with CmdlineParser.global_instance(cmdline_args) as cp: return find_deps(cp.get_task_obj(), upstream().family)
def _value_iterator(self, task_name, param_name): """ Yield the parameter values, with optional deprecation warning as second tuple value. The parameter value will be whatever non-_no_value that is yielded first. """ cp_parser = CmdlineParser.get_instance() if cp_parser: dest = self._parser_global_dest(param_name, task_name) found = getattr(cp_parser.known_args, dest, None) yield (self._parse_or_no_value(found), None) yield (self._get_value_from_config(task_name, param_name), None) yield ( self._get_value_from_config(task_name, param_name.replace("_", "-")), "Configuration [{}] {} (with dashes) should be avoided. Please use underscores.".format( task_name, param_name ), ) if self.__config: yield ( self._get_value_from_config(self.__config["section"], self.__config["name"]), "The use of the configuration [{}] {} is deprecated. Please use [{}] {}".format( self.__config["section"], self.__config["name"], task_name, param_name ), ) yield (self._default, None)
def _value_iterator(self, task_name, param_name): """ Yield the parameter values, with optional deprecation warning as second tuple value. The parameter value will be whatever non-_no_value that is yielded first. """ cp_parser = CmdlineParser.get_instance() if cp_parser: is_without_section = not task_register.Register.get_task_cls( task_name).use_cmdline_section globs = [True] + ([False] if cp_parser.is_local_task(task_name) else []) for glob in globs: dest = self._parser_dest(param_name, task_name, glob=glob, is_without_section=is_without_section) if dest: found = getattr(cp_parser.known_args, dest, None) yield (self._parse_or_no_value(found), None) yield (self._get_value_from_config(task_name, param_name), None) yield (self._get_value_from_config( task_name, param_name.replace('_', '-') ), 'Configuration [{}] {} (with dashes) should be avoided. Please use underscores.' .format(task_name, param_name)) if self.__config: yield (self._get_value_from_config( self.__config['section'], self.__config['name'] ), 'The use of the configuration [{}] {} is deprecated. Please use [{}] {}' .format(self.__config['section'], self.__config['name'], task_name, param_name)) yield (self.__default, None)
def _run(cmdline_args=None, main_task_cls=None, worker_scheduler_factory=None, use_dynamic_argparse=None, local_scheduler=False): """ Please dont use. Instead use `luigi` binary. Run from cmdline using argparse. :param cmdline_args: :param main_task_cls: :param worker_scheduler_factory: :param use_dynamic_argparse: Deprecated and ignored :param local_scheduler: """ if use_dynamic_argparse is not None: warnings.warn("use_dynamic_argparse is deprecated, don't set it.", DeprecationWarning, stacklevel=2) if cmdline_args is None: cmdline_args = sys.argv[1:] if main_task_cls: cmdline_args.insert(0, main_task_cls.task_family) if local_scheduler: cmdline_args.append('--local-scheduler') with CmdlineParser.global_instance(cmdline_args) as cp: return _schedule_and_run([cp.get_task_obj()], worker_scheduler_factory)
def _run(cmdline_args=None, main_task_cls=None, worker_scheduler_factory=None, use_dynamic_argparse=None, local_scheduler=False): """ Please dont use. Instead use `luigi` binary. Run from cmdline using argparse. :param cmdline_args: :param main_task_cls: :param worker_scheduler_factory: :param use_dynamic_argparse: Deprecated and ignored :param local_scheduler: """ if use_dynamic_argparse is not None: warnings.warn("use_dynamic_argparse is deprecated, don't set it.", DeprecationWarning, stacklevel=2) if cmdline_args is None: cmdline_args = sys.argv[1:] if main_task_cls: cmdline_args.insert(0, main_task_cls.task_family) if local_scheduler: cmdline_args.insert(0, '--local-scheduler') with CmdlineParser.global_instance(cmdline_args) as cp: return _schedule_and_run([cp.get_task_obj()], worker_scheduler_factory)
def _test_run_with_empty_data_frame(cmdline_args: List[str], test_run_params: test_run): from unittest.mock import patch try: gokart.run(cmdline_args=cmdline_args) except SystemExit as e: assert e.code == 0, f'original workflow does not run properly. It exited with error code {e}.' with CmdlineParser.global_instance(cmdline_args) as cp: all_tasks = _get_all_tasks(cp.get_task_obj()) if test_run_params.namespace is not None: all_tasks = [ t for t in all_tasks if t.task_namespace == test_run_params.namespace ] with patch('gokart.TaskOnKart.load_data_frame', new=lambda *args, required_columns=None, **kwargs: pd.DataFrame( columns=required_columns)): with patch('gokart.TaskOnKart.dump', new=lambda *args, **kwargs: None): test_status_list = [_run_with_test_status(t) for t in all_tasks] test_logger.info('gokart test results:\n' + '\n'.join(s.format() for s in test_status_list)) if any(s.fail() for s in test_status_list): exit(1)
def _try_to_delete_unnecessary_output_file(cmdline_args: List[str]): with CmdlineParser.global_instance(cmdline_args) as cp: task = cp.get_task_obj() # type: gokart.TaskOnKart if task.delete_unnecessary_output_files: if ObjectStorage.if_object_storage_path(task.workspace_directory): logger.info('delete-unnecessary-output-files is not support s3/gcs.') else: gokart.delete_local_unnecessary_outputs(task) exit()
def invalidate(end_task, module=None, tasks_to_invalidate=None, yes=False): """Invalidate specified task.""" cmdline_parser_args = ['--module', module, end_task ] if module is not None else [end_task] end_task = CmdlineParser(cmdline_parser_args).get_task_obj() stats = invalidate_stats(end_task, tasks_to_invalidate) click.echo(stats) if yes or click.confirm('Do you want to continue?', abort=True): invalidate_downstream(end_task, tasks_to_invalidate) click.echo('Done.')
def find_deps_cli(): ''' Finds all tasks on all paths from provided CLI task ''' cmdline_args = sys.argv[1:] with CmdlineParser.global_instance(cmdline_args) as cp: task_cls = cp.get_task_cls() task = task_cls() upstream_task_family = upstream().family return find_deps(task, upstream_task_family)
def _try_get_slack_api(cmdline_args: List[str]) -> Optional[gokart.slack.SlackAPI]: with CmdlineParser.global_instance(cmdline_args): config = gokart.slack.SlackConfig() token = os.getenv(config.token_name, '') channel = config.channel to_user = config.to_user if token and channel: logger.info('Slack notification is activated.') return gokart.slack.SlackAPI(token=token, channel=channel, to_user=to_user) logger.info('Slack notification is not activated.') return None
def try_to_run_test_for_empty_data_frame(cmdline_args: List[str]): with CmdlineParser.global_instance(cmdline_args): test_run_params = test_run() if test_run_params.pandas: cmdline_args = [ a for a in cmdline_args if not a.startswith('--test-run-') ] _test_run_with_empty_data_frame(cmdline_args=cmdline_args, test_run_params=test_run_params) exit(0)
def run(self, argv): parser = argparse.ArgumentParser(description=self.description()) cli.add_properties_override_arg(parser) lor_args, luigi_args = parser.parse_known_args(argv) property_overrides = cli.extract_property_overrides(lor_args) lor._internal.bootstrap_globals(property_overrides) with CmdlineParser.global_instance(luigi_args) as cp: task_obj = cp.get_task_obj() explain(task_obj)
def run(*args, **kwargs): """ Wrapper around toad.build to allow execution from command line by calling a python script with this in it's main execution block. Args: tasks (obj, list): task or list of tasks forced (list): list of forced tasks forced_all (bool): force all tasks forced_all_upstream (bool): force all tasks including upstream confirm (list): confirm invalidating tasks workers (int): number of workers abort (bool): on errors raise exception kwargs: keywords to pass to luigi.build """ forced = kwargs.get('forced', None) forced_all = kwargs.get('forced_all', False) forced_all_upstream = kwargs.get('forced_all_upstream', False) confirm = kwargs.get('confirm', True) workers = kwargs.get('workers', 1) abort = kwargs.get('abort', True) cmdline_args = sys.argv[1:] with CmdlineParser.global_instance(cmdline_args) as cp: if toad.config.show_params_on_run: print("\n\nCommand line args:\n{}".format(cp.known_args)) root_task = cp.get_task_obj() if toad.config.isdebug: print("\nRoot task:\n{}".format(root_task)) if "forced" in cp.known_args: forced = cp.known_args.forced if "forced_all" in cp.known_args: if toad.config.isdebug: print("\n\ncp.known_args.forced_all: {}".format( cp.known_args.forced_all)) forced_all = cp.known_args.forced_all if "forced_all_upstream" in cp.known_args: forced_all_upstream = cp.known_args.forced_all_upstream if "confirm" in cp.known_args: confirm = cp.known_args.confirm if "workers" in cp.known_args: workers = cp.known_args.workers if "abort" in cp.known_args: abort = cp.known_args.abort return build(root_task, forced=forced, forced_all=forced_all, forced_all_upstream=forced_all_upstream, confirm=confirm, workers=workers, abort=abort, **kwargs)
def run(set_retcode=True): if set_retcode: luigi.retcodes.retcode.already_running = 10 luigi.retcodes.retcode.missing_data = 20 luigi.retcodes.retcode.not_run = 30 luigi.retcodes.retcode.task_failed = 40 luigi.retcodes.retcode.scheduling_error = 50 _read_environ() _check_config() cmdline_args = sys.argv[1:] if cmdline_args[0] == '--tree-info': with CmdlineParser.global_instance(cmdline_args[1:]) as cp: return gokart.make_tree_info(cp.get_task_obj(), details=False) if cmdline_args[0] == '--tree-info-all': with CmdlineParser.global_instance(cmdline_args[1:]) as cp: return gokart.make_tree_info(cp.get_task_obj(), details=True) luigi.cmdline.luigi_run(cmdline_args)
def get_parameters(args): with CmdlineParser.global_instance(args) as cp: cls = cp._get_task_cls() params = cls.get_params() run_script = [] run_script.extend(args) for param in params: run_script.append('--' + param[0].replace('_', '-')) if type(param[1]) == luigi.IntParameter: run_script.append('1') elif type(param[1]) != luigi.BoolParameter: run_script.append('&' + param[0] + '&') return run_script
def run(self, argv): parser = argparse.ArgumentParser(description=self.description()) # TODO: Replace the workspace CLI bootstrapping a func cli.add_properties_override_arg(parser) lor_args, luigi_args = parser.parse_known_args(argv) property_overrides = cli.extract_property_overrides(lor_args) lor._internal.bootstrap_globals(property_overrides) with CmdlineParser.global_instance(luigi_args) as cp: task_obj = cp.get_task_obj() print_as_dot(task_obj)
def _run(cmdline_args=None, main_task_cls=None, worker_scheduler_factory=None, use_dynamic_argparse=None, local_scheduler=False, detailed_summary=False): if use_dynamic_argparse is not None: warnings.warn("use_dynamic_argparse is deprecated, don't set it.", DeprecationWarning, stacklevel=2) if cmdline_args is None: cmdline_args = sys.argv[1:] if main_task_cls: cmdline_args.insert(0, main_task_cls.task_family) if local_scheduler: cmdline_args.append('--local-scheduler') with CmdlineParser.global_instance(cmdline_args) as cp: return _schedule_and_run([cp.get_task_obj()], worker_scheduler_factory)
def main(): cmdline_args = sys.argv[1:] if "--tab" not in cmdline_args: cmdline_args += ["--tab", join(dirname(dirname(__file__)), "testset", "seq_data", "data_input.tsv")] with CmdlineParser.global_instance(cmdline_args) as cp: task = cp.get_task_obj() graph = get_graph(task) dot_graph = construct_dot_output(graph) with open('/tmp/tmp.dot', 'w') as f1: f1.write(dot_graph) dot_graph = '/tmp/tmp.dot' ofile = join(task.odir, "pipelines.png") run_cmd(f"dot -Tpng < {dot_graph} > {ofile}", )
def _value_iterator(self, task_name, param_name): """ Yield the parameter values, with optional deprecation warning as second tuple value. The parameter value will be whatever non-_no_value that is yielded first. """ cp_parser = CmdlineParser.get_instance() if cp_parser: dest = self._parser_global_dest(param_name, task_name) found = getattr(cp_parser.known_args, dest, None) yield (self._parse_or_no_value(found), None) yield (self._get_value_from_config(task_name, param_name), None) if self._config_path: yield (self._get_value_from_config(self._config_path['section'], self._config_path['name']), 'The use of the configuration [{}] {} is deprecated. Please use [{}] {}'.format( self._config_path['section'], self._config_path['name'], task_name, param_name)) yield (self._default, None)
def _try_tree_info(cmdline_args): with CmdlineParser.global_instance(cmdline_args): mode = gokart.tree_info().mode output_path = gokart.tree_info().output().path() # do nothing if `mode` is empty. if mode == '': return # output tree info and exit. if mode == 'simple': _run_tree_info(cmdline_args, details=False) elif mode == 'all': _run_tree_info(cmdline_args, details=True) else: raise ValueError(f'--tree-info-mode must be "simple" or "all", but "{mode}" is passed.') logger.info(f'output tree info: {output_path}') exit()
def _try_to_send_event_summary_to_slack(slack_api: Optional[gokart.slack.SlackAPI], event_aggregator: gokart.slack.EventAggregator, cmdline_args: List[str]): if slack_api is None: # do nothing return with CmdlineParser.global_instance(cmdline_args) as cp: task = cp.get_task_obj() tree_info = gokart.make_tree_info(task, details=True) task_name = type(task).__name__ comment = f'Report of {task_name}' + os.linesep + event_aggregator.get_summary() content = os.linesep.join([ '===== Event List ====', event_aggregator.get_event_list(), os.linesep, '==== Tree Info ====', tree_info, ]) slack_api.send_snippet(comment=comment, title='event.txt', content=content)
def main(luigi_module_file, last_task, first_task, dry_run, luigi_args): """ Determines which files are generated by tasks on a "path" of a luigi task graph and deletes them. This is useful to force-run one or several tasks as if the output targets already exist, luigi will not execute the task. For example if there are 4 tasks A -> B -> C -> D ('->' meaning 'requires') and the code of task C changed, one might to rerun tasks C, B and A. Hence, the output files generated by these three tasks should get deleted first. :param luigi_module_file: python module name containing luigi tasks (needs to be on python path) :param last_task: last task in topological ordering ('A' in example) :param first_task: first task in topological ordering ('C' in example) :param dry_run: don't delete files, just output paths :param luigi_args: arguments passed to luigi """ luigi_cmd = ['--module', luigi_module_file, last_task] if luigi_args: luigi_cmd.extend(luigi_args) with CmdlineParser.global_instance(luigi_cmd) as cp: task = cp.get_task_obj() print("Determining output files on path {} to {}".format(first_task, last_task)) files = walk_tree(task, first_task) files_to_delete = {f for f in files if os.path.exists(f)} if not files_to_delete: print("Nothing to delete.") for f in files_to_delete: if not dry_run: print("Removing {}".format(f)) os.unlink(f) else: print("Would remove {}".format(f))
def _value_iterator(self, task_name, param_name): """ Yield the parameter values, with optional deprecation warning as second tuple value. The parameter value will be whatever non-_no_value that is yielded first. """ cp_parser = CmdlineParser.get_instance() if cp_parser: dest = self._parser_dest(param_name, task_name) found = getattr(cp_parser.known_args, dest, None) yield (self._parse_or_no_value(found), None) yield (self._get_value_from_config(task_name, param_name), None) yield (self._get_value_from_config( task_name, param_name.replace('_', '-') ), 'Configuration [{}] {} (with dashes) should be avoided. Please use underscores.' .format(task_name, param_name)) if self.__config: yield (self._get_value_from_config( self.__config['section'], self.__config['name'] ), 'The use of the configuration [{}] {} is deprecated. Please use [{}] {}' .format(self.__config['section'], self.__config['name'], task_name, param_name)) yield (self.__default, None)
def _value_iterator(self, task_name, param_name): """ Yield the parameter values, with optional deprecation warning as second tuple value. The parameter value will be whatever non-_no_value that is yielded first. """ cp_parser = CmdlineParser.get_instance() if cp_parser: is_without_section = not task_register.Register.get_task_cls(task_name).use_cmdline_section globs = [True] + ([False] if cp_parser.is_local_task(task_name) else []) for glob in globs: dest = self._parser_dest(param_name, task_name, glob=glob, is_without_section=is_without_section) if dest: found = getattr(cp_parser.known_args, dest, None) yield (self._parse_or_no_value(found), None) yield (self._get_value_from_config(task_name, param_name), None) yield (self._get_value_from_config(task_name, param_name.replace('_', '-')), 'Configuration [{}] {} (with dashes) should be avoided. Please use underscores.'.format( task_name, param_name)) if self.__config: yield (self._get_value_from_config(self.__config['section'], self.__config['name']), 'The use of the configuration [{}] {} is deprecated. Please use [{}] {}'.format( self.__config['section'], self.__config['name'], task_name, param_name)) yield (self.__default, None)
def in_parse(cmds, deferred_computation): with CmdlineParser.global_instance(cmds) as cp: deferred_computation(cp.get_task_obj())
def in_parse(cmds, deferred_computation): """function copied from luigi: https://github.com/spotify/luigi/blob/e2228418eec60b68ca09a30c878ab26413846847/test/helpers.py""" with CmdlineParser.global_instance(cmds) as cp: deferred_computation(cp.get_task_obj())
def _run_tree_info(cmdline_args, details): with CmdlineParser.global_instance(cmdline_args) as cp: gokart.tree_info().output().dump( gokart.make_tree_info(cp.get_task_obj(), details=details))
def main(): cmdline_args = sys.argv[1:] with CmdlineParser.global_instance(cmdline_args) as cp: task = cp.get_task_obj() print(print_tree(task))
def in_parse(cmds, deferred_computation): with CmdlineParser.global_instance(cmds): deferred_computation()
def wrapper(*args, **kwargs): with CmdlineParser.global_instance(self.cmds, allow_override=True): return fun(*args, **kwargs)
def get_graph(args): with CmdlineParser.global_instance(args) as cp: task = cp.get_task_obj() print(task.__repr__()) print_input_output(task) print_deps(task)
def get_task(cmdline_args): with CmdlineParser.global_instance(cmdline_args) as cp: return cp.get_task_obj()