def remove_task_output(task, max_depth=0, mode=None, include_external=False): max_depth = int(max_depth) print("remove task output with max_depth {}".format(max_depth)) include_external = check_bool_flag(include_external) if include_external: print("include external tasks") # determine the mode, i.e., all, dry, interactive modes = ["i", "a", "d"] mode_names = ["interactive", "all", "dry"] if mode is None: mode = query_choice("removal mode?", modes, default="i", descriptions=mode_names) elif isinstance(mode, int): mode = modes[mode] else: mode = mode[0].lower() if mode not in modes: raise Exception("unknown removal mode '{}'".format(mode)) mode_name = mode_names[modes.index(mode)] print("selected " + colored(mode_name + " mode", "blue", style="bright")) done = [] ind = "| " for dep, _, depth in task.walk_deps(max_depth=max_depth, order="pre"): offset = depth * ind print(offset) print("{}> remove output of {}".format(offset, dep.colored_repr())) offset += ind if not include_external and isinstance(dep, ExternalTask): print(offset + "- " + colored("task is external, skip", "yellow")) continue if mode == "i": task_mode = query_choice(offset + " walk through outputs?", ("y", "n"), default="y") if task_mode == "n": continue if dep in done: print(offset + "- " + colored("outputs already removed", "yellow")) continue done.append(dep) for outp in luigi.task.flatten(dep.output()): print("{}- remove {}".format(offset, outp.colored_repr())) if mode == "d": continue elif mode == "i": if query_choice(offset + " remove?", ("y", "n"), default="n") == "n": print(offset + colored(" skipped", "yellow")) continue outp.remove() print(offset + " " + colored("removed", "red", style="bright"))
def _purge_output(self, max_depth=0): print("purge output with max_depth %s\n" % max_depth) mode = query_choice("continue?", ("y", "n", "d", "i"), default="i") if mode == "n": return elif mode == "d": print("selected " + colored("dry mode", "blue", style="bright") + "\n") elif mode == "i": print("selected " + colored("interactive mode", "blue", style="bright") + "\n") else: print("") done = [] for task, _, depth in self.walk_deps(max_depth=max_depth, order="pre"): tpl = (depth * "| ", task.colored_repr()) print("%s> remove output of %s" % tpl) if mode == "i": msg = tpl[0] + " walk through outputs?" task_mode = query_choice(msg, ("y", "n", "d"), default="y") if task_mode == "n": continue if task in done: print((depth + 1) * "| " + "- " + colored("outputs already removed", "yellow")) else: done.append(task) for outp in luigi.task.flatten(task.output()): tpl = ((depth + 1) * "| ", outp.colored_repr()) print("%s- remove %s" % tpl) if mode == "d": continue if mode == "i" and task_mode != "d": msg = tpl[0] + " remove?" if query_choice(msg, ("y", "n"), default="n") == "n": print(tpl[0] + " skipped") continue outp.remove() print(tpl[0] + " " + law.util.colored("removed", "red", style="bright")) print("")
def fetch_task_output(task, max_depth=0, mode=None, target_dir=".", include_external=False): from law.task.base import ExternalTask from law.workflow.base import BaseWorkflow max_depth = int(max_depth) print("fetch task output with max_depth {}".format(max_depth)) target_dir = os.path.normpath(os.path.abspath(target_dir)) print("target directory is {}".format(target_dir)) if not os.path.exists(target_dir): os.makedirs(target_dir) include_external = flag_to_bool(include_external) if include_external: print("include external tasks") # determine the mode, i.e., all, dry, interactive modes = ["i", "a", "d"] mode_names = ["interactive", "all", "dry"] if mode is None: mode = query_choice("fetch mode?", modes, default="i", descriptions=mode_names) elif isinstance(mode, int): mode = modes[mode] else: mode = mode[0].lower() if mode not in modes: raise Exception("unknown removal mode '{}'".format(mode)) mode_name = mode_names[modes.index(mode)] print("selected " + colored(mode_name + " mode", "blue", style="bright")) done = [] for dep, _, depth in task.walk_deps(max_depth=max_depth, order="pre"): offset = depth * ("|" + ind) print(offset) # when the dep is a workflow, preload its branch map which updates branch parameters if isinstance(dep, BaseWorkflow): dep.get_branch_map() print("{}> fetch output of {}".format(offset, dep.repr(color=True))) offset += "|" + ind if not include_external and isinstance(dep, ExternalTask): print(offset + colored(" task is external", "yellow")) continue if dep in done: print(offset + colored(" outputs already fetched", "yellow")) continue if mode == "i": task_mode = query_choice(offset + " fetch outputs?", ("y", "n", "a"), default="y", descriptions=["yes", "no", "all"]) if task_mode == "n": print(offset + colored(" skipped", "yellow")) continue done.append(dep) # start the traversing through output structure with a lookup pattern for output, odepth, oprefix, ooffset, lookup in _iter_output( dep.output(), offset): try: stat = output.stat except: stat = None target_line = "{} {}{}".format(ooffset, oprefix, output.repr(color=True)) if stat: target_line += " ({:.2f} {})".format( *human_bytes(stat.st_size)) print(target_line) if not isinstance(output, TargetCollection) and stat is None: print(ooffset + ind + colored(" not existing, skip", "yellow")) continue is_copyable = callable(getattr(output, "copy_to_local", None)) if not isinstance(output, TargetCollection) and not is_copyable: print(ooffset + ind + colored(" not a file target, skip", "yellow")) continue if mode == "d": print(ooffset + ind + colored(" dry fetched", "yellow")) continue to_fetch = [output] if mode == "i" and task_mode != "a": if isinstance(output, TargetCollection): coll_choice = query_choice( ooffset + ind + "fetch?", ("y", "n", "i"), default="y", descriptions=["yes", "no", "interactive"]) if coll_choice == "i": lookup[:0] = _flatten_output(output.targets, odepth + 1) continue else: target_choice = coll_choice to_fetch = list(output._flat_target_list) else: target_choice = query_choice(ooffset + ind + "fetch?", ("y", "n"), default="y", descriptions=["yes", "no"]) if target_choice == "n": print(ooffset + ind + colored(" skipped", "yellow")) continue for outp in to_fetch: if not callable(getattr(outp, "copy_to_local", None)): continue basename = "{}__{}".format(dep.live_task_id, outp.basename) outp.copy_to_local(os.path.join(target_dir, basename)) print("{}{} {} ({})".format( ooffset, ind, colored("fetched", "green", style="bright"), basename))
def remove_task_output(task, max_depth=0, mode=None, include_external=False): from law.task.base import ExternalTask from law.workflow.base import BaseWorkflow max_depth = int(max_depth) print("remove task output with max_depth {}".format(max_depth)) include_external = flag_to_bool(include_external) if include_external: print("include external tasks") # determine the mode, i.e., interactive, dry, all modes = ["i", "d", "a"] mode_names = ["interactive", "dry", "all"] if mode and mode not in modes: raise Exception("unknown removal mode '{}'".format(mode)) if not mode: mode = query_choice("removal mode?", modes, default="i", descriptions=mode_names) mode_name = mode_names[modes.index(mode)] print("selected " + colored(mode_name + " mode", "blue", style="bright")) done = [] for dep, _, depth in task.walk_deps(max_depth=max_depth, order="pre"): offset = depth * ("|" + ind) print(offset) # when the dep is a workflow, preload its branch map which updates branch parameters if isinstance(dep, BaseWorkflow): dep.get_branch_map() print("{}> remove output of {}".format(offset, dep.repr(color=True))) offset += "|" + ind if not include_external and isinstance(dep, ExternalTask): print(offset + colored(" task is external", "yellow")) continue if dep in done: print(offset + colored(" already removed", "yellow")) continue if mode == "i": task_mode = query_choice(offset + " remove outputs?", ["y", "n", "a"], default="y", descriptions=["yes", "no", "all"]) if task_mode == "n": continue done.append(dep) # start the traversing through output structure for output, odepth, oprefix, ooffset, lookup in _iter_output( dep.output(), offset): print("{} {}{}".format(ooffset, oprefix, output.repr(color=True))) if mode == "d": print(ooffset + ind + colored(" dry removed", "yellow")) continue if mode == "i" and task_mode != "a": if isinstance(output, TargetCollection): coll_choice = query_choice( ooffset + ind + " remove?", ("y", "n", "i"), default="n", descriptions=["yes", "no", "interactive"]) if coll_choice == "i": lookup[:0] = _flatten_output(output.targets, odepth + 1) continue else: target_choice = coll_choice else: target_choice = query_choice(ooffset + ind + " remove?", ("y", "n"), default="n", descriptions=["yes", "no"]) if target_choice == "n": print(ooffset + ind + colored(" skipped", "yellow")) continue output.remove() print(ooffset + ind + colored(" removed", "red", style="bright"))
def remove_task_output(task, max_depth=0, mode=None, run_task=False): from law.task.base import ExternalTask from law.workflow.base import BaseWorkflow max_depth = int(max_depth) print("remove task output with max_depth {}".format(max_depth)) run_task = flag_to_bool(run_task) if run_task: print("task will run after output removal") # determine the mode, i.e., interactive, dry, all modes = ["i", "d", "a"] mode_names = ["interactive", "dry", "all"] if mode and mode not in modes: raise Exception("unknown removal mode '{}'".format(mode)) if not mode: mode = query_choice("removal mode?", modes, default="i", descriptions=mode_names) mode_name = mode_names[modes.index(mode)] print("selected {} mode".format( colored(mode_name + " mode", "blue", style="bright"))) done = [] for dep, _, depth in task.walk_deps(max_depth=max_depth, order="pre"): offset = depth * ("|" + ind) print(offset) # when the dep is a workflow, independent of its create_branch_map_before_repr setting, # preload its branch map which updates branch parameters if isinstance(dep, BaseWorkflow): dep.get_branch_map() print("{}> {}".format(offset, dep.repr(color=True))) offset += "|" + ind # always skip external tasks if isinstance(dep, ExternalTask): print(offset + colored(" task is external", "yellow")) continue # skip when this task was already handled if dep in done: print(offset + colored(" already handled", "yellow")) continue done.append(dep) # skip when mode is "all" and task is configured to skip if mode == "a" and getattr(dep, "skip_output_removal", False): print(offset + colored(" configured to skip", "yellow")) continue # query for a decision per task when mode is "interactive" task_mode = None if mode == "i": task_mode = query_choice(offset + " remove outputs?", ["y", "n", "a"], default="y", descriptions=["yes", "no", "all"]) if task_mode == "n": continue # start the traversing through output structure for output, odepth, oprefix, ooffset, lookup in _iter_output( dep.output(), offset): print("{} {}{}".format(ooffset, oprefix, output.repr(color=True))) # skip external targets if getattr(output, "external", False): print(ooffset + ind + colored(" external output", "yellow")) continue # stop here when in dry mode if mode == "d": print(ooffset + ind + colored(" dry removed", "yellow")) continue # when the mode is "interactive" and the task decision is not "all", query per output if mode == "i" and task_mode != "a": if isinstance(output, TargetCollection): coll_choice = query_choice( ooffset + ind + " remove?", ("y", "n", "i"), default="n", descriptions=["yes", "no", "interactive"]) if coll_choice == "i": lookup[:0] = _flatten_output(output.targets, odepth + 1) continue else: target_choice = coll_choice else: target_choice = query_choice(ooffset + ind + " remove?", ("y", "n"), default="n", descriptions=["yes", "no"]) if target_choice == "n": print(ooffset + ind + colored(" skipped", "yellow")) continue # finally remove output.remove() print(ooffset + ind + colored(" removed", "red", style="bright")) return run_task
def remove_task_output(task, max_depth=0, mode=None, include_external=False): from law.task.base import ExternalTask from law.workflow.base import BaseWorkflow max_depth = int(max_depth) print("remove task output with max_depth {}".format(max_depth)) include_external = check_bool_flag(include_external) if include_external: print("include external tasks") # determine the mode, i.e., interactive, dry, all modes = ["i", "d", "a"] mode_names = ["interactive", "dry", "all"] if mode and mode not in modes: raise Exception("unknown removal mode '{}'".format(mode)) if not mode: mode = query_choice("removal mode?", modes, default="i", descriptions=mode_names) mode_name = mode_names[modes.index(mode)] print("selected " + colored(mode_name + " mode", "blue", style="bright")) done = [] ind = "| " for dep, _, depth in task.walk_deps(max_depth=max_depth, order="pre"): offset = depth * ind print(offset) # when the dep is a workflow, preload its branch map which updates branch parameters if isinstance(dep, BaseWorkflow): dep.get_branch_map() print("{}> remove output of {}".format(offset, dep.repr(color=True))) offset += ind if not include_external and isinstance(dep, ExternalTask): print(offset + "- " + colored("task is external, skip", "yellow")) continue if dep in done: print(offset + "- " + colored("outputs already removed", "yellow")) continue if mode == "i": task_mode = query_choice(offset + " remove outputs?", ["y", "n", "a"], default="y", descriptions=["yes", "no", "all"]) if task_mode == "n": continue done.append(dep) for outp in flatten(dep.output()): print("{}- {}".format(offset, outp.repr(color=True))) if mode == "d": print(offset + " " + colored("dry removed", "yellow")) continue elif mode == "i" and task_mode != "a": if query_choice(offset + " remove?", ("y", "n"), default="n") == "n": print(offset + " " + colored("skipped", "yellow")) continue outp.remove() print(offset + " " + colored("removed", "red", style="bright"))
def fetch_task_output(task, max_depth=0, mode=None, target_dir=".", include_external=False): from law.task.base import ExternalTask from law.workflow.base import BaseWorkflow max_depth = int(max_depth) print("fetch task output with max_depth {}".format(max_depth)) target_dir = os.path.normpath(os.path.abspath(target_dir)) print("target directory is {}".format(target_dir)) if not os.path.exists(target_dir): os.makedirs(target_dir) include_external = check_bool_flag(include_external) if include_external: print("include external tasks") # determine the mode, i.e., all, dry, interactive modes = ["i", "a", "d"] mode_names = ["interactive", "all", "dry"] if mode is None: mode = query_choice("fetch mode?", modes, default="i", descriptions=mode_names) elif isinstance(mode, int): mode = modes[mode] else: mode = mode[0].lower() if mode not in modes: raise Exception("unknown removal mode '{}'".format(mode)) mode_name = mode_names[modes.index(mode)] print("selected " + colored(mode_name + " mode", "blue", style="bright")) done = [] ind = "| " for dep, _, depth in task.walk_deps(max_depth=max_depth, order="pre"): offset = depth * ind print(offset) # when the dep is a workflow, preload its branch map which updates branch parameters if isinstance(dep, BaseWorkflow): dep.get_branch_map() print("{}> fetch output of {}".format(offset, dep.repr(color=True))) offset += ind if not include_external and isinstance(dep, ExternalTask): print(offset + "- " + colored("task is external, skip", "yellow")) continue if dep in done: print(offset + "- " + colored("outputs already fetched", "yellow")) continue if mode == "i": task_mode = query_choice(offset + " walk through outputs?", ("y", "n"), default="y") if task_mode == "n": continue done.append(dep) outputs = flatten( (outp._flat_target_list if isinstance(outp, TargetCollection) else outp) for outp in flatten(dep.output()) ) for outp in outputs: try: stat = outp.stat except: stat = None target_line = "{}- {}".format(offset, outp.repr(color=True)) if stat: target_line += " ({:.2f} {})".format(*human_bytes(stat.st_size)) print(target_line) def print_skip(reason): text = reason + ", skip" print(offset + " " + colored(text, color="yellow", style="bright")) if stat is None: print_skip("not existing") continue if not callable(getattr(outp, "copy_to_local", None)): print_skip("not a file target") continue if mode == "d": print("{} {}".format(offset, colored("dry fetched", "yellow"))) continue elif mode == "i": q = offset + " fetch?" if query_choice(q, ("y", "n"), default="y") == "n": print(offset + " " + colored("skipped", "yellow")) continue basename = "{}__{}".format(dep.live_task_id, outp.basename) outp.copy_to_local(os.path.join(target_dir, basename)) print("{} {} ({})".format(offset, colored("fetched", "green", style="bright"), basename))