def compute_task_graph(task: luigi.Task) -> None: name = task.__str__().split('(')[0] for dep in task.deps(): if name in graph: graph[name].append(dep.__str__().split('(')[0]) else: graph[name] = [dep.__str__().split('(')[0]] compute_task_graph(dep)
def run_task(task: luigi.Task): """ Run the task and all its dependencies synchronously. This is probably some kind of reinvention of the wheel, but I don't know how to do this better. Note that there is no guarantee that this works exactly the same as the luigi scheduler. """ tasks = Queue() tasks.put(task) while tasks.qsize(): task = tasks.get() if task.complete(): continue # Check dependencies deps = task.deps() for dep in deps: if not dep.complete(): # Handle this one first tasks.put(dep) tasks.put(task) task = None break if not task: continue # Run task ... result = task.run() # ... while checking optional dynamic dependencies try: try: dep = next(result) while True: if not dep.complete(): # Handle this one first tasks.put(dep) tasks.put(task) task = None break dep = result.send(dep.output()) if not task: continue except StopIteration: pass except TypeError: # no dynamic dependencies pass
def logging(task: luigi.Task, log_dependencies: bool = False) -> lg.Logger: logger_name = ''.join(random.choices(string.ascii_uppercase + string.digits, k=10)) log_file = task.output().path + ".log" logger = lg.getLogger(logger_name) formatter = lg.Formatter('%(asctime)s %(levelname)s: %(message)s') fileHandler = lg.FileHandler(log_file, mode='w') fileHandler.setFormatter(formatter) # streamHandler = lg.StreamHandler() # streamHandler.setFormatter(formatter) logger.setLevel(lg.INFO) logger.addHandler(fileHandler) # logger.addHandler(streamHandler) if log_dependencies: logger.info("digraph G {") graph: Dict[str, List[str]] = {} def compute_task_graph(task: luigi.Task) -> None: name = task.__str__().split('(')[0] for dep in task.deps(): if name in graph: graph[name].append(dep.__str__().split('(')[0]) else: graph[name] = [dep.__str__().split('(')[0]] compute_task_graph(dep) compute_task_graph(task) for k, v in graph.items(): for u in set(v): logger.info('"' + u + '" -> "' + k + '";') logger.info("}") logger.info("") for p in task.get_param_names(): logger.info(f"{p} = {task.__dict__[p]}") logger.info("===") return logger
def build_sequentially(task: Task) -> None: """Build a task sequentially.""" def can_run(task: Task) -> bool: return not isinstance(task, WrapperTask) and all( dep.complete() for dep in yield_dependencies(task) if dep is not task) tasks = set(yield_dependencies(task)) while tasks: tasks = {task for task in tasks if not task.complete()} try: to_run = next(task for task in tasks if can_run(task)) except StopIteration: raise RuntimeError( f"Unable to build {task} sequentially; {tasks=} remain") else: to_run.run() if not to_run.complete(): raise RuntimeError(f"Ran {task}; but remains incomplete")
def task_action(task: luigi.Task, method: str) -> Iterator[el.Action]: with el.start_task(action_type=f'{task.task_family}.{method}', task_id=task.task_id, **task.to_str_params(only_significant=True, only_public=True)) as ctx: yield ctx
def yield_dependencies(task: Task) -> Iterable[Task]: """Yield the dependencies of a task.""" deps = task.deps() yield from _unique_everseen( chain([task], deps, *map(yield_dependencies, deps)))
from luigi import Task from luigi.event import Event from telegram_notify import send_notification Task.event_handler(Event.FAILURE)(send_notification)
def set_handlers(self): logger.debug('Setting up handlers') Task.event_handler(Event.SUCCESS)(self.on_success) Task.event_handler(Event.FAILURE)(self.on_failure)
def _canonical_params(t: luigi.Task) -> FrozenOrderedDict: return FrozenOrderedDict( sorted(t.to_str_params(only_significant=True).items()))
def _deep_requires(t: luigi.Task) -> Iterable[luigi.Task]: yield t for child in luigi.task.flatten(t.requires()): for anc in _deep_requires(cast(luigi.Task, child)): yield anc