def _terminate_all_running_pods(self): """ Clean up of all running pods on terminate: """ # now we need to clean after the run pods_to_delete = sorted(list(self.submitted_pods.values())) if not pods_to_delete: return self.log.info( "Terminating run, deleting all %d submitted pods that are still running/not finalized", len(pods_to_delete), ) for submitted_pod in pods_to_delete: try: self.delete_pod(submitted_pod.pod_name) except Exception: self.log.exception("Failed to terminate pod %s", submitted_pod.pod_name) # Wait for pods to be deleted and execute their own state management self.log.info( "Setting all running/not finalized pods to cancelled in 10 seconds..." ) time.sleep(10) try: for submitted_pod in pods_to_delete: task_run = submitted_pod.task_run ti_state = get_airflow_task_instance_state(task_run) if is_task_instance_finished(ti_state): if task_run.task_run_state not in TaskRunState.final_states( ): self.log.info( "%s with pod %s is not finished: airflow state - %s and databand state - %s." "Setting the task_run state to match airflow state", task_run, submitted_pod.pod_name, ti_state, task_run.task_run_state, ) new_state = AIRFLOW_TO_DBND_STATE_MAP.get( ti_state, TaskRunState.CANCELLED) task_run.set_task_run_state(new_state) else: self.log.info( "%s with pod %s is finished: airflow state - %s and databand state - %s.Skipping", task_run, submitted_pod.pod_name, ti_state, task_run.task_run_state, ) continue task_run.set_task_run_state(TaskRunState.CANCELLED) except Exception: self.log.exception("Could not set pods to cancelled!")
def cleanup_after_task_run(self, task): # type: (Task) -> None rels = task.ctrl.relations # potentially, all inputs/outputs targets for current task could be removed targets_to_clean = set(flatten([rels.task_inputs, rels.task_outputs])) targets_in_use = set() # any target which appears in inputs of all not finished tasks shouldn't be removed for tr in self.task_runs: if tr.task_run_state in TaskRunState.final_states(): continue # remove all still needed inputs from targets_to_clean list for target in flatten(tr.task.ctrl.relations.task_inputs): targets_in_use.add(target) TARGET_CACHE.clear_for_targets(targets_to_clean - targets_in_use)
def _terminate_all_running_pods(self): """ Clean up of all running pods on terminate: """ # now we need to clean after the run pods_to_delete = sorted(list(self.submitted_pods.values())) if not pods_to_delete: return self.log.info( "Terminating run, deleting all %d submitted pods that are still running/not finalized", len(pods_to_delete), ) for submitted_pod in pods_to_delete: try: self.delete_pod(submitted_pod.pod_name) except Exception: self.log.exception("Failed to terminate pod %s", submitted_pod.pod_name) # Wait for pods to be deleted and execute their own state management self.log.info( "Setting all running/not finalized pods to cancelled in 10 seconds..." ) time.sleep(10) try: for submitted_pod in pods_to_delete: task_run = submitted_pod.task_run if task_run.task_run_state in TaskRunState.final_states(): self.log.info( "%s with pod %s was %s, skipping", task_run, submitted_pod.pod_name, task_run.task_run_state, ) continue task_run.set_task_run_state(TaskRunState.CANCELLED) except Exception: self.log.exception("Could not set pods to cancelled!")