Example #1
0
    def _terminate_all_running_pods(self):
        """
        Clean up of all running pods on terminate:
        """
        # now we need to clean after the run
        pods_to_delete = sorted(list(self.submitted_pods.values()))
        if not pods_to_delete:
            return

        self.log.info(
            "Terminating run, deleting all %d submitted pods that are still running/not finalized",
            len(pods_to_delete),
        )
        for submitted_pod in pods_to_delete:
            try:
                self.delete_pod(submitted_pod.pod_name)
            except Exception:
                self.log.exception("Failed to terminate pod %s",
                                   submitted_pod.pod_name)

        # Wait for pods to be deleted and execute their own state management
        self.log.info(
            "Setting all running/not finalized pods to cancelled in 10 seconds..."
        )
        time.sleep(10)
        try:
            for submitted_pod in pods_to_delete:
                task_run = submitted_pod.task_run
                ti_state = get_airflow_task_instance_state(task_run)
                if is_task_instance_finished(ti_state):
                    if task_run.task_run_state not in TaskRunState.final_states(
                    ):
                        self.log.info(
                            "%s with pod %s is not finished: airflow state - %s and databand state - %s."
                            "Setting the task_run state to match airflow state",
                            task_run,
                            submitted_pod.pod_name,
                            ti_state,
                            task_run.task_run_state,
                        )

                        new_state = AIRFLOW_TO_DBND_STATE_MAP.get(
                            ti_state, TaskRunState.CANCELLED)
                        task_run.set_task_run_state(new_state)
                    else:
                        self.log.info(
                            "%s with pod %s is finished: airflow state - %s and databand state - %s.Skipping",
                            task_run,
                            submitted_pod.pod_name,
                            ti_state,
                            task_run.task_run_state,
                        )

                    continue
                task_run.set_task_run_state(TaskRunState.CANCELLED)
        except Exception:
            self.log.exception("Could not set pods to cancelled!")
Example #2
0
    def cleanup_after_task_run(self, task):
        # type: (Task) -> None
        rels = task.ctrl.relations
        # potentially, all inputs/outputs targets for current task could be removed
        targets_to_clean = set(flatten([rels.task_inputs, rels.task_outputs]))

        targets_in_use = set()
        # any target which appears in inputs of all not finished tasks shouldn't be removed
        for tr in self.task_runs:
            if tr.task_run_state in TaskRunState.final_states():
                continue
            # remove all still needed inputs from targets_to_clean list
            for target in flatten(tr.task.ctrl.relations.task_inputs):
                targets_in_use.add(target)

        TARGET_CACHE.clear_for_targets(targets_to_clean - targets_in_use)
Example #3
0
    def _terminate_all_running_pods(self):
        """
        Clean up of all running pods on terminate:
        """
        # now we need to clean after the run
        pods_to_delete = sorted(list(self.submitted_pods.values()))
        if not pods_to_delete:
            return

        self.log.info(
            "Terminating run, deleting all %d submitted pods that are still running/not finalized",
            len(pods_to_delete),
        )
        for submitted_pod in pods_to_delete:
            try:
                self.delete_pod(submitted_pod.pod_name)
            except Exception:
                self.log.exception("Failed to terminate pod %s", submitted_pod.pod_name)

        # Wait for pods to be deleted and execute their own state management
        self.log.info(
            "Setting all running/not finalized pods to cancelled in 10 seconds..."
        )
        time.sleep(10)
        try:
            for submitted_pod in pods_to_delete:
                task_run = submitted_pod.task_run
                if task_run.task_run_state in TaskRunState.final_states():
                    self.log.info(
                        "%s with pod %s was %s, skipping",
                        task_run,
                        submitted_pod.pod_name,
                        task_run.task_run_state,
                    )
                    continue
                task_run.set_task_run_state(TaskRunState.CANCELLED)
        except Exception:
            self.log.exception("Could not set pods to cancelled!")