def generate_pod_yaml(args): """Generates yaml files for each task in the DAG. Used for testing output of KubernetesExecutor""" execution_date = args.execution_date dag = get_dag(subdir=args.subdir, dag_id=args.dag_id) yaml_output_path = args.output_path kube_config = KubeConfig() for task in dag.tasks: ti = TaskInstance(task, execution_date) pod = PodGenerator.construct_pod( dag_id=args.dag_id, task_id=ti.task_id, pod_id=create_pod_id(args.dag_id, ti.task_id), try_number=ti.try_number, kube_image=kube_config.kube_image, date=ti.execution_date, args=ti.command_as_list(), pod_override_object=PodGenerator.from_obj(ti.executor_config), scheduler_job_id="worker-config", namespace=kube_config.executor_namespace, base_worker_pod=PodGenerator.deserialize_model_file(kube_config.pod_template_file), ) pod_mutation_hook(pod) api_client = ApiClient() date_string = pod_generator.datetime_to_label_safe_datestring(execution_date) yaml_file_name = f"{args.dag_id}_{ti.task_id}_{date_string}.yml" os.makedirs(os.path.dirname(yaml_output_path + "/airflow_yaml_output/"), exist_ok=True) with open(yaml_output_path + "/airflow_yaml_output/" + yaml_file_name, "w") as output: sanitized_pod = api_client.sanitize_for_serialization(pod) output.write(yaml.dump(sanitized_pod)) print(f"YAML output can be found at {yaml_output_path}/airflow_yaml_output/")
def test_try_adopt_task_instances(self, mock_adopt_completed_pods, mock_adopt_launched_task): executor = self.kubernetes_executor executor.scheduler_job_id = "10" mock_ti = mock.MagicMock(queued_by_job_id="1", external_executor_id="1", dag_id="dag", task_id="task") pod = k8s.V1Pod(metadata=k8s.V1ObjectMeta(name="foo", labels={ "dag_id": "dag", "task_id": "task" })) pod_id = create_pod_id(dag_id="dag", task_id="task") mock_kube_client = mock.MagicMock() mock_kube_client.list_namespaced_pod.return_value.items = [pod] executor.kube_client = mock_kube_client # First adoption executor.try_adopt_task_instances([mock_ti]) mock_kube_client.list_namespaced_pod.assert_called_once_with( namespace='default', label_selector='airflow-worker=1') mock_adopt_launched_task.assert_called_once_with( mock_kube_client, pod, {pod_id: mock_ti}) mock_adopt_completed_pods.assert_called_once() # We aren't checking the return value of `try_adopt_task_instances` because it relies on # `adopt_launched_task` mutating its arg. This should be refactored, but not right now. # Second adoption (queued_by_job_id and external_executor_id no longer match) mock_kube_client.reset_mock() mock_adopt_launched_task.reset_mock() mock_adopt_completed_pods.reset_mock() mock_ti.queued_by_job_id = "10" # scheduler_job would have updated this after the first adoption executor.scheduler_job_id = "20" executor.try_adopt_task_instances([mock_ti]) mock_kube_client.list_namespaced_pod.assert_called_once_with( namespace='default', label_selector='airflow-worker=10') mock_adopt_launched_task.assert_called_once_with( mock_kube_client, pod, {pod_id: mock_ti}) mock_adopt_completed_pods.assert_called_once()
def test_create_pod_id(self): for dag_id, task_id in self._cases(): pod_name = PodGenerator.make_unique_pod_id( create_pod_id(dag_id, task_id)) assert self._is_valid_pod_id(pod_name)