Ejemplo n.º 1
0
def generate_pod_yaml(args):
    """Generates yaml files for each task in the DAG. Used for testing output of KubernetesExecutor"""
    execution_date = args.execution_date
    dag = get_dag(subdir=args.subdir, dag_id=args.dag_id)
    yaml_output_path = args.output_path
    kube_config = KubeConfig()
    for task in dag.tasks:
        ti = TaskInstance(task, execution_date)
        pod = PodGenerator.construct_pod(
            dag_id=args.dag_id,
            task_id=ti.task_id,
            pod_id=create_pod_id(args.dag_id, ti.task_id),
            try_number=ti.try_number,
            kube_image=kube_config.kube_image,
            date=ti.execution_date,
            args=ti.command_as_list(),
            pod_override_object=PodGenerator.from_obj(ti.executor_config),
            scheduler_job_id="worker-config",
            namespace=kube_config.executor_namespace,
            base_worker_pod=PodGenerator.deserialize_model_file(kube_config.pod_template_file),
        )
        pod_mutation_hook(pod)
        api_client = ApiClient()
        date_string = pod_generator.datetime_to_label_safe_datestring(execution_date)
        yaml_file_name = f"{args.dag_id}_{ti.task_id}_{date_string}.yml"
        os.makedirs(os.path.dirname(yaml_output_path + "/airflow_yaml_output/"), exist_ok=True)
        with open(yaml_output_path + "/airflow_yaml_output/" + yaml_file_name, "w") as output:
            sanitized_pod = api_client.sanitize_for_serialization(pod)
            output.write(yaml.dump(sanitized_pod))
    print(f"YAML output can be found at {yaml_output_path}/airflow_yaml_output/")
Ejemplo n.º 2
0
    def test_try_adopt_task_instances(self, mock_adopt_completed_pods,
                                      mock_adopt_launched_task):
        executor = self.kubernetes_executor
        executor.scheduler_job_id = "10"
        mock_ti = mock.MagicMock(queued_by_job_id="1",
                                 external_executor_id="1",
                                 dag_id="dag",
                                 task_id="task")
        pod = k8s.V1Pod(metadata=k8s.V1ObjectMeta(name="foo",
                                                  labels={
                                                      "dag_id": "dag",
                                                      "task_id": "task"
                                                  }))
        pod_id = create_pod_id(dag_id="dag", task_id="task")
        mock_kube_client = mock.MagicMock()
        mock_kube_client.list_namespaced_pod.return_value.items = [pod]
        executor.kube_client = mock_kube_client

        # First adoption
        executor.try_adopt_task_instances([mock_ti])
        mock_kube_client.list_namespaced_pod.assert_called_once_with(
            namespace='default', label_selector='airflow-worker=1')
        mock_adopt_launched_task.assert_called_once_with(
            mock_kube_client, pod, {pod_id: mock_ti})
        mock_adopt_completed_pods.assert_called_once()
        # We aren't checking the return value of `try_adopt_task_instances` because it relies on
        # `adopt_launched_task` mutating its arg. This should be refactored, but not right now.

        # Second adoption (queued_by_job_id and external_executor_id no longer match)
        mock_kube_client.reset_mock()
        mock_adopt_launched_task.reset_mock()
        mock_adopt_completed_pods.reset_mock()

        mock_ti.queued_by_job_id = "10"  # scheduler_job would have updated this after the first adoption
        executor.scheduler_job_id = "20"

        executor.try_adopt_task_instances([mock_ti])
        mock_kube_client.list_namespaced_pod.assert_called_once_with(
            namespace='default', label_selector='airflow-worker=10')
        mock_adopt_launched_task.assert_called_once_with(
            mock_kube_client, pod, {pod_id: mock_ti})
        mock_adopt_completed_pods.assert_called_once()
Ejemplo n.º 3
0
 def test_create_pod_id(self):
     for dag_id, task_id in self._cases():
         pod_name = PodGenerator.make_unique_pod_id(
             create_pod_id(dag_id, task_id))
         assert self._is_valid_pod_id(pod_name)