Ejemplo n.º 1
0
def get_base_pod_from_template(pod_template_file: Optional[str], kube_config: Any) -> k8s.V1Pod:
    """
    Reads either the pod_template_file set in the executor_config or the base pod_template_file
    set in the airflow.cfg to craft a "base pod" that will be used by the KubernetesExecutor

    :param pod_template_file: absolute path to a pod_template_file.yaml or None
    :param kube_config: The KubeConfig class generated by airflow that contains all kube metadata
    :return: a V1Pod that can be used as the base pod for k8s tasks
    """
    if pod_template_file:
        return PodGenerator.deserialize_model_file(pod_template_file)
    else:
        return PodGenerator.deserialize_model_file(kube_config.pod_template_file)
    def test_construct_pod_empty_executor_config(self, mock_uuid):
        path = sys.path[0] + '/tests/kubernetes/pod_generator_base_with_secrets.yaml'
        worker_config = PodGenerator.deserialize_model_file(path)
        mock_uuid.return_value = self.static_uuid
        executor_config = None

        result = PodGenerator.construct_pod(
            dag_id='dag_id',
            task_id='task_id',
            pod_id='pod_id',
            kube_image='test-image',
            try_number=3,
            date=self.execution_date,
            args=['command'],
            pod_override_object=executor_config,
            base_worker_pod=worker_config,
            namespace='namespace',
            scheduler_job_id='uuid',
        )
        sanitized_result = self.k8s_client.sanitize_for_serialization(result)
        worker_config.spec.containers[0].image = "test-image"
        worker_config.spec.containers[0].args = ["command"]
        worker_config.metadata.annotations = self.annotations
        worker_config.metadata.labels = self.labels
        worker_config.metadata.labels['app'] = 'myapp'
        worker_config.metadata.name = 'pod_id-' + self.static_uuid.hex
        worker_config.metadata.namespace = 'namespace'
        worker_config.spec.containers[0].env.append(
            k8s.V1EnvVar(name="AIRFLOW_IS_K8S_EXECUTOR_POD", value='True')
        )
        worker_config_result = self.k8s_client.sanitize_for_serialization(worker_config)
        assert worker_config_result == sanitized_result
Ejemplo n.º 3
0
    def test_ensure_max_label_length(self, mock_uuid):
        mock_uuid.return_value = self.static_uuid
        path = os.path.join(os.path.dirname(__file__),
                            'pod_generator_base_with_secrets.yaml')
        worker_config = PodGenerator.deserialize_model_file(path)

        result = PodGenerator.construct_pod(
            dag_id='a' * 512,
            task_id='a' * 512,
            pod_id='a' * 512,
            kube_image='a' * 512,
            try_number=3,
            date=self.execution_date,
            args=['command'],
            namespace='namespace',
            scheduler_job_id='a' * 512,
            pod_override_object=None,
            base_worker_pod=worker_config,
        )

        assert result.metadata.name == 'a' * 63 + '.' + self.static_uuid.hex
        for _, v in result.metadata.labels.items():
            assert len(v) <= 63

        assert 'a' * 512 == result.metadata.annotations['dag_id']
        assert 'a' * 512 == result.metadata.annotations['task_id']
Ejemplo n.º 4
0
def generate_pod_yaml(args):
    """Generates yaml files for each task in the DAG. Used for testing output of KubernetesExecutor"""
    execution_date = args.execution_date
    dag = get_dag(subdir=args.subdir, dag_id=args.dag_id)
    yaml_output_path = args.output_path
    kube_config = KubeConfig()
    for task in dag.tasks:
        ti = TaskInstance(task, execution_date)
        pod = PodGenerator.construct_pod(
            dag_id=args.dag_id,
            task_id=ti.task_id,
            pod_id=create_pod_id(args.dag_id, ti.task_id),
            try_number=ti.try_number,
            kube_image=kube_config.kube_image,
            date=ti.execution_date,
            args=ti.command_as_list(),
            pod_override_object=PodGenerator.from_obj(ti.executor_config),
            scheduler_job_id="worker-config",
            namespace=kube_config.executor_namespace,
            base_worker_pod=PodGenerator.deserialize_model_file(kube_config.pod_template_file),
        )
        pod_mutation_hook(pod)
        api_client = ApiClient()
        date_string = pod_generator.datetime_to_label_safe_datestring(execution_date)
        yaml_file_name = f"{args.dag_id}_{ti.task_id}_{date_string}.yml"
        os.makedirs(os.path.dirname(yaml_output_path + "/airflow_yaml_output/"), exist_ok=True)
        with open(yaml_output_path + "/airflow_yaml_output/" + yaml_file_name, "w") as output:
            sanitized_pod = api_client.sanitize_for_serialization(pod)
            output.write(yaml.dump(sanitized_pod))
    print(f"YAML output can be found at {yaml_output_path}/airflow_yaml_output/")
Ejemplo n.º 5
0
 def test_pod_template_file(self):
     fixture = sys.path[0] + '/tests/kubernetes/pod.yaml'
     self.kube_config.pod_template_file = fixture
     worker_config = WorkerConfiguration(self.kube_config)
     result = worker_config.as_pod()
     expected = PodGenerator.deserialize_model_file(fixture)
     expected.metadata.name = ANY
     self.assertEqual(expected, result)
Ejemplo n.º 6
0
    def test_construct_pod(self, mock_uuid):
        path = sys.path[
            0] + '/tests/kubernetes/pod_generator_base_with_secrets.yaml'
        worker_config = PodGenerator.deserialize_model_file(path)
        mock_uuid.return_value = self.static_uuid
        executor_config = k8s.V1Pod(spec=k8s.V1PodSpec(containers=[
            k8s.V1Container(name='',
                            resources=k8s.V1ResourceRequirements(limits={
                                'cpu': '1m',
                                'memory': '1G'
                            }))
        ]))

        result = PodGenerator.construct_pod(
            dag_id=self.dag_id,
            task_id=self.task_id,
            pod_id='pod_id',
            kube_image='airflow_image',
            try_number=self.try_number,
            date=self.execution_date,
            args=['command'],
            pod_override_object=executor_config,
            base_worker_pod=worker_config,
            namespace='test_namespace',
            scheduler_job_id='uuid',
        )
        expected = self.expected
        expected.metadata.labels = self.labels
        expected.metadata.labels['app'] = 'myapp'
        expected.metadata.annotations = self.annotations
        expected.metadata.name = 'pod_id.' + self.static_uuid.hex
        expected.metadata.namespace = 'test_namespace'
        expected.spec.containers[0].args = ['command']
        expected.spec.containers[0].image = 'airflow_image'
        expected.spec.containers[0].resources = {
            'limits': {
                'cpu': '1m',
                'memory': '1G'
            }
        }
        expected.spec.containers[0].env.append(
            k8s.V1EnvVar(
                name="AIRFLOW_IS_K8S_EXECUTOR_POD",
                value='True',
            ))
        result_dict = self.k8s_client.sanitize_for_serialization(result)
        expected_dict = self.k8s_client.sanitize_for_serialization(
            self.expected)

        assert expected_dict == result_dict
Ejemplo n.º 7
0
    def run_next(self, next_job: KubernetesJobType) -> None:
        """
        The run_next command will check the task_queue for any un-run jobs.
        It will then create a unique job-id, launch that job in the cluster,
        and store relevant info in the current_jobs map so we can track the job's
        status
        """
        self.log.info('Kubernetes job is %s', str(next_job))
        key, command, kube_executor_config = next_job
        dag_id, task_id, execution_date, try_number = key

        if command[0:3] != ["airflow", "tasks", "run"]:
            raise ValueError(
                'The command must start with ["airflow", "tasks", "run"].')

        base_worker_pod = PodGenerator.deserialize_model_file(
            self.kube_config.pod_template_file)
        if not base_worker_pod:
            raise AirflowException(
                f"could not find a valid worker template yaml at {self.kube_config.pod_template_file}"
            )

        pod = PodGenerator.construct_pod(
            namespace=self.namespace,
            scheduler_job_id=self.scheduler_job_id,
            pod_id=create_pod_id(dag_id, task_id),
            dag_id=dag_id,
            task_id=task_id,
            kube_image=self.kube_config.kube_image,
            try_number=try_number,
            date=execution_date,
            command=command,
            pod_override_object=kube_executor_config,
            base_worker_pod=base_worker_pod,
        )
        # Reconcile the pod generated by the Operator and the Pod
        # generated by the .cfg file
        self.log.debug("Kubernetes running for command %s", command)
        self.log.debug("Kubernetes launching image %s",
                       pod.spec.containers[0].image)

        # the watcher will monitor pods, so we do not block.
        self.launcher.run_pod_async(
            pod, **self.kube_config.kube_client_request_args)
        self.log.debug("Kubernetes Job created!")
    def test_deserialize_model_string(self):
        fixture = """
apiVersion: v1
kind: Pod
metadata:
  name: memory-demo
  namespace: mem-example
spec:
  containers:
    - name: memory-demo-ctr
      image: apache/airflow:stress-2020.07.10-1.0.4
      resources:
        limits:
          memory: "200Mi"
        requests:
          memory: "100Mi"
      command: ["stress"]
      args: ["--vm", "1", "--vm-bytes", "150M", "--vm-hang", "1"]
        """
        result = PodGenerator.deserialize_model_file(fixture)
        sanitized_res = self.k8s_client.sanitize_for_serialization(result)
        assert sanitized_res == self.deserialize_result
 def test_deserialize_model_file(self):
     path = sys.path[0] + '/tests/kubernetes/pod.yaml'
     result = PodGenerator.deserialize_model_file(path)
     sanitized_res = self.k8s_client.sanitize_for_serialization(result)
     assert sanitized_res == self.deserialize_result
 def test_deserialize_model_file(self):
     fixture = 'tests/kubernetes/pod.yaml'
     result = PodGenerator.deserialize_model_file(fixture)
     sanitized_res = self.k8s_client.sanitize_for_serialization(result)
     self.assertEqual(sanitized_res, self.deserialize_result)