def run_next(self, next_job: KubernetesJobType) -> None: """ The run_next command will check the task_queue for any un-run jobs. It will then create a unique job-id, launch that job in the cluster, and store relevant info in the current_jobs map so we can track the job's status """ self.log.info('Kubernetes job is %s', str(next_job)) key, command, kube_executor_config = next_job dag_id, task_id, execution_date, try_number = key if isinstance(command, str): command = [command] pod = PodGenerator.construct_pod( namespace=self.namespace, worker_uuid=self.worker_uuid, pod_id=self._create_pod_id(dag_id, task_id), dag_id=pod_generator.make_safe_label_value(dag_id), task_id=pod_generator.make_safe_label_value(task_id), try_number=try_number, date=self._datetime_to_label_safe_datestring(execution_date), command=command, kube_executor_config=kube_executor_config, worker_config=self.worker_configuration_pod) # Reconcile the pod generated by the Operator and the Pod # generated by the .cfg file self.log.debug("Kubernetes running for command %s", command) self.log.debug("Kubernetes launching image %s", pod.spec.containers[0].image) # the watcher will monitor pods, so we do not block. self.launcher.run_pod_async( pod, **self.kube_config.kube_client_request_args) self.log.debug("Kubernetes Job created!")
def test_construct_pod_empty_executor_config(self, mock_uuid): path = sys.path[0] + '/tests/kubernetes/pod_generator_base_with_secrets.yaml' worker_config = PodGenerator.deserialize_model_file(path) mock_uuid.return_value = self.static_uuid executor_config = None result = PodGenerator.construct_pod( dag_id='dag_id', task_id='task_id', pod_id='pod_id', kube_image='test-image', try_number=3, date=self.execution_date, args=['command'], pod_override_object=executor_config, base_worker_pod=worker_config, namespace='namespace', scheduler_job_id='uuid', ) sanitized_result = self.k8s_client.sanitize_for_serialization(result) worker_config.spec.containers[0].image = "test-image" worker_config.spec.containers[0].args = ["command"] worker_config.metadata.annotations = self.annotations worker_config.metadata.labels = self.labels worker_config.metadata.labels['app'] = 'myapp' worker_config.metadata.name = 'pod_id-' + self.static_uuid.hex worker_config.metadata.namespace = 'namespace' worker_config.spec.containers[0].env.append( k8s.V1EnvVar(name="AIRFLOW_IS_K8S_EXECUTOR_POD", value='True') ) worker_config_result = self.k8s_client.sanitize_for_serialization(worker_config) assert worker_config_result == sanitized_result
def test_construct_pod_empty_execuctor_config(self, mock_uuid): mock_uuid.return_value = self.static_uuid worker_config = k8s.V1Pod( spec=k8s.V1PodSpec( containers=[ k8s.V1Container( name='', resources=k8s.V1ResourceRequirements( limits={ 'cpu': '1m', 'memory': '1G' } ) ) ] ) ) executor_config = None result = PodGenerator.construct_pod( 'dag_id', 'task_id', 'pod_id', 3, 'date', ['command'], executor_config, worker_config, 'namespace', 'uuid', ) sanitized_result = self.k8s_client.sanitize_for_serialization(result) self.assertEqual({ 'apiVersion': 'v1', 'kind': 'Pod', 'metadata': self.metadata, 'spec': { 'containers': [{ 'args': [], 'command': ['command'], 'env': [], 'envFrom': [], 'imagePullPolicy': 'IfNotPresent', 'name': 'base', 'ports': [], 'resources': { 'limits': { 'cpu': '1m', 'memory': '1G' } }, 'volumeMounts': [] }], 'hostNetwork': False, 'imagePullSecrets': [], 'restartPolicy': 'Never', 'volumes': [] } }, sanitized_result)
def test_ensure_max_label_length(self, mock_uuid): mock_uuid.return_value = self.static_uuid path = os.path.join(os.path.dirname(__file__), 'pod_generator_base_with_secrets.yaml') worker_config = PodGenerator.deserialize_model_file(path) result = PodGenerator.construct_pod( dag_id='a' * 512, task_id='a' * 512, pod_id='a' * 512, kube_image='a' * 512, try_number=3, date=self.execution_date, args=['command'], namespace='namespace', scheduler_job_id='a' * 512, pod_override_object=None, base_worker_pod=worker_config, ) assert result.metadata.name == 'a' * 63 + '.' + self.static_uuid.hex for _, v in result.metadata.labels.items(): assert len(v) <= 63 assert 'a' * 512 == result.metadata.annotations['dag_id'] assert 'a' * 512 == result.metadata.annotations['task_id']
def test_make_pod_assert_labels(self): # Tests the pod created has all the expected labels set self.kube_config.dags_folder = 'dags' worker_config = WorkerConfiguration(self.kube_config) execution_date = parser.parse('2019-11-21 11:08:22.920875') pod = PodGenerator.construct_pod( "test_dag_id", "test_task_id", "test_pod_id", 1, execution_date, ["bash -c 'ls /'"], None, worker_config.as_pod(), "default", "sample-uuid", ) expected_labels = { 'airflow-worker': 'sample-uuid', 'airflow_version': airflow_version.replace('+', '-'), 'dag_id': 'test_dag_id', 'execution_date': datetime_to_label_safe_datestring(execution_date), 'kubernetes_executor': 'True', 'my_label': 'label_id', 'task_id': 'test_task_id', 'try_number': '1' } self.assertEqual(pod.metadata.labels, expected_labels)
def generate_pod_yaml(args): """Generates yaml files for each task in the DAG. Used for testing output of KubernetesExecutor""" execution_date = args.execution_date dag = get_dag(subdir=args.subdir, dag_id=args.dag_id) yaml_output_path = args.output_path kube_config = KubeConfig() for task in dag.tasks: ti = TaskInstance(task, execution_date) pod = PodGenerator.construct_pod( dag_id=args.dag_id, task_id=ti.task_id, pod_id=create_pod_id(args.dag_id, ti.task_id), try_number=ti.try_number, kube_image=kube_config.kube_image, date=ti.execution_date, args=ti.command_as_list(), pod_override_object=PodGenerator.from_obj(ti.executor_config), scheduler_job_id="worker-config", namespace=kube_config.executor_namespace, base_worker_pod=PodGenerator.deserialize_model_file(kube_config.pod_template_file), ) pod_mutation_hook(pod) api_client = ApiClient() date_string = pod_generator.datetime_to_label_safe_datestring(execution_date) yaml_file_name = f"{args.dag_id}_{ti.task_id}_{date_string}.yml" os.makedirs(os.path.dirname(yaml_output_path + "/airflow_yaml_output/"), exist_ok=True) with open(yaml_output_path + "/airflow_yaml_output/" + yaml_file_name, "w") as output: sanitized_pod = api_client.sanitize_for_serialization(pod) output.write(yaml.dump(sanitized_pod)) print(f"YAML output can be found at {yaml_output_path}/airflow_yaml_output/")
def run_next(self, next_job: KubernetesJobType) -> None: """ The run_next command will check the task_queue for any un-run jobs. It will then create a unique job-id, launch that job in the cluster, and store relevant info in the current_jobs map so we can track the job's status """ self.log.info('Kubernetes job is %s', str(next_job)) key, command, kube_executor_config = next_job dag_id, task_id, execution_date, try_number = key if command[0:3] != ["airflow", "tasks", "run"]: raise ValueError('The command must start with ["airflow", "tasks", "run"].') pod = PodGenerator.construct_pod( namespace=self.namespace, worker_uuid=self.worker_uuid, pod_id=self._create_pod_id(dag_id, task_id), dag_id=dag_id, task_id=task_id, try_number=try_number, date=execution_date, command=command, kube_executor_config=kube_executor_config, worker_config=self.worker_configuration_pod ) # Reconcile the pod generated by the Operator and the Pod # generated by the .cfg file self.log.debug("Kubernetes running for command %s", command) self.log.debug("Kubernetes launching image %s", pod.spec.containers[0].image) # the watcher will monitor pods, so we do not block. self.launcher.run_pod_async(pod, **self.kube_config.kube_client_request_args) self.log.debug("Kubernetes Job created!")
def test_construct_pod(self, mock_uuid): path = sys.path[ 0] + '/tests/kubernetes/pod_generator_base_with_secrets.yaml' worker_config = PodGenerator.deserialize_model_file(path) mock_uuid.return_value = self.static_uuid executor_config = k8s.V1Pod(spec=k8s.V1PodSpec(containers=[ k8s.V1Container(name='', resources=k8s.V1ResourceRequirements(limits={ 'cpu': '1m', 'memory': '1G' })) ])) result = PodGenerator.construct_pod( dag_id=self.dag_id, task_id=self.task_id, pod_id='pod_id', kube_image='airflow_image', try_number=self.try_number, date=self.execution_date, args=['command'], pod_override_object=executor_config, base_worker_pod=worker_config, namespace='test_namespace', scheduler_job_id='uuid', ) expected = self.expected expected.metadata.labels = self.labels expected.metadata.labels['app'] = 'myapp' expected.metadata.annotations = self.annotations expected.metadata.name = 'pod_id.' + self.static_uuid.hex expected.metadata.namespace = 'test_namespace' expected.spec.containers[0].args = ['command'] expected.spec.containers[0].image = 'airflow_image' expected.spec.containers[0].resources = { 'limits': { 'cpu': '1m', 'memory': '1G' } } expected.spec.containers[0].env.append( k8s.V1EnvVar( name="AIRFLOW_IS_K8S_EXECUTOR_POD", value='True', )) result_dict = self.k8s_client.sanitize_for_serialization(result) expected_dict = self.k8s_client.sanitize_for_serialization( self.expected) assert expected_dict == result_dict
def run_next(self, next_job: KubernetesJobType) -> None: """ The run_next command will check the task_queue for any un-run jobs. It will then create a unique job-id, launch that job in the cluster, and store relevant info in the current_jobs map so we can track the job's status """ self.log.info('Kubernetes job is %s', str(next_job).replace("\n", " ")) key, command, kube_executor_config, pod_template_file = next_job dag_id, task_id, run_id, try_number, map_index = key if command[0:3] != ["airflow", "tasks", "run"]: raise ValueError( 'The command must start with ["airflow", "tasks", "run"].') base_worker_pod = get_base_pod_from_template(pod_template_file, self.kube_config) if not base_worker_pod: raise AirflowException( f"could not find a valid worker template yaml at {self.kube_config.pod_template_file}" ) pod = PodGenerator.construct_pod( namespace=self.namespace, scheduler_job_id=self.scheduler_job_id, pod_id=create_pod_id(dag_id, task_id), dag_id=dag_id, task_id=task_id, kube_image=self.kube_config.kube_image, try_number=try_number, map_index=map_index, date=None, run_id=run_id, args=command, pod_override_object=kube_executor_config, base_worker_pod=base_worker_pod, ) # Reconcile the pod generated by the Operator and the Pod # generated by the .cfg file self.log.debug("Kubernetes running for command %s", command) self.log.debug("Kubernetes launching image %s", pod.spec.containers[0].image) # the watcher will monitor pods, so we do not block. self.run_pod_async(pod, **self.kube_config.kube_client_request_args) self.log.debug("Kubernetes Job created!")
def generate_pod_yaml(args): """Generates yaml files for each task in the DAG. Used for testing output of KubernetesExecutor""" from kubernetes.client.api_client import ApiClient from airflow.executors.kubernetes_executor import AirflowKubernetesScheduler, KubeConfig from airflow.kubernetes import pod_generator from airflow.kubernetes.pod_generator import PodGenerator from airflow.kubernetes.worker_configuration import WorkerConfiguration from airflow.settings import pod_mutation_hook execution_date = args.execution_date dag = get_dag(subdir=args.subdir, dag_id=args.dag_id) yaml_output_path = args.output_path kube_config = KubeConfig() for task in dag.tasks: ti = TaskInstance(task, execution_date) pod = PodGenerator.construct_pod( dag_id=args.dag_id, task_id=ti.task_id, pod_id=AirflowKubernetesScheduler._create_pod_id( # pylint: disable=W0212 args.dag_id, ti.task_id), try_number=ti.try_number, kube_image=kube_config.kube_image, date=ti.execution_date, command=ti.command_as_list(), pod_override_object=PodGenerator.from_obj(ti.executor_config), worker_uuid="worker-config", namespace=kube_config.executor_namespace, base_worker_pod=WorkerConfiguration( kube_config=kube_config).as_pod()) pod_mutation_hook(pod) api_client = ApiClient() date_string = pod_generator.datetime_to_label_safe_datestring( execution_date) yaml_file_name = f"{args.dag_id}_{ti.task_id}_{date_string}.yml" os.makedirs(os.path.dirname(yaml_output_path + "/airflow_yaml_output/"), exist_ok=True) with open(yaml_output_path + "/airflow_yaml_output/" + yaml_file_name, "w") as output: sanitized_pod = api_client.sanitize_for_serialization(pod) output.write(yaml.dump(sanitized_pod)) print( f"YAML output can be found at {yaml_output_path}/airflow_yaml_output/")
def test_add_custom_label(self): from kubernetes.client import models as k8s pod = PodGenerator.construct_pod( namespace="test", worker_uuid="test", pod_id="test", dag_id="test", task_id="test", try_number=1, date="23-07-2020", command="test", kube_executor_config=None, worker_config=k8s.V1Pod(metadata=k8s.V1ObjectMeta( labels={"airflow-test": "airflow-task-pod"}, annotations={"my.annotation": "foo"}))) self.assertIn("airflow-test", pod.metadata.labels) self.assertIn("my.annotation", pod.metadata.annotations)
def run_next(self, next_job): """ The run_next command will check the task_queue for any un-run jobs. It will then create a unique job-id, launch that job in the cluster, and store relevant info in the current_jobs map so we can track the job's status """ self.log.info('Kubernetes job is %s', str(next_job)) key, command, kube_executor_config = next_job dag_id, task_id, execution_date, try_number = key if command[0:2] != ["airflow", "run"]: raise ValueError('The command must start with ["airflow", "run"].') pod = PodGenerator.construct_pod( namespace=self.namespace, worker_uuid=self.worker_uuid, pod_id=self._create_pod_id(dag_id, task_id), dag_id=pod_generator.make_safe_label_value(dag_id), task_id=pod_generator.make_safe_label_value(task_id), try_number=try_number, kube_image=self.kube_config.kube_image, date=execution_date, command=command, pod_override_object=kube_executor_config, base_worker_pod=self.worker_configuration_pod) sanitized_pod = self.launcher._client.api_client.sanitize_for_serialization( pod) json_pod = json.dumps(sanitized_pod, indent=2) self.log.debug('Pod Creation Request before mutation: \n%s', json_pod) # Reconcile the pod generated by the Operator and the Pod # generated by the .cfg file self.log.debug("Kubernetes running for command %s", command) self.log.debug("Kubernetes launching image %s", pod.spec.containers[0].image) # the watcher will monitor pods, so we do not block. self.launcher.run_pod_async( pod, **self.kube_config.kube_client_request_args) self.log.debug("Kubernetes Job created!")
def test_construct_pod_with_mutation(self, mock_uuid): mock_uuid.return_value = self.static_uuid worker_config = k8s.V1Pod( metadata=k8s.V1ObjectMeta(name='gets-overridden-by-dynamic-args', annotations={'should': 'stay'}), spec=k8s.V1PodSpec(containers=[ k8s.V1Container(name='doesnt-override', resources=k8s.V1ResourceRequirements( limits={ 'cpu': '1m', 'memory': '1G' }), security_context=k8s.V1SecurityContext( run_as_user=1)) ])) executor_config = k8s.V1Pod(spec=k8s.V1PodSpec(containers=[ k8s.V1Container(name='doesnt-override-either', resources=k8s.V1ResourceRequirements(limits={ 'cpu': '2m', 'memory': '2G' })) ])) result = PodGenerator.construct_pod( 'dag_id', 'task_id', 'pod_id', 3, 'date', ['command'], executor_config, worker_config, 'namespace', 'uuid', ) sanitized_result = self.k8s_client.sanitize_for_serialization(result) self.metadata.update({'annotations': {'should': 'stay'}}) self.assertEqual( { 'apiVersion': 'v1', 'kind': 'Pod', 'metadata': self.metadata, 'spec': { 'containers': [{ 'args': [], 'command': ['command'], 'env': [], 'envFrom': [], 'name': 'base', 'ports': [], 'resources': { 'limits': { 'cpu': '2m', 'memory': '2G' } }, 'volumeMounts': [], 'securityContext': { 'runAsUser': 1 } }], 'hostNetwork': False, 'imagePullSecrets': [], 'volumes': [] } }, sanitized_result)