def _get_volume_mounts(self): volume_mounts = { self.dags_volume_name: k8s.V1VolumeMount( name=self.dags_volume_name, mount_path=self.generate_dag_volume_mount_path(), read_only=True, ), self.logs_volume_name: k8s.V1VolumeMount( name=self.logs_volume_name, mount_path=self.worker_airflow_logs, ) } if self.kube_config.dags_volume_subpath: volume_mounts[ self. dags_volume_name].sub_path = self.kube_config.dags_volume_subpath if self.kube_config.logs_volume_subpath: volume_mounts[ self. logs_volume_name].sub_path = self.kube_config.logs_volume_subpath if self.kube_config.dags_in_image: del volume_mounts[self.dags_volume_name] # Mount the airflow.cfg file via a configmap the user has specified if self.kube_config.airflow_configmap: config_volume_name = 'airflow-config' config_path = '{}/airflow.cfg'.format(self.worker_airflow_home) volume_mounts[config_volume_name] = k8s.V1VolumeMount( name=config_volume_name, mount_path=config_path, sub_path='airflow.cfg', read_only=True) if self.kube_config.airflow_local_settings_configmap: config_path = '{}/config/airflow_local_settings.py'.format( self.worker_airflow_home) if self.kube_config.airflow_local_settings_configmap != self.kube_config.airflow_configmap: config_volume_name = 'airflow-local-settings' volume_mounts[config_volume_name] = k8s.V1VolumeMount( name=config_volume_name, mount_path=config_path, sub_path='airflow_local_settings.py', read_only=True) else: volume_mounts['airflow-local-settings'] = k8s.V1VolumeMount( name='airflow-config', mount_path=config_path, sub_path='airflow_local_settings.py', read_only=True) return list(volume_mounts.values())
def test_reconcile_pods(self, mock_uuid): mock_uuid.return_value = self.static_uuid path = sys.path[ 0] + '/tests/kubernetes/pod_generator_base_with_secrets.yaml' base_pod = PodGenerator(pod_template_file=path, extract_xcom=False).gen_pod() mutator_pod = k8s.V1Pod( metadata=k8s.V1ObjectMeta( name="name2", labels={"bar": "baz"}, ), spec=k8s.V1PodSpec( containers=[ k8s.V1Container( image='', name='name', command=['/bin/command2.sh', 'arg2'], volume_mounts=[ k8s.V1VolumeMount( mount_path="/foo/", name="example-kubernetes-test-volume2") ], ) ], volumes=[ k8s.V1Volume( host_path=k8s.V1HostPathVolumeSource(path="/tmp/"), name="example-kubernetes-test-volume2", ) ], ), ) result = PodGenerator.reconcile_pods(base_pod, mutator_pod) expected: k8s.V1Pod = self.expected expected.metadata.name = "name2" expected.metadata.labels['bar'] = 'baz' expected.spec.volumes = expected.spec.volumes or [] expected.spec.volumes.append( k8s.V1Volume(host_path=k8s.V1HostPathVolumeSource(path="/tmp/"), name="example-kubernetes-test-volume2")) base_container: k8s.V1Container = expected.spec.containers[0] base_container.command = ['/bin/command2.sh', 'arg2'] base_container.volume_mounts = [ k8s.V1VolumeMount(mount_path="/foo/", name="example-kubernetes-test-volume2") ] base_container.name = "name" expected.spec.containers[0] = base_container result_dict = self.k8s_client.sanitize_for_serialization(result) expected_dict = self.k8s_client.sanitize_for_serialization(expected) assert result_dict == expected_dict
def test_to_volume_secret(self, mock_uuid): mock_uuid.return_value = '0' secret = Secret('volume', '/etc/foo', 'secret_b') assert secret.to_volume_secret() == ( k8s.V1Volume(name='secretvol0', secret=k8s.V1SecretVolumeSource(secret_name='secret_b')), k8s.V1VolumeMount(mount_path='/etc/foo', name='secretvol0', read_only=True), )
class PodDefaults: """ Static defaults for Pods """ XCOM_MOUNT_PATH = '/airflow/xcom' SIDECAR_CONTAINER_NAME = 'airflow-xcom-sidecar' XCOM_CMD = 'trap "exit 0" INT; while true; do sleep 30; done;' VOLUME_MOUNT = k8s.V1VolumeMount( name='xcom', mount_path=XCOM_MOUNT_PATH ) VOLUME = k8s.V1Volume( name='xcom', empty_dir=k8s.V1EmptyDirVolumeSource() ) SIDECAR_CONTAINER = k8s.V1Container( name=SIDECAR_CONTAINER_NAME, command=['sh', '-c', XCOM_CMD], image='alpine', volume_mounts=[VOLUME_MOUNT], resources=k8s.V1ResourceRequirements( requests={ "cpu": "1m", } ), )
def test_gen_pod_extract_xcom(self, mock_uuid): mock_uuid.return_value = self.static_uuid path = sys.path[0] + '/tests/kubernetes/pod_generator_base_with_secrets.yaml' pod_generator = PodGenerator(pod_template_file=path, extract_xcom=True) result = pod_generator.gen_pod() result_dict = self.k8s_client.sanitize_for_serialization(result) container_two = { 'name': 'airflow-xcom-sidecar', 'image': "alpine", 'command': ['sh', '-c', PodDefaults.XCOM_CMD], 'volumeMounts': [{'name': 'xcom', 'mountPath': '/airflow/xcom'}], 'resources': {'requests': {'cpu': '1m'}}, } self.expected.spec.containers.append(container_two) base_container: k8s.V1Container = self.expected.spec.containers[0] base_container.volume_mounts = base_container.volume_mounts or [] base_container.volume_mounts.append(k8s.V1VolumeMount(name="xcom", mount_path="/airflow/xcom")) self.expected.spec.containers[0] = base_container self.expected.spec.volumes = self.expected.spec.volumes or [] self.expected.spec.volumes.append( k8s.V1Volume( name='xcom', empty_dir={}, ) ) result_dict = self.k8s_client.sanitize_for_serialization(result) expected_dict = self.k8s_client.sanitize_for_serialization(self.expected) assert result_dict == expected_dict
def to_volume_secret(self) -> Tuple[k8s.V1Volume, k8s.V1VolumeMount]: """Converts to volume secret""" vol_id = f'secretvol{uuid.uuid4()}' volume = k8s.V1Volume(name=vol_id, secret=k8s.V1SecretVolumeSource(secret_name=self.secret)) if self.items: volume.secret.items = self.items return (volume, k8s.V1VolumeMount(mount_path=self.deploy_target, name=vol_id, read_only=True))
def to_volume_secret(self) -> Tuple[k8s.V1Volume, k8s.V1VolumeMount]: vol_id = 'secretvol{}'.format(uuid.uuid4()) return (k8s.V1Volume( name=vol_id, secret=k8s.V1SecretVolumeSource(secret_name=self.secret)), k8s.V1VolumeMount(mount_path=self.deploy_target, name=vol_id, read_only=True))
def to_k8s_client_obj(self) -> k8s.V1VolumeMount: """ Converts to k8s object. :return: Volume Mount k8s object """ return k8s.V1VolumeMount(name=self.name, mount_path=self.mount_path, sub_path=self.sub_path, read_only=self.read_only)
def test_only_mount_sub_secret(self, mock_uuid): mock_uuid.return_value = '0' items = [k8s.V1KeyToPath(key="my-username", path="/extra/path")] secret = Secret('volume', '/etc/foo', 'secret_b', items=items) assert secret.to_volume_secret() == ( k8s.V1Volume( name='secretvol0', secret=k8s.V1SecretVolumeSource(secret_name='secret_b', items=items) ), k8s.V1VolumeMount(mount_path='/etc/foo', name='secretvol0', read_only=True), )
def test_init_container(self): # GIVEN volume_mounts = [ k8s.V1VolumeMount(mount_path='/etc/foo', name='test-volume', sub_path=None, read_only=True) ] init_environments = [ k8s.V1EnvVar(name='key1', value='value1'), k8s.V1EnvVar(name='key2', value='value2'), ] init_container = k8s.V1Container( name="init-container", image="ubuntu:16.04", env=init_environments, volume_mounts=volume_mounts, command=["bash", "-cx"], args=["echo 10"], ) volume = k8s.V1Volume( name='test-volume', persistent_volume_claim=k8s.V1PersistentVolumeClaimVolumeSource(claim_name='test-volume'), ) expected_init_container = { 'name': 'init-container', 'image': 'ubuntu:16.04', 'command': ['bash', '-cx'], 'args': ['echo 10'], 'env': [{'name': 'key1', 'value': 'value1'}, {'name': 'key2', 'value': 'value2'}], 'volumeMounts': [{'mountPath': '/etc/foo', 'name': 'test-volume', 'readOnly': True}], } k = KubernetesPodOperator( namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=["echo 10"], labels={"foo": "bar"}, name="test-" + str(random.randint(0, 1000000)), task_id="task" + self.get_current_task_name(), volumes=[volume], init_containers=[init_container], in_cluster=False, do_xcom_push=False, ) context = create_context(k) k.execute(context) actual_pod = self.api_client.sanitize_for_serialization(k.pod) self.expected_pod['spec']['initContainers'] = [expected_init_container] self.expected_pod['spec']['volumes'] = [ {'name': 'test-volume', 'persistentVolumeClaim': {'claimName': 'test-volume'}} ] assert self.expected_pod == actual_pod
def to_k8s_client_obj(self): """ Converts to k8s object. :return Volume Mount k8s object """ import kubernetes.client.models as k8s return k8s.V1VolumeMount(name=self.name, mount_path=self.mount_path, sub_path=self.sub_path, read_only=self.read_only)
def test_to_volume_secret(self, mock_uuid): static_uuid = uuid.UUID('cf4a56d2-8101-4217-b027-2af6216feb48') mock_uuid.return_value = static_uuid secret = Secret('volume', '/etc/foo', 'secret_b') self.assertEqual( secret.to_volume_secret(), (k8s.V1Volume( name='secretvol' + str(static_uuid), secret=k8s.V1SecretVolumeSource(secret_name='secret_b')), k8s.V1VolumeMount(mount_path='/etc/foo', name='secretvol' + str(static_uuid), read_only=True)))
def to_volume_secret(secret: "Secret") -> typing.Tuple[k8s.V1Volume, k8s.V1VolumeMount]: """Converts to volume secret""" vol_id = f"secretvol{uuid.uuid4()}" volume = k8s.V1Volume( name=vol_id, secret=k8s.V1SecretVolumeSource(secret_name=secret.secret) ) # if secret.items: # volume.secret.items = self.items return ( volume, k8s.V1VolumeMount(mount_path=secret.deploy_target, name=vol_id, read_only=True), )
def to_volume_secret(self): import kubernetes.client.models as k8s vol_id = 'secretvol{}'.format(uuid.uuid4()) if self.deploy_target: volume_mount = k8s.V1VolumeMount(mount_path=self.deploy_target, name=vol_id, read_only=True) else: volume_mount = None return (k8s.V1Volume( name=vol_id, secret=k8s.V1SecretVolumeSource(secret_name=self.secret)), volume_mount)
def test_volume_mount(self): with mock.patch.object(PodLauncher, 'log') as mock_logger: volume_mount = k8s.V1VolumeMount(name='test-volume', mount_path='/tmp/test_volume', sub_path=None, read_only=False) volume = k8s.V1Volume( name='test-volume', persistent_volume_claim=k8s. V1PersistentVolumeClaimVolumeSource(claim_name='test-volume'), ) args = [ "echo \"retrieved from mount\" > /tmp/test_volume/test.txt " "&& cat /tmp/test_volume/test.txt" ] k = KubernetesPodOperator( namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=args, labels={"foo": "bar"}, volume_mounts=[volume_mount], volumes=[volume], name="test-" + str(random.randint(0, 1000000)), task_id="task" + self.get_current_task_name(), in_cluster=False, do_xcom_push=False, ) context = create_context(k) k.execute(context=context) mock_logger.info.assert_any_call('retrieved from mount') actual_pod = self.api_client.sanitize_for_serialization(k.pod) self.expected_pod['spec']['containers'][0]['args'] = args self.expected_pod['spec']['containers'][0]['volumeMounts'] = [{ 'name': 'test-volume', 'mountPath': '/tmp/test_volume', 'readOnly': False }] self.expected_pod['spec']['volumes'] = [{ 'name': 'test-volume', 'persistentVolumeClaim': { 'claimName': 'test-volume' } }] self.assertEqual(self.expected_pod, actual_pod)
def attach_volume_mount(pod: k8s.V1Pod, volume_mount_spec: typing.Dict[str, typing.Any]): cp_pod = copy.deepcopy(pod) volume_mount = k8s.V1VolumeMount( name=volume_mount_spec.get("name"), mount_path=volume_mount_spec.get("mountPath"), sub_path=volume_mount_spec.get("subPath"), read_only=volume_mount_spec.get("readOnly"), ) cp_pod.spec.containers[ 0].volume_mounts = pod.spec.containers[0].volume_mounts or [] cp_pod.spec.containers[0].volume_mounts.append(volume_mount) return cp_pod
class PodDefaults: """ Static defaults for the PodGenerator """ XCOM_MOUNT_PATH = '/airflow/xcom' SIDECAR_CONTAINER_NAME = 'airflow-xcom-sidecar' XCOM_CMD = """import time while True: try: time.sleep(3600) except KeyboardInterrupt: exit(0) """ VOLUME_MOUNT = k8s.V1VolumeMount(name='xcom', mount_path=XCOM_MOUNT_PATH) VOLUME = k8s.V1Volume(name='xcom', empty_dir=k8s.V1EmptyDirVolumeSource()) SIDECAR_CONTAINER = k8s.V1Container(name=SIDECAR_CONTAINER_NAME, command=['python', '-c', XCOM_CMD], image='python:3.5-alpine', volume_mounts=[VOLUME_MOUNT])
def get_kubeconfig_volume_mount(): return k8s.V1VolumeMount(name="kubeconfig", mount_path="/home/airflow/.kube", read_only=True)
def pipeline_definition( hydrosphere_name="local", hydrosphere_address="http://hydro-serving-sidecar-serving.kubeflow.svc.cluster.local:8080", data_directory='/data/mnist', models_directory="/models/mnist", learning_rate="0.01", learning_steps="5000", batch_size="256", warmpup_count="100", model_name="mnist", application_name="mnist-app", signature_name="predict", acceptable_accuracy="0.90", ): data_pvc = k8s.V1PersistentVolumeClaimVolumeSource(claim_name="data") models_pvc = k8s.V1PersistentVolumeClaimVolumeSource(claim_name="models") data_volume = k8s.V1Volume(name="data", persistent_volume_claim=data_pvc) models_volume = k8s.V1Volume(name="models", persistent_volume_claim=models_pvc) data_volume_mount = k8s.V1VolumeMount( mount_path="{{workflow.parameters.data-directory}}", name="data") models_volume_mount = k8s.V1VolumeMount( mount_path="{{workflow.parameters.models-directory}}", name="models") hydrosphere_address_env = k8s.V1EnvVar( name="CLUSTER_ADDRESS", value="{{workflow.parameters.hydrosphere-address}}") hydrosphere_name_env = k8s.V1EnvVar( name="CLUSTER_NAME", value="{{workflow.parameters.hydrosphere-name}}") data_directory_env = k8s.V1EnvVar( name="MNIST_DATA_DIR", value="{{workflow.parameters.data-directory}}") models_directory_env = k8s.V1EnvVar( name="MNIST_MODELS_DIR", value="{{workflow.parameters.models-directory}}") model_name_env = k8s.V1EnvVar(name="MODEL_NAME", value="{{workflow.parameters.model-name}}") application_name_env = k8s.V1EnvVar( name="APPLICATION_NAME", value="{{workflow.parameters.application-name}}") signature_name_env = k8s.V1EnvVar( name="SIGNATURE_NAME", value="{{workflow.parameters.signature-name}}") acceptable_accuracy_env = k8s.V1EnvVar( name="ACCEPTABLE_ACCURACY", value="{{workflow.parameters.acceptable-accuracy}}") learning_rate_env = k8s.V1EnvVar( name="LEARNING_RATE", value="{{workflow.parameters.learning-rate}}") learning_steps_env = k8s.V1EnvVar( name="LEARNING_STEPS", value="{{workflow.parameters.learning-steps}}") batch_size_env = k8s.V1EnvVar(name="BATCH_SIZE", value="{{workflow.parameters.batch-size}}") warmup_count_env = k8s.V1EnvVar( name="WARMUP_IMAGES_AMOUNT", value="{{workflow.parameters.warmpup-count}}") # 1. Download MNIST data download = dsl.ContainerOp( name="download", image="tidylobster/mnist-pipeline-download:latest") download.add_volume(data_volume) download.add_volume_mount(data_volume_mount) download.add_env_variable(data_directory_env) # 2. Train and save a MNIST classifier using Tensorflow train = dsl.ContainerOp(name="train", image="tidylobster/mnist-pipeline-train:latest") train.after(download) train.set_memory_request('2G') train.set_cpu_request('1') train.add_volume(data_volume) train.add_volume(models_volume) train.add_volume_mount(data_volume_mount) train.add_volume_mount(models_volume_mount) train.add_env_variable(data_directory_env) train.add_env_variable(models_directory_env) train.add_env_variable(learning_rate_env) train.add_env_variable(learning_steps_env) train.add_env_variable(batch_size_env) # 3. Upload trained model to the cluster upload = dsl.ContainerOp( name="upload", image="tidylobster/mnist-pipeline-upload:latest", file_outputs={"model_version": "/model_version.txt"}) upload.after(train) upload.add_volume(models_volume) upload.add_volume_mount(models_volume_mount) upload.add_env_variable(models_directory_env) upload.add_env_variable(model_name_env) upload.add_env_variable(hydrosphere_name_env) upload.add_env_variable(hydrosphere_address_env) # 4. Deploy application deploy = dsl.ContainerOp(name="deploy", image="tidylobster/mnist-pipeline-deploy:latest", arguments=[upload.outputs["model_version"]]) deploy.after(upload) deploy.add_env_variable(hydrosphere_name_env) deploy.add_env_variable(hydrosphere_address_env) deploy.add_env_variable(application_name_env) deploy.add_env_variable(model_name_env) # 5. Test the model test = dsl.ContainerOp(name="test", image="tidylobster/mnist-pipeline-test:latest") test.after(deploy) test.add_volume(data_volume) test.add_volume_mount(data_volume_mount) test.add_env_variable(data_directory_env) test.add_env_variable(hydrosphere_address_env) test.add_env_variable(application_name_env) test.add_env_variable(signature_name_env) test.add_env_variable(warmup_count_env) test.add_env_variable(acceptable_accuracy_env) # 6. Clean environment clean = dsl.ContainerOp(name="clean", image="tidylobster/mnist-pipeline-clean:latest") clean.after(test) clean.add_volume(data_volume) clean.add_volume_mount(data_volume_mount) clean.add_env_variable(data_directory_env) clean.add_volume(models_volume) clean.add_volume_mount(models_volume_mount) clean.add_env_variable(models_directory_env)
) # Define Kubernetes namespace to execute DAG in namespace = 'airflow' ## Define volume details (change values as necessary to match your environment) # Dataset volume dataset_volume_pvc_existing = 'dataset-vol' dataset_volume = k8s.V1Volume( name=dataset_volume_pvc_existing, persistent_volume_claim=k8s.V1PersistentVolumeClaimVolumeSource(claim_name=dataset_volume_pvc_existing), ) dataset_volume_mount = k8s.V1VolumeMount( name=dataset_volume_pvc_existing, mount_path='/mnt/dataset', sub_path=None, read_only=False ) # Model volume model_volume_pvc_existing = 'airflow-model-vol' model_volume = k8s.V1Volume( name=model_volume_pvc_existing, persistent_volume_claim=k8s.V1PersistentVolumeClaimVolumeSource(claim_name=model_volume_pvc_existing), ) model_volume_mount = k8s.V1VolumeMount( name=model_volume_pvc_existing, mount_path='/mnt/model', sub_path=None, read_only=False )
def __init__( # pylint: disable=too-many-arguments self, node_name: str, namespace: str, image: str, image_pull_policy: str, env: str, task_id: str, pipeline: str = "__default__", pvc_name: Optional[str] = None, startup_timeout: int = 600, volume_disabled: bool = False, volume_owner: int = 0, mlflow_enabled: bool = True, requests_cpu: Optional[str] = None, requests_memory: Optional[str] = None, limits_cpu: Optional[str] = None, limits_memory: Optional[str] = None, node_selector_labels: Optional[Dict[str, str]] = None, labels: Optional[Dict[str, str]] = None, tolerations: Optional[List[Dict[str, str]]] = None, annotations: Optional[Dict[str, str]] = None, secrets: Optional[List[Secret]] = None, source: str = "/home/kedro/data", parameters: Optional[str] = "", ): """ :param node_name: name from the kedro pipeline :param namespace: k8s namespace the pod will execute in :param pvc_name: name of the shared storage attached to this pod :param image: image to be mounted :param image_pull_policy: k8s image pull policy :param env: kedro pipeline configuration name, provided with '-e' option :param pipeline: kedro pipeline name, provided with '--pipeline' option :param task_id: Airflow id to override :param startup_timeout: after the amount provided in seconds the pod start is timed out :param volume_disabled: if set to true, shared volume is not attached :param volume_owner: if volume is not disabled, fs group associated with this pod :param mlflow_enabled: if mlflow_run_id value is passed from xcom :param requests_cpu: k8s requests cpu value :param requests_memory: k8s requests memory value :param limits_cpu: k8s limits cpu value :param limits_memory: k8s limits memory value :param node_selector_labels: dictionary of node selector labels to be put into pod node selector :param labels: dictionary of labels to apply on pod :param tolerations: dictionary tolerations for nodes :param annotations: dictionary of annotations to apply on pod :param source: mount point of shared storage :param parameters: additional kedro run parameters """ self._task_id = task_id self._volume_disabled = volume_disabled self._pvc_name = pvc_name self._mlflow_enabled = mlflow_enabled super().__init__( task_id=task_id, security_context=self.create_security_context( volume_disabled, volume_owner), namespace=namespace, image=image, image_pull_policy=image_pull_policy, arguments=[ "kedro", "run", "-e", env, "--pipeline", pipeline, "--node", node_name, "--params", parameters, ], volume_mounts=[ k8s.V1VolumeMount(mount_path=source, name="storage") ] if not volume_disabled else [], resources=self.create_resources(requests_cpu, requests_memory, limits_cpu, limits_memory), startup_timeout_seconds=startup_timeout, is_delete_operator_pod=True, pod_template_file=self.minimal_pod_template, node_selectors=node_selector_labels, labels=labels, tolerations=self.create_tolerations(tolerations), annotations=annotations, secrets=secrets, )
from airflow.hooks.base import BaseHook from airflow.kubernetes.pod_generator import PodGenerator from airflow.models import DAG, Connection, DagBag, TaskInstance from airflow.models.baseoperator import BaseOperator, BaseOperatorLink from airflow.operators.bash import BashOperator from airflow.security import permissions from airflow.serialization.json_schema import load_dag_schema_dict from airflow.serialization.serialized_objects import SerializedBaseOperator, SerializedDAG from tests.test_utils.mock_operators import CustomOperator, CustomOpLink, GoogleLink executor_config_pod = k8s.V1Pod( metadata=k8s.V1ObjectMeta(name="my-name"), spec=k8s.V1PodSpec( containers=[ k8s.V1Container(name="base", volume_mounts=[k8s.V1VolumeMount(name="my-vol", mount_path="/vol/")]) ] ), ) serialized_simple_dag_ground_truth = { "__version": 1, "dag": { "default_args": { "__type": "dict", "__var": { "depends_on_past": False, "retries": 1, "retry_delay": {"__type": "timedelta", "__var": 300.0}, "sla": {"__type": "timedelta", "__var": 100.0}, },
# MOUNT OWS_CFG via init_container # for main container mount ows_cfg_mount = VolumeMount("ows-config-volume", mount_path=OWS_CFG_MOUNT_PATH, sub_path=None, read_only=False) ows_cfg_volume_config = {} ows_cfg_volume = Volume(name="ows-config-volume", configs=ows_cfg_volume_config) # for init container mount cfg_image_mount = k8s.V1VolumeMount( mount_path=OWS_CFG_MOUNT_PATH, name="ows-config-volume", sub_path=None, read_only=False, ) config_container = k8s.V1Container( image=OWS_CONFIG_IMAGE, command=["cp"], args=["-r", OWS_CFG_IMAGEPATH, OWS_CFG_FOLDER_PATH], volume_mounts=[cfg_image_mount], name="mount-ows-config", working_dir="/opt", ) def ows_update_extent_subdag(parent_dag_name: str, child_dag_name: str,
from airflow.hooks.base_hook import BaseHook from airflow.kubernetes.pod_generator import PodGenerator from airflow.models import DAG, Connection, DagBag, TaskInstance from airflow.models.baseoperator import BaseOperator from airflow.operators.bash import BashOperator from airflow.serialization.json_schema import load_dag_schema_dict from airflow.serialization.serialized_objects import SerializedBaseOperator, SerializedDAG from tests.test_utils.mock_operators import CustomOperator, CustomOpLink, GoogleLink executor_config_pod = k8s.V1Pod( metadata=k8s.V1ObjectMeta(name="my-name"), spec=k8s.V1PodSpec(containers=[ k8s.V1Container(name="base", volume_mounts=[ k8s.V1VolumeMount(name="my-vol", mount_path="/vol/") ]) ])) serialized_simple_dag_ground_truth = { "__version": 1, "dag": { "default_args": { "__type": "dict", "__var": { "depends_on_past": False, "retries": 1, "retry_delay": { "__type": "timedelta", "__var": 300.0 }
def build_k8s_object(aggregate_job, k8s_api_version=None, defaults=None): k8s_api_version = k8s_api_version or _K8S_API_VERSION version_parts = str(k8s_api_version).split('.') k8s_major, k8s_minor = int(version_parts[0]), int(version_parts[1]) if k8s_major != 1 or k8s_minor < 5: raise ValueError('Unsupported kubernetes api version') if k8s_minor >= 8: cronjob_api_version = 'batch/v1beta1' CronJob = k8s_models.V1beta1CronJob CronJobSpec = k8s_models.V1beta1CronJobSpec JobTemplateSpec = k8s_models.V1beta1JobTemplateSpec else: cronjob_api_version = 'batch/v2alpha1' CronJob = k8s_models.V2alpha1CronJob CronJobSpec = k8s_models.V2alpha1CronJobSpec JobTemplateSpec = k8s_models.V2alpha1JobTemplateSpec defaults = copy.deepcopy(defaults) if defaults is not None else {} if 'containerName' not in defaults: defaults['containerName'] = '{}-job'.format(aggregate_job['name']) if 'labels' not in defaults: defaults['labels'] = {} if 'labelKey' not in defaults: defaults['labelKey'] = 'kronjob/job' def _get_arg(key): return aggregate_job.get(key, defaults.get(key)) def _get_args(*keys): ''' Roughly speaking, turns camel case into snake case, e.g. 'DeviceType' -> 'device_type' but not always, e.g. 'IOError' -> 'IoError' ''' return {inflection.underscore(key): _get_arg(key) for key in keys} labels = _get_arg('labels') labels[_get_arg('labelKey')] = _get_arg('name') metadata = k8s_models.V1ObjectMeta(labels=labels, **_get_args('name', 'namespace')) env = _deserialize_k8s(aggregate_job.get('env'), 'list[V1EnvVar]') volume_mounts = _get_arg('volumeMounts') or [] volume_mounts = [ k8s_models.V1VolumeMount( **({inflection.underscore(k): v for k, v in volume_mount.items()})) for volume_mount in volume_mounts ] job_spec = k8s_models.V1JobSpec(template=k8s_models.V1PodTemplateSpec( metadata=k8s_models.V1ObjectMeta(labels=labels, **_get_args('annotations')), spec=k8s_models.V1PodSpec(containers=[ k8s_models.V1Container( env=env, name=_get_arg('containerName'), resources=k8s_models.V1ResourceRequirements( limits={ k: v for k, v in (('cpu', _get_arg('cpuLimit')), ('memory', _get_arg('memoryLimit'))) if v is not None } or None, requests={ k: v for k, v in (('cpu', _get_arg('cpuRequest')), ('memory', _get_arg('memoryRequest'))) if v is not None } or None, ), volume_mounts=volume_mounts, **_get_args('args', 'command', 'image', 'imagePullPolicy')) ], **_get_args('nodeSelector', 'restartPolicy', 'volumes'))), backoff_limit=_get_arg('backoffLimit')) if aggregate_job['schedule'] == 'once': # https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1Job.md k8s_object = k8s_models.V1Job(api_version='batch/v1', kind='Job', metadata=metadata, spec=job_spec) else: # Note that this can be one of two versions here: # https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1beta1CronJob.md # or # https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V2alpha1CronJob.md k8s_object = CronJob( api_version=cronjob_api_version, kind='CronJob', metadata=metadata, spec=CronJobSpec(job_template=JobTemplateSpec( metadata=k8s_models.V1ObjectMeta(labels=labels), spec=job_spec), **_get_args('concurrencyPolicy', 'failedJobsHistoryLimit', 'schedule', 'successfulJobsHistoryLimit', 'suspend', 'startingDeadlineSeconds'))) return k8s_object
def get_empty_dir_volume_mount(): return k8s.V1VolumeMount(name="tmpdir", mount_path="/tmp")
@task(executor_config=start_task_executor_config) def start_task(): print_stuff() start_task = start_task() # [START task_with_volume] executor_config_volume_mount = { "pod_override": k8s.V1Pod(spec=k8s.V1PodSpec( containers=[ k8s.V1Container( name="base", volume_mounts=[ k8s.V1VolumeMount( mount_path="/foo/", name="example-kubernetes-test-volume") ], ) ], volumes=[ k8s.V1Volume( name="example-kubernetes-test-volume", host_path=k8s.V1HostPathVolumeSource(path="/tmp/"), ) ], )), } @task(executor_config=executor_config_volume_mount) def test_volume_mount():
def test_to_v1_pod(self, mock_uuid): from airflow.contrib.kubernetes.pod import Pod as DeprecatedPod from airflow.kubernetes.volume import Volume from airflow.kubernetes.volume_mount import VolumeMount from airflow.kubernetes.secret import Secret from airflow.kubernetes.pod import Resources import uuid static_uuid = uuid.UUID('cf4a56d2-8101-4217-b027-2af6216feb48') mock_uuid.return_value = static_uuid pod = DeprecatedPod( image="foo", name="bar", namespace="baz", image_pull_policy="Never", envs={"test_key": "test_value"}, cmds=["airflow"], resources=Resources(request_memory="1G", request_cpu="100Mi", limit_gpu="100G"), init_containers=k8s.V1Container(name="test-container", volume_mounts=k8s.V1VolumeMount( mount_path="/foo/bar", name="init-volume-secret")), volumes=[ Volume(name="foo", configs={}), { "name": "bar", 'secret': { 'secretName': 'volume-secret' } } ], secrets=[ Secret("volume", None, "init-volume-secret"), Secret('env', "AIRFLOW_SECRET", 'secret_name', "airflow_config"), Secret("volume", "/opt/airflow", "volume-secret", "secret-key") ], volume_mounts=[ VolumeMount(name="foo", mount_path="/mnt", sub_path="/", read_only=True) ]) k8s_client = ApiClient() result = pod.to_v1_kubernetes_pod() result = k8s_client.sanitize_for_serialization(result) expected = \ {'metadata': {'labels': {}, 'name': 'bar', 'namespace': 'baz'}, 'spec': {'affinity': {}, 'containers': [{'args': [], 'command': ['airflow'], 'env': [{'name': 'test_key', 'value': 'test_value'}, {'name': 'AIRFLOW_SECRET', 'valueFrom': {'secretKeyRef': {'key': 'airflow_config', 'name': 'secret_name'}}}], 'image': 'foo', 'imagePullPolicy': 'Never', 'name': 'base', 'resources': {'limits': {'nvidia.com/gpu': '100G'}, 'requests': {'cpu': '100Mi', 'memory': '1G'}}, 'volumeMounts': [{'mountPath': '/mnt', 'name': 'foo', 'readOnly': True, 'subPath': '/'}, {'mountPath': '/opt/airflow', 'name': 'secretvol' + str(static_uuid), 'readOnly': True}]}], 'hostNetwork': False, 'initContainers': {'name': 'test-container', 'volumeMounts': {'mountPath': '/foo/bar', 'name': 'init-volume-secret'}}, 'securityContext': {}, 'tolerations': [], 'volumes': [{'name': 'foo'}, {'name': 'bar', 'secret': {'secretName': 'volume-secret'}}, {'name': 'secretvolcf4a56d2-8101-4217-b027-2af6216feb48', 'secret': {'secretName': 'init-volume-secret'}}, {'name': 'secretvol' + str(static_uuid), 'secret': {'secretName': 'volume-secret'}} ]}} self.maxDiff = None self.assertEqual(expected, result)
def test_from_obj(self): result = PodGenerator.from_obj( { "pod_override": k8s.V1Pod( api_version="v1", kind="Pod", metadata=k8s.V1ObjectMeta(name="foo", annotations={"test": "annotation"}), spec=k8s.V1PodSpec( containers=[ k8s.V1Container( name="base", volume_mounts=[ k8s.V1VolumeMount( mount_path="/foo/", name="example-kubernetes-test-volume" ) ], ) ], volumes=[ k8s.V1Volume( name="example-kubernetes-test-volume", host_path=k8s.V1HostPathVolumeSource(path="/tmp/"), ) ], ), ) } ) result = self.k8s_client.sanitize_for_serialization(result) assert { 'apiVersion': 'v1', 'kind': 'Pod', 'metadata': { 'name': 'foo', 'annotations': {'test': 'annotation'}, }, 'spec': { 'containers': [ { 'name': 'base', 'volumeMounts': [{'mountPath': '/foo/', 'name': 'example-kubernetes-test-volume'}], } ], 'volumes': [{'hostPath': {'path': '/tmp/'}, 'name': 'example-kubernetes-test-volume'}], }, } == result result = PodGenerator.from_obj( { "KubernetesExecutor": { "annotations": {"test": "annotation"}, "volumes": [ { "name": "example-kubernetes-test-volume", "hostPath": {"path": "/tmp/"}, }, ], "volume_mounts": [ { "mountPath": "/foo/", "name": "example-kubernetes-test-volume", }, ], } } ) result_from_pod = PodGenerator.from_obj( { "pod_override": k8s.V1Pod( metadata=k8s.V1ObjectMeta(annotations={"test": "annotation"}), spec=k8s.V1PodSpec( containers=[ k8s.V1Container( name="base", volume_mounts=[ k8s.V1VolumeMount( name="example-kubernetes-test-volume", mount_path="/foo/" ) ], ) ], volumes=[k8s.V1Volume(name="example-kubernetes-test-volume", host_path="/tmp/")], ), ) } ) result = self.k8s_client.sanitize_for_serialization(result) result_from_pod = self.k8s_client.sanitize_for_serialization(result_from_pod) expected_from_pod = { 'metadata': {'annotations': {'test': 'annotation'}}, 'spec': { 'containers': [ { 'name': 'base', 'volumeMounts': [{'mountPath': '/foo/', 'name': 'example-kubernetes-test-volume'}], } ], 'volumes': [{'hostPath': '/tmp/', 'name': 'example-kubernetes-test-volume'}], }, } assert ( result_from_pod == expected_from_pod ), "There was a discrepency between KubernetesExecutor and pod_override" assert { 'apiVersion': 'v1', 'kind': 'Pod', 'metadata': { 'annotations': {'test': 'annotation'}, }, 'spec': { 'containers': [ { 'args': [], 'command': [], 'env': [], 'envFrom': [], 'name': 'base', 'ports': [], 'volumeMounts': [{'mountPath': '/foo/', 'name': 'example-kubernetes-test-volume'}], } ], 'hostNetwork': False, 'imagePullSecrets': [], 'volumes': [{'hostPath': {'path': '/tmp/'}, 'name': 'example-kubernetes-test-volume'}], }, } == result
def _get_init_containers(self) -> List[k8s.V1Container]: """When using git to retrieve the DAGs, use the GitSync Init Container""" # If we're using volume claims to mount the dags, no init container is needed if self.kube_config.dags_volume_claim or \ self.kube_config.dags_volume_host or self.kube_config.dags_in_image: return [] # Otherwise, define a git-sync init container init_environment = [ k8s.V1EnvVar(name='GIT_SYNC_REPO', value=self.kube_config.git_repo), k8s.V1EnvVar(name='GIT_SYNC_BRANCH', value=self.kube_config.git_branch), k8s.V1EnvVar(name='GIT_SYNC_ROOT', value=self.kube_config.git_sync_root), k8s.V1EnvVar(name='GIT_SYNC_DEST', value=self.kube_config.git_sync_dest), k8s.V1EnvVar(name='GIT_SYNC_REV', value=self.kube_config.git_sync_rev), k8s.V1EnvVar(name='GIT_SYNC_DEPTH', value='1'), k8s.V1EnvVar(name='GIT_SYNC_ONE_TIME', value='true') ] if self.kube_config.git_user: init_environment.append( k8s.V1EnvVar(name='GIT_SYNC_USERNAME', value=self.kube_config.git_user)) if self.kube_config.git_password: init_environment.append( k8s.V1EnvVar(name='GIT_SYNC_PASSWORD', value=self.kube_config.git_password)) volume_mounts = [ k8s.V1VolumeMount(mount_path=self.kube_config.git_sync_root, name=self.dags_volume_name, read_only=False) ] if self.kube_config.git_sync_credentials_secret: init_environment.extend([ k8s.V1EnvVar( name='GIT_SYNC_USERNAME', value_from=k8s.V1EnvVarSource( secret_key_ref=k8s.V1SecretKeySelector( name=self.kube_config.git_sync_credentials_secret, key='GIT_SYNC_USERNAME'))), k8s.V1EnvVar( name='GIT_SYNC_PASSWORD', value_from=k8s.V1EnvVarSource( secret_key_ref=k8s.V1SecretKeySelector( name=self.kube_config.git_sync_credentials_secret, key='GIT_SYNC_PASSWORD'))) ]) if self.kube_config.git_ssh_key_secret_name: volume_mounts.append( k8s.V1VolumeMount(name=self.git_sync_ssh_secret_volume_name, mount_path='/etc/git-secret/ssh', sub_path='ssh')) init_environment.extend([ k8s.V1EnvVar(name='GIT_SSH_KEY_FILE', value='/etc/git-secret/ssh'), k8s.V1EnvVar(name='GIT_SYNC_SSH', value='true') ]) if self.kube_config.git_ssh_known_hosts_configmap_name: volume_mounts.append( k8s.V1VolumeMount( name=self.git_sync_ssh_known_hosts_volume_name, mount_path='/etc/git-secret/known_hosts', sub_path='known_hosts')) init_environment.extend([ k8s.V1EnvVar(name='GIT_KNOWN_HOSTS', value='true'), k8s.V1EnvVar(name='GIT_SSH_KNOWN_HOSTS_FILE', value='/etc/git-secret/known_hosts') ]) else: init_environment.append( k8s.V1EnvVar(name='GIT_KNOWN_HOSTS', value='false')) init_containers = k8s.V1Container( name=self.kube_config.git_sync_init_container_name, image=self.kube_config.git_sync_container, env=init_environment, volume_mounts=volume_mounts) if self.kube_config.git_sync_run_as_user != "": init_containers.security_context = k8s.V1SecurityContext( run_as_user=self.kube_config.git_sync_run_as_user ) # git-sync user return [init_containers]