def test_volume_mount(self): with patch.object(PodLauncher, 'log') as mock_logger: volume_mount = VolumeMount('test-volume', mount_path='/tmp/test_volume', sub_path=None, read_only=False) volume_config = { 'persistentVolumeClaim': { 'claimName': 'test-volume' } } volume = Volume(name='test-volume', configs=volume_config) args = [ "echo \"retrieved from mount\" > /tmp/test_volume/test.txt " "&& cat /tmp/test_volume/test.txt" ] k = KubernetesPodOperator( namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=args, labels={"foo": "bar"}, volume_mounts=[volume_mount], volumes=[volume], is_delete_operator_pod=False, name="test", task_id="task", in_cluster=False, do_xcom_push=False, ) context = create_context(k) k.execute(context=context) mock_logger.info.assert_any_call('retrieved from mount') actual_pod = self.api_client.sanitize_for_serialization(k.pod) self.expected_pod['spec']['containers'][0]['args'] = args self.expected_pod['spec']['containers'][0]['volumeMounts'] = [{ 'name': 'test-volume', 'mountPath': '/tmp/test_volume', 'readOnly': False }] self.expected_pod['spec']['volumes'] = [{ 'name': 'test-volume', 'persistentVolumeClaim': { 'claimName': 'test-volume' } }] self.assertEqual(self.expected_pod, actual_pod)
def __kubernetes_kwargs(self, task: ContainerTask): config = copy.deepcopy(self.executor_config) kubernetes_kwargs = { 'task_id': task.task_id, 'image': task.image, 'arguments': task.arguments, 'namespace': os.environ.get('AIRFLOW__KUBERNETES__NAMESPACE', 'default'), 'name': task.task_id.replace('_', '-'), 'in_cluster': os.environ.get('AIRFLOW__KUBERNETES__IN_CLUSTER', False), 'image_pull_policy': get_variable('image_pull_policy', default_val='IfNotPresent'), 'get_logs': config.pop('get_logs', True), 'is_delete_operator_pod': config.pop('is_delete_operator_pod', True), 'startup_timeout_seconds': config.pop('startup_timeout_seconds', 1200), 'env_vars': task.env_vars, 'do_xcom_push': task.task_config.get('do_xcom_push', False), 'image_pull_secrets': config.pop('image_pull_secrets', 'regcred'), 'volumes': self.volumes, 'config_file': os.environ.get('AIRFLOW__KUBERNETES__CONFIG_FILE'), 'cluster_context': os.environ.get('AIRFLOW__KUBERNETES__CLUSTER_CONTEXT', None), 'cmds': task.cmds, 'volume_mounts': [ VolumeMount(mount['volume'], mount['path'], mount.get('sub_path'), mount.get('read_only', False)) for mount in task.mounts ] } config.pop('in_cluster', None) config.pop('volumes', None) config.pop('volume_mounts', None) config.pop('executor', None) config.pop('type', None) kubernetes_kwargs.update(config) if env_util.is_running_on_jenkins(): kubernetes_kwargs['affinity'] = self.__jenkins_kubernetes_affinity() kubernetes_kwargs['namespace'] = 'jenkins' if not task.dag: kubernetes_kwargs.update({ 'start_date': datetime.datetime(1970, 1, 1), }) return kubernetes_kwargs
def build_kubernetes_pod_exporter(dag, command, etl_cmd_string, output_file): ''' Creates the export task using a KubernetesPodOperator. Parameters: dag - the parent dag command - stellar-etl command type (ex. export_ledgers, export_accounts) etl_cmd_string - a string of the fully formed command that includes all flags and arguments to be sent to the etl output_file - filename for the output file or folder Returns: the KubernetesPodOperator for the export task ''' from airflow.kubernetes.volume import Volume from airflow.kubernetes.volume_mount import VolumeMount data_mount = VolumeMount(Variable.get('volume_name'), Variable.get("image_output_path"), '', False) volume_config = { 'persistentVolumeClaim': { 'claimName': Variable.get('volume_claim_name') } } data_volume = Volume(Variable.get('volume_name'), volume_config) cmd = ['bash'] args = ['-c', f'{etl_cmd_string} && mkdir -p /airflow/xcom/ && echo \'{{"output_file":"{output_file}"}}\' >> /airflow/xcom/return.json'] config_file_location = Variable.get('kube_config_location') in_cluster = False if config_file_location else True return KubernetesPodOperator( task_id=command + '_task', name=command + '_task', namespace=Variable.get('namespace'), image=Variable.get('image_name'), cmds=cmd, arguments=args, dag=dag, do_xcom_push=True, is_delete_operator_pod=True, in_cluster=in_cluster, config_file=config_file_location, volume_mounts=[data_mount], volumes=[data_volume], affinity=Variable.get('affinity', deserialize_json=True) )
def test_volume_mount(self): with mock.patch.object(PodLauncher, 'log') as mock_logger: volume_mount = VolumeMount('test-volume', mount_path='/root/mount_file', sub_path=None, read_only=True) volume_config = { 'persistentVolumeClaim': { 'claimName': 'test-volume' } } volume = Volume(name='test-volume', configs=volume_config) args = ["cat /root/mount_file/test.txt"] k = KubernetesPodOperator(namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=args, labels={"foo": "bar"}, volume_mounts=[volume_mount], volumes=[volume], name="test", task_id="task") k.execute(None) mock_logger.info.assert_any_call(b"retrieved from mount\n") actual_pod = self.api_client.sanitize_for_serialization(k.pod) self.expected_pod['spec']['containers'][0]['args'] = args self.expected_pod['spec']['containers'][0]['volumeMounts'] = [{ 'name': 'test-volume', 'mountPath': '/root/mount_file', 'readOnly': True }] self.expected_pod['spec']['volumes'] = [{ 'name': 'test-volume', 'persistentVolumeClaim': { 'claimName': 'test-volume' } }] self.assertEqual(self.expected_pod, actual_pod)
def __kubernetes_kwargs(self): kubernetes_kwargs = { 'namespace': get_variable('kubernetes_namespace', default_val='default'), 'name': self.task_name.replace('_', '-'), 'in_cluster': get_variable('in_kubernetes_cluster', default_val=False), 'image_pull_policy': get_variable('image_pull_policy', default_val='IfNotPresent'), 'get_logs': True, 'is_delete_operator_pod': True, 'startup_timeout_seconds': 300, 'image_pull_secrets': 'regcred', 'resources': self.resources, 'dag': self.dag, 'volumes': self.volumes, 'volume_mounts': [ VolumeMount(mount['volume'], mount['path'], mount.get('sub_path'), mount.get('read_only', False)) for mount in self.mounts ] } return kubernetes_kwargs
def make_task(operator: str, task_params: Dict[str, Any]) -> BaseOperator: """ Takes an operator and params and creates an instance of that operator. :returns: instance of operator object """ try: # class is a Callable https://stackoverflow.com/a/34578836/3679900 operator_obj: Callable[..., BaseOperator] = import_string(operator) except Exception as err: raise Exception(f"Failed to import operator: {operator}") from err try: if operator_obj in [PythonOperator, BranchPythonOperator]: if not task_params.get( "python_callable_name") and not task_params.get( "python_callable_file"): raise Exception( "Failed to create task. PythonOperator and BranchPythonOperator requires \ `python_callable_name` and `python_callable_file` parameters." ) task_params[ "python_callable"]: Callable = utils.get_python_callable( task_params["python_callable_name"], task_params["python_callable_file"], ) # remove dag-factory specific parameters # Airflow 2.0 doesn't allow these to be passed to operator del task_params["python_callable_name"] del task_params["python_callable_file"] # Check for the custom success and failure callables in SqlSensor. These are considered # optional, so no failures in case they aren't found. Note: there's no reason to # declare both a callable file and a lambda function for success/failure parameter. # If both are found the object will not throw and error, instead callable file will # take precedence over the lambda function if operator_obj in [SqlSensor]: # Success checks if task_params.get("success_check_file") and task_params.get( "success_check_name"): task_params[ "success"]: Callable = utils.get_python_callable( task_params["success_check_name"], task_params["success_check_file"], ) del task_params["success_check_name"] del task_params["success_check_file"] elif task_params.get("success_check_lambda"): task_params[ "success"]: Callable = utils.get_python_callable_lambda( task_params["success_check_lambda"]) del task_params["success_check_lambda"] # Failure checks if task_params.get("failure_check_file") and task_params.get( "failure_check_name"): task_params[ "failure"]: Callable = utils.get_python_callable( task_params["failure_check_name"], task_params["failure_check_file"], ) del task_params["failure_check_name"] del task_params["failure_check_file"] elif task_params.get("failure_check_lambda"): task_params[ "failure"]: Callable = utils.get_python_callable_lambda( task_params["failure_check_lambda"]) del task_params["failure_check_lambda"] if operator_obj in [HttpSensor]: if not (task_params.get("response_check_name") and task_params.get("response_check_file") ) and not task_params.get("response_check_lambda"): raise Exception( "Failed to create task. HttpSensor requires \ `response_check_name` and `response_check_file` parameters \ or `response_check_lambda` parameter.") if task_params.get("response_check_file"): task_params[ "response_check"]: Callable = utils.get_python_callable( task_params["response_check_name"], task_params["response_check_file"], ) # remove dag-factory specific parameters # Airflow 2.0 doesn't allow these to be passed to operator del task_params["response_check_name"] del task_params["response_check_file"] else: task_params[ "response_check"]: Callable = utils.get_python_callable_lambda( task_params["response_check_lambda"]) # remove dag-factory specific parameters # Airflow 2.0 doesn't allow these to be passed to operator del task_params["response_check_lambda"] # KubernetesPodOperator if operator_obj == KubernetesPodOperator: task_params["secrets"] = ([ Secret(**v) for v in task_params.get("secrets") ] if task_params.get("secrets") is not None else None) task_params["ports"] = ([ Port(**v) for v in task_params.get("ports") ] if task_params.get("ports") is not None else None) task_params["volume_mounts"] = ([ VolumeMount(**v) for v in task_params.get("volume_mounts") ] if task_params.get("volume_mounts") is not None else None) task_params["volumes"] = ([ Volume(**v) for v in task_params.get("volumes") ] if task_params.get("volumes") is not None else None) task_params["pod_runtime_info_envs"] = ([ PodRuntimeInfoEnv(**v) for v in task_params.get("pod_runtime_info_envs") ] if task_params.get("pod_runtime_info_envs") is not None else None) task_params["full_pod_spec"] = ( V1Pod(**task_params.get("full_pod_spec")) if task_params.get("full_pod_spec") is not None else None) task_params["init_containers"] = ([ V1Container(**v) for v in task_params.get("init_containers") ] if task_params.get("init_containers") is not None else None) if utils.check_dict_key(task_params, "execution_timeout_secs"): task_params["execution_timeout"]: timedelta = timedelta( seconds=task_params["execution_timeout_secs"]) del task_params["execution_timeout_secs"] if utils.check_dict_key(task_params, "sla_secs"): task_params["sla"]: timedelta = timedelta( seconds=task_params["sla_secs"]) del task_params["sla_secs"] if utils.check_dict_key(task_params, "execution_delta_secs"): task_params["execution_delta"]: timedelta = timedelta( seconds=task_params["execution_delta_secs"]) del task_params["execution_delta_secs"] if utils.check_dict_key( task_params, "execution_date_fn_name") and utils.check_dict_key( task_params, "execution_date_fn_file"): task_params[ "execution_date_fn"]: Callable = utils.get_python_callable( task_params["execution_date_fn_name"], task_params["execution_date_fn_file"], ) del task_params["execution_date_fn_name"] del task_params["execution_date_fn_file"] # on_execute_callback is an Airflow 2.0 feature if utils.check_dict_key( task_params, "on_execute_callback" ) and version.parse(AIRFLOW_VERSION) >= version.parse("2.0.0"): task_params["on_execute_callback"]: Callable = import_string( task_params["on_execute_callback"]) if utils.check_dict_key(task_params, "on_failure_callback"): task_params["on_failure_callback"]: Callable = import_string( task_params["on_failure_callback"]) if utils.check_dict_key(task_params, "on_success_callback"): task_params["on_success_callback"]: Callable = import_string( task_params["on_success_callback"]) if utils.check_dict_key(task_params, "on_retry_callback"): task_params["on_retry_callback"]: Callable = import_string( task_params["on_retry_callback"]) # use variables as arguments on operator if utils.check_dict_key(task_params, "variables_as_arguments"): variables: List[Dict[str, str]] = task_params.get( "variables_as_arguments") for variable in variables: if Variable.get(variable["variable"], default_var=None) is not None: task_params[variable["attribute"]] = Variable.get( variable["variable"], default_var=None) del task_params["variables_as_arguments"] task: BaseOperator = operator_obj(**task_params) except Exception as err: raise Exception(f"Failed to create {operator_obj} task") from err return task
OWS_PYTHON_PATH, OWS_CFG_FOLDER_PATH, ) from infra.podconfig import ONDEMAND_NODE_AFFINITY from webapp_update.update_list import UPDATE_EXTENT_PRODUCTS from infra.variables import SECRET_OWS_WRITER_NAME OWS_SECRETS = [ Secret("env", "DB_USERNAME", SECRET_OWS_WRITER_NAME, "postgres-username"), Secret("env", "DB_PASSWORD", SECRET_OWS_WRITER_NAME, "postgres-password"), ] # MOUNT OWS_CFG via init_container # for main container mount ows_cfg_mount = VolumeMount("ows-config-volume", mount_path=OWS_CFG_MOUNT_PATH, sub_path=None, read_only=False) ows_cfg_volume_config = {} ows_cfg_volume = Volume(name="ows-config-volume", configs=ows_cfg_volume_config) # for init container mount cfg_image_mount = k8s.V1VolumeMount( mount_path=OWS_CFG_MOUNT_PATH, name="ows-config-volume", sub_path=None, read_only=False, )
def test_to_v1_pod(self, mock_uuid): from airflow.contrib.kubernetes.pod import Pod as DeprecatedPod from airflow.kubernetes.volume import Volume from airflow.kubernetes.volume_mount import VolumeMount from airflow.kubernetes.secret import Secret from airflow.kubernetes.pod import Resources import uuid static_uuid = uuid.UUID('cf4a56d2-8101-4217-b027-2af6216feb48') mock_uuid.return_value = static_uuid pod = DeprecatedPod( image="foo", name="bar", namespace="baz", image_pull_policy="Never", envs={"test_key": "test_value"}, cmds=["airflow"], resources=Resources(request_memory="1G", request_cpu="100Mi", limit_gpu="100G"), init_containers=k8s.V1Container(name="test-container", volume_mounts=k8s.V1VolumeMount( mount_path="/foo/bar", name="init-volume-secret")), volumes=[ Volume(name="foo", configs={}), { "name": "bar", 'secret': { 'secretName': 'volume-secret' } } ], secrets=[ Secret("volume", None, "init-volume-secret"), Secret('env', "AIRFLOW_SECRET", 'secret_name', "airflow_config"), Secret("volume", "/opt/airflow", "volume-secret", "secret-key") ], volume_mounts=[ VolumeMount(name="foo", mount_path="/mnt", sub_path="/", read_only=True) ]) k8s_client = ApiClient() result = pod.to_v1_kubernetes_pod() result = k8s_client.sanitize_for_serialization(result) expected = \ {'metadata': {'labels': {}, 'name': 'bar', 'namespace': 'baz'}, 'spec': {'affinity': {}, 'containers': [{'args': [], 'command': ['airflow'], 'env': [{'name': 'test_key', 'value': 'test_value'}, {'name': 'AIRFLOW_SECRET', 'valueFrom': {'secretKeyRef': {'key': 'airflow_config', 'name': 'secret_name'}}}], 'image': 'foo', 'imagePullPolicy': 'Never', 'name': 'base', 'resources': {'limits': {'nvidia.com/gpu': '100G'}, 'requests': {'cpu': '100Mi', 'memory': '1G'}}, 'volumeMounts': [{'mountPath': '/mnt', 'name': 'foo', 'readOnly': True, 'subPath': '/'}, {'mountPath': '/opt/airflow', 'name': 'secretvol' + str(static_uuid), 'readOnly': True}]}], 'hostNetwork': False, 'initContainers': {'name': 'test-container', 'volumeMounts': {'mountPath': '/foo/bar', 'name': 'init-volume-secret'}}, 'securityContext': {}, 'tolerations': [], 'volumes': [{'name': 'foo'}, {'name': 'bar', 'secret': {'secretName': 'volume-secret'}}, {'name': 'secretvolcf4a56d2-8101-4217-b027-2af6216feb48', 'secret': {'secretName': 'init-volume-secret'}}, {'name': 'secretvol' + str(static_uuid), 'secret': {'secretName': 'volume-secret'}} ]}} self.maxDiff = None self.assertEqual(expected, result)
from airflow.kubernetes.volume import Volume from airflow.kubernetes.volume_mount import VolumeMount default_args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime.utcnow(), 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 0, 'retry_delay': timedelta(minutes=1) } volume_mount_nb = VolumeMount(name='nb-data', mount_path='/opt/airflow/nb', sub_path=None, read_only=False) volume_config = { 'persistentVolumeClaim': { 'claimName': 'airflow-nfs-pvc-nb-01' } } volume_nb = Volume(name='nb-data', configs=volume_config) dag = DAG( 'kubernetes_pod_operator_03_with_sleep', default_args=default_args, schedule_interval=timedelta(minutes=10)) start = DummyOperator(task_id='start', dag=dag)
def make_task(operator: str, task_params: Dict[str, Any], af_vars: Dict[str, Any]) -> BaseOperator: """ Takes an operator and params and creates an instance of that operator. :returns: instance of operator object """ try: # class is a Callable https://stackoverflow.com/a/34578836/3679900 operator_obj: Callable[..., BaseOperator] = import_string(operator) except Exception as err: raise Exception(f"Failed to import operator: {operator}") from err try: if operator_obj in [ PythonOperator, BranchPythonOperator, PythonSensor ]: if (not task_params.get("python_callable") and not task_params.get("python_callable_name") and not task_params.get("python_callable_file")): # pylint: disable=line-too-long raise Exception( "Failed to create task. PythonOperator, BranchPythonOperator and PythonSensor requires \ `python_callable_name` and `python_callable_file` " "parameters.\nOptionally you can load python_callable " "from a file. with the special pyyaml notation:\n" " python_callable_file: !!python/name:my_module.my_func" ) if not task_params.get("python_callable"): task_params[ "python_callable"]: Callable = utils.get_python_callable( task_params["python_callable_name"], task_params["python_callable_file"], ) # remove dag-factory specific parameters # Airflow 2.0 doesn't allow these to be passed to operator del task_params["python_callable_name"] del task_params["python_callable_file"] # Check for the custom success and failure callables in SqlSensor. These are considered # optional, so no failures in case they aren't found. Note: there's no reason to # declare both a callable file and a lambda function for success/failure parameter. # If both are found the object will not throw and error, instead callable file will # take precedence over the lambda function if operator_obj in [SqlSensor]: # Success checks if task_params.get("success_check_file") and task_params.get( "success_check_name"): task_params[ "success"]: Callable = utils.get_python_callable( task_params["success_check_name"], task_params["success_check_file"], ) del task_params["success_check_name"] del task_params["success_check_file"] elif task_params.get("success_check_lambda"): task_params[ "success"]: Callable = utils.get_python_callable_lambda( task_params["success_check_lambda"]) del task_params["success_check_lambda"] # Failure checks if task_params.get("failure_check_file") and task_params.get( "failure_check_name"): task_params[ "failure"]: Callable = utils.get_python_callable( task_params["failure_check_name"], task_params["failure_check_file"], ) del task_params["failure_check_name"] del task_params["failure_check_file"] elif task_params.get("failure_check_lambda"): task_params[ "failure"]: Callable = utils.get_python_callable_lambda( task_params["failure_check_lambda"]) del task_params["failure_check_lambda"] if operator_obj in [HttpSensor]: if not (task_params.get("response_check_name") and task_params.get("response_check_file") ) and not task_params.get("response_check_lambda"): raise Exception( "Failed to create task. HttpSensor requires \ `response_check_name` and `response_check_file` parameters \ or `response_check_lambda` parameter.") if task_params.get("response_check_file"): task_params[ "response_check"]: Callable = utils.get_python_callable( task_params["response_check_name"], task_params["response_check_file"], ) # remove dag-factory specific parameters # Airflow 2.0 doesn't allow these to be passed to operator del task_params["response_check_name"] del task_params["response_check_file"] else: task_params[ "response_check"]: Callable = utils.get_python_callable_lambda( task_params["response_check_lambda"]) # remove dag-factory specific parameters # Airflow 2.0 doesn't allow these to be passed to operator del task_params["response_check_lambda"] # KubernetesPodOperator if operator_obj == KubernetesPodOperator: task_params["secrets"] = ([ Secret(**v) for v in task_params.get("secrets") ] if task_params.get("secrets") is not None else None) task_params["ports"] = ([ Port(**v) for v in task_params.get("ports") ] if task_params.get("ports") is not None else None) task_params["volume_mounts"] = ([ VolumeMount(**v) for v in task_params.get("volume_mounts") ] if task_params.get("volume_mounts") is not None else None) task_params["volumes"] = ([ Volume(**v) for v in task_params.get("volumes") ] if task_params.get("volumes") is not None else None) task_params["pod_runtime_info_envs"] = ([ PodRuntimeInfoEnv(**v) for v in task_params.get("pod_runtime_info_envs") ] if task_params.get("pod_runtime_info_envs") is not None else None) task_params["full_pod_spec"] = ( V1Pod(**task_params.get("full_pod_spec")) if task_params.get("full_pod_spec") is not None else None) task_params["init_containers"] = ([ V1Container(**v) for v in task_params.get("init_containers") ] if task_params.get("init_containers") is not None else None) if operator_obj == DockerOperator: if task_params.get("environment") is not None: task_params["environment"] = { k: os.environ.get(v, v) for k, v in task_params["environment"].items() } if operator_obj == EcsOperator: for c in task_params["overrides"]["containerOverrides"]: if c.get('environment') is not None: for env in c['environment']: env['value'] = os.environ.get( env['value'], env['value']) if 'ECS_SECURITY_GROUPS' in af_vars and 'network_configuration' in task_params: task_params["network_configuration"]["awsvpcConfiguration"]['securityGroups'] \ = af_vars['ECS_SECURITY_GROUPS'] if 'ECS_SUBNETS' in af_vars and 'network_configuration' in task_params: task_params['network_configuration'][ "awsvpcConfiguration"]["subnets"] = af_vars[ "ECS_SUBNETS"] if af_vars.get('ECS_CLUSTER'): task_params['cluster'] = af_vars["ECS_CLUSTER"] task_params['task_definition'] = ( af_vars.get('ECS_CLUSTER') + '_' + task_params['task_definition']).lower() task_params['awslogs_group'] = \ task_params['awslogs_group'] + '/' + af_vars.get('ECS_CLUSTER').lower() if utils.check_dict_key(task_params, "execution_timeout_secs"): task_params["execution_timeout"]: timedelta = timedelta( seconds=task_params["execution_timeout_secs"]) del task_params["execution_timeout_secs"] if utils.check_dict_key(task_params, "sla_secs"): task_params["sla"]: timedelta = timedelta( seconds=task_params["sla_secs"]) del task_params["sla_secs"] if utils.check_dict_key(task_params, "execution_delta_secs"): task_params["execution_delta"]: timedelta = timedelta( seconds=task_params["execution_delta_secs"]) del task_params["execution_delta_secs"] if utils.check_dict_key( task_params, "execution_date_fn_name") and utils.check_dict_key( task_params, "execution_date_fn_file"): task_params[ "execution_date_fn"]: Callable = utils.get_python_callable( task_params["execution_date_fn_name"], task_params["execution_date_fn_file"], ) del task_params["execution_date_fn_name"] del task_params["execution_date_fn_file"] # on_execute_callback is an Airflow 2.0 feature if utils.check_dict_key( task_params, "on_execute_callback" ) and version.parse(AIRFLOW_VERSION) >= version.parse("2.0.0"): task_params["on_execute_callback"]: Callable = import_string( task_params["on_execute_callback"]) if utils.check_dict_key(task_params, "on_failure_callback"): task_params["on_failure_callback"]: Callable = import_string( task_params["on_failure_callback"]) if utils.check_dict_key(task_params, "on_success_callback"): task_params["on_success_callback"]: Callable = import_string( task_params["on_success_callback"]) if utils.check_dict_key(task_params, "on_retry_callback"): task_params["on_retry_callback"]: Callable = import_string( task_params["on_retry_callback"]) # use variables as arguments on operator if utils.check_dict_key(task_params, "variables_as_arguments"): variables: List[Dict[str, str]] = task_params.get( "variables_as_arguments") for variable in variables: if Variable.get(variable["variable"], default_var=None) is not None: task_params[variable["attribute"]] = Variable.get( variable["variable"], default_var=None) del task_params["variables_as_arguments"] # use variables as arguments on operator if utils.check_dict_key(task_params, "af_vars_as_arguments"): variables: List[Dict[str, str]] = task_params.get( "af_vars_as_arguments") for variable in variables: if af_vars.get(variable["variable"], None) is not None: task_params[variable["attribute"]] = af_vars.get( variable["variable"], None) del task_params["af_vars_as_arguments"] task: BaseOperator = operator_obj(**task_params) except Exception as err: raise Exception(f"Failed to create {operator_obj} task") from err return task
def _extract_volume_mounts(volume_mounts): result = [] volume_mounts = volume_mounts or [] # type: List[Union[k8s.V1VolumeMount, dict]] for volume_mount in volume_mounts: if isinstance(volume_mount, k8s.V1VolumeMount): volume_mount = api_client.sanitize_for_serialization(volume_mount) volume_mount = VolumeMount( name=volume_mount.get("name"), mount_path=volume_mount.get("mountPath"), sub_path=volume_mount.get("subPath"), read_only=volume_mount.get("readOnly") ) elif not isinstance(volume_mount, VolumeMount): volume_mount = VolumeMount( name=volume_mount.get("name"), mount_path=volume_mount.get("mountPath"), sub_path=volume_mount.get("subPath"), read_only=volume_mount.get("readOnly") ) result.append(volume_mount) return result
}] }] } } } tolerations = [{ "key": "dedicated", "operator": "Equal", "value": "wagl", "effect": "NoSchedule" }] ancillary_volume_mount = VolumeMount( name="wagl-nrt-ancillary-volume", mount_path="/ancillary", sub_path=None, read_only=False, ) ancillary_volume = Volume( name="wagl-nrt-ancillary-volume", configs={ "persistentVolumeClaim": { "claimName": "wagl-nrt-ancillary-volume" } }, ) def setup_logging(): """ """
from airflow.operators.dummy_operator import DummyOperator from airflow.kubernetes.volume import Volume from airflow.kubernetes.volume_mount import VolumeMount from airflow.utils.dates import days_ago from airflow import DAG # from kubernetes.client import models as k8s default_args={ 'owner': 'Airflow' ,'start_date': datetime.datetime.now() - datetime.timedelta(days=1) #yesterday } volume_mount = VolumeMount( 'persist-airflow-logs' , mount_path='/opt/airflow/logs' , sub_path=None , read_only=False ) volume_config= { 'persistentVolumeClaim': { 'claimName': 'persist-airflow-logs' } } volume = Volume( name='persist-airflow-logs' , configs=volume_config ) default_args = { 'owner': 'airflow', }
def test_convert_to_airflow_pod(self): input_pod = k8s.V1Pod( metadata=k8s.V1ObjectMeta(name="foo", namespace="bar"), spec=k8s.V1PodSpec( init_containers=[ k8s.V1Container(name="init-container", volume_mounts=[ k8s.V1VolumeMount(mount_path="/tmp", name="init-secret") ]) ], containers=[ k8s.V1Container( name="base", command=["foo"], image="myimage", env=[ k8s.V1EnvVar( name="AIRFLOW_SECRET", value_from=k8s.V1EnvVarSource( secret_key_ref=k8s.V1SecretKeySelector( name="ai", key="secret_key"))) ], ports=[ k8s.V1ContainerPort( name="myport", container_port=8080, ) ], volume_mounts=[ k8s.V1VolumeMount(name="myvolume", mount_path="/tmp/mount", read_only="True"), k8s.V1VolumeMount(name='airflow-config', mount_path='/config', sub_path='airflow.cfg', read_only=True), k8s.V1VolumeMount(name="airflow-secret", mount_path="/opt/mount", read_only=True) ]) ], security_context=k8s.V1PodSecurityContext( run_as_user=0, fs_group=0, ), volumes=[ k8s.V1Volume(name="myvolume"), k8s.V1Volume( name="airflow-config", config_map=k8s.V1ConfigMap(data="airflow-data")), k8s.V1Volume(name="airflow-secret", secret=k8s.V1SecretVolumeSource( secret_name="secret-name", )), k8s.V1Volume(name="init-secret", secret=k8s.V1SecretVolumeSource( secret_name="init-secret", )) ])) result_pod = _convert_to_airflow_pod(input_pod) expected = Pod( name="foo", namespace="bar", envs={}, init_containers=[{ 'name': 'init-container', 'volumeMounts': [{ 'mountPath': '/tmp', 'name': 'init-secret' }] }], cmds=["foo"], image="myimage", ports=[Port(name="myport", container_port=8080)], volume_mounts=[ VolumeMount(name="myvolume", mount_path="/tmp/mount", sub_path=None, read_only="True"), VolumeMount(name="airflow-config", read_only=True, mount_path="/config", sub_path="airflow.cfg"), VolumeMount(name="airflow-secret", mount_path="/opt/mount", sub_path=None, read_only=True) ], secrets=[Secret("env", "AIRFLOW_SECRET", "ai", "secret_key")], security_context={ 'fsGroup': 0, 'runAsUser': 0 }, volumes=[ Volume(name="myvolume", configs={'name': 'myvolume'}), Volume(name="airflow-config", configs={ 'configMap': { 'data': 'airflow-data' }, 'name': 'airflow-config' }), Volume(name='airflow-secret', configs={ 'name': 'airflow-secret', 'secret': { 'secretName': 'secret-name' } }), Volume(name='init-secret', configs={ 'name': 'init-secret', 'secret': { 'secretName': 'init-secret' } }) ], ) expected_dict = expected.as_dict() result_dict = result_pod.as_dict() print(result_pod.volume_mounts) parsed_configs = self.pull_out_volumes(result_dict) result_dict['volumes'] = parsed_configs self.assertEqual(result_dict['secrets'], expected_dict['secrets']) self.assertDictEqual(expected_dict, result_dict)
dag = DAG( "k8s_nci_db_incremental_sync", doc_md=__doc__, default_args=DEFAULT_ARGS, catchup=False, concurrency=1, max_active_runs=1, tags=["k8s", "nci-explorer"], schedule_interval="45 0 * * *", # every day 0:45AM dagrun_timeout=timedelta(minutes=60 * 3), ) affinity = ONDEMAND_NODE_AFFINITY s3_backup_volume_mount = VolumeMount(name="s3-backup-volume", mount_path=BACKUP_PATH, sub_path=None, read_only=False) s3_backup_volume_config = { "persistentVolumeClaim": { "claimName": "s3-backup-volume" } } s3_backup_volume = Volume(name="s3-backup-volume", configs=s3_backup_volume_config) with dag: START = DummyOperator(task_id="start") # Wait for S3 Key
from airflow import DAG from airflow.kubernetes.pod import Port from airflow.kubernetes.secret import Secret from airflow.kubernetes.volume import Volume from airflow.kubernetes.volume_mount import VolumeMount from airflow.operators.bash import BashOperator from airflow.providers.cncf.kubernetes.operators.kubernetes_pod import KubernetesPodOperator from airflow.utils.dates import days_ago # [START howto_operator_k8s_cluster_resources] secret_file = Secret('volume', '/etc/sql_conn', 'airflow-secrets', 'sql_alchemy_conn') secret_env = Secret('env', 'SQL_CONN', 'airflow-secrets', 'sql_alchemy_conn') secret_all_keys = Secret('env', None, 'airflow-secrets-2') volume_mount = VolumeMount('test-volume', mount_path='/root/mount_file', sub_path=None, read_only=True) configmaps = ['test-configmap-1', 'test-configmap-2'] volume_config = {'persistentVolumeClaim': {'claimName': 'test-volume'}} volume = Volume(name='test-volume', configs=volume_config) # [END howto_operator_k8s_cluster_resources] port = Port('http', 80) init_container_volume_mounts = [ k8s.V1VolumeMount(mount_path='/etc/foo', name='test-volume', sub_path=None, read_only=True) ]
def make_task(operator: str, task_params: Dict[str, Any]) -> BaseOperator: """ Takes an operator and params and creates an instance of that operator. :returns: instance of operator object """ try: # class is a Callable https://stackoverflow.com/a/34578836/3679900 operator_obj: Callable[..., BaseOperator] = import_string(operator) except Exception as err: raise f"Failed to import operator: {operator}" from err try: if operator_obj == PythonOperator: if not task_params.get( "python_callable_name") and not task_params.get( "python_callable_file"): raise Exception( "Failed to create task. PythonOperator requires `python_callable_name` \ and `python_callable_file` parameters.") task_params[ "python_callable"]: Callable = utils.get_python_callable( task_params["python_callable_name"], task_params["python_callable_file"], ) # KubernetesPodOperator if operator_obj == KubernetesPodOperator: task_params["secrets"] = ([ Secret(**v) for v in task_params.get("secrets") ] if task_params.get("secrets") is not None else None) task_params["ports"] = ([ Port(**v) for v in task_params.get("ports") ] if task_params.get("ports") is not None else None) task_params["volume_mounts"] = ([ VolumeMount(**v) for v in task_params.get("volume_mounts") ] if task_params.get("volume_mounts") is not None else None) task_params["volumes"] = ([ Volume(**v) for v in task_params.get("volumes") ] if task_params.get("volumes") is not None else None) task_params["pod_runtime_info_envs"] = ([ PodRuntimeInfoEnv(**v) for v in task_params.get("pod_runtime_info_envs") ] if task_params.get("pod_runtime_info_envs") is not None else None) task_params["full_pod_spec"] = ( V1Pod(**task_params.get("full_pod_spec")) if task_params.get("full_pod_spec") is not None else None) task_params["init_containers"] = ([ V1Container(**v) for v in task_params.get("init_containers") ] if task_params.get("init_containers") is not None else None) if utils.check_dict_key(task_params, "execution_timeout_secs"): task_params["execution_timeout"]: timedelta = timedelta( seconds=task_params["execution_timeout_secs"]) del task_params["execution_timeout_secs"] # use variables as arguments on operator if utils.check_dict_key(task_params, "variables_as_arguments"): variables: List[Dict[str, str]] = task_params.get( "variables_as_arguments") for variable in variables: if Variable.get(variable["variable"], default_var=None) is not None: task_params[variable["attribute"]] = Variable.get( variable["variable"], default_var=None) del task_params["variables_as_arguments"] task: BaseOperator = operator_obj(**task_params) except Exception as err: raise f"Failed to create {operator_obj} task" from err return task
http_conn_id="apar_graphql", endpoint="graphql/", method="POST", headers={"Content-Type": "application/json"}, data_fn=partial(get_job_status_update_callable, "FAILED"), task_id="failed_callback", dag=dag, trigger_rule="all_failed", ) volume_config = {"persistentVolumeClaim": {"claimName": "pvc-data-name"}} test_volume = Volume(name="pv-data-name", configs=volume_config) test_volume_mount = VolumeMount("pv-data-name", mount_path="/data", sub_path=None, read_only=False) fastq_dump = KubernetesPodOperator( namespace="airflow", image="quay.io/biocontainers/sra-tools:2.10.0--pl526he1b5a44_0", cmds=[ "fastq-dump", "--outdir", "data/fastq", "--gzip", "--skip-technical", "--readids", "--read-filter", "pass", "--dumpbase", "--split-3", "--clip", "SRR6982497" ], name="fastq_dump", task_id="fastq_dump", get_logs=True, dag=dag, volumes=[test_volume],