def test_to_k8s_object(self): volume_config = {'persistentVolumeClaim': {'claimName': 'test-volume'}} volume = Volume(name='test-volume', configs=volume_config) expected_volume = k8s.V1Volume( name="test-volume", persistent_volume_claim={"claimName": "test-volume"}) result = volume.to_k8s_client_obj() self.assertEqual(result, expected_volume)
def test_volume_mount(): with mock.patch.object(PodLauncher, 'log') as mock_logger: volume_mount = VolumeMount('test-volume', mount_path='/root/mount_file', sub_path=None, read_only=True) volume_config = { 'persistentVolumeClaim': { 'claimName': 'test-volume' } } volume = Volume(name='test-volume', configs=volume_config) k = KubernetesPodOperator( namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=["cat /root/mount_file/test.txt"], labels={"foo": "bar"}, volume_mounts=[volume_mount], volumes=[volume], name="test", task_id="task" ) k.execute(None) mock_logger.info.assert_any_call(b"retrieved from mount\n")
def test_init_container(self): # GIVEN volume_mounts = [ k8s.V1VolumeMount(mount_path='/etc/foo', name='test-volume', sub_path=None, read_only=True) ] init_environments = [ k8s.V1EnvVar(name='key1', value='value1'), k8s.V1EnvVar(name='key2', value='value2'), ] init_container = k8s.V1Container( name="init-container", image="ubuntu:16.04", env=init_environments, volume_mounts=volume_mounts, command=["bash", "-cx"], args=["echo 10"], ) volume_config = {'persistentVolumeClaim': {'claimName': 'test-volume'}} volume = Volume(name='test-volume', configs=volume_config) expected_init_container = { 'name': 'init-container', 'image': 'ubuntu:16.04', 'command': ['bash', '-cx'], 'args': ['echo 10'], 'env': [{'name': 'key1', 'value': 'value1'}, {'name': 'key2', 'value': 'value2'}], 'volumeMounts': [{'mountPath': '/etc/foo', 'name': 'test-volume', 'readOnly': True}], } k = KubernetesPodOperator( namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=["echo 10"], labels={"foo": "bar"}, name="test", task_id="task", volumes=[volume], init_containers=[init_container], in_cluster=False, do_xcom_push=False, ) context = create_context(k) k.execute(context) actual_pod = self.api_client.sanitize_for_serialization(k.pod) self.expected_pod['spec']['initContainers'] = [expected_init_container] self.expected_pod['spec']['volumes'] = [ {'name': 'test-volume', 'persistentVolumeClaim': {'claimName': 'test-volume'}} ] assert self.expected_pod == actual_pod
def test_volume_mount(self): with patch.object(PodManager, 'log') as mock_logger: volume_mount = VolumeMount('test-volume', mount_path='/tmp/test_volume', sub_path=None, read_only=False) volume_config = { 'persistentVolumeClaim': { 'claimName': 'test-volume' } } volume = Volume(name='test-volume', configs=volume_config) args = [ "echo \"retrieved from mount\" > /tmp/test_volume/test.txt " "&& cat /tmp/test_volume/test.txt" ] k = KubernetesPodOperator( namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=args, labels={"foo": "bar"}, volume_mounts=[volume_mount], volumes=[volume], is_delete_operator_pod=False, name="test", task_id="task", in_cluster=False, do_xcom_push=False, ) context = create_context(k) k.execute(context=context) mock_logger.info.assert_any_call('retrieved from mount') actual_pod = self.api_client.sanitize_for_serialization(k.pod) expected_pod = copy(self.expected_pod) expected_pod['spec']['containers'][0]['args'] = args expected_pod['spec']['containers'][0]['volumeMounts'] = [{ 'name': 'test-volume', 'mountPath': '/tmp/test_volume', 'readOnly': False }] expected_pod['spec']['volumes'] = [{ 'name': 'test-volume', 'persistentVolumeClaim': { 'claimName': 'test-volume' } }] expected_pod['metadata']['labels']['already_checked'] = 'True' assert expected_pod == actual_pod
def _volumes(self): volumes_config = self.liminal_config.get('volumes', []) volumes = [] for volume_config in volumes_config: name = volume_config['volume'] volume = Volume(name=name, configs={ 'persistentVolumeClaim': { 'claimName': f"{name}-pvc" } }) volumes.append(volume) return volumes
def _volumes(self): volumes_config = self.liminal_config.get('volumes', []) volumes = [] for volume_config in volumes_config: name = volume_config['volume'] claim_name = volume_config.get('claim_name') if not claim_name and 'local' in volume_config: claim_name = f'{name}-pvc' volume = Volume( name=name, configs={'persistentVolumeClaim': { 'claimName': claim_name }}) volumes.append(volume) return volumes
def test_volume_mount(self): with mock.patch.object(PodLauncher, 'log') as mock_logger: volume_mount = VolumeMount('test-volume', mount_path='/root/mount_file', sub_path=None, read_only=True) volume_config = { 'persistentVolumeClaim': { 'claimName': 'test-volume' } } volume = Volume(name='test-volume', configs=volume_config) args = ["cat /root/mount_file/test.txt"] k = KubernetesPodOperator( namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=args, labels={"foo": "bar"}, volume_mounts=[volume_mount], volumes=[volume], name="test", task_id="task", in_cluster=False, do_xcom_push=False, ) context = self.create_context(k) k.execute(context=context) mock_logger.info.assert_any_call(b"retrieved from mount\n") actual_pod = self.api_client.sanitize_for_serialization(k.pod) self.expected_pod['spec']['containers'][0]['args'] = args self.expected_pod['spec']['containers'][0]['volumeMounts'] = [{ 'name': 'test-volume', 'mountPath': '/root/mount_file', 'readOnly': True }] self.expected_pod['spec']['volumes'] = [{ 'name': 'test-volume', 'persistentVolumeClaim': { 'claimName': 'test-volume' } }] self.assertEqual(self.expected_pod, actual_pod)
def _extract_volumes(volumes): result = [] volumes = volumes or [] # type: List[Union[k8s.V1Volume, dict]] for volume in volumes: if isinstance(volume, k8s.V1Volume): volume = api_client.sanitize_for_serialization(volume) volume = Volume(name=volume.get("name"), configs=volume) if not isinstance(volume, Volume): volume = Volume(name=volume.get("name"), configs=volume) result.append(volume) return result
def build_kubernetes_pod_exporter(dag, command, etl_cmd_string, output_file): ''' Creates the export task using a KubernetesPodOperator. Parameters: dag - the parent dag command - stellar-etl command type (ex. export_ledgers, export_accounts) etl_cmd_string - a string of the fully formed command that includes all flags and arguments to be sent to the etl output_file - filename for the output file or folder Returns: the KubernetesPodOperator for the export task ''' from airflow.kubernetes.volume import Volume from airflow.kubernetes.volume_mount import VolumeMount data_mount = VolumeMount(Variable.get('volume_name'), Variable.get("image_output_path"), '', False) volume_config = { 'persistentVolumeClaim': { 'claimName': Variable.get('volume_claim_name') } } data_volume = Volume(Variable.get('volume_name'), volume_config) cmd = ['bash'] args = ['-c', f'{etl_cmd_string} && mkdir -p /airflow/xcom/ && echo \'{{"output_file":"{output_file}"}}\' >> /airflow/xcom/return.json'] config_file_location = Variable.get('kube_config_location') in_cluster = False if config_file_location else True return KubernetesPodOperator( task_id=command + '_task', name=command + '_task', namespace=Variable.get('namespace'), image=Variable.get('image_name'), cmds=cmd, arguments=args, dag=dag, do_xcom_push=True, is_delete_operator_pod=True, in_cluster=in_cluster, config_file=config_file_location, volume_mounts=[data_mount], volumes=[data_volume], affinity=Variable.get('affinity', deserialize_json=True) )
) affinity = ONDEMAND_NODE_AFFINITY s3_backup_volume_mount = VolumeMount(name="s3-backup-volume", mount_path=BACKUP_PATH, sub_path=None, read_only=False) s3_backup_volume_config = { "persistentVolumeClaim": { "claimName": "s3-backup-volume" } } s3_backup_volume = Volume(name="s3-backup-volume", configs=s3_backup_volume_config) with dag: START = DummyOperator(task_id="start") # Wait for S3 Key S3_BACKUP_SENSE = S3KeySensor( task_id="s3-backup-sense", poke_interval=60 * 30, bucket_key=S3_KEY, aws_conn_id="aws_nci_db_backup", ) # Download NCI db incremental backup from S3 and restore to RDS Aurora RESTORE_NCI_INCREMENTAL_SYNC = KubernetesPodOperator( namespace="processing",
def make_task(operator: str, task_params: Dict[str, Any]) -> BaseOperator: """ Takes an operator and params and creates an instance of that operator. :returns: instance of operator object """ try: # class is a Callable https://stackoverflow.com/a/34578836/3679900 operator_obj: Callable[..., BaseOperator] = import_string(operator) except Exception as err: raise Exception(f"Failed to import operator: {operator}") from err try: if operator_obj in [PythonOperator, BranchPythonOperator]: if not task_params.get( "python_callable_name") and not task_params.get( "python_callable_file"): raise Exception( "Failed to create task. PythonOperator and BranchPythonOperator requires \ `python_callable_name` and `python_callable_file` parameters." ) task_params[ "python_callable"]: Callable = utils.get_python_callable( task_params["python_callable_name"], task_params["python_callable_file"], ) # remove dag-factory specific parameters # Airflow 2.0 doesn't allow these to be passed to operator del task_params["python_callable_name"] del task_params["python_callable_file"] # Check for the custom success and failure callables in SqlSensor. These are considered # optional, so no failures in case they aren't found. Note: there's no reason to # declare both a callable file and a lambda function for success/failure parameter. # If both are found the object will not throw and error, instead callable file will # take precedence over the lambda function if operator_obj in [SqlSensor]: # Success checks if task_params.get("success_check_file") and task_params.get( "success_check_name"): task_params[ "success"]: Callable = utils.get_python_callable( task_params["success_check_name"], task_params["success_check_file"], ) del task_params["success_check_name"] del task_params["success_check_file"] elif task_params.get("success_check_lambda"): task_params[ "success"]: Callable = utils.get_python_callable_lambda( task_params["success_check_lambda"]) del task_params["success_check_lambda"] # Failure checks if task_params.get("failure_check_file") and task_params.get( "failure_check_name"): task_params[ "failure"]: Callable = utils.get_python_callable( task_params["failure_check_name"], task_params["failure_check_file"], ) del task_params["failure_check_name"] del task_params["failure_check_file"] elif task_params.get("failure_check_lambda"): task_params[ "failure"]: Callable = utils.get_python_callable_lambda( task_params["failure_check_lambda"]) del task_params["failure_check_lambda"] if operator_obj in [HttpSensor]: if not (task_params.get("response_check_name") and task_params.get("response_check_file") ) and not task_params.get("response_check_lambda"): raise Exception( "Failed to create task. HttpSensor requires \ `response_check_name` and `response_check_file` parameters \ or `response_check_lambda` parameter.") if task_params.get("response_check_file"): task_params[ "response_check"]: Callable = utils.get_python_callable( task_params["response_check_name"], task_params["response_check_file"], ) # remove dag-factory specific parameters # Airflow 2.0 doesn't allow these to be passed to operator del task_params["response_check_name"] del task_params["response_check_file"] else: task_params[ "response_check"]: Callable = utils.get_python_callable_lambda( task_params["response_check_lambda"]) # remove dag-factory specific parameters # Airflow 2.0 doesn't allow these to be passed to operator del task_params["response_check_lambda"] # KubernetesPodOperator if operator_obj == KubernetesPodOperator: task_params["secrets"] = ([ Secret(**v) for v in task_params.get("secrets") ] if task_params.get("secrets") is not None else None) task_params["ports"] = ([ Port(**v) for v in task_params.get("ports") ] if task_params.get("ports") is not None else None) task_params["volume_mounts"] = ([ VolumeMount(**v) for v in task_params.get("volume_mounts") ] if task_params.get("volume_mounts") is not None else None) task_params["volumes"] = ([ Volume(**v) for v in task_params.get("volumes") ] if task_params.get("volumes") is not None else None) task_params["pod_runtime_info_envs"] = ([ PodRuntimeInfoEnv(**v) for v in task_params.get("pod_runtime_info_envs") ] if task_params.get("pod_runtime_info_envs") is not None else None) task_params["full_pod_spec"] = ( V1Pod(**task_params.get("full_pod_spec")) if task_params.get("full_pod_spec") is not None else None) task_params["init_containers"] = ([ V1Container(**v) for v in task_params.get("init_containers") ] if task_params.get("init_containers") is not None else None) if utils.check_dict_key(task_params, "execution_timeout_secs"): task_params["execution_timeout"]: timedelta = timedelta( seconds=task_params["execution_timeout_secs"]) del task_params["execution_timeout_secs"] if utils.check_dict_key(task_params, "sla_secs"): task_params["sla"]: timedelta = timedelta( seconds=task_params["sla_secs"]) del task_params["sla_secs"] if utils.check_dict_key(task_params, "execution_delta_secs"): task_params["execution_delta"]: timedelta = timedelta( seconds=task_params["execution_delta_secs"]) del task_params["execution_delta_secs"] if utils.check_dict_key( task_params, "execution_date_fn_name") and utils.check_dict_key( task_params, "execution_date_fn_file"): task_params[ "execution_date_fn"]: Callable = utils.get_python_callable( task_params["execution_date_fn_name"], task_params["execution_date_fn_file"], ) del task_params["execution_date_fn_name"] del task_params["execution_date_fn_file"] # on_execute_callback is an Airflow 2.0 feature if utils.check_dict_key( task_params, "on_execute_callback" ) and version.parse(AIRFLOW_VERSION) >= version.parse("2.0.0"): task_params["on_execute_callback"]: Callable = import_string( task_params["on_execute_callback"]) if utils.check_dict_key(task_params, "on_failure_callback"): task_params["on_failure_callback"]: Callable = import_string( task_params["on_failure_callback"]) if utils.check_dict_key(task_params, "on_success_callback"): task_params["on_success_callback"]: Callable = import_string( task_params["on_success_callback"]) if utils.check_dict_key(task_params, "on_retry_callback"): task_params["on_retry_callback"]: Callable = import_string( task_params["on_retry_callback"]) # use variables as arguments on operator if utils.check_dict_key(task_params, "variables_as_arguments"): variables: List[Dict[str, str]] = task_params.get( "variables_as_arguments") for variable in variables: if Variable.get(variable["variable"], default_var=None) is not None: task_params[variable["attribute"]] = Variable.get( variable["variable"], default_var=None) del task_params["variables_as_arguments"] task: BaseOperator = operator_obj(**task_params) except Exception as err: raise Exception(f"Failed to create {operator_obj} task") from err return task
OWS_SECRETS = [ Secret("env", "DB_USERNAME", SECRET_OWS_WRITER_NAME, "postgres-username"), Secret("env", "DB_PASSWORD", SECRET_OWS_WRITER_NAME, "postgres-password"), ] # MOUNT OWS_CFG via init_container # for main container mount ows_cfg_mount = VolumeMount("ows-config-volume", mount_path=OWS_CFG_MOUNT_PATH, sub_path=None, read_only=False) ows_cfg_volume_config = {} ows_cfg_volume = Volume(name="ows-config-volume", configs=ows_cfg_volume_config) # for init container mount cfg_image_mount = k8s.V1VolumeMount( mount_path=OWS_CFG_MOUNT_PATH, name="ows-config-volume", sub_path=None, read_only=False, ) config_container = k8s.V1Container( image=OWS_CONFIG_IMAGE, command=["cp"], args=["-r", OWS_CFG_IMAGEPATH, OWS_CFG_FOLDER_PATH], volume_mounts=[cfg_image_mount], name="mount-ows-config",
def test_to_v1_pod(self, mock_uuid): from airflow.contrib.kubernetes.pod import Pod as DeprecatedPod from airflow.kubernetes.volume import Volume from airflow.kubernetes.volume_mount import VolumeMount from airflow.kubernetes.secret import Secret from airflow.kubernetes.pod import Resources import uuid static_uuid = uuid.UUID('cf4a56d2-8101-4217-b027-2af6216feb48') mock_uuid.return_value = static_uuid pod = DeprecatedPod( image="foo", name="bar", namespace="baz", image_pull_policy="Never", envs={"test_key": "test_value"}, cmds=["airflow"], resources=Resources(request_memory="1G", request_cpu="100Mi", limit_gpu="100G"), init_containers=k8s.V1Container(name="test-container", volume_mounts=k8s.V1VolumeMount( mount_path="/foo/bar", name="init-volume-secret")), volumes=[ Volume(name="foo", configs={}), { "name": "bar", 'secret': { 'secretName': 'volume-secret' } } ], secrets=[ Secret("volume", None, "init-volume-secret"), Secret('env', "AIRFLOW_SECRET", 'secret_name', "airflow_config"), Secret("volume", "/opt/airflow", "volume-secret", "secret-key") ], volume_mounts=[ VolumeMount(name="foo", mount_path="/mnt", sub_path="/", read_only=True) ]) k8s_client = ApiClient() result = pod.to_v1_kubernetes_pod() result = k8s_client.sanitize_for_serialization(result) expected = \ {'metadata': {'labels': {}, 'name': 'bar', 'namespace': 'baz'}, 'spec': {'affinity': {}, 'containers': [{'args': [], 'command': ['airflow'], 'env': [{'name': 'test_key', 'value': 'test_value'}, {'name': 'AIRFLOW_SECRET', 'valueFrom': {'secretKeyRef': {'key': 'airflow_config', 'name': 'secret_name'}}}], 'image': 'foo', 'imagePullPolicy': 'Never', 'name': 'base', 'resources': {'limits': {'nvidia.com/gpu': '100G'}, 'requests': {'cpu': '100Mi', 'memory': '1G'}}, 'volumeMounts': [{'mountPath': '/mnt', 'name': 'foo', 'readOnly': True, 'subPath': '/'}, {'mountPath': '/opt/airflow', 'name': 'secretvol' + str(static_uuid), 'readOnly': True}]}], 'hostNetwork': False, 'initContainers': {'name': 'test-container', 'volumeMounts': {'mountPath': '/foo/bar', 'name': 'init-volume-secret'}}, 'securityContext': {}, 'tolerations': [], 'volumes': [{'name': 'foo'}, {'name': 'bar', 'secret': {'secretName': 'volume-secret'}}, {'name': 'secretvolcf4a56d2-8101-4217-b027-2af6216feb48', 'secret': {'secretName': 'init-volume-secret'}}, {'name': 'secretvol' + str(static_uuid), 'secret': {'secretName': 'volume-secret'}} ]}} self.maxDiff = None self.assertEqual(expected, result)
'retries': 0, 'retry_delay': timedelta(minutes=1) } volume_mount_nb = VolumeMount(name='nb-data', mount_path='/opt/airflow/nb', sub_path=None, read_only=False) volume_config = { 'persistentVolumeClaim': { 'claimName': 'airflow-nfs-pvc-nb-01' } } volume_nb = Volume(name='nb-data', configs=volume_config) dag = DAG( 'kubernetes_pod_operator_03_with_sleep', default_args=default_args, schedule_interval=timedelta(minutes=10)) start = DummyOperator(task_id='start', dag=dag) passing = KubernetesPodOperator(namespace='default', image="python:3.6", cmds=["python"], arguments=["/opt/airflow/nb/sleep.py"], labels={"foo": "bar"}, name="passing-test", task_id="passing-task", get_logs=True, image_pull_policy="IfNotPresent",
def test_convert_to_airflow_pod(self): input_pod = k8s.V1Pod( metadata=k8s.V1ObjectMeta(name="foo", namespace="bar"), spec=k8s.V1PodSpec( init_containers=[ k8s.V1Container(name="init-container", volume_mounts=[ k8s.V1VolumeMount(mount_path="/tmp", name="init-secret") ]) ], containers=[ k8s.V1Container( name="base", command=["foo"], image="myimage", env=[ k8s.V1EnvVar( name="AIRFLOW_SECRET", value_from=k8s.V1EnvVarSource( secret_key_ref=k8s.V1SecretKeySelector( name="ai", key="secret_key"))) ], ports=[ k8s.V1ContainerPort( name="myport", container_port=8080, ) ], volume_mounts=[ k8s.V1VolumeMount(name="myvolume", mount_path="/tmp/mount", read_only="True"), k8s.V1VolumeMount(name='airflow-config', mount_path='/config', sub_path='airflow.cfg', read_only=True), k8s.V1VolumeMount(name="airflow-secret", mount_path="/opt/mount", read_only=True) ]) ], security_context=k8s.V1PodSecurityContext( run_as_user=0, fs_group=0, ), volumes=[ k8s.V1Volume(name="myvolume"), k8s.V1Volume( name="airflow-config", config_map=k8s.V1ConfigMap(data="airflow-data")), k8s.V1Volume(name="airflow-secret", secret=k8s.V1SecretVolumeSource( secret_name="secret-name", )), k8s.V1Volume(name="init-secret", secret=k8s.V1SecretVolumeSource( secret_name="init-secret", )) ])) result_pod = _convert_to_airflow_pod(input_pod) expected = Pod( name="foo", namespace="bar", envs={}, init_containers=[{ 'name': 'init-container', 'volumeMounts': [{ 'mountPath': '/tmp', 'name': 'init-secret' }] }], cmds=["foo"], image="myimage", ports=[Port(name="myport", container_port=8080)], volume_mounts=[ VolumeMount(name="myvolume", mount_path="/tmp/mount", sub_path=None, read_only="True"), VolumeMount(name="airflow-config", read_only=True, mount_path="/config", sub_path="airflow.cfg"), VolumeMount(name="airflow-secret", mount_path="/opt/mount", sub_path=None, read_only=True) ], secrets=[Secret("env", "AIRFLOW_SECRET", "ai", "secret_key")], security_context={ 'fsGroup': 0, 'runAsUser': 0 }, volumes=[ Volume(name="myvolume", configs={'name': 'myvolume'}), Volume(name="airflow-config", configs={ 'configMap': { 'data': 'airflow-data' }, 'name': 'airflow-config' }), Volume(name='airflow-secret', configs={ 'name': 'airflow-secret', 'secret': { 'secretName': 'secret-name' } }), Volume(name='init-secret', configs={ 'name': 'init-secret', 'secret': { 'secretName': 'init-secret' } }) ], ) expected_dict = expected.as_dict() result_dict = result_pod.as_dict() print(result_pod.volume_mounts) parsed_configs = self.pull_out_volumes(result_dict) result_dict['volumes'] = parsed_configs self.assertEqual(result_dict['secrets'], expected_dict['secrets']) self.assertDictEqual(expected_dict, result_dict)
} volume_mount = VolumeMount( 'persist-airflow-logs' , mount_path='/opt/airflow/logs' , sub_path=None , read_only=False ) volume_config= { 'persistentVolumeClaim': { 'claimName': 'persist-airflow-logs' } } volume = Volume( name='persist-airflow-logs' , configs=volume_config ) default_args = { 'owner': 'airflow', } #get current proxies in db batch_size=15 batch_g_size=5 #used to remove that pod timeout error CP_count=db_import.getChildPagesCount(ps_user="******", ps_pass="******", ps_host="172.22.114.65", ps_port="5432", ps_db="scrape_db") #iterate to run l_proxy_mods=["scrape_child"] with DAG(
def make_task(operator: str, task_params: Dict[str, Any], af_vars: Dict[str, Any]) -> BaseOperator: """ Takes an operator and params and creates an instance of that operator. :returns: instance of operator object """ try: # class is a Callable https://stackoverflow.com/a/34578836/3679900 operator_obj: Callable[..., BaseOperator] = import_string(operator) except Exception as err: raise Exception(f"Failed to import operator: {operator}") from err try: if operator_obj in [ PythonOperator, BranchPythonOperator, PythonSensor ]: if (not task_params.get("python_callable") and not task_params.get("python_callable_name") and not task_params.get("python_callable_file")): # pylint: disable=line-too-long raise Exception( "Failed to create task. PythonOperator, BranchPythonOperator and PythonSensor requires \ `python_callable_name` and `python_callable_file` " "parameters.\nOptionally you can load python_callable " "from a file. with the special pyyaml notation:\n" " python_callable_file: !!python/name:my_module.my_func" ) if not task_params.get("python_callable"): task_params[ "python_callable"]: Callable = utils.get_python_callable( task_params["python_callable_name"], task_params["python_callable_file"], ) # remove dag-factory specific parameters # Airflow 2.0 doesn't allow these to be passed to operator del task_params["python_callable_name"] del task_params["python_callable_file"] # Check for the custom success and failure callables in SqlSensor. These are considered # optional, so no failures in case they aren't found. Note: there's no reason to # declare both a callable file and a lambda function for success/failure parameter. # If both are found the object will not throw and error, instead callable file will # take precedence over the lambda function if operator_obj in [SqlSensor]: # Success checks if task_params.get("success_check_file") and task_params.get( "success_check_name"): task_params[ "success"]: Callable = utils.get_python_callable( task_params["success_check_name"], task_params["success_check_file"], ) del task_params["success_check_name"] del task_params["success_check_file"] elif task_params.get("success_check_lambda"): task_params[ "success"]: Callable = utils.get_python_callable_lambda( task_params["success_check_lambda"]) del task_params["success_check_lambda"] # Failure checks if task_params.get("failure_check_file") and task_params.get( "failure_check_name"): task_params[ "failure"]: Callable = utils.get_python_callable( task_params["failure_check_name"], task_params["failure_check_file"], ) del task_params["failure_check_name"] del task_params["failure_check_file"] elif task_params.get("failure_check_lambda"): task_params[ "failure"]: Callable = utils.get_python_callable_lambda( task_params["failure_check_lambda"]) del task_params["failure_check_lambda"] if operator_obj in [HttpSensor]: if not (task_params.get("response_check_name") and task_params.get("response_check_file") ) and not task_params.get("response_check_lambda"): raise Exception( "Failed to create task. HttpSensor requires \ `response_check_name` and `response_check_file` parameters \ or `response_check_lambda` parameter.") if task_params.get("response_check_file"): task_params[ "response_check"]: Callable = utils.get_python_callable( task_params["response_check_name"], task_params["response_check_file"], ) # remove dag-factory specific parameters # Airflow 2.0 doesn't allow these to be passed to operator del task_params["response_check_name"] del task_params["response_check_file"] else: task_params[ "response_check"]: Callable = utils.get_python_callable_lambda( task_params["response_check_lambda"]) # remove dag-factory specific parameters # Airflow 2.0 doesn't allow these to be passed to operator del task_params["response_check_lambda"] # KubernetesPodOperator if operator_obj == KubernetesPodOperator: task_params["secrets"] = ([ Secret(**v) for v in task_params.get("secrets") ] if task_params.get("secrets") is not None else None) task_params["ports"] = ([ Port(**v) for v in task_params.get("ports") ] if task_params.get("ports") is not None else None) task_params["volume_mounts"] = ([ VolumeMount(**v) for v in task_params.get("volume_mounts") ] if task_params.get("volume_mounts") is not None else None) task_params["volumes"] = ([ Volume(**v) for v in task_params.get("volumes") ] if task_params.get("volumes") is not None else None) task_params["pod_runtime_info_envs"] = ([ PodRuntimeInfoEnv(**v) for v in task_params.get("pod_runtime_info_envs") ] if task_params.get("pod_runtime_info_envs") is not None else None) task_params["full_pod_spec"] = ( V1Pod(**task_params.get("full_pod_spec")) if task_params.get("full_pod_spec") is not None else None) task_params["init_containers"] = ([ V1Container(**v) for v in task_params.get("init_containers") ] if task_params.get("init_containers") is not None else None) if operator_obj == DockerOperator: if task_params.get("environment") is not None: task_params["environment"] = { k: os.environ.get(v, v) for k, v in task_params["environment"].items() } if operator_obj == EcsOperator: for c in task_params["overrides"]["containerOverrides"]: if c.get('environment') is not None: for env in c['environment']: env['value'] = os.environ.get( env['value'], env['value']) if 'ECS_SECURITY_GROUPS' in af_vars and 'network_configuration' in task_params: task_params["network_configuration"]["awsvpcConfiguration"]['securityGroups'] \ = af_vars['ECS_SECURITY_GROUPS'] if 'ECS_SUBNETS' in af_vars and 'network_configuration' in task_params: task_params['network_configuration'][ "awsvpcConfiguration"]["subnets"] = af_vars[ "ECS_SUBNETS"] if af_vars.get('ECS_CLUSTER'): task_params['cluster'] = af_vars["ECS_CLUSTER"] task_params['task_definition'] = ( af_vars.get('ECS_CLUSTER') + '_' + task_params['task_definition']).lower() task_params['awslogs_group'] = \ task_params['awslogs_group'] + '/' + af_vars.get('ECS_CLUSTER').lower() if utils.check_dict_key(task_params, "execution_timeout_secs"): task_params["execution_timeout"]: timedelta = timedelta( seconds=task_params["execution_timeout_secs"]) del task_params["execution_timeout_secs"] if utils.check_dict_key(task_params, "sla_secs"): task_params["sla"]: timedelta = timedelta( seconds=task_params["sla_secs"]) del task_params["sla_secs"] if utils.check_dict_key(task_params, "execution_delta_secs"): task_params["execution_delta"]: timedelta = timedelta( seconds=task_params["execution_delta_secs"]) del task_params["execution_delta_secs"] if utils.check_dict_key( task_params, "execution_date_fn_name") and utils.check_dict_key( task_params, "execution_date_fn_file"): task_params[ "execution_date_fn"]: Callable = utils.get_python_callable( task_params["execution_date_fn_name"], task_params["execution_date_fn_file"], ) del task_params["execution_date_fn_name"] del task_params["execution_date_fn_file"] # on_execute_callback is an Airflow 2.0 feature if utils.check_dict_key( task_params, "on_execute_callback" ) and version.parse(AIRFLOW_VERSION) >= version.parse("2.0.0"): task_params["on_execute_callback"]: Callable = import_string( task_params["on_execute_callback"]) if utils.check_dict_key(task_params, "on_failure_callback"): task_params["on_failure_callback"]: Callable = import_string( task_params["on_failure_callback"]) if utils.check_dict_key(task_params, "on_success_callback"): task_params["on_success_callback"]: Callable = import_string( task_params["on_success_callback"]) if utils.check_dict_key(task_params, "on_retry_callback"): task_params["on_retry_callback"]: Callable = import_string( task_params["on_retry_callback"]) # use variables as arguments on operator if utils.check_dict_key(task_params, "variables_as_arguments"): variables: List[Dict[str, str]] = task_params.get( "variables_as_arguments") for variable in variables: if Variable.get(variable["variable"], default_var=None) is not None: task_params[variable["attribute"]] = Variable.get( variable["variable"], default_var=None) del task_params["variables_as_arguments"] # use variables as arguments on operator if utils.check_dict_key(task_params, "af_vars_as_arguments"): variables: List[Dict[str, str]] = task_params.get( "af_vars_as_arguments") for variable in variables: if af_vars.get(variable["variable"], None) is not None: task_params[variable["attribute"]] = af_vars.get( variable["variable"], None) del task_params["af_vars_as_arguments"] task: BaseOperator = operator_obj(**task_params) except Exception as err: raise Exception(f"Failed to create {operator_obj} task") from err return task
def make_task(operator: str, task_params: Dict[str, Any]) -> BaseOperator: """ Takes an operator and params and creates an instance of that operator. :returns: instance of operator object """ try: # class is a Callable https://stackoverflow.com/a/34578836/3679900 operator_obj: Callable[..., BaseOperator] = import_string(operator) except Exception as err: raise f"Failed to import operator: {operator}" from err try: if operator_obj == PythonOperator: if not task_params.get( "python_callable_name") and not task_params.get( "python_callable_file"): raise Exception( "Failed to create task. PythonOperator requires `python_callable_name` \ and `python_callable_file` parameters.") task_params[ "python_callable"]: Callable = utils.get_python_callable( task_params["python_callable_name"], task_params["python_callable_file"], ) # KubernetesPodOperator if operator_obj == KubernetesPodOperator: task_params["secrets"] = ([ Secret(**v) for v in task_params.get("secrets") ] if task_params.get("secrets") is not None else None) task_params["ports"] = ([ Port(**v) for v in task_params.get("ports") ] if task_params.get("ports") is not None else None) task_params["volume_mounts"] = ([ VolumeMount(**v) for v in task_params.get("volume_mounts") ] if task_params.get("volume_mounts") is not None else None) task_params["volumes"] = ([ Volume(**v) for v in task_params.get("volumes") ] if task_params.get("volumes") is not None else None) task_params["pod_runtime_info_envs"] = ([ PodRuntimeInfoEnv(**v) for v in task_params.get("pod_runtime_info_envs") ] if task_params.get("pod_runtime_info_envs") is not None else None) task_params["full_pod_spec"] = ( V1Pod(**task_params.get("full_pod_spec")) if task_params.get("full_pod_spec") is not None else None) task_params["init_containers"] = ([ V1Container(**v) for v in task_params.get("init_containers") ] if task_params.get("init_containers") is not None else None) if utils.check_dict_key(task_params, "execution_timeout_secs"): task_params["execution_timeout"]: timedelta = timedelta( seconds=task_params["execution_timeout_secs"]) del task_params["execution_timeout_secs"] # use variables as arguments on operator if utils.check_dict_key(task_params, "variables_as_arguments"): variables: List[Dict[str, str]] = task_params.get( "variables_as_arguments") for variable in variables: if Variable.get(variable["variable"], default_var=None) is not None: task_params[variable["attribute"]] = Variable.get( variable["variable"], default_var=None) del task_params["variables_as_arguments"] task: BaseOperator = operator_obj(**task_params) except Exception as err: raise f"Failed to create {operator_obj} task" from err return task
"operator": "Equal", "value": "wagl", "effect": "NoSchedule" }] ancillary_volume_mount = VolumeMount( name="wagl-nrt-ancillary-volume", mount_path="/ancillary", sub_path=None, read_only=False, ) ancillary_volume = Volume( name="wagl-nrt-ancillary-volume", configs={ "persistentVolumeClaim": { "claimName": "wagl-nrt-ancillary-volume" } }, ) def setup_logging(): """ """ _LOG.setLevel(logging.INFO) handler = logging.StreamHandler() handler.setLevel(logging.INFO) formatter = logging.Formatter( "%(asctime)s - %(name)s - %(levelname)s - %(message)s") handler.setFormatter(formatter)
from airflow.operators.bash import BashOperator from airflow.providers.cncf.kubernetes.operators.kubernetes_pod import KubernetesPodOperator from airflow.utils.dates import days_ago # [START howto_operator_k8s_cluster_resources] secret_file = Secret('volume', '/etc/sql_conn', 'airflow-secrets', 'sql_alchemy_conn') secret_env = Secret('env', 'SQL_CONN', 'airflow-secrets', 'sql_alchemy_conn') secret_all_keys = Secret('env', None, 'airflow-secrets-2') volume_mount = VolumeMount('test-volume', mount_path='/root/mount_file', sub_path=None, read_only=True) configmaps = ['test-configmap-1', 'test-configmap-2'] volume_config = {'persistentVolumeClaim': {'claimName': 'test-volume'}} volume = Volume(name='test-volume', configs=volume_config) # [END howto_operator_k8s_cluster_resources] port = Port('http', 80) init_container_volume_mounts = [ k8s.V1VolumeMount(mount_path='/etc/foo', name='test-volume', sub_path=None, read_only=True) ] init_environments = [ k8s.V1EnvVar(name='key1', value='value1'), k8s.V1EnvVar(name='key2', value='value2') ]
dag=dag) failed_callback = ExtendedHttpOperator( http_conn_id="apar_graphql", endpoint="graphql/", method="POST", headers={"Content-Type": "application/json"}, data_fn=partial(get_job_status_update_callable, "FAILED"), task_id="failed_callback", dag=dag, trigger_rule="all_failed", ) volume_config = {"persistentVolumeClaim": {"claimName": "pvc-data-name"}} test_volume = Volume(name="pv-data-name", configs=volume_config) test_volume_mount = VolumeMount("pv-data-name", mount_path="/data", sub_path=None, read_only=False) fastq_dump = KubernetesPodOperator( namespace="airflow", image="quay.io/biocontainers/sra-tools:2.10.0--pl526he1b5a44_0", cmds=[ "fastq-dump", "--outdir", "data/fastq", "--gzip", "--skip-technical", "--readids", "--read-filter", "pass", "--dumpbase", "--split-3", "--clip", "SRR6982497" ], name="fastq_dump",