Ejemplo n.º 1
0
    def test_volume_mount(self):
        with patch.object(PodLauncher, 'log') as mock_logger:
            volume_mount = VolumeMount('test-volume',
                                       mount_path='/tmp/test_volume',
                                       sub_path=None,
                                       read_only=False)

            volume_config = {
                'persistentVolumeClaim': {
                    'claimName': 'test-volume'
                }
            }
            volume = Volume(name='test-volume', configs=volume_config)
            args = [
                "echo \"retrieved from mount\" > /tmp/test_volume/test.txt "
                "&& cat /tmp/test_volume/test.txt"
            ]
            k = KubernetesPodOperator(
                namespace='default',
                image="ubuntu:16.04",
                cmds=["bash", "-cx"],
                arguments=args,
                labels={"foo": "bar"},
                volume_mounts=[volume_mount],
                volumes=[volume],
                is_delete_operator_pod=False,
                name="test",
                task_id="task",
                in_cluster=False,
                do_xcom_push=False,
            )
            context = create_context(k)
            k.execute(context=context)
            mock_logger.info.assert_any_call('retrieved from mount')
            actual_pod = self.api_client.sanitize_for_serialization(k.pod)
            self.expected_pod['spec']['containers'][0]['args'] = args
            self.expected_pod['spec']['containers'][0]['volumeMounts'] = [{
                'name':
                'test-volume',
                'mountPath':
                '/tmp/test_volume',
                'readOnly':
                False
            }]
            self.expected_pod['spec']['volumes'] = [{
                'name': 'test-volume',
                'persistentVolumeClaim': {
                    'claimName': 'test-volume'
                }
            }]
            self.assertEqual(self.expected_pod, actual_pod)
Ejemplo n.º 2
0
    def __kubernetes_kwargs(self, task: ContainerTask):
        config = copy.deepcopy(self.executor_config)
        kubernetes_kwargs = {
            'task_id': task.task_id,
            'image': task.image,
            'arguments': task.arguments,
            'namespace': os.environ.get('AIRFLOW__KUBERNETES__NAMESPACE', 'default'),
            'name': task.task_id.replace('_', '-'),
            'in_cluster': os.environ.get('AIRFLOW__KUBERNETES__IN_CLUSTER', False),
            'image_pull_policy': get_variable('image_pull_policy', default_val='IfNotPresent'),
            'get_logs': config.pop('get_logs', True),
            'is_delete_operator_pod': config.pop('is_delete_operator_pod', True),
            'startup_timeout_seconds': config.pop('startup_timeout_seconds', 1200),
            'env_vars': task.env_vars,
            'do_xcom_push': task.task_config.get('do_xcom_push', False),
            'image_pull_secrets': config.pop('image_pull_secrets', 'regcred'),
            'volumes': self.volumes,
            'config_file': os.environ.get('AIRFLOW__KUBERNETES__CONFIG_FILE'),
            'cluster_context': os.environ.get('AIRFLOW__KUBERNETES__CLUSTER_CONTEXT', None),
            'cmds': task.cmds,
            'volume_mounts': [
                VolumeMount(mount['volume'],
                            mount['path'],
                            mount.get('sub_path'),
                            mount.get('read_only', False))
                for mount
                in task.mounts
            ]
        }

        config.pop('in_cluster', None)
        config.pop('volumes', None)
        config.pop('volume_mounts', None)
        config.pop('executor', None)
        config.pop('type', None)

        kubernetes_kwargs.update(config)

        if env_util.is_running_on_jenkins():
            kubernetes_kwargs['affinity'] = self.__jenkins_kubernetes_affinity()
            kubernetes_kwargs['namespace'] = 'jenkins'

        if not task.dag:
            kubernetes_kwargs.update({
                'start_date': datetime.datetime(1970, 1, 1),
            })

        return kubernetes_kwargs
def build_kubernetes_pod_exporter(dag, command, etl_cmd_string, output_file):
    '''
    Creates the export task using a KubernetesPodOperator.
    Parameters:
        dag - the parent dag
        command - stellar-etl command type (ex. export_ledgers, export_accounts)
        etl_cmd_string - a string of the fully formed command that includes all flags and arguments to be sent to the etl
        output_file - filename for the output file or folder
    Returns:
        the KubernetesPodOperator for the export task
    '''
    from airflow.kubernetes.volume import Volume
    from airflow.kubernetes.volume_mount import VolumeMount

    data_mount = VolumeMount(Variable.get('volume_name'), Variable.get("image_output_path"), '', False)
    volume_config = {
        'persistentVolumeClaim':
        {
            'claimName': Variable.get('volume_claim_name')
        }   
    }
    data_volume = Volume(Variable.get('volume_name'), volume_config)

    cmd = ['bash']
    args = ['-c', f'{etl_cmd_string} && mkdir -p /airflow/xcom/ && echo \'{{"output_file":"{output_file}"}}\' >> /airflow/xcom/return.json']
    
    config_file_location = Variable.get('kube_config_location')
    in_cluster = False if config_file_location else True
    
    return KubernetesPodOperator(
        task_id=command + '_task',
        name=command + '_task',
        namespace=Variable.get('namespace'),
        image=Variable.get('image_name'),
        cmds=cmd,
        arguments=args,
        dag=dag,
        do_xcom_push=True,
        is_delete_operator_pod=True,
        in_cluster=in_cluster,
        config_file=config_file_location,
        volume_mounts=[data_mount],
        volumes=[data_volume],
        affinity=Variable.get('affinity', deserialize_json=True)
    )
Ejemplo n.º 4
0
    def test_volume_mount(self):
        with mock.patch.object(PodLauncher, 'log') as mock_logger:
            volume_mount = VolumeMount('test-volume',
                                       mount_path='/root/mount_file',
                                       sub_path=None,
                                       read_only=True)

            volume_config = {
                'persistentVolumeClaim': {
                    'claimName': 'test-volume'
                }
            }
            volume = Volume(name='test-volume', configs=volume_config)
            args = ["cat /root/mount_file/test.txt"]
            k = KubernetesPodOperator(namespace='default',
                                      image="ubuntu:16.04",
                                      cmds=["bash", "-cx"],
                                      arguments=args,
                                      labels={"foo": "bar"},
                                      volume_mounts=[volume_mount],
                                      volumes=[volume],
                                      name="test",
                                      task_id="task")
            k.execute(None)
            mock_logger.info.assert_any_call(b"retrieved from mount\n")
            actual_pod = self.api_client.sanitize_for_serialization(k.pod)
            self.expected_pod['spec']['containers'][0]['args'] = args
            self.expected_pod['spec']['containers'][0]['volumeMounts'] = [{
                'name':
                'test-volume',
                'mountPath':
                '/root/mount_file',
                'readOnly':
                True
            }]
            self.expected_pod['spec']['volumes'] = [{
                'name': 'test-volume',
                'persistentVolumeClaim': {
                    'claimName': 'test-volume'
                }
            }]
            self.assertEqual(self.expected_pod, actual_pod)
Ejemplo n.º 5
0
 def __kubernetes_kwargs(self):
     kubernetes_kwargs = {
         'namespace': get_variable('kubernetes_namespace', default_val='default'),
         'name': self.task_name.replace('_', '-'),
         'in_cluster': get_variable('in_kubernetes_cluster', default_val=False),
         'image_pull_policy': get_variable('image_pull_policy', default_val='IfNotPresent'),
         'get_logs': True,
         'is_delete_operator_pod': True,
         'startup_timeout_seconds': 300,
         'image_pull_secrets': 'regcred',
         'resources': self.resources,
         'dag': self.dag,
         'volumes': self.volumes,
         'volume_mounts': [
             VolumeMount(mount['volume'],
                         mount['path'],
                         mount.get('sub_path'),
                         mount.get('read_only', False))
             for mount
             in self.mounts
         ]
     }
     return kubernetes_kwargs
Ejemplo n.º 6
0
    def make_task(operator: str, task_params: Dict[str, Any]) -> BaseOperator:
        """
        Takes an operator and params and creates an instance of that operator.

        :returns: instance of operator object
        """
        try:
            # class is a Callable https://stackoverflow.com/a/34578836/3679900
            operator_obj: Callable[..., BaseOperator] = import_string(operator)
        except Exception as err:
            raise Exception(f"Failed to import operator: {operator}") from err
        try:
            if operator_obj in [PythonOperator, BranchPythonOperator]:
                if not task_params.get(
                        "python_callable_name") and not task_params.get(
                            "python_callable_file"):
                    raise Exception(
                        "Failed to create task. PythonOperator and BranchPythonOperator requires \
                        `python_callable_name` and `python_callable_file` parameters."
                    )
                task_params[
                    "python_callable"]: Callable = utils.get_python_callable(
                        task_params["python_callable_name"],
                        task_params["python_callable_file"],
                    )
                # remove dag-factory specific parameters
                # Airflow 2.0 doesn't allow these to be passed to operator
                del task_params["python_callable_name"]
                del task_params["python_callable_file"]

            # Check for the custom success and failure callables in SqlSensor. These are considered
            # optional, so no failures in case they aren't found. Note: there's no reason to
            # declare both a callable file and a lambda function for success/failure parameter.
            # If both are found the object will not throw and error, instead callable file will
            # take precedence over the lambda function
            if operator_obj in [SqlSensor]:
                # Success checks
                if task_params.get("success_check_file") and task_params.get(
                        "success_check_name"):
                    task_params[
                        "success"]: Callable = utils.get_python_callable(
                            task_params["success_check_name"],
                            task_params["success_check_file"],
                        )
                    del task_params["success_check_name"]
                    del task_params["success_check_file"]
                elif task_params.get("success_check_lambda"):
                    task_params[
                        "success"]: Callable = utils.get_python_callable_lambda(
                            task_params["success_check_lambda"])
                    del task_params["success_check_lambda"]
                # Failure checks
                if task_params.get("failure_check_file") and task_params.get(
                        "failure_check_name"):
                    task_params[
                        "failure"]: Callable = utils.get_python_callable(
                            task_params["failure_check_name"],
                            task_params["failure_check_file"],
                        )
                    del task_params["failure_check_name"]
                    del task_params["failure_check_file"]
                elif task_params.get("failure_check_lambda"):
                    task_params[
                        "failure"]: Callable = utils.get_python_callable_lambda(
                            task_params["failure_check_lambda"])
                    del task_params["failure_check_lambda"]

            if operator_obj in [HttpSensor]:
                if not (task_params.get("response_check_name")
                        and task_params.get("response_check_file")
                        ) and not task_params.get("response_check_lambda"):
                    raise Exception(
                        "Failed to create task. HttpSensor requires \
                        `response_check_name` and `response_check_file` parameters \
                        or `response_check_lambda` parameter.")
                if task_params.get("response_check_file"):
                    task_params[
                        "response_check"]: Callable = utils.get_python_callable(
                            task_params["response_check_name"],
                            task_params["response_check_file"],
                        )
                    # remove dag-factory specific parameters
                    # Airflow 2.0 doesn't allow these to be passed to operator
                    del task_params["response_check_name"]
                    del task_params["response_check_file"]
                else:
                    task_params[
                        "response_check"]: Callable = utils.get_python_callable_lambda(
                            task_params["response_check_lambda"])
                    # remove dag-factory specific parameters
                    # Airflow 2.0 doesn't allow these to be passed to operator
                    del task_params["response_check_lambda"]

            # KubernetesPodOperator
            if operator_obj == KubernetesPodOperator:
                task_params["secrets"] = ([
                    Secret(**v) for v in task_params.get("secrets")
                ] if task_params.get("secrets") is not None else None)

                task_params["ports"] = ([
                    Port(**v) for v in task_params.get("ports")
                ] if task_params.get("ports") is not None else None)
                task_params["volume_mounts"] = ([
                    VolumeMount(**v) for v in task_params.get("volume_mounts")
                ] if task_params.get("volume_mounts") is not None else None)
                task_params["volumes"] = ([
                    Volume(**v) for v in task_params.get("volumes")
                ] if task_params.get("volumes") is not None else None)
                task_params["pod_runtime_info_envs"] = ([
                    PodRuntimeInfoEnv(**v)
                    for v in task_params.get("pod_runtime_info_envs")
                ] if task_params.get("pod_runtime_info_envs") is not None else
                                                        None)
                task_params["full_pod_spec"] = (
                    V1Pod(**task_params.get("full_pod_spec"))
                    if task_params.get("full_pod_spec") is not None else None)
                task_params["init_containers"] = ([
                    V1Container(**v)
                    for v in task_params.get("init_containers")
                ] if task_params.get("init_containers") is not None else None)

            if utils.check_dict_key(task_params, "execution_timeout_secs"):
                task_params["execution_timeout"]: timedelta = timedelta(
                    seconds=task_params["execution_timeout_secs"])
                del task_params["execution_timeout_secs"]

            if utils.check_dict_key(task_params, "sla_secs"):
                task_params["sla"]: timedelta = timedelta(
                    seconds=task_params["sla_secs"])
                del task_params["sla_secs"]

            if utils.check_dict_key(task_params, "execution_delta_secs"):
                task_params["execution_delta"]: timedelta = timedelta(
                    seconds=task_params["execution_delta_secs"])
                del task_params["execution_delta_secs"]

            if utils.check_dict_key(
                    task_params,
                    "execution_date_fn_name") and utils.check_dict_key(
                        task_params, "execution_date_fn_file"):
                task_params[
                    "execution_date_fn"]: Callable = utils.get_python_callable(
                        task_params["execution_date_fn_name"],
                        task_params["execution_date_fn_file"],
                    )
                del task_params["execution_date_fn_name"]
                del task_params["execution_date_fn_file"]

            # on_execute_callback is an Airflow 2.0 feature
            if utils.check_dict_key(
                    task_params, "on_execute_callback"
            ) and version.parse(AIRFLOW_VERSION) >= version.parse("2.0.0"):
                task_params["on_execute_callback"]: Callable = import_string(
                    task_params["on_execute_callback"])

            if utils.check_dict_key(task_params, "on_failure_callback"):
                task_params["on_failure_callback"]: Callable = import_string(
                    task_params["on_failure_callback"])

            if utils.check_dict_key(task_params, "on_success_callback"):
                task_params["on_success_callback"]: Callable = import_string(
                    task_params["on_success_callback"])

            if utils.check_dict_key(task_params, "on_retry_callback"):
                task_params["on_retry_callback"]: Callable = import_string(
                    task_params["on_retry_callback"])

            # use variables as arguments on operator
            if utils.check_dict_key(task_params, "variables_as_arguments"):
                variables: List[Dict[str, str]] = task_params.get(
                    "variables_as_arguments")
                for variable in variables:
                    if Variable.get(variable["variable"],
                                    default_var=None) is not None:
                        task_params[variable["attribute"]] = Variable.get(
                            variable["variable"], default_var=None)
                del task_params["variables_as_arguments"]

            task: BaseOperator = operator_obj(**task_params)
        except Exception as err:
            raise Exception(f"Failed to create {operator_obj} task") from err
        return task
    OWS_PYTHON_PATH,
    OWS_CFG_FOLDER_PATH,
)
from infra.podconfig import ONDEMAND_NODE_AFFINITY
from webapp_update.update_list import UPDATE_EXTENT_PRODUCTS
from infra.variables import SECRET_OWS_WRITER_NAME

OWS_SECRETS = [
    Secret("env", "DB_USERNAME", SECRET_OWS_WRITER_NAME, "postgres-username"),
    Secret("env", "DB_PASSWORD", SECRET_OWS_WRITER_NAME, "postgres-password"),
]

# MOUNT OWS_CFG via init_container
# for main container mount
ows_cfg_mount = VolumeMount("ows-config-volume",
                            mount_path=OWS_CFG_MOUNT_PATH,
                            sub_path=None,
                            read_only=False)

ows_cfg_volume_config = {}

ows_cfg_volume = Volume(name="ows-config-volume",
                        configs=ows_cfg_volume_config)

# for init container mount
cfg_image_mount = k8s.V1VolumeMount(
    mount_path=OWS_CFG_MOUNT_PATH,
    name="ows-config-volume",
    sub_path=None,
    read_only=False,
)
Ejemplo n.º 8
0
    def test_to_v1_pod(self, mock_uuid):
        from airflow.contrib.kubernetes.pod import Pod as DeprecatedPod
        from airflow.kubernetes.volume import Volume
        from airflow.kubernetes.volume_mount import VolumeMount
        from airflow.kubernetes.secret import Secret
        from airflow.kubernetes.pod import Resources
        import uuid
        static_uuid = uuid.UUID('cf4a56d2-8101-4217-b027-2af6216feb48')
        mock_uuid.return_value = static_uuid

        pod = DeprecatedPod(
            image="foo",
            name="bar",
            namespace="baz",
            image_pull_policy="Never",
            envs={"test_key": "test_value"},
            cmds=["airflow"],
            resources=Resources(request_memory="1G",
                                request_cpu="100Mi",
                                limit_gpu="100G"),
            init_containers=k8s.V1Container(name="test-container",
                                            volume_mounts=k8s.V1VolumeMount(
                                                mount_path="/foo/bar",
                                                name="init-volume-secret")),
            volumes=[
                Volume(name="foo", configs={}), {
                    "name": "bar",
                    'secret': {
                        'secretName': 'volume-secret'
                    }
                }
            ],
            secrets=[
                Secret("volume", None, "init-volume-secret"),
                Secret('env', "AIRFLOW_SECRET", 'secret_name',
                       "airflow_config"),
                Secret("volume", "/opt/airflow", "volume-secret", "secret-key")
            ],
            volume_mounts=[
                VolumeMount(name="foo",
                            mount_path="/mnt",
                            sub_path="/",
                            read_only=True)
            ])

        k8s_client = ApiClient()

        result = pod.to_v1_kubernetes_pod()
        result = k8s_client.sanitize_for_serialization(result)

        expected = \
            {'metadata': {'labels': {}, 'name': 'bar', 'namespace': 'baz'},
             'spec': {'affinity': {},
                      'containers': [{'args': [],
                                      'command': ['airflow'],
                                      'env': [{'name': 'test_key', 'value': 'test_value'},
                                              {'name': 'AIRFLOW_SECRET',
                                               'valueFrom': {'secretKeyRef': {'key': 'airflow_config',
                                                                              'name': 'secret_name'}}}],
                                      'image': 'foo',
                                      'imagePullPolicy': 'Never',
                                      'name': 'base',
                                      'resources': {'limits': {'nvidia.com/gpu': '100G'},
                                                    'requests': {'cpu': '100Mi',
                                                                 'memory': '1G'}},
                                      'volumeMounts': [{'mountPath': '/mnt',
                                                        'name': 'foo',
                                                        'readOnly': True,
                                                        'subPath': '/'},
                                                       {'mountPath': '/opt/airflow',
                                                       'name': 'secretvol' + str(static_uuid),
                                                        'readOnly': True}]}],
                      'hostNetwork': False,
                      'initContainers': {'name': 'test-container',
                                         'volumeMounts': {'mountPath': '/foo/bar',
                                                          'name': 'init-volume-secret'}},
                      'securityContext': {},
                      'tolerations': [],
                      'volumes': [{'name': 'foo'},
                                  {'name': 'bar',
                                   'secret': {'secretName': 'volume-secret'}},
                                  {'name': 'secretvolcf4a56d2-8101-4217-b027-2af6216feb48',
                                   'secret': {'secretName': 'init-volume-secret'}},
                                  {'name': 'secretvol' + str(static_uuid),
                                   'secret': {'secretName': 'volume-secret'}}
                                  ]}}
        self.maxDiff = None
        self.assertEqual(expected, result)
Ejemplo n.º 9
0
from airflow.kubernetes.volume import Volume
from airflow.kubernetes.volume_mount import VolumeMount

default_args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': datetime.utcnow(),
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 0,
    'retry_delay': timedelta(minutes=1)
}

volume_mount_nb = VolumeMount(name='nb-data',
                            mount_path='/opt/airflow/nb',
                            sub_path=None,
                            read_only=False)

volume_config = {
    'persistentVolumeClaim':
        {
            'claimName': 'airflow-nfs-pvc-nb-01'
        }
}
volume_nb = Volume(name='nb-data', configs=volume_config)

dag = DAG(
    'kubernetes_pod_operator_03_with_sleep', default_args=default_args, schedule_interval=timedelta(minutes=10))

start = DummyOperator(task_id='start', dag=dag)
Ejemplo n.º 10
0
    def make_task(operator: str, task_params: Dict[str, Any],
                  af_vars: Dict[str, Any]) -> BaseOperator:
        """
        Takes an operator and params and creates an instance of that operator.

        :returns: instance of operator object
        """
        try:
            # class is a Callable https://stackoverflow.com/a/34578836/3679900
            operator_obj: Callable[..., BaseOperator] = import_string(operator)
        except Exception as err:
            raise Exception(f"Failed to import operator: {operator}") from err
        try:
            if operator_obj in [
                    PythonOperator, BranchPythonOperator, PythonSensor
            ]:
                if (not task_params.get("python_callable")
                        and not task_params.get("python_callable_name")
                        and not task_params.get("python_callable_file")):
                    # pylint: disable=line-too-long
                    raise Exception(
                        "Failed to create task. PythonOperator, BranchPythonOperator and PythonSensor requires \
                        `python_callable_name` and `python_callable_file` "
                        "parameters.\nOptionally you can load python_callable "
                        "from a file. with the special pyyaml notation:\n"
                        "  python_callable_file: !!python/name:my_module.my_func"
                    )
                if not task_params.get("python_callable"):
                    task_params[
                        "python_callable"]: Callable = utils.get_python_callable(
                            task_params["python_callable_name"],
                            task_params["python_callable_file"],
                        )
                    # remove dag-factory specific parameters
                    # Airflow 2.0 doesn't allow these to be passed to operator
                    del task_params["python_callable_name"]
                    del task_params["python_callable_file"]

            # Check for the custom success and failure callables in SqlSensor. These are considered
            # optional, so no failures in case they aren't found. Note: there's no reason to
            # declare both a callable file and a lambda function for success/failure parameter.
            # If both are found the object will not throw and error, instead callable file will
            # take precedence over the lambda function
            if operator_obj in [SqlSensor]:
                # Success checks
                if task_params.get("success_check_file") and task_params.get(
                        "success_check_name"):
                    task_params[
                        "success"]: Callable = utils.get_python_callable(
                            task_params["success_check_name"],
                            task_params["success_check_file"],
                        )
                    del task_params["success_check_name"]
                    del task_params["success_check_file"]
                elif task_params.get("success_check_lambda"):
                    task_params[
                        "success"]: Callable = utils.get_python_callable_lambda(
                            task_params["success_check_lambda"])
                    del task_params["success_check_lambda"]
                # Failure checks
                if task_params.get("failure_check_file") and task_params.get(
                        "failure_check_name"):
                    task_params[
                        "failure"]: Callable = utils.get_python_callable(
                            task_params["failure_check_name"],
                            task_params["failure_check_file"],
                        )
                    del task_params["failure_check_name"]
                    del task_params["failure_check_file"]
                elif task_params.get("failure_check_lambda"):
                    task_params[
                        "failure"]: Callable = utils.get_python_callable_lambda(
                            task_params["failure_check_lambda"])
                    del task_params["failure_check_lambda"]

            if operator_obj in [HttpSensor]:
                if not (task_params.get("response_check_name")
                        and task_params.get("response_check_file")
                        ) and not task_params.get("response_check_lambda"):
                    raise Exception(
                        "Failed to create task. HttpSensor requires \
                        `response_check_name` and `response_check_file` parameters \
                        or `response_check_lambda` parameter.")
                if task_params.get("response_check_file"):
                    task_params[
                        "response_check"]: Callable = utils.get_python_callable(
                            task_params["response_check_name"],
                            task_params["response_check_file"],
                        )
                    # remove dag-factory specific parameters
                    # Airflow 2.0 doesn't allow these to be passed to operator
                    del task_params["response_check_name"]
                    del task_params["response_check_file"]
                else:
                    task_params[
                        "response_check"]: Callable = utils.get_python_callable_lambda(
                            task_params["response_check_lambda"])
                    # remove dag-factory specific parameters
                    # Airflow 2.0 doesn't allow these to be passed to operator
                    del task_params["response_check_lambda"]

            # KubernetesPodOperator
            if operator_obj == KubernetesPodOperator:
                task_params["secrets"] = ([
                    Secret(**v) for v in task_params.get("secrets")
                ] if task_params.get("secrets") is not None else None)

                task_params["ports"] = ([
                    Port(**v) for v in task_params.get("ports")
                ] if task_params.get("ports") is not None else None)
                task_params["volume_mounts"] = ([
                    VolumeMount(**v) for v in task_params.get("volume_mounts")
                ] if task_params.get("volume_mounts") is not None else None)
                task_params["volumes"] = ([
                    Volume(**v) for v in task_params.get("volumes")
                ] if task_params.get("volumes") is not None else None)
                task_params["pod_runtime_info_envs"] = ([
                    PodRuntimeInfoEnv(**v)
                    for v in task_params.get("pod_runtime_info_envs")
                ] if task_params.get("pod_runtime_info_envs") is not None else
                                                        None)
                task_params["full_pod_spec"] = (
                    V1Pod(**task_params.get("full_pod_spec"))
                    if task_params.get("full_pod_spec") is not None else None)
                task_params["init_containers"] = ([
                    V1Container(**v)
                    for v in task_params.get("init_containers")
                ] if task_params.get("init_containers") is not None else None)
            if operator_obj == DockerOperator:
                if task_params.get("environment") is not None:
                    task_params["environment"] = {
                        k: os.environ.get(v, v)
                        for k, v in task_params["environment"].items()
                    }

            if operator_obj == EcsOperator:
                for c in task_params["overrides"]["containerOverrides"]:
                    if c.get('environment') is not None:
                        for env in c['environment']:
                            env['value'] = os.environ.get(
                                env['value'], env['value'])

                if 'ECS_SECURITY_GROUPS' in af_vars and 'network_configuration' in task_params:
                    task_params["network_configuration"]["awsvpcConfiguration"]['securityGroups'] \
                        = af_vars['ECS_SECURITY_GROUPS']

                if 'ECS_SUBNETS' in af_vars and 'network_configuration' in task_params:
                    task_params['network_configuration'][
                        "awsvpcConfiguration"]["subnets"] = af_vars[
                            "ECS_SUBNETS"]

                if af_vars.get('ECS_CLUSTER'):
                    task_params['cluster'] = af_vars["ECS_CLUSTER"]
                    task_params['task_definition'] = (
                        af_vars.get('ECS_CLUSTER') + '_' +
                        task_params['task_definition']).lower()

                    task_params['awslogs_group'] = \
                        task_params['awslogs_group'] + '/' + af_vars.get('ECS_CLUSTER').lower()

            if utils.check_dict_key(task_params, "execution_timeout_secs"):
                task_params["execution_timeout"]: timedelta = timedelta(
                    seconds=task_params["execution_timeout_secs"])
                del task_params["execution_timeout_secs"]

            if utils.check_dict_key(task_params, "sla_secs"):
                task_params["sla"]: timedelta = timedelta(
                    seconds=task_params["sla_secs"])
                del task_params["sla_secs"]

            if utils.check_dict_key(task_params, "execution_delta_secs"):
                task_params["execution_delta"]: timedelta = timedelta(
                    seconds=task_params["execution_delta_secs"])
                del task_params["execution_delta_secs"]

            if utils.check_dict_key(
                    task_params,
                    "execution_date_fn_name") and utils.check_dict_key(
                        task_params, "execution_date_fn_file"):
                task_params[
                    "execution_date_fn"]: Callable = utils.get_python_callable(
                        task_params["execution_date_fn_name"],
                        task_params["execution_date_fn_file"],
                    )
                del task_params["execution_date_fn_name"]
                del task_params["execution_date_fn_file"]

            # on_execute_callback is an Airflow 2.0 feature
            if utils.check_dict_key(
                    task_params, "on_execute_callback"
            ) and version.parse(AIRFLOW_VERSION) >= version.parse("2.0.0"):
                task_params["on_execute_callback"]: Callable = import_string(
                    task_params["on_execute_callback"])

            if utils.check_dict_key(task_params, "on_failure_callback"):
                task_params["on_failure_callback"]: Callable = import_string(
                    task_params["on_failure_callback"])

            if utils.check_dict_key(task_params, "on_success_callback"):
                task_params["on_success_callback"]: Callable = import_string(
                    task_params["on_success_callback"])

            if utils.check_dict_key(task_params, "on_retry_callback"):
                task_params["on_retry_callback"]: Callable = import_string(
                    task_params["on_retry_callback"])

            # use variables as arguments on operator
            if utils.check_dict_key(task_params, "variables_as_arguments"):
                variables: List[Dict[str, str]] = task_params.get(
                    "variables_as_arguments")
                for variable in variables:
                    if Variable.get(variable["variable"],
                                    default_var=None) is not None:
                        task_params[variable["attribute"]] = Variable.get(
                            variable["variable"], default_var=None)
                del task_params["variables_as_arguments"]

            # use variables as arguments on operator
            if utils.check_dict_key(task_params, "af_vars_as_arguments"):
                variables: List[Dict[str, str]] = task_params.get(
                    "af_vars_as_arguments")
                for variable in variables:
                    if af_vars.get(variable["variable"], None) is not None:
                        task_params[variable["attribute"]] = af_vars.get(
                            variable["variable"], None)
                del task_params["af_vars_as_arguments"]

            task: BaseOperator = operator_obj(**task_params)
        except Exception as err:
            raise Exception(f"Failed to create {operator_obj} task") from err
        return task
def _extract_volume_mounts(volume_mounts):
    result = []
    volume_mounts = volume_mounts or []  # type: List[Union[k8s.V1VolumeMount, dict]]
    for volume_mount in volume_mounts:
        if isinstance(volume_mount, k8s.V1VolumeMount):
            volume_mount = api_client.sanitize_for_serialization(volume_mount)
            volume_mount = VolumeMount(
                name=volume_mount.get("name"),
                mount_path=volume_mount.get("mountPath"),
                sub_path=volume_mount.get("subPath"),
                read_only=volume_mount.get("readOnly")
            )
        elif not isinstance(volume_mount, VolumeMount):
            volume_mount = VolumeMount(
                name=volume_mount.get("name"),
                mount_path=volume_mount.get("mountPath"),
                sub_path=volume_mount.get("subPath"),
                read_only=volume_mount.get("readOnly")
            )
        result.append(volume_mount)
    return result
Ejemplo n.º 12
0
                }]
            }]
        }
    }
}

tolerations = [{
    "key": "dedicated",
    "operator": "Equal",
    "value": "wagl",
    "effect": "NoSchedule"
}]

ancillary_volume_mount = VolumeMount(
    name="wagl-nrt-ancillary-volume",
    mount_path="/ancillary",
    sub_path=None,
    read_only=False,
)

ancillary_volume = Volume(
    name="wagl-nrt-ancillary-volume",
    configs={
        "persistentVolumeClaim": {
            "claimName": "wagl-nrt-ancillary-volume"
        }
    },
)


def setup_logging():
    """ """
from airflow.operators.dummy_operator import DummyOperator
from airflow.kubernetes.volume import Volume
from airflow.kubernetes.volume_mount import VolumeMount
from airflow.utils.dates import days_ago
from airflow import DAG
# from kubernetes.client import models as k8s

default_args={
    'owner': 'Airflow'
    ,'start_date': datetime.datetime.now() - datetime.timedelta(days=1) #yesterday
    }


volume_mount = VolumeMount(
    'persist-airflow-logs'
    , mount_path='/opt/airflow/logs'
    , sub_path=None
    , read_only=False
)

volume_config= {
    'persistentVolumeClaim': { 'claimName': 'persist-airflow-logs' }
    }

volume = Volume(
    name='persist-airflow-logs'
    , configs=volume_config
)

default_args = {
    'owner': 'airflow',
}
    def test_convert_to_airflow_pod(self):
        input_pod = k8s.V1Pod(
            metadata=k8s.V1ObjectMeta(name="foo", namespace="bar"),
            spec=k8s.V1PodSpec(
                init_containers=[
                    k8s.V1Container(name="init-container",
                                    volume_mounts=[
                                        k8s.V1VolumeMount(mount_path="/tmp",
                                                          name="init-secret")
                                    ])
                ],
                containers=[
                    k8s.V1Container(
                        name="base",
                        command=["foo"],
                        image="myimage",
                        env=[
                            k8s.V1EnvVar(
                                name="AIRFLOW_SECRET",
                                value_from=k8s.V1EnvVarSource(
                                    secret_key_ref=k8s.V1SecretKeySelector(
                                        name="ai", key="secret_key")))
                        ],
                        ports=[
                            k8s.V1ContainerPort(
                                name="myport",
                                container_port=8080,
                            )
                        ],
                        volume_mounts=[
                            k8s.V1VolumeMount(name="myvolume",
                                              mount_path="/tmp/mount",
                                              read_only="True"),
                            k8s.V1VolumeMount(name='airflow-config',
                                              mount_path='/config',
                                              sub_path='airflow.cfg',
                                              read_only=True),
                            k8s.V1VolumeMount(name="airflow-secret",
                                              mount_path="/opt/mount",
                                              read_only=True)
                        ])
                ],
                security_context=k8s.V1PodSecurityContext(
                    run_as_user=0,
                    fs_group=0,
                ),
                volumes=[
                    k8s.V1Volume(name="myvolume"),
                    k8s.V1Volume(
                        name="airflow-config",
                        config_map=k8s.V1ConfigMap(data="airflow-data")),
                    k8s.V1Volume(name="airflow-secret",
                                 secret=k8s.V1SecretVolumeSource(
                                     secret_name="secret-name", )),
                    k8s.V1Volume(name="init-secret",
                                 secret=k8s.V1SecretVolumeSource(
                                     secret_name="init-secret", ))
                ]))
        result_pod = _convert_to_airflow_pod(input_pod)

        expected = Pod(
            name="foo",
            namespace="bar",
            envs={},
            init_containers=[{
                'name':
                'init-container',
                'volumeMounts': [{
                    'mountPath': '/tmp',
                    'name': 'init-secret'
                }]
            }],
            cmds=["foo"],
            image="myimage",
            ports=[Port(name="myport", container_port=8080)],
            volume_mounts=[
                VolumeMount(name="myvolume",
                            mount_path="/tmp/mount",
                            sub_path=None,
                            read_only="True"),
                VolumeMount(name="airflow-config",
                            read_only=True,
                            mount_path="/config",
                            sub_path="airflow.cfg"),
                VolumeMount(name="airflow-secret",
                            mount_path="/opt/mount",
                            sub_path=None,
                            read_only=True)
            ],
            secrets=[Secret("env", "AIRFLOW_SECRET", "ai", "secret_key")],
            security_context={
                'fsGroup': 0,
                'runAsUser': 0
            },
            volumes=[
                Volume(name="myvolume", configs={'name': 'myvolume'}),
                Volume(name="airflow-config",
                       configs={
                           'configMap': {
                               'data': 'airflow-data'
                           },
                           'name': 'airflow-config'
                       }),
                Volume(name='airflow-secret',
                       configs={
                           'name': 'airflow-secret',
                           'secret': {
                               'secretName': 'secret-name'
                           }
                       }),
                Volume(name='init-secret',
                       configs={
                           'name': 'init-secret',
                           'secret': {
                               'secretName': 'init-secret'
                           }
                       })
            ],
        )
        expected_dict = expected.as_dict()
        result_dict = result_pod.as_dict()
        print(result_pod.volume_mounts)
        parsed_configs = self.pull_out_volumes(result_dict)
        result_dict['volumes'] = parsed_configs
        self.assertEqual(result_dict['secrets'], expected_dict['secrets'])
        self.assertDictEqual(expected_dict, result_dict)
dag = DAG(
    "k8s_nci_db_incremental_sync",
    doc_md=__doc__,
    default_args=DEFAULT_ARGS,
    catchup=False,
    concurrency=1,
    max_active_runs=1,
    tags=["k8s", "nci-explorer"],
    schedule_interval="45 0 * * *",  # every day 0:45AM
    dagrun_timeout=timedelta(minutes=60 * 3),
)

affinity = ONDEMAND_NODE_AFFINITY

s3_backup_volume_mount = VolumeMount(name="s3-backup-volume",
                                     mount_path=BACKUP_PATH,
                                     sub_path=None,
                                     read_only=False)

s3_backup_volume_config = {
    "persistentVolumeClaim": {
        "claimName": "s3-backup-volume"
    }
}

s3_backup_volume = Volume(name="s3-backup-volume",
                          configs=s3_backup_volume_config)

with dag:
    START = DummyOperator(task_id="start")

    # Wait for S3 Key
Ejemplo n.º 16
0
from airflow import DAG
from airflow.kubernetes.pod import Port
from airflow.kubernetes.secret import Secret
from airflow.kubernetes.volume import Volume
from airflow.kubernetes.volume_mount import VolumeMount
from airflow.operators.bash import BashOperator
from airflow.providers.cncf.kubernetes.operators.kubernetes_pod import KubernetesPodOperator
from airflow.utils.dates import days_ago

# [START howto_operator_k8s_cluster_resources]
secret_file = Secret('volume', '/etc/sql_conn', 'airflow-secrets',
                     'sql_alchemy_conn')
secret_env = Secret('env', 'SQL_CONN', 'airflow-secrets', 'sql_alchemy_conn')
secret_all_keys = Secret('env', None, 'airflow-secrets-2')
volume_mount = VolumeMount('test-volume',
                           mount_path='/root/mount_file',
                           sub_path=None,
                           read_only=True)
configmaps = ['test-configmap-1', 'test-configmap-2']
volume_config = {'persistentVolumeClaim': {'claimName': 'test-volume'}}
volume = Volume(name='test-volume', configs=volume_config)
# [END howto_operator_k8s_cluster_resources]

port = Port('http', 80)

init_container_volume_mounts = [
    k8s.V1VolumeMount(mount_path='/etc/foo',
                      name='test-volume',
                      sub_path=None,
                      read_only=True)
]
Ejemplo n.º 17
0
    def make_task(operator: str, task_params: Dict[str, Any]) -> BaseOperator:
        """
        Takes an operator and params and creates an instance of that operator.

        :returns: instance of operator object
        """
        try:
            # class is a Callable https://stackoverflow.com/a/34578836/3679900
            operator_obj: Callable[..., BaseOperator] = import_string(operator)
        except Exception as err:
            raise f"Failed to import operator: {operator}" from err
        try:
            if operator_obj == PythonOperator:
                if not task_params.get(
                        "python_callable_name") and not task_params.get(
                            "python_callable_file"):
                    raise Exception(
                        "Failed to create task. PythonOperator requires `python_callable_name` \
                        and `python_callable_file` parameters.")
                task_params[
                    "python_callable"]: Callable = utils.get_python_callable(
                        task_params["python_callable_name"],
                        task_params["python_callable_file"],
                    )

            # KubernetesPodOperator
            if operator_obj == KubernetesPodOperator:
                task_params["secrets"] = ([
                    Secret(**v) for v in task_params.get("secrets")
                ] if task_params.get("secrets") is not None else None)

                task_params["ports"] = ([
                    Port(**v) for v in task_params.get("ports")
                ] if task_params.get("ports") is not None else None)
                task_params["volume_mounts"] = ([
                    VolumeMount(**v) for v in task_params.get("volume_mounts")
                ] if task_params.get("volume_mounts") is not None else None)
                task_params["volumes"] = ([
                    Volume(**v) for v in task_params.get("volumes")
                ] if task_params.get("volumes") is not None else None)
                task_params["pod_runtime_info_envs"] = ([
                    PodRuntimeInfoEnv(**v)
                    for v in task_params.get("pod_runtime_info_envs")
                ] if task_params.get("pod_runtime_info_envs") is not None else
                                                        None)
                task_params["full_pod_spec"] = (
                    V1Pod(**task_params.get("full_pod_spec"))
                    if task_params.get("full_pod_spec") is not None else None)
                task_params["init_containers"] = ([
                    V1Container(**v)
                    for v in task_params.get("init_containers")
                ] if task_params.get("init_containers") is not None else None)

            if utils.check_dict_key(task_params, "execution_timeout_secs"):
                task_params["execution_timeout"]: timedelta = timedelta(
                    seconds=task_params["execution_timeout_secs"])
                del task_params["execution_timeout_secs"]

            # use variables as arguments on operator
            if utils.check_dict_key(task_params, "variables_as_arguments"):
                variables: List[Dict[str, str]] = task_params.get(
                    "variables_as_arguments")
                for variable in variables:
                    if Variable.get(variable["variable"],
                                    default_var=None) is not None:
                        task_params[variable["attribute"]] = Variable.get(
                            variable["variable"], default_var=None)
                del task_params["variables_as_arguments"]

            task: BaseOperator = operator_obj(**task_params)
        except Exception as err:
            raise f"Failed to create {operator_obj} task" from err
        return task
Ejemplo n.º 18
0
    http_conn_id="apar_graphql",
    endpoint="graphql/",
    method="POST",
    headers={"Content-Type": "application/json"},
    data_fn=partial(get_job_status_update_callable, "FAILED"),
    task_id="failed_callback",
    dag=dag,
    trigger_rule="all_failed",
)

volume_config = {"persistentVolumeClaim": {"claimName": "pvc-data-name"}}

test_volume = Volume(name="pv-data-name", configs=volume_config)

test_volume_mount = VolumeMount("pv-data-name",
                                mount_path="/data",
                                sub_path=None,
                                read_only=False)

fastq_dump = KubernetesPodOperator(
    namespace="airflow",
    image="quay.io/biocontainers/sra-tools:2.10.0--pl526he1b5a44_0",
    cmds=[
        "fastq-dump", "--outdir", "data/fastq", "--gzip", "--skip-technical",
        "--readids", "--read-filter", "pass", "--dumpbase", "--split-3",
        "--clip", "SRR6982497"
    ],
    name="fastq_dump",
    task_id="fastq_dump",
    get_logs=True,
    dag=dag,
    volumes=[test_volume],