def test_get_secrets(self): # Test when secretRef is None and kube_secrets is not empty self.kube_config.kube_secrets = { 'AWS_SECRET_KEY': 'airflow-secret=aws_secret_key', 'POSTGRES_PASSWORD': '******' } self.kube_config.env_from_secret_ref = None worker_config = WorkerConfiguration(self.kube_config) secrets = worker_config._get_secrets() secrets.sort(key=lambda secret: secret.deploy_target) expected = [ Secret('env', 'AWS_SECRET_KEY', 'airflow-secret', 'aws_secret_key'), Secret('env', 'POSTGRES_PASSWORD', 'airflow-secret', 'postgres_credentials') ] self.assertListEqual(expected, secrets) # Test when secret is not empty and kube_secrets is empty dict self.kube_config.kube_secrets = {} self.kube_config.env_from_secret_ref = 'secret_a,secret_b' worker_config = WorkerConfiguration(self.kube_config) secrets = worker_config._get_secrets() expected = [ Secret('env', None, 'secret_a'), Secret('env', None, 'secret_b') ] self.assertListEqual(expected, secrets)
def test_extract_volume_secrets(self): # Test when secrets is not empty secrets = [ Secret('volume', 'KEY1', 's1', 'key-1'), Secret('env', 'KEY2', 's2'), Secret('volume', 'KEY3', 's3', 'key-2') ] pod = Pod('v3.14', {}, [], secrets=secrets) self.expected['spec']['containers'][0]['volumeMounts'] = [{ 'mountPath': 'KEY1', 'name': 'secretvol0', 'readOnly': True }, { 'mountPath': 'KEY3', 'name': 'secretvol1', 'readOnly': True }] self.expected['spec']['volumes'] = [{ 'name': 'secretvol0', 'secret': { 'secretName': 's1' } }, { 'name': 'secretvol1', 'secret': { 'secretName': 's3' } }] KubernetesRequestFactory.extract_volume_secrets(pod, self.input_req) self.assertEqual(self.input_req, self.expected)
def test_extract_env_and_secrets(self): # Test when secrets and envs are not empty secrets = [ Secret('env', None, 's1'), Secret('volume', 'KEY2', 's2', 'key-2'), Secret('env', None, 's3') ] envs = { 'ENV1': 'val1', 'ENV2': 'val2' } configmaps = ['configmap_a', 'configmap_b'] pod_runtime_envs = [PodRuntimeInfoEnv("ENV3", "status.podIP")] pod = Pod( image='v3.14', envs=envs, cmds=[], secrets=secrets, configmaps=configmaps, pod_runtime_info_envs=pod_runtime_envs) self.expected['spec']['containers'][0]['env'] = [ {'name': 'ENV1', 'value': 'val1'}, {'name': 'ENV2', 'value': 'val2'}, { 'name': 'ENV3', 'valueFrom': { 'fieldRef': { 'fieldPath': 'status.podIP' } } } ] self.expected['spec']['containers'][0]['envFrom'] = [{ 'secretRef': { 'name': 's1' } }, { 'secretRef': { 'name': 's3' } }, { 'configMapRef': { 'name': 'configmap_a' } }, { 'configMapRef': { 'name': 'configmap_b' } }] KubernetesRequestFactory.extract_env_and_secrets(pod, self.input_req) self.input_req['spec']['containers'][0]['env'].sort(key=lambda x: x['name']) self.assertEqual(self.input_req, self.expected)
def _get_secrets(self): """Defines any necessary secrets for the pod executor""" worker_secrets = [] for env_var_name, obj_key_pair in six.iteritems( self.kube_config.kube_secrets): k8s_secret_obj, k8s_secret_key = obj_key_pair.split('=') worker_secrets.append( Secret('env', env_var_name, k8s_secret_obj, k8s_secret_key)) if self.kube_config.env_from_secret_ref: for secret_ref in self.kube_config.env_from_secret_ref.split(','): worker_secrets.append(Secret('env', None, secret_ref)) return worker_secrets
def build_secret_volume(secret, key="service-account.json"): return Secret( deploy_type="volume", deploy_target="/var/secrets/google", secret=secret, key=key, )
def test_extract_env_and_secrets(self): # Test when secrets and envs are not empty secrets = [ Secret('env', None, 's1'), Secret('volume', 'KEY2', 's2', 'key-2'), Secret('env', None, 's3') ] envs = {'ENV1': 'val1', 'ENV2': 'val2'} configmaps = ['configmap_a', 'configmap_b'] pod = Pod('v3.14', envs, [], secrets=secrets, configmaps=configmaps) self.expected['spec']['containers'][0]['env'] = [ { 'name': 'ENV1', 'value': 'val1' }, { 'name': 'ENV2', 'value': 'val2' }, ] self.expected['spec']['containers'][0]['envFrom'] = [{ 'secretRef': { 'name': 's1' } }, { 'secretRef': { 'name': 's3' } }, { 'configMapRef': { 'name': 'configmap_a' } }, { 'configMapRef': { 'name': 'configmap_b' } }] KubernetesRequestFactory.extract_env_and_secrets(pod, self.input_req) self.input_req['spec']['containers'][0]['env'].sort( key=lambda x: x['name']) self.assertEqual(self.input_req, self.expected)
def test_add_secret_to_env(self): secret = Secret('env', 'target', 'my-secret', 'KEY') secret_list = [] self.expected = [{ 'name': 'TARGET', 'valueFrom': { 'secretKeyRef': { 'name': 'my-secret', 'key': 'KEY' } } }] KubernetesRequestFactory.add_secret_to_env(secret_list, secret) self.assertListEqual(secret_list, self.expected)
def get_secrets(self): """Defines any necessary secrets for the pod executor""" from airflow.contrib.kubernetes.secret import Secret result = [] secrets = self.system_secrets + self.secrets for secret_data in secrets: result.append( Secret( deploy_type=secret_data.get("type"), deploy_target=secret_data.get("target"), secret=secret_data.get("secret"), key=secret_data.get("key"), )) return result
def test_envs_from_secrets(self, client_mock, launcher_mock): # GIVEN from airflow.utils.state import State secrets = [Secret('env', None, "secret_name")] # WHEN k = KubernetesPodOperator( namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=["echo 10"], secrets=secrets, labels={"foo": "bar"}, name="test", task_id="task", ) # THEN launcher_mock.return_value = (State.SUCCESS, None) k.execute(None) self.assertEqual(launcher_mock.call_args[0][0].secrets, secrets)
def handle_container_environment_variables( env_vars: List[V1EnvVar], ) -> Tuple[Dict[str, str], List[Secret], List[str], List[PodRuntimeInfoEnv]]: secrets = [] plain_env_vars = {} config_maps = [] runtime_env_vars = [] for env_var in env_vars or []: value_from: V1EnvVarSource = env_var.value_from if value_from: if value_from.resource_field_ref: # not handled for now continue if value_from.field_ref: field_ref: V1ObjectFieldSelector = value_from.field_ref runtime_env_vars.append( PodRuntimeInfoEnv( field_path=field_ref.field_path, name=env_var.name ) ) continue if value_from.config_map_key_ref: key_ref: V1ConfigMapKeySelector = value_from.config_map_key_ref config_maps.append(key_ref.name) continue if value_from.secret_key_ref: key_ref: V1SecretKeySelector = value_from.secret_key_ref secrets.append( Secret( deploy_type="env", deploy_target=env_var.name, secret=key_ref.name, key=key_ref.key, ) ) continue plain_env_vars[env_var.name] = env_var.value return plain_env_vars, secrets, config_maps, runtime_env_vars
def test_envs_from_secrets(self, mock_client, mock_launcher): # GIVEN from airflow.utils.state import State secret_ref = 'secret_name' secrets = [Secret('env', None, secret_ref)] # WHEN k = KubernetesPodOperator( namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=["echo 10"], secrets=secrets, labels={"foo": "bar"}, name="test", task_id="task", in_cluster=False, do_xcom_push=False, ) # THEN mock_launcher.return_value = (State.SUCCESS, None) k.execute(None) self.assertEqual(mock_launcher.call_args[0][0].secrets, secrets)
from airflow import DAG from airflow.utils.dates import days_ago from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator from airflow.contrib.kubernetes.secret import Secret args = {'owner': 'Airflow', 'start_date': days_ago(2)} dag = DAG( 'citi-bike-pipeline', default_args=args, description='Citi Bike Pipeline', schedule_interval=None, ) aws_access_key_id = Secret('env', 'AWS_ACCESS_KEY_ID', 'citi-bike-secrets', 'aws_access_key_id') aws_secret_access_key = Secret('env', 'AWS_SECRET_ACCESS_KEY', 'citi-bike-secrets', 'aws_secret_access_key') # TODO Change this to your ecr dag image ecr_image = "<dag_ecr_image_url>" create_cluster_task = KubernetesPodOperator( namespace='citi-bike', task_id="create_cluster", name="create_cluster_task", image=ecr_image, image_pull_policy='Always', arguments=["create_cluster"], do_xcom_push=True, secrets=[aws_access_key_id, aws_secret_access_key],
from airflow.models.dag import DAG from datetime import datetime, timedelta from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator from airflow.contrib.kubernetes.secret import Secret aws_access_key_id = Secret('env', 'AWS_ACCESS_KEY_ID', 'airflow-aws', 'AWS_ACCESS_KEY_ID') aws_secret_access_key = Secret('env', 'AWS_SECRET_ACCESS_KEY', 'airflow-aws', 'AWS_SECRET_ACCESS_KEY') aws_account = Secret('env', 'AWS_ACCOUNT', 'airflow-aws', 'AWS_ACCOUNT') spark_image = 'gcr.io/engineering-sandbox-228018/dev-pyspark:2.4.4' default_args = { 'owner': 'airflow', 'namespace': 'ns-airflow', 'depends_on_past': False, 'get_logs': True, 'start_date': datetime(2020, 1, 1), 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=5),
#!/usr/bin/env python from os import environ import json from airflow import DAG from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator from airflow.contrib.kubernetes.secret import Secret from airflow.utils.dates import days_ago from airflow.utils.helpers import chain from pipeline.testnatgateway.default_values import DEFAULT_VALUES SECRET_ENV = Secret(deploy_type="env", deploy_target=None, secret="airflow-secret") PIPILE_NAME = "testnatgateway" WORKLOAD = int(environ["DAG_WORKLOAD"]) DRONE_LOG_DAG = DAG( PIPILE_NAME, default_args=DEFAULT_VALUES, schedule_interval=None, # '@once', description="Insert UUID row into db", start_date=days_ago(1), ) INDEX_FILE = "index.txt" INDEX_PREFIX = f"airflow/{PIPILE_NAME}/indexes" INDEX2 = KubernetesPodOperator( dag=DRONE_LOG_DAG, image=f"{environ['DOCKER_REGISTRY']}/pipeline/{PIPILE_NAME}:index", namespace="airflow", image_pull_policy="Always", name="index", arguments=[INDEX_PREFIX, INDEX_FILE],
"retries": 1, "retry_delay": timedelta(minutes=5), # 'queue': 'bash_queue', # 'pool': 'backfill', # 'priority_weight': 10, # 'end_date': datetime(2016, 1, 1), "env_vars": { "AWS_DEFAULT_REGION": "ap-southeast-2", "DB_HOSTNAME": DB_HOSTNAME, # The run day's DB "DB_DATABASE": DB_DATABASE, }, # Use K8S secrets to send DB Creds # Lift secrets into environment variables for datacube "secrets": [ Secret("env", "DB_USERNAME", "replicator-db", "postgres-username"), # For Datacube to use Secret("env", "DB_PASSWORD", "replicator-db", "postgres-password"), # For psql to use Secret("env", "PGPASSWORD", "replicator-db", "postgres-password"), ], } # Point to Geoscience Australia / OpenDataCube Dockerhub S3_TO_RDS_IMAGE = "geoscienceaustralia/s3-to-rds:latest" EXPLORER_IMAGE = "opendatacube/explorer:2.1.9" dag = DAG( "k8s_db_sync", doc_md=__doc__, default_args=DEFAULT_ARGS,
'email': '*****@*****.**', 'email_on_failure': True, 'email_on_retry': False, 'retries': 0, 'retry_delay': timedelta(seconds=10) } MAIN_DAG_ID = "ba_dag" main_dag = DAG(MAIN_DAG_ID, default_args=default_args, schedule_interval='@weekly', catchup=True, max_active_runs=1) service_account_secret_file = Secret('volume', '/etc/ga_service_account', 'ga-service-account-json', 'ga-service-account.json') client_secret_secret_file = Secret('volume', '/etc/ga_client_secret', 'ga-client-secret-json', 'ga-client-secret.json') script_root_path = '/tmp/scripts' get_user_ids_r_script_path = "Services/ELT/DA/getUserIDs.R" get_user_ids_r_script_whole_path = script_root_path + \ "/" + get_user_ids_r_script_path retrieve_user_session_activity_r_script_path = "Services/ELT/DA/getUserIDs.R" retrieve_user_session_activity_r_script_whole_path = script_root_path + \ "/" + retrieve_user_session_activity_r_script_path volume_mount = VolumeMount('git-root-path', mount_path=script_root_path,
"email_on_retry": False, "retries": 1, "retry_delay": timedelta(minutes=5), "env_vars": { "AWS_DEFAULT_REGION": "ap-southeast-2", # TODO: Pass these via templated params in DAG Run "DB_HOSTNAME": "database-write.local", "DB_DATABASE": "ows-index", }, # Use K8S secrets to send DB Creds # Lift secrets into environment variables for datacube "secrets": [ Secret("env", "DB_USERNAME", "ows-db", "postgres-username"), Secret("env", "DB_PASSWORD", "ows-db", "postgres-password"), ], } INDEXER_IMAGE = "opendatacube/datacube-index:0.0.5" OWS_IMAGE = "opendatacube/ows:1.8.0" EXPLORER_IMAGE = "opendatacube/explorer:2.1.9" dag = DAG("k8s_s3_orchestrate", doc_md=__doc__, default_args=DEFAULT_ARGS, schedule_interval=None, catchup=False, tags=["k8s"])
# pd.datetime is an alias for datetime.datetime today = pd.datetime.today() from airflow import models from airflow.contrib.kubernetes.secret import Secret from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator default_dag_args = { # Setting start date as yesterday starts the DAG immediately when it is # detected in the Cloud Storage bucket. 'start_date': datetime.datetime(2019, 5, 31), } DOCKER_IMAGE = 'gcr.io/dta-ga-bigquery/galileo' DATA_DIR = '/home/airflow/gcs/data/' GCS_BUCKET = 'us-east1-dta-airflow-b3415db4-bucket' cf_username = Secret('env', 'CF_USERNAME', 'airflow-secrets', 'CF_USERNAME') cf_password = Secret('env', 'CF_PASSWORD', 'airflow-secrets', 'CF_PASSWORD') htpasswd = models.Variable.get('HTPASSWD', '') with models.DAG('observatory', schedule_interval=datetime.timedelta(days=7), default_args=default_dag_args) as dag: deploy_shiny = KubernetesPodOperator( task_id='deploy-shiny', name='deploy-shiny', namespace='default', secrets=[cf_username, cf_password], image_pull_policy="Always", image=DOCKER_IMAGE, cmds=['bash', '-c'], arguments=[
# Continue to run this DAG once per day 'schedule_interval': datetime.timedelta(days=1), # Setting start date as yesterday starts the DAG immediately after discovery 'start_date': datetime.datetime.now() - datetime.timedelta(days=1) } # A Secret is an object that contains a small amount of sensitive data # such as a password, a token, or a key. Such information might otherwise be # put in a Pod specification or in an image; putting it in a Secret object # allows for more control over how it is used, and reduces the risk of # accidental exposure. secret_file = Secret( # Mounts the secret as a file in RAM-backed tmpfs deploy_type='volume', # File path of where to deploy the target, since 'volume' deploy_target='/etc/sql_conn', # Name of secret in Kubernetes secret='airflow-secrets', # Key of the secret within Kubernetes key='sql_alchemy_conn') secret_env = Secret( # Exposes secret as environment variable deploy_type='env', # The environment variable the secret is specified under deploy_target='SQL_CONN', # Name of secret in Kubernetes secret='airflow-secrets', # Key of the secret within Kubernetes key='sql_alchemy_conn')
from airflow.contrib.kubernetes.secret import Secret # GCP service account for dbt operations with BigQuery # TODO: make this a volume deploy type? DBT_SERVICE_ACCOUNT = Secret( # Expose the secret as environment variable. deploy_type="env", # The name of the environment variable, since deploy_type is `env` rather # than `volume`. deploy_target="SERVICE_ACCOUNT", # Name of the Kubernetes Secret secret="dbt-secret", # Key of a secret stored in this Secret object key="account.json", ) GIT_SECRET_ID_RSA_PRIVATE = Secret( deploy_type="volume", deploy_target="/dbt/.ssh/", secret="ssh-key-secret", key="id_rsa", )
def setUp(self): self.simple_pod_request_factory = SimplePodRequestFactory() self.xcom_pod_request_factory = ExtractXcomPodRequestFactory() self.pod = Pod( image='busybox', envs={ 'ENVIRONMENT': 'prod', 'LOG_LEVEL': 'warning' }, name='myapp-pod', cmds=['sh', '-c', 'echo Hello Kubernetes!'], labels={'app': 'myapp'}, image_pull_secrets='pull_secret_a,pull_secret_b', configmaps=['configmap_a', 'configmap_b'], secrets=[ # This should be a secretRef Secret('env', None, 'secret_a'), # This should be a single secret mounted in volumeMounts Secret('volume', '/etc/foo', 'secret_b'), # This should produce a single secret mounted in env Secret('env', 'TARGET', 'secret_b', 'source_b'), ], security_context={ 'runAsUser': 1000, 'fsGroup': 2000, }) self.maxDiff = None self.expected = { 'apiVersion': 'v1', 'kind': 'Pod', 'metadata': { 'name': 'myapp-pod', 'labels': { 'app': 'myapp' }, 'annotations': {} }, 'spec': { 'containers': [{ 'name': 'base', 'image': 'busybox', 'command': ['sh', '-c', 'echo Hello Kubernetes!'], 'imagePullPolicy': 'IfNotPresent', 'args': [], 'env': [{ 'name': 'ENVIRONMENT', 'value': 'prod' }, { 'name': 'LOG_LEVEL', 'value': 'warning' }, { 'name': 'TARGET', 'valueFrom': { 'secretKeyRef': { 'name': 'secret_b', 'key': 'source_b' } } }], 'envFrom': [{ 'secretRef': { 'name': 'secret_a' } }, { 'configMapRef': { 'name': 'configmap_a' } }, { 'configMapRef': { 'name': 'configmap_b' } }], 'volumeMounts': [{ 'mountPath': '/etc/foo', 'name': 'secretvol0', 'readOnly': True }] }], 'restartPolicy': 'Never', 'nodeSelector': {}, 'volumes': [{ 'name': 'secretvol0', 'secret': { 'secretName': 'secret_b' } }], 'imagePullSecrets': [{ 'name': 'pull_secret_a' }, { 'name': 'pull_secret_b' }], 'affinity': {}, 'securityContext': { 'runAsUser': 1000, 'fsGroup': 2000, }, } }
def test_secret_throws(self): with self.assertRaises(AirflowConfigException): Secret('volume', None, 'secret_a', 'key')
## # Secret Configuration ## """ Secrets pull secret variables and their contents from Kubernetes. You do this to protect things like database credentials. You can do this with files, tokens, or variables. Arguments: deploy_type (string): How you want to deploy this secret inside the container deploy_target (string): The name of the environmental variable in this case secret (string): The name of the secret stored in Kubernetes key (string): The key of the secret stored in the object """ secret_env = Secret( deploy_type='env', deploy_target='SQL_CONN', secret='airflow-secrets', key='sql_alchemy_conn') ## # Example DAG ## with DAG( dag_id='kubernetes_pod_operator_blobfuse', default_args=args, schedule_interval=None, tags=['example'], ) as dag: """
from airflow.contrib.kubernetes.secret import Secret # AWS AWS_ACCOUNT_ID = Secret("env", "AWS_ACCOUNT_ID", "airflow", "AWS_ACCOUNT_ID") # Snowflake Load SNOWFLAKE_LOAD_DATABASE = Secret("env", "SNOWFLAKE_LOAD_DATABASE", "airflow", "SNOWFLAKE_LOAD_DATABASE") SNOWFLAKE_LOAD_ROLE = Secret("env", "SNOWFLAKE_LOAD_ROLE", "airflow", "SNOWFLAKE_LOAD_ROLE") SNOWFLAKE_LOAD_PASSWORD = Secret("env", "SNOWFLAKE_LOAD_PASSWORD", "airflow", "SNOWFLAKE_LOAD_PASSWORD") SNOWFLAKE_LOAD_USER = Secret("env", "SNOWFLAKE_LOAD_USER", "airflow", "SNOWFLAKE_LOAD_USER") SNOWFLAKE_LOAD_WAREHOUSE = Secret("env", "SNOWFLAKE_LOAD_WAREHOUSE", "airflow", "SNOWFLAKE_LOAD_WAREHOUSE") # Snowflake Transform SNOWFLAKE_TRANSFORM_ROLE = Secret("env", "SNOWFLAKE_TRANSFORM_ROLE", "airflow", "SNOWFLAKE_TRANSFORM_ROLE") SNOWFLAKE_TRANSFORM_SCHEMA = Secret("env", "SNOWFLAKE_TRANSFORM_SCHEMA", "airflow", "SNOWFLAKE_TRANSFORM_SCHEMA") SNOWFLAKE_TRANSFORM_USER = Secret("env", "SNOWFLAKE_TRANSFORM_USER", "airflow", "SNOWFLAKE_TRANSFORM_USER") SNOWFLAKE_TRANSFORM_WAREHOUSE = Secret("env", "SNOWFLAKE_TRANSFORM_WAREHOUSE", "airflow", "SNOWFLAKE_TRANSFORM_WAREHOUSE") SNOWFLAKE_USER = Secret("env", "SNOWFLAKE_USER", "airflow", "SNOWFLAKE_USER") SNOWFLAKE_ACCOUNT = Secret("env", "SNOWFLAKE_ACCOUNT", "airflow", "SNOWFLAKE_ACCOUNT") SNOWFLAKE_PASSWORD = Secret("env", "SNOWFLAKE_PASSWORD", "airflow",
from airflow.contrib.kubernetes.secret import Secret # AWS AWS_ACCOUNT_ID = Secret("env", "AWS_ACCOUNT_ID", "airflow", "AWS_ACCOUNT_ID") AWS_ACCESS_KEY_ID = Secret("env", "AWS_ACCESS_KEY_ID", "airflow", "AWS_ACCESS_KEY_ID") AWS_SECRET_ACCESS_KEY = Secret("env", "AWS_SECRET_ACCESS_KEY", "airflow", "AWS_SECRET_ACCESS_KEY") # blapi BLAPI_DATABASE_URL = Secret("env", "BLAPI_DATABASE_URL", "airflow", "BLAPI_DATABASE_URL") BLAPI_TOKEN = Secret("env", "BLAPI_TOKEN", "airflow", "BLAPI_TOKEN") BLAPI_URL = Secret("env", "BLAPI_URL", "airflow", "BLAPI_URL") BLAPI_TEST_URL = Secret("env", "BLAPI_TEST_URL", "airflow", "BLAPI_TEST_URL") BLAPI_TEST_TOKEN = Secret("env", "BLAPI_TEST_TOKEN", "airflow", "BLAPI_TEST_TOKEN") BLAPI_TEST_DATABASE_URL = Secret("env", "BLAPI_TEST_DATABASE_URL", "airflow", "BLAPI_TEST_DATABASE_URL") # dbt Cloud DBT_CLOUD_API_KEY = Secret("env", "DBT_CLOUD_API_KEY", "airflow", "DBT_CLOUD_API_KEY") DBT_CLOUD_API_ACCOUNT_ID = Secret("env", "DBT_CLOUD_API_ACCOUNT_ID", "airflow", "DBT_CLOUD_API_ACCOUNT_ID") # Snowflake Load SNOWFLAKE_LOAD_DATABASE = Secret("env", "SNOWFLAKE_LOAD_DATABASE", "airflow", "SNOWFLAKE_LOAD_DATABASE") SNOWFLAKE_LOAD_ROLE = Secret("env", "SNOWFLAKE_LOAD_ROLE", "airflow",
default_args=default_args, schedule_interval=dag_interval, concurrency=1) # env variables for inside the k8s pod k8s_run_env = { 'SECRETS_PATH': '/secrets/wcgh-secrets.json', 'COVID_19_DEPLOY_FILE': 'covid-19-widgets.zip', 'COVID_19_DEPLOY_URL': 'https://ds2.capetown.gov.za/covid-19-widgets-deploy', 'COVID_19_WIDGETS_DIR': '/covid-19-widgets', 'DB_UTILS_LOCATION': 'https://ds2.capetown.gov.za/db-utils', 'DB_UTILS_PKG': 'db_utils-0.3.7-py2.py3-none-any.whl' } # airflow-workers' secrets secret_file = Secret('volume', '/secrets', 'wcgh-secret') # arguments for the k8s operator k8s_run_args = { "image": "cityofcapetown/datascience:python@sha256:c5a8ec97e35e603aca281343111193a26a929d821b84c6678fb381f9e7bd08d7", "namespace": 'airflow-workers', "is_delete_operator_pod": True, "get_logs": True, "in_cluster": True, "secrets": [secret_file], "env_vars": k8s_run_env, "image_pull_policy": "IfNotPresent", "startup_timeout_seconds": 60 * 30, }
schedule_interval=dag_interval, concurrency=5) # env variables for inside the k8s pod k8s_run_env = { 'SECRETS_PATH': '/secrets/secrets.json', 'COVID_19_DEPLOY_FILE': 'covid-19-widgets.zip', 'COVID_19_DEPLOY_URL': 'https://ds2.capetown.gov.za/covid-19-widgets-deploy', 'COVID_19_WIDGETS_DIR': '/covid-19-widgets', 'DB_UTILS_LOCATION': 'https://ds2.capetown.gov.za/db-utils', 'DB_UTILS_PKG': 'db_utils-0.4.0-py2.py3-none-any.whl' } # airflow-workers' secrets secret_file = Secret('volume', '/secrets', 'airflow-workers-secret') # arguments for the k8s operator k8s_run_args = { "image": "cityofcapetown/datascience:python@sha256:53a435a5550f057b89b5d750d19efd1c6ac740a0db340698d928929d4335a64f", "namespace": 'airflow-workers', "is_delete_operator_pod": True, "get_logs": True, "in_cluster": True, "secrets": [secret_file], "env_vars": k8s_run_env, "image_pull_policy": "IfNotPresent", "startup_timeout_seconds": 60 * 30, }
from airflow.contrib.kubernetes.secret import Secret # AWS AWS_ACCOUNT_ID = Secret("env", "AWS_ACCOUNT_ID", "airflow", "AWS_ACCOUNT_ID") AWS_ACCESS_KEY_ID = Secret("env", "AWS_ACCESS_KEY_ID", "airflow", "AWS_ACCESS_KEY_ID") AWS_SECRET_ACCESS_KEY = Secret("env", "AWS_SECRET_ACCESS_KEY", "airflow", "AWS_SECRET_ACCESS_KEY") # Snowflake Load SNOWFLAKE_LOAD_DATABASE = Secret("env", "SNOWFLAKE_LOAD_DATABASE", "airflow", "SNOWFLAKE_LOAD_DATABASE") SNOWFLAKE_LOAD_ROLE = Secret("env", "SNOWFLAKE_LOAD_ROLE", "airflow", "SNOWFLAKE_LOAD_ROLE") SNOWFLAKE_LOAD_PASSWORD = Secret("env", "SNOWFLAKE_LOAD_PASSWORD", "airflow", "SNOWFLAKE_LOAD_PASSWORD") SNOWFLAKE_LOAD_USER = Secret("env", "SNOWFLAKE_LOAD_USER", "airflow", "SNOWFLAKE_LOAD_USER") SNOWFLAKE_LOAD_WAREHOUSE = Secret("env", "SNOWFLAKE_LOAD_WAREHOUSE", "airflow", "SNOWFLAKE_LOAD_WAREHOUSE") # Snowflake Transform SNOWFLAKE_TRANSFORM_ROLE = Secret("env", "SNOWFLAKE_TRANSFORM_ROLE", "airflow", "SNOWFLAKE_TRANSFORM_ROLE") SNOWFLAKE_TRANSFORM_SCHEMA = Secret("env", "SNOWFLAKE_TRANSFORM_SCHEMA", "airflow", "SNOWFLAKE_TRANSFORM_SCHEMA") SNOWFLAKE_TRANSFORM_USER = Secret("env", "SNOWFLAKE_TRANSFORM_USER", "airflow", "SNOWFLAKE_TRANSFORM_USER") SNOWFLAKE_TRANSFORM_WAREHOUSE = Secret("env", "SNOWFLAKE_TRANSFORM_WAREHOUSE", "airflow", "SNOWFLAKE_TRANSFORM_WAREHOUSE")
from airflow.contrib.operators import KubernetesOperator from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator from airflow.contrib.kubernetes.secret import Secret secret_file = Secret('volume', '/etc/sql_conn', 'airflow-secrets', 'sql_alchemy_conn') secret_env = Secret('env', 'SQL_CONN', 'airflow-secrets', 'sql_alchemy_conn') volume_mount = VolumeMount('test-volume', mount_path='/root/mount_file', sub_path=None, read_only=True) volume_config= { 'persistentVolumeClaim': { 'claimName': 'test-volume' } } volume = Volume(name='test-volume', configs=volume_config) affinity = { 'nodeAffinity': { 'preferredDuringSchedulingIgnoredDuringExecution': [ { "weight": 1, "preference": { "matchExpressions": { "key": "disktype", "operator": "In", "values": ["ssd"] } }
from datetime import datetime, timedelta from airflow import DAG from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator from airflow.contrib.kubernetes.secret import Secret secrets = [ Secret( deploy_type="volume", deploy_target="/etc/secrets", secret="common-svc-acc", key="sa-key.json") ] default_args = { "owner": "eduard.chai", "start_date": datetime(2020, 3, 1), "email_on_failure": False, "email_on_retry": False, "retries": 1, "retry_delay": timedelta(minutes=3) } dag = DAG( "gcs_loader", default_args=default_args, schedule_interval="0 1 * * *", catchup=False, max_active_runs=1 )