def test_pod_affinity(): affinity = { 'nodeAffinity': { 'requiredDuringSchedulingIgnoredDuringExecution': { 'nodeSelectorTerms': [ { 'matchExpressions': [ { 'key': 'beta.kubernetes.io/os', 'operator': 'In', 'values': ['linux'] } ] } ] } } } k = KubernetesPodOperator( namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=["echo", "10"], labels={"foo": "bar"}, name="test", task_id="task", affinity=affinity, executor_config={'KubernetesExecutor': {'affinity': affinity}} ) k.execute(None)
def test_volume_mount(): with mock.patch.object(PodLauncher, 'log') as mock_logger: volume_mount = VolumeMount('test-volume', mount_path='/root/mount_file', sub_path=None, read_only=True) volume_config = { 'persistentVolumeClaim': { 'claimName': 'test-volume' } } volume = Volume(name='test-volume', configs=volume_config) k = KubernetesPodOperator( namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=["cat /root/mount_file/test.txt"], labels={"foo": "bar"}, volume_mounts=[volume_mount], volumes=[volume], name="test", task_id="task" ) k.execute(None) mock_logger.info.assert_any_call(b"retrieved from mount\n")
def test_working_pod(): k = KubernetesPodOperator( namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=["echo 10"], labels={"foo": "bar"}, name="test", task_id="task" ) k.execute(None)
def test_xcom_push(self): return_value = '{"foo": "bar"\n, "buzz": 2}' k = KubernetesPodOperator( namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=['echo \'{}\' > /airflow/xcom/return.json'.format(return_value)], labels={"foo": "bar"}, name="test", task_id="task", do_xcom_push=True ) self.assertEqual(k.execute(None), json.loads(return_value))
def test_logging(self): with mock.patch.object(PodLauncher, 'log') as mock_logger: k = KubernetesPodOperator(namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=["echo", "10"], labels={"foo": "bar"}, name="test", task_id="task", get_logs=True ) k.execute(None) mock_logger.info.assert_any_call("+ echo\n")
def test_faulty_image(self): bad_image_name = "foobar" k = KubernetesPodOperator( namespace='default', image=bad_image_name, cmds=["bash", "-cx"], arguments=["echo 10"], labels={"foo": "bar"}, name="test", task_id="task", startup_timeout_seconds=5 ) with self.assertRaises(AirflowException): k.execute(None)
def test_faulty_service_account(self): bad_service_account_name = "foobar" k = KubernetesPodOperator( namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=["echo 10"], labels={"foo": "bar"}, name="test", task_id="task", startup_timeout_seconds=5, service_account_name=bad_service_account_name ) with self.assertRaises(ApiException): k.execute(None)
def test_pod_node_selectors(): node_selectors = { 'beta.kubernetes.io/os': 'linux' } k = KubernetesPodOperator( namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=["echo", "10"], labels={"foo": "bar"}, name="test", task_id="task", node_selectors=node_selectors, executor_config={'KubernetesExecutor': {'node_selectors': node_selectors}} ) k.execute(None)
def test_pod_failure(self): """ Tests that the task fails when a pod reports a failure """ bad_internal_command = "foobar" k = KubernetesPodOperator( namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=[bad_internal_command + " 10"], labels={"foo": "bar"}, name="test", task_id="task" ) with self.assertRaises(AirflowException): k.execute(None)
def test_config_path_move(): new_config_path = '/tmp/kube_config' old_config_path = os.path.expanduser('~/.kube/config') shutil.copy(old_config_path, new_config_path) k = KubernetesPodOperator( namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=["echo 10"], labels={"foo": "bar"}, name="test", task_id="task", config_file=new_config_path ) k.execute(None)
def test_pod_delete_even_on_launcher_error(self, client_mock, delete_pod_mock, run_pod_mock): k = KubernetesPodOperator( namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=["echo 10"], labels={"foo": "bar"}, name="test", task_id="task", in_cluster=False, cluster_context='default', is_delete_operator_pod=True ) run_pod_mock.side_effect = AirflowException('fake failure') with self.assertRaises(AirflowException): k.execute(None) assert delete_pod_mock.called
def test_fs_group(): security_context = { 'securityContext': { 'fsGroup': 1000, } } k = KubernetesPodOperator( namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=["echo", "10"], labels={"foo": "bar"}, name="test", task_id="task", security_context=security_context, executor_config={'KubernetesExecutor': {'securityContext': security_context}} ) k.execute(None)
def test_envs_from_secrets(self, client_mock, launcher_mock): # GIVEN from airflow.utils.state import State secrets = [Secret('env', None, "secret_name")] # WHEN k = KubernetesPodOperator( namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=["echo 10"], secrets=secrets, labels={"foo": "bar"}, name="test", task_id="task", ) # THEN launcher_mock.return_value = (State.SUCCESS, None) k.execute(None) self.assertEqual(launcher_mock.call_args[0][0].secrets, secrets)
def test_image_pull_secrets_correctly_set(self, client_mock, launcher_mock): from airflow.utils.state import State fake_pull_secrets = "fakeSecret" k = KubernetesPodOperator( namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=["echo 10"], labels={"foo": "bar"}, name="test", task_id="task", image_pull_secrets=fake_pull_secrets, in_cluster=False, cluster_context='default' ) launcher_mock.return_value = (State.SUCCESS, None) k.execute(None) self.assertEqual(launcher_mock.call_args[0][0].image_pull_secrets, fake_pull_secrets)
def test_config_path(self, client_mock, launcher_mock): from airflow.utils.state import State file_path = "/tmp/fake_file" k = KubernetesPodOperator( namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=["echo 10"], labels={"foo": "bar"}, name="test", task_id="task", config_file=file_path, in_cluster=False, cluster_context='default' ) launcher_mock.return_value = (State.SUCCESS, None) k.execute(None) client_mock.assert_called_with(in_cluster=False, cluster_context='default', config_file=file_path)
INDEXING_BASH_COMMAND = [ "bash", "-c", f'sqs-to-dc {SQS_QUEUE_NAME} "{index_product_string}" {record_path_string} --skip-lineage --allow-unsafe', ] # THE DAG dag = DAG( "sentinel_2_nrt_streamline_indexing", doc_md=__doc__, default_args=DEFAULT_ARGS, schedule_interval="0 */1 * * *", # hourly catchup=False, tags=["k8s", "sentinel-2", "streamline-indexing"], ) with dag: INDEXING = KubernetesPodOperator( namespace="processing", image=INDEXER_IMAGE, image_pull_policy="IfNotPresent", annotations={"iam.amazonaws.com/role": INDEXING_ROLE}, arguments=INDEXING_BASH_COMMAND, labels={"step": "sqs-to-rds"}, name="datacube-index", task_id="indexing-task", get_logs=True, affinity=NODE_AFFINITY, is_delete_operator_pod=True, )
} INDEXER_IMAGE = "opendatacube/datacube-index:0.0.15" dag = DAG( "k8s_index_wo_fc_c3", doc_md=__doc__, default_args=DEFAULT_ARGS, schedule_interval="0,30 * * * * *", catchup=False, tags=["k8s", "landsat_c3"], ) with dag: for product in ["wo", "fc"]: INDEXING = KubernetesPodOperator( namespace="processing", image=INDEXER_IMAGE, image_pull_policy="IfNotPresent", arguments=[ "bash", "-c", f"sqs-to-dc --stac ${product.upper()}_SQS_INDEXING_QUEUE ga_ls_{product}_3", ], labels={"step": "sqs-dc-indexing"}, name=f"datacube-index-{product}", task_id=f"indexing-task-{product}", get_logs=True, is_delete_operator_pod=True, )
S3_BACKUP_SENSE = S3KeySensor( task_id="s3-backup-sense", poke_interval=60 * 30, bucket_key=S3_KEY, aws_conn_id="aws_nci_db_backup", ) # Download NCI db incremental backup from S3 and restore to RDS Aurora RESTORE_NCI_INCREMENTAL_SYNC = KubernetesPodOperator( namespace="processing", image=S3_TO_RDS_IMAGE, annotations={"iam.amazonaws.com/role": "svc-dea-sandbox-eks-processing-dbsync"}, cmds=["./import_from_s3.sh"], image_pull_policy="Always", labels={"step": "s3-to-rds"}, name="s3-to-rds", task_id="s3-to-rds", get_logs=True, is_delete_operator_pod=True, affinity=affinity, volumes=[s3_backup_volume], volume_mounts=[s3_backup_volume_mount], ) # Task complete COMPLETE = DummyOperator(task_id="done") START >> S3_BACKUP_SENSE S3_BACKUP_SENSE >> RESTORE_NCI_INCREMENTAL_SYNC RESTORE_NCI_INCREMENTAL_SYNC >> COMPLETE
dag = DAG('k8s_auto_scale_out_test_3', default_args=default_args, schedule_interval=timedelta(minutes=10), catchup=False) start = DummyOperator(task_id='run_this_first', dag=dag) # task1 task1 = KubernetesPodOperator( namespace='default', image="731553518823.dkr.ecr.us-west-1.amazonaws.com/demandanalytics/airflow-k8s-etl:latest", cmds=["python","-c"], arguments=["print('hello world')"], labels={"foo": "bar"}, name="task1-test", task_id="task1-task", get_logs=True, startup_timeout_seconds=300, dag=dag, is_delete_operator_pod=True, resources={ 'request_cpu': '175m', 'request_memory': '384Mi', 'limit_cpu': '175m', 'limit_memory': '384Mi' } ) # task2 task2 = KubernetesPodOperator( namespace='default', image="731553518823.dkr.ecr.us-west-1.amazonaws.com/demandanalytics/airflow-k8s-etl:latest", cmds=["python","-c"],
# 'pool': 'backfill', # 'priority_weight': 10, # 'end_date': datetime(2016, 1, 1), } dag = DAG('Hello_Kubernetes_Operator_DAG', default_args=default_args, schedule_interval=timedelta(days=1)) namespace = configuration.conf.get("kubernetes", "namespace") t1 = KubernetesPodOperator(namespace=namespace, image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=["echo", "10"], name="echo", in_cluster=True, task_id="echo", is_delete_operator_pod=True, dag=dag) t2 = KubernetesPodOperator( namespace=namespace, image="registry.access.redhat.com/rhscl/python-36-rhel7", cmds=["python", "--version"], arguments=["echo", "10"], name="python-version", in_cluster=True, task_id="pythonversion", is_delete_operator_pod=True, dag=dag)
default_args=default_args, schedule_interval="5-59/30 * * * *") user_agent_cmd = f""" {clone_and_setup_extraction_cmd} && python utils/user_agent_parser.py """ user_agent = KubernetesPodOperator( **pod_defaults, image=DATA_IMAGE, task_id="user-agent", name="user-agent", secrets=[ SNOWFLAKE_USER, SNOWFLAKE_PASSWORD, SNOWFLAKE_ACCOUNT, SNOWFLAKE_TRANSFORM_WAREHOUSE, ], env_vars=env_vars, arguments=[user_agent_cmd], dag=dag, ) # dbt-run dbt_run_cmd = f""" {dbt_install_deps_cmd} && dbt run --profiles-dir profile --exclude tag:nightly """ dbt_run = KubernetesPodOperator(
tolerations = [ { 'key': "key", 'operator': 'Equal', 'value': 'value' } ] k = KubernetesPodOperator( namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=["echo", "10"], labels={"foo": "bar"}, name="airflow-test-pod", in_cluster=False, task_id="task", get_logs=True, dag=dag, is_delete_operator_pod=False, volumes=None, tolerations=tolerations ) except ImportError as e: log.warn("Could not import KubernetesPodOperator: " + str(e)) log.warn("Install kubernetes dependencies with: " " pip install 'apache-airflow[kubernetes]'")
"owner": "s-block", "email": ["*****@*****.**"], } dag = DAG( AIRFLOW_JOB_NAME, default_args=task_args, description="Copy viper extract to external bucket", start_date=datetime(2019, 3, 18), schedule_interval="0 */2 * * *", ) tasks = {} task_id = "viper-external" tasks[task_id] = KubernetesPodOperator( dag=dag, namespace="airflow", image=IMAGE, env_vars={ "INTERNAL_BUCKET": INTERNAL_BUCKET, "EXTERNAL_BUCKET": EXTERNAL_BUCKET, }, labels={"app": dag.dag_id}, name=task_id, in_cluster=True, task_id=task_id, get_logs=True, annotations={"iam.amazonaws.com/role": ROLE}, )
# # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. import airflow import logging from airflow.models import DAG from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator args = {'owner': 'airflow', 'start_date': airflow.utils.dates.days_ago(2)} dag = DAG(dag_id='example_kubernetes_sleep_operator', default_args=args, schedule_interval='*/2 * * * *') k = KubernetesPodOperator(namespace='analytics-eng', image="debian", arguments=["sleep", "60"], labels={"foo": "bar"}, name="airflow-test-sleep-pod", in_cluster=False, task_id="task", get_logs=True, dag=dag)
from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator from airflow.operators.dummy_operator import DummyOperator from airflow.utils.dates import days_ago from datetime import datetime, timedelta dag = DAG( 'kubernetes_sample', start_date=datetime(2020, 1, 1), schedule_interval=None) start = DummyOperator(task_id='run_this_first', dag=dag) passing = KubernetesPodOperator(namespace='default', image="python:3.6", cmds=["python", "-c"], arguments=["print('hello world')"], labels={"test-airflow": "firstversion"}, name="passing-test", task_id="passing-task", get_logs=True, dag=dag ) failing = KubernetesPodOperator(namespace='default', image="ubuntu:1604", cmds=["python", "-c"], arguments=["print('hello world')"], labels={"test-airflow": "firstversion"}, name="fail", task_id="failing-task", get_logs=True, dag=dag )
"email_on_failure": False, "email_on_retry": False, "retries": 1, "retry_delay": timedelta(minutes=5), } dag = DAG('kube-operator', default_args=default_args, schedule_interval=timedelta(days=1)) t1 = KubernetesPodOperator( namespace='airflow', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=["echo", "hello world"], labels={'runner': 'airflow'}, name="pod1", task_id='pod1', is_delete_operator_pod=False, hostnetwork=False, dag=dag, ) t2 = KubernetesPodOperator( namespace='airflow', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=["echo", "hello world"], labels={'runner': 'airflow'}, name="pod2", task_id='pod2', is_delete_operator_pod=False,
"max_retry_delay": timedelta(minutes=15), "execution_timeout": timedelta(minutes=10), "owner": "davidread", "email": ["*****@*****.**"], } # Catch-up on dates before today-REUPDATE_LAST_N_DAYS days dag = DAG( "prison_population_hub", default_args=task_args, description= "From NOMIS it calculates prison population stats and uploads to The Hub", start_date=days_ago(1), schedule_interval=timedelta(days=1), ) task_dag = KubernetesPodOperator( dag=dag, namespace="airflow", image=IMAGE, env_vars={}, cmds=["bash", "-c"], arguments=["python prison_pop_loop.py --download-data"], labels={"app": dag.dag_id}, name="prison-pop", in_cluster=True, task_id="prison-pop", get_logs=True, annotations={"iam.amazonaws.com/role": IAM_ROLE}, )
dedent(""" for uri in %s; do s3-to-dc $uri "%s" --skip-lineage; done """) % (uri_string, " ".join(INDEXING_PRODUCTS)), ] # THE DAG dag = DAG( "sentinel_2_nrt_batch_indexing", doc_md=__doc__, default_args=DEFAULT_ARGS, schedule_interval="0 6 * * *", catchup=False, tags=["k8s", "sentinel-2", "batch-indexing"], ) with dag: INDEXING = KubernetesPodOperator( namespace="processing", image=INDEXER_IMAGE, image_pull_policy="IfNotPresent", arguments=INDEXING_BASH_COMMAND, labels={"step": "s3-to-rds"}, name="datacube-index", task_id="batch-indexing-task", get_logs=True, affinity=NODE_AFFINITY, is_delete_operator_pod=True, )
"retry_delay": timedelta(minutes=50), "email": ["*****@*****.**"], "pool": "occupeye_pool"} dag = DAG( dag_id="occupeye_scraper_daily_v2", default_args=args, schedule_interval='0 3 * * *', ) surveys_to_s3 = KubernetesPodOperator( namespace="airflow", image="robinlinacre/airflow-occupeye-scraper:v11", cmds=["bash", "-c"], arguments=["python main.py --scrape_type=daily --scrape_datetime='{{ts}}' --next_execution_date='{{next_execution_date}}'"], labels={"foo": "bar"}, name="airflow-test-pod", in_cluster=True, task_id="scrape_all", get_logs=True, dag=dag, annotations={"iam.amazonaws.com/role": "dev_ravi_test_airflow_assume_role"}, image_pull_policy='Always' ) except ImportError as e: log.warn("Could not import KubernetesPodOperator: " + str(e))
volume_config = { 'persistentVolumeClaim': { 'claimName': 'airflow-dags' # uses the persistentVolumeClaim given in the Kube yaml } } in_volume = Volume(name='input-dataset', configs=volume_config) out_volume = Volume(name='output-dataset', configs=volume_config) step1 = KubernetesPodOperator(namespace='airflow', image="cestum/airflow-demo:prep-input", cmds=[], arguments=["K8S-Airflow"], labels={"foo": "bar"}, name="prep-input", volumes=[in_volume], volume_mounts=[input_volume_mount], task_id="prep-input", get_logs=True, dag=dag, in_cluster=True) step2 = KubernetesPodOperator(namespace='airflow', image="cestum/airflow-demo:model-run", cmds=[], arguments=["3"], labels={"foo": "bar"}, name="model-run", volumes=[out_volume], volume_mounts=[output_volume_mount], task_id="model-run",
default_args = { 'owner': 'airflow', 'depends_on_past': True, 'start_date': datetime.utcnow() } dag = DAG('connector-lorem-ipsum', schedule_interval='*/10 * * * *', default_args=default_args) KubernetesPodOperator(namespace='airflow', image="103050589342.dkr.ecr.eu-central-1.amazonaws.com/connector-lorem-ipsum:1.9", env_vars={'CELERY_BROKER_URL': 'redis://:airflow@airflow-redis-master:6379/0'}, arguments=["action=create-job", f'no_of_pages=100', 'job_id=book_1'], name="create-job", task_id="create-job", is_delete_operator_pod=True, hostnetwork=False, in_cluster=True, dag=dag ) KubernetesPodOperator(namespace='airflow', image="103050589342.dkr.ecr.eu-central-1.amazonaws.com/connector-lorem-ipsum:1.15", env_vars={'CELERY_BROKER_URL': 'redis://:airflow@airflow-redis-master:6379/0', 'REDIS_HOST': 'airflow-redis-master', 'REDIS_PASSWORD': '******'}, arguments=["action=download-book", 'job_id=book_1'], name="download-book", task_id="download-book", is_delete_operator_pod=True, hostnetwork=False,
'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=5) } dag = DAG('kubernetes_sample', default_args=default_args, schedule_interval=timedelta(minutes=10)) start = DummyOperator(task_id='run_this_first', dag=dag) passing = KubernetesPodOperator(namespace='airflow', image="python:3.6", cmds=["python", "-c"], arguments=["print('hello world')"], labels={"foo": "bar"}, name="passing-test", task_id="passing-task", get_logs=True, dag=dag, in_cluster=True) failing = KubernetesPodOperator(namespace='airflow', image="ubuntu:16.04", cmds=["python", "-c"], arguments=["print('hello world')"], labels={"foo": "bar"}, name="fail", task_id="failing-task", get_logs=True, dag=dag, in_cluster=True)
'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=5) } dag = DAG( 'jordi_test_get_secert2', default_args=default_args, schedule_interval=timedelta(minutes=10)) start = DummyOperator(task_id='run_this_first', dag=dag) quay_k8s = KubernetesPodOperator( namespace='default', name="passing-test7", image='docker.io/test-pai-1', image_pull_secrets=env_var_secret, task_id="passing-task6", get_logs=True, dag=dag ) start >> quay_k8s except Exception as e: error_message = { "message": "An internal error ocurred" ,"error": str(e) , "error information" : str(sys.exc_info()) , "traceback": str(traceback.format_exc())