def test_template_task_dag(): dag = DAG( dag_id="dag", default_args=default_args, schedule_interval=None, ) t1 = BashOperator( task_id="print_hello", bash_command="echo hello dagsir", dag=dag, ) t2 = BashOperator( task_id="sleep", bash_command="sleep 2", dag=dag, ) templated_command = """ {% for i in range(5) %} echo '{{ ds }}' echo '{{ macros.ds_add(ds, 7)}}' echo '{{ params.my_param }}' {% endfor %} """ t3 = BashOperator( task_id="templated", depends_on_past=False, bash_command=templated_command, params={"my_param": "Parameter I passed in"}, dag=dag, ) # pylint: disable=pointless-statement t1 >> [t2, t3] instance = DagsterInstance.local_temp() manager = instance.compute_log_manager execution_date = get_current_datetime_in_utc() execution_date_add_one_week = execution_date + datetime.timedelta(days=7) execution_date_iso = execution_date.strftime("%Y-%m-%d") execution_date_add_one_week_iso = execution_date_add_one_week.strftime( "%Y-%m-%d") result = execute_pipeline( make_dagster_pipeline_from_airflow_dag( dag=dag, tags={AIRFLOW_EXECUTION_DATE_STR: execution_date_iso}), instance=instance, ) compute_steps = [ event.step_key for event in result.step_event_list if event.event_type == DagsterEventType.STEP_START ] assert compute_steps == [ "airflow_print_hello.compute", "airflow_sleep.compute", "airflow_templated.compute", ] for step_key in compute_steps: compute_io_path = manager.get_local_path(result.run_id, step_key, ComputeIOType.STDOUT) assert os.path.exists(compute_io_path) stdout_file = open(compute_io_path, "r") file_contents = normalize_file_content(stdout_file.read()) stdout_file.close() if step_key == "airflow_print_hello.compute": assert file_contents.count( "INFO - Running command: echo hello dagsir\n") == 1 assert file_contents.count( "INFO - Command exited with return code 0") == 1 elif step_key == "airflow_sleep.compute": assert file_contents.count( "INFO - Running command: sleep 2\n") == 1 assert file_contents.count("INFO - Output:\n") == 1 assert file_contents.count( "INFO - Command exited with return code 0") == 1 elif step_key == "airflow_templated.compute": assert (file_contents.count( "INFO - Running command: \n \n " "echo '{execution_date_iso}'\n " "echo '{execution_date_add_one_week_iso}'\n " "echo 'Parameter I passed in'\n \n " "echo '{execution_date_iso}'\n " "echo '{execution_date_add_one_week_iso}'\n " "echo 'Parameter I passed in'\n \n " "echo '{execution_date_iso}'\n " "echo '{execution_date_add_one_week_iso}'\n " "echo 'Parameter I passed in'\n \n " "echo '{execution_date_iso}'\n " "echo '{execution_date_add_one_week_iso}'\n " "echo 'Parameter I passed in'\n \n " "echo '{execution_date_iso}'\n " "echo '{execution_date_add_one_week_iso}'\n " "echo 'Parameter I passed in'\n \n \n".format( execution_date_iso=execution_date_iso, execution_date_add_one_week_iso= execution_date_add_one_week_iso, )) == 1) assert (file_contents.count("INFO - {execution_date_iso}\n".format( execution_date_iso=execution_date_iso)) == 5) assert (file_contents.count( "INFO - {execution_date_add_one_week_iso}\n".format( execution_date_add_one_week_iso= execution_date_add_one_week_iso)) == 5) assert file_contents.count("INFO - Parameter I passed in\n") == 5 assert file_contents.count( "INFO - Command exited with return code 0") == 1
def test_failure_callback_only_called_once(self, mock_return_code, _check_call): """ Test that ensures that when a task exits with failure by itself, failure callback is only called once """ # use shared memory value so we can properly track value change even if # it's been updated across processes. failure_callback_called = Value('i', 0) callback_count_lock = Lock() def failure_callback(context): with callback_count_lock: failure_callback_called.value += 1 assert context['dag_run'].dag_id == 'test_failure_callback_race' assert isinstance(context['exception'], AirflowFailException) def task_function(ti): raise AirflowFailException() dag = DAG(dag_id='test_failure_callback_race', start_date=DEFAULT_DATE) task = PythonOperator( task_id='test_exit_on_failure', python_callable=task_function, on_failure_callback=failure_callback, dag=dag, ) dag.clear() with create_session() as session: dag.create_dagrun( run_id="test", state=State.RUNNING, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE, session=session, ) ti = TaskInstance(task=task, execution_date=DEFAULT_DATE) ti.refresh_from_db() job1 = LocalTaskJob(task_instance=ti, ignore_ti_state=True, executor=SequentialExecutor()) # Simulate race condition where job1 heartbeat ran right after task # state got set to failed by ti.handle_failure but before task process # fully exits. See _execute loop in airflow/jobs/local_task_job.py. # In this case, we have: # * task_runner.return_code() is None # * ti.state == State.Failed # # We also need to set return_code to a valid int after job1.terminating # is set to True so _execute loop won't loop forever. def dummy_return_code(*args, **kwargs): return None if not job1.terminating else -9 mock_return_code.side_effect = dummy_return_code with timeout(10): # This should be _much_ shorter to run. # If you change this limit, make the timeout in the callable above bigger job1.run() ti.refresh_from_db() assert ti.state == State.FAILED # task exits with failure state assert failure_callback_called.value == 1
""" This dag only runs some simple tasks to test Airflow's task execution. """ from datetime import datetime, timedelta from airflow.models.dag import DAG from airflow.operators.dummy_operator import DummyOperator from airflow.utils.dates import days_ago now = datetime.now() now_to_the_hour = (now - timedelta(0, 0, 0, 0, 0, 3)).replace(minute=0, second=0, microsecond=0) START_DATE = now_to_the_hour DAG_NAME = 'test_dag_v1' default_args = { 'owner': 'airflow', 'depends_on_past': True, 'start_date': days_ago(2) } dag = DAG(DAG_NAME, schedule_interval='*/10 * * * *', default_args=default_args) run_this_1 = DummyOperator(task_id='run_this_1', dag=dag) run_this_2 = DummyOperator(task_id='run_this_2', dag=dag) run_this_2.set_upstream(run_this_1) run_this_3 = DummyOperator(task_id='run_this_3', dag=dag) run_this_3.set_upstream(run_this_2)
def test_external_task_sensor_fn_multiple_execution_dates(self): bash_command_code = """ {% set s=execution_date.time().second %} echo "second is {{ s }}" if [[ $(( {{ s }} % 60 )) == 1 ]] then exit 1 fi exit 0 """ dag_external_id = TEST_DAG_ID + '_external' dag_external = DAG(dag_external_id, default_args=self.args, schedule_interval=timedelta(seconds=1)) task_external_with_failure = BashOperator( task_id="task_external_with_failure", bash_command=bash_command_code, retries=0, dag=dag_external) task_external_without_failure = DummyOperator( task_id="task_external_without_failure", retries=0, dag=dag_external) task_external_without_failure.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + timedelta(seconds=1), ignore_ti_state=True) session = settings.Session() TI = TaskInstance try: task_external_with_failure.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + timedelta(seconds=1), ignore_ti_state=True) # The test_with_failure task is excepted to fail # once per minute (the run on the first second of # each minute). except Exception as e: # pylint: disable=broad-except failed_tis = session.query(TI).filter( TI.dag_id == dag_external_id, TI.state == State.FAILED, TI.execution_date == DEFAULT_DATE + timedelta(seconds=1)).all() if len(failed_tis) == 1 and \ failed_tis[0].task_id == 'task_external_with_failure': pass else: raise e dag_id = TEST_DAG_ID dag = DAG(dag_id, default_args=self.args, schedule_interval=timedelta(minutes=1)) task_without_failure = ExternalTaskSensor( task_id='task_without_failure', external_dag_id=dag_external_id, external_task_id='task_external_without_failure', execution_date_fn=lambda dt: [dt + timedelta(seconds=i) for i in range(2)], allowed_states=['success'], retries=0, timeout=1, poke_interval=1, dag=dag) task_with_failure = ExternalTaskSensor( task_id='task_with_failure', external_dag_id=dag_external_id, external_task_id='task_external_with_failure', execution_date_fn=lambda dt: [dt + timedelta(seconds=i) for i in range(2)], allowed_states=['success'], retries=0, timeout=1, poke_interval=1, dag=dag) task_without_failure.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) with self.assertRaises(AirflowSensorTimeout): task_with_failure.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def setUp(self): self.dagbag = DagBag(dag_folder=DEV_NULL, include_examples=True) self.args = {'owner': 'airflow', 'start_date': DEFAULT_DATE} self.dag = DAG(TEST_DAG_ID, default_args=self.args)
from parameterized import parameterized from airflow import settings from airflow.models import Variable from airflow.models.dag import DAG from airflow.models.renderedtifields import RenderedTaskInstanceFields as RTIF from airflow.models.taskinstance import TaskInstance as TI from airflow.operators.bash import BashOperator from airflow.utils.session import create_session from airflow.utils.timezone import datetime from airflow.version import version from tests.test_utils.asserts import assert_queries_count from tests.test_utils.db import clear_rendered_ti_fields TEST_DAG = DAG("example_rendered_ti_field", schedule_interval=None) START_DATE = datetime(2018, 1, 1) EXECUTION_DATE = datetime(2019, 1, 1) class ClassWithCustomAttributes: """Class for testing purpose: allows to create objects with custom attributes in one single statement.""" def __init__(self, **kwargs): for key, value in kwargs.items(): setattr(self, key, value) def __str__(self): return "{}({})".format(ClassWithCustomAttributes.__name__, str(self.__dict__)) def __repr__(self):
def setUp(self): args = {'owner': 'airflow', 'start_date': DEFAULT_DATE} dag = DAG(TEST_DAG_ID, default_args=args) self.dag = dag rows = [ (1880, "John", 0.081541, "boy"), (1880, "William", 0.080511, "boy"), (1880, "James", 0.050057, "boy"), (1880, "Charles", 0.045167, "boy"), (1880, "George", 0.043292, "boy"), (1880, "Frank", 0.02738, "boy"), (1880, "Joseph", 0.022229, "boy"), (1880, "Thomas", 0.021401, "boy"), (1880, "Henry", 0.020641, "boy"), (1880, "Robert", 0.020404, "boy"), (1880, "Edward", 0.019965, "boy"), (1880, "Harry", 0.018175, "boy"), (1880, "Walter", 0.014822, "boy"), (1880, "Arthur", 0.013504, "boy"), (1880, "Fred", 0.013251, "boy"), (1880, "Albert", 0.012609, "boy"), (1880, "Samuel", 0.008648, "boy"), (1880, "David", 0.007339, "boy"), (1880, "Louis", 0.006993, "boy"), (1880, "Joe", 0.006174, "boy"), (1880, "Charlie", 0.006165, "boy"), (1880, "Clarence", 0.006165, "boy"), (1880, "Richard", 0.006148, "boy"), (1880, "Andrew", 0.005439, "boy"), (1880, "Daniel", 0.00543, "boy"), (1880, "Ernest", 0.005194, "boy"), (1880, "Will", 0.004966, "boy"), (1880, "Jesse", 0.004805, "boy"), (1880, "Oscar", 0.004594, "boy"), (1880, "Lewis", 0.004366, "boy"), (1880, "Peter", 0.004189, "boy"), (1880, "Benjamin", 0.004138, "boy"), (1880, "Frederick", 0.004079, "boy"), (1880, "Willie", 0.00402, "boy"), (1880, "Alfred", 0.003961, "boy"), (1880, "Sam", 0.00386, "boy"), (1880, "Roy", 0.003716, "boy"), (1880, "Herbert", 0.003581, "boy"), (1880, "Jacob", 0.003412, "boy"), (1880, "Tom", 0.00337, "boy"), (1880, "Elmer", 0.00315, "boy"), (1880, "Carl", 0.003142, "boy"), (1880, "Lee", 0.003049, "boy"), (1880, "Howard", 0.003015, "boy"), (1880, "Martin", 0.003015, "boy"), (1880, "Michael", 0.00299, "boy"), (1880, "Bert", 0.002939, "boy"), (1880, "Herman", 0.002931, "boy"), (1880, "Jim", 0.002914, "boy"), (1880, "Francis", 0.002905, "boy"), (1880, "Harvey", 0.002905, "boy"), (1880, "Earl", 0.002829, "boy"), (1880, "Eugene", 0.00277, "boy"), ] self.env_vars = { 'AIRFLOW_CTX_DAG_ID': 'test_dag_id', 'AIRFLOW_CTX_TASK_ID': 'test_task_id', 'AIRFLOW_CTX_EXECUTION_DATE': '2015-01-01T00:00:00+00:00', 'AIRFLOW_CTX_DAG_RUN_ID': '55', 'AIRFLOW_CTX_DAG_OWNER': 'airflow', 'AIRFLOW_CTX_DAG_EMAIL': '*****@*****.**', } with MySqlHook().get_conn() as cur: cur.execute(''' CREATE TABLE IF NOT EXISTS baby_names ( org_year integer(4), baby_name VARCHAR(25), rate FLOAT(7,6), sex VARCHAR(4) ) ''') for row in rows: cur.execute("INSERT INTO baby_names VALUES(%s, %s, %s, %s);", row)
def test_retry_on_error_sending_task(self): """Test that Airflow retries publishing tasks to Celery Broker atleast 3 times""" def fake_execute_command(command): print(command) with _prepare_app(execute=fake_execute_command), self.assertLogs( celery_executor.log ) as cm, mock.patch.object(celery_executor, "OPERATION_TIMEOUT", 0.001): # fake_execute_command takes no arguments while execute_command takes 1, # which will cause TypeError when calling task.apply_async() executor = celery_executor.CeleryExecutor() self.assertEqual(executor.task_publish_retries, {}) self.assertEqual(executor.task_publish_max_retries, 3, msg="Assert Default Max Retries is 3") task = BashOperator( task_id="test", bash_command="true", dag=DAG(dag_id='id'), start_date=datetime.now() ) when = datetime.now() value_tuple = ( 'command', 1, None, SimpleTaskInstance(ti=TaskInstance(task=task, execution_date=datetime.now())), ) key = ('fail', 'fake_simple_ti', when, 0) executor.queued_tasks[key] = value_tuple # Test that when heartbeat is called again, task is published again to Celery Queue executor.heartbeat() self.assertEqual(dict(executor.task_publish_retries), {key: 2}) self.assertEqual(1, len(executor.queued_tasks), "Task should remain in queue") self.assertEqual(executor.event_buffer, {}) self.assertIn( "INFO:airflow.executors.celery_executor.CeleryExecutor:" f"[Try 1 of 3] Task Timeout Error for Task: ({key}).", cm.output, ) executor.heartbeat() self.assertEqual(dict(executor.task_publish_retries), {key: 3}) self.assertEqual(1, len(executor.queued_tasks), "Task should remain in queue") self.assertEqual(executor.event_buffer, {}) self.assertIn( "INFO:airflow.executors.celery_executor.CeleryExecutor:" f"[Try 2 of 3] Task Timeout Error for Task: ({key}).", cm.output, ) executor.heartbeat() self.assertEqual(dict(executor.task_publish_retries), {key: 4}) self.assertEqual(1, len(executor.queued_tasks), "Task should remain in queue") self.assertEqual(executor.event_buffer, {}) self.assertIn( "INFO:airflow.executors.celery_executor.CeleryExecutor:" f"[Try 3 of 3] Task Timeout Error for Task: ({key}).", cm.output, ) executor.heartbeat() self.assertEqual(dict(executor.task_publish_retries), {}) self.assertEqual(0, len(executor.queued_tasks), "Task should no longer be in queue") self.assertEqual(executor.event_buffer[('fail', 'fake_simple_ti', when, 0)][0], State.FAILED)
"resources_vpc_config": { "subnetIds": ["subnet-12345ab", "subnet-67890cd"], "endpointPublicAccess": true, "endpointPrivateAccess": false }, "nodegroup_name": "templated-nodegroup", "nodegroup_subnets": "['subnet-12345ab', 'subnet-67890cd']", "nodegroup_role_arn": "arn:aws:iam::123456789012:role/role_name" } """ with DAG( dag_id='example_eks_templated', schedule_interval=None, start_date=datetime(2021, 1, 1), tags=['example', 'templated'], catchup=False, # render_template_as_native_obj=True is what converts the Jinja to Python objects, instead of a string. render_template_as_native_obj=True, ) as dag: CLUSTER_NAME = "{{ dag_run.conf['cluster_name'] }}" NODEGROUP_NAME = "{{ dag_run.conf['nodegroup_name'] }}" # Create an Amazon EKS Cluster control plane without attaching a compute service. create_cluster = EksCreateClusterOperator( task_id='create_eks_cluster', cluster_name=CLUSTER_NAME, compute=None, cluster_role_arn="{{ dag_run.conf['cluster_role_arn'] }}", resources_vpc_config="{{ dag_run.conf['resources_vpc_config'] }}",
"nodegroup_subnets": ["subnet-12345ab", "subnet-67890cd"], "resources_vpc_config": { "subnetIds": ["subnet-12345ab", "subnet-67890cd"], "endpointPublicAccess": true, "endpointPrivateAccess": false }, "nodegroup_name": "templated-nodegroup", "nodegroup_role_arn": "arn:aws:iam::123456789012:role/role_name" } """ with DAG( dag_id='to-publish-manuals-templated', schedule_interval=None, start_date=days_ago(2), max_active_runs=1, tags=['example', 'templated'], # render_template_as_native_obj=True is what converts the Jinja to Python objects, instead of a string. render_template_as_native_obj=True, ) as dag: # Create an Amazon EKS Cluster control plane without attaching a compute service. create_cluster = EKSCreateClusterOperator( task_id='create_eks_cluster', compute=None, cluster_name="{{ dag_run.conf['cluster_name'] }}", cluster_role_arn="{{ dag_run.conf['cluster_role_arn'] }}", resources_vpc_config="{{ dag_run.conf['resources_vpc_config'] }}", ) await_create_cluster = EKSClusterStateSensor(
# KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. from airflow.models.dag import DAG from airflow.providers.alibaba.cloud.operators.oss import ( OSSDeleteBatchObjectOperator, OSSDeleteObjectOperator, OSSDownloadObjectOperator, OSSUploadObjectOperator, ) from airflow.utils.dates import days_ago with DAG( dag_id='oss_object_dag', start_date=days_ago(2), max_active_runs=1, tags=['example'], ) as dag: create_object = OSSUploadObjectOperator( file='your local file', key='your oss key', oss_conn_id='oss_default', region='your region', task_id='task1', bucket_name='your bucket', ) download_object = OSSDownloadObjectOperator( file='your local file', key='your oss key',
def test_complex_dag(snapshot): dag = DAG(dag_id="complex_dag", default_args=default_args, schedule_interval=None) # Create create_entry_group = DummyOperator( task_id="create_entry_group", dag=dag, ) create_entry_group_result = DummyOperator( task_id="create_entry_group_result", dag=dag, ) create_entry_group_result2 = DummyOperator( task_id="create_entry_group_result2", dag=dag, ) create_entry_gcs = DummyOperator( task_id="create_entry_gcs", dag=dag, ) create_entry_gcs_result = DummyOperator( task_id="create_entry_gcs_result", dag=dag, ) create_entry_gcs_result2 = DummyOperator( task_id="create_entry_gcs_result2", dag=dag, ) create_tag = DummyOperator( task_id="create_tag", dag=dag, ) create_tag_result = DummyOperator( task_id="create_tag_result", dag=dag, ) create_tag_result2 = DummyOperator( task_id="create_tag_result2", dag=dag, ) create_tag_template = DummyOperator( task_id="create_tag_template", dag=dag, ) create_tag_template_result = DummyOperator( task_id="create_tag_template_result", dag=dag, ) create_tag_template_result2 = DummyOperator( task_id="create_tag_template_result2", dag=dag, ) create_tag_template_field = DummyOperator( task_id="create_tag_template_field", dag=dag, ) create_tag_template_field_result = DummyOperator( task_id="create_tag_template_field_result", dag=dag, ) create_tag_template_field_result2 = DummyOperator( task_id="create_tag_template_field_result", dag=dag, ) # Delete delete_entry = DummyOperator( task_id="delete_entry", dag=dag, ) create_entry_gcs >> delete_entry delete_entry_group = DummyOperator( task_id="delete_entry_group", dag=dag, ) create_entry_group >> delete_entry_group delete_tag = DummyOperator( task_id="delete_tag", dag=dag, ) create_tag >> delete_tag delete_tag_template_field = DummyOperator( task_id="delete_tag_template_field", dag=dag, ) delete_tag_template = DummyOperator( task_id="delete_tag_template", dag=dag, ) # Get get_entry_group = DummyOperator( task_id="get_entry_group", dag=dag, ) get_entry_group_result = DummyOperator( task_id="get_entry_group_result", dag=dag, ) get_entry = DummyOperator( task_id="get_entry", dag=dag, ) get_entry_result = DummyOperator( task_id="get_entry_result", dag=dag, ) get_tag_template = DummyOperator( task_id="get_tag_template", dag=dag, ) get_tag_template_result = DummyOperator( task_id="get_tag_template_result", dag=dag, ) # List list_tags = DummyOperator( task_id="list_tags", dag=dag, ) list_tags_result = DummyOperator( task_id="list_tags_result", dag=dag, ) # Lookup lookup_entry = DummyOperator( task_id="lookup_entry", dag=dag, ) lookup_entry_result = DummyOperator( task_id="lookup_entry_result", dag=dag, ) # Rename rename_tag_template_field = DummyOperator( task_id="rename_tag_template_field", dag=dag, ) # Search search_catalog = DummyOperator( task_id="search_catalog", dag=dag, ) search_catalog_result = DummyOperator( task_id="search_catalog_result", dag=dag, ) # Update update_entry = DummyOperator( task_id="update_entry", dag=dag, ) update_tag = DummyOperator( task_id="update_tag", dag=dag, ) update_tag_template = DummyOperator( task_id="update_tag_template", dag=dag, ) update_tag_template_field = DummyOperator( task_id="update_tag_template_field", dag=dag, ) # Create create_tasks = [ create_entry_group, create_entry_gcs, create_tag_template, create_tag_template_field, create_tag, ] chain(*create_tasks) create_entry_group >> delete_entry_group create_entry_group >> create_entry_group_result create_entry_group >> create_entry_group_result2 create_entry_gcs >> delete_entry create_entry_gcs >> create_entry_gcs_result create_entry_gcs >> create_entry_gcs_result2 create_tag_template >> delete_tag_template_field create_tag_template >> create_tag_template_result create_tag_template >> create_tag_template_result2 create_tag_template_field >> delete_tag_template_field create_tag_template_field >> create_tag_template_field_result create_tag_template_field >> create_tag_template_field_result2 create_tag >> delete_tag create_tag >> create_tag_result create_tag >> create_tag_result2 # Delete delete_tasks = [ delete_tag, delete_tag_template_field, delete_tag_template, delete_entry_group, delete_entry, ] chain(*delete_tasks) # Get create_tag_template >> get_tag_template >> delete_tag_template get_tag_template >> get_tag_template_result create_entry_gcs >> get_entry >> delete_entry get_entry >> get_entry_result create_entry_group >> get_entry_group >> delete_entry_group get_entry_group >> get_entry_group_result # List create_tag >> list_tags >> delete_tag list_tags >> list_tags_result # Lookup create_entry_gcs >> lookup_entry >> delete_entry lookup_entry >> lookup_entry_result # Rename create_tag_template_field >> rename_tag_template_field >> delete_tag_template_field # Search chain(create_tasks, search_catalog, delete_tasks) search_catalog >> search_catalog_result # Update create_entry_gcs >> update_entry >> delete_entry create_tag >> update_tag >> delete_tag create_tag_template >> update_tag_template >> delete_tag_template create_tag_template_field >> update_tag_template_field >> rename_tag_template_field snapshot.assert_match( serialize_pp( PipelineSnapshot.from_pipeline_def( make_dagster_pipeline_from_airflow_dag( dag=dag)).dep_structure_snapshot))
NODEGROUP_SUFFIX = '-nodegroup' NODEGROUP_NAME = CLUSTER_NAME + NODEGROUP_SUFFIX ROLE_ARN = environ.get('EKS_DEMO_ROLE_ARN', 'arn:aws:iam::123456789012:role/role_name') SUBNETS = environ.get('EKS_DEMO_SUBNETS', 'subnet-12345ab subnet-67890cd').split(' ') VPC_CONFIG = { 'subnetIds': SUBNETS, 'endpointPublicAccess': True, 'endpointPrivateAccess': False, } with DAG( dag_id='example_eks_with_nodegroups_dag', schedule_interval=None, start_date=days_ago(2), max_active_runs=1, tags=['example'], ) as dag: # [START howto_operator_eks_create_cluster] # Create an Amazon EKS Cluster control plane without attaching a compute service. create_cluster = EKSCreateClusterOperator( task_id='create_eks_cluster', cluster_name=CLUSTER_NAME, cluster_role_arn=ROLE_ARN, resources_vpc_config=VPC_CONFIG, compute=None, ) # [END howto_operator_eks_create_cluster]
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. from datetime import datetime from airflow.models.dag import DAG from airflow.providers.slack.operators.slack import SlackAPIFileOperator with DAG( dag_id='slack_example_dag', schedule_interval=None, start_date=datetime(2021, 1, 1), default_args={ 'slack_conn_id': 'slack', 'channel': '#general', 'initial_comment': 'Hello World!' }, max_active_runs=1, tags=['example'], ) as dag: # [START slack_operator_howto_guide_send_file] # Send file with filename and filetype slack_operator_file = SlackAPIFileOperator( task_id="slack_file_upload_1", filename="/files/dags/test.txt", filetype="txt", ) # [END slack_operator_howto_guide_send_file] # [START slack_operator_howto_guide_send_file_content]
def setUp(self): self.dag = DAG(TEST_DAG_ID, default_args=self.TRAINING_DEFAULT_ARGS)
# # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. from airflow.models.dag import DAG from airflow.utils import timezone from airflow.contrib.jobs.event_handlers import AIFlowHandler from airflow.operators.bash import BashOperator dag = DAG(dag_id='workflow_1', start_date=timezone.utcnow(), schedule_interval='@once') op_0 = BashOperator(task_id='0_job', dag=dag, bash_command='echo "0 hello word!"') op_1 = BashOperator(task_id='1_job', dag=dag, bash_command='echo "1 hello word!"') op_2 = BashOperator(task_id='2_job', dag=dag, bash_command='echo "2 hello word!"') op_1.subscribe_event('key_1', 'UNDEFINED', 'default') configs_op_1 = '[{"__af_object_type__": "jsonable", "__class__": "MetConfig", "__module__": "ai_flow.graph.edge", "action": "START", "condition": "NECESSARY", "event_key": "key_1", "event_type": "UNDEFINED", "event_value": "value_1", "life": "ONCE", "namespace": "default", "value_condition": "EQUAL"}]' op_1.set_events_handler(AIFlowHandler(configs_op_1)) op_2.subscribe_event('key_2', 'UNDEFINED', 'default')
def setUp(self): args = {'owner': 'airflow', 'start_date': DEFAULT_DATE} self.dag = DAG(TEST_DAG_ID, default_args=args)
NODEGROUP_SUFFIX = '-nodegroup' NODEGROUP_NAME = CLUSTER_NAME + NODEGROUP_SUFFIX ROLE_ARN = environ.get('EKS_DEMO_ROLE_ARN', 'arn:aws:iam::123456789012:role/role_name') SUBNETS = environ.get('EKS_DEMO_SUBNETS', 'subnet-12345ab subnet-67890cd').split(' ') VPC_CONFIG = { 'subnetIds': SUBNETS, 'endpointPublicAccess': True, 'endpointPrivateAccess': False, } with DAG( dag_id='example_eks_using_defaults_dag', schedule_interval=None, start_date=days_ago(2), max_active_runs=1, tags=['example'], ) as dag: # [START howto_operator_eks_create_cluster_with_nodegroup] # Create an Amazon EKS cluster control plane and an EKS nodegroup compute platform in one step. create_cluster_and_nodegroup = EKSCreateClusterOperator( task_id='create_eks_cluster_and_nodegroup', cluster_name=CLUSTER_NAME, nodegroup_name=NODEGROUP_NAME, cluster_role_arn=ROLE_ARN, nodegroup_role_arn=ROLE_ARN, # Opting to use the same ARN for the cluster and the nodegroup here, # but a different ARN could be configured and passed if desired. resources_vpc_config=VPC_CONFIG,
def test_get_k8s_pod_yaml(self, mock_pod_mutation_hook): """ Test that k8s_pod_yaml is rendered correctly, stored in the Database, and are correctly fetched using RTIF.get_k8s_pod_yaml """ dag = DAG("test_get_k8s_pod_yaml", start_date=START_DATE) with dag: task = BashOperator(task_id="test", bash_command="echo hi") ti = TI(task=task, execution_date=EXECUTION_DATE) rtif = RTIF(ti=ti) # Test that pod_mutation_hook is called mock_pod_mutation_hook.assert_called_once_with(mock.ANY) self.assertEqual(ti.dag_id, rtif.dag_id) self.assertEqual(ti.task_id, rtif.task_id) self.assertEqual(ti.execution_date, rtif.execution_date) expected_pod_yaml = { 'metadata': { 'annotations': { 'dag_id': 'test_get_k8s_pod_yaml', 'execution_date': '2019-01-01T00:00:00+00:00', 'task_id': 'test', 'try_number': '1', }, 'labels': { 'airflow-worker': 'worker-config', 'airflow_version': version, 'dag_id': 'test_get_k8s_pod_yaml', 'execution_date': '2019-01-01T00_00_00_plus_00_00', 'kubernetes_executor': 'True', 'task_id': 'test', 'try_number': '1', }, 'name': mock.ANY, 'namespace': 'default', }, 'spec': { 'containers': [ { 'command': [ 'airflow', 'tasks', 'run', 'test_get_k8s_pod_yaml', 'test', '2019-01-01T00:00:00+00:00', ], 'image': ':', 'name': 'base', 'env': [{'name': 'AIRFLOW_IS_K8S_EXECUTOR_POD', 'value': 'True'}], } ] }, } self.assertEqual(expected_pod_yaml, rtif.k8s_pod_yaml) with create_session() as session: session.add(rtif) self.assertEqual(expected_pod_yaml, RTIF.get_k8s_pod_yaml(ti=ti)) # Test the else part of get_k8s_pod_yaml # i.e. for the TIs that are not stored in RTIF table # Fetching them will return None with dag: task_2 = BashOperator(task_id="test2", bash_command="echo hello") ti2 = TI(task_2, EXECUTION_DATE) self.assertIsNone(RTIF.get_k8s_pod_yaml(ti=ti2))
from datetime import datetime from airflow.models.dag import DAG from airflow.providers.alibaba.cloud.operators.oss import ( OSSDeleteBatchObjectOperator, OSSDeleteObjectOperator, OSSDownloadObjectOperator, OSSUploadObjectOperator, ) with DAG( dag_id='oss_object_dag', start_date=datetime(2021, 1, 1), default_args={ 'bucket_name': 'your bucket', 'region': 'your region' }, max_active_runs=1, tags=['example'], catchup=False, ) as dag: create_object = OSSUploadObjectOperator( file='your local file', key='your oss key', task_id='task1', ) download_object = OSSDownloadObjectOperator( file='your local file', key='your oss key',
def setUp(self): args = { 'owner': 'airflow', 'start_date': datetime.datetime(2017, 1, 1) } self.dag = DAG('test_dag_id', default_args=args)
# # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. from datetime import datetime from airflow.models.dag import DAG from airflow.providers.arangodb.operators.arangodb import AQLOperator from airflow.providers.arangodb.sensors.arangodb import AQLSensor dag = DAG( 'example_arangodb_operator', start_date=datetime(2021, 1, 1), tags=['example'], catchup=False, ) # [START howto_aql_sensor_arangodb] sensor = AQLSensor( task_id="aql_sensor", query="FOR doc IN students FILTER doc.name == 'judy' RETURN doc", timeout=60, poke_interval=10, dag=dag, ) # [END howto_aql_sensor_arangodb]
def dag_bag_ext(): """ Create a DagBag with DAGs looking like this. The dotted lines represent external dependencies set up using ExternalTaskMarker and ExternalTaskSensor. dag_0: task_a_0 >> task_b_0 | | dag_1: ---> task_a_1 >> task_b_1 | | dag_2: ---> task_a_2 >> task_b_2 | | dag_3: ---> task_a_3 >> task_b_3 """ dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False) dag_0 = DAG("dag_0", start_date=DEFAULT_DATE, schedule_interval=None) task_a_0 = DummyOperator(task_id="task_a_0", dag=dag_0) task_b_0 = ExternalTaskMarker(task_id="task_b_0", external_dag_id="dag_1", external_task_id="task_a_1", recursion_depth=3, dag=dag_0) task_a_0 >> task_b_0 dag_1 = DAG("dag_1", start_date=DEFAULT_DATE, schedule_interval=None) task_a_1 = ExternalTaskSensor(task_id="task_a_1", external_dag_id=dag_0.dag_id, external_task_id=task_b_0.task_id, dag=dag_1) task_b_1 = ExternalTaskMarker(task_id="task_b_1", external_dag_id="dag_2", external_task_id="task_a_2", recursion_depth=2, dag=dag_1) task_a_1 >> task_b_1 dag_2 = DAG("dag_2", start_date=DEFAULT_DATE, schedule_interval=None) task_a_2 = ExternalTaskSensor(task_id="task_a_2", external_dag_id=dag_1.dag_id, external_task_id=task_b_1.task_id, dag=dag_2) task_b_2 = ExternalTaskMarker(task_id="task_b_2", external_dag_id="dag_3", external_task_id="task_a_3", recursion_depth=1, dag=dag_2) task_a_2 >> task_b_2 dag_3 = DAG("dag_3", start_date=DEFAULT_DATE, schedule_interval=None) task_a_3 = ExternalTaskSensor(task_id="task_a_3", external_dag_id=dag_2.dag_id, external_task_id=task_b_2.task_id, dag=dag_3) task_b_3 = DummyOperator(task_id="task_b_3", dag=dag_3) task_a_3 >> task_b_3 for dag in [dag_0, dag_1, dag_2, dag_3]: dag_bag.bag_dag(dag=dag, root_dag=dag) return dag_bag
def setUp(self): args = {'owner': 'airflow', 'start_date': DEFAULT_DATE} self.dag = DAG('test_dag_id', default_args=args)
import datetime from dateutil.tz import * from airflow.models.dag import DAG from airflow.operators.bash_operator import BashOperator from airflow.operators.bash_operator import BashOperator valid_dag = DAG( dag_id="ValidDag", description="This is a valid test dag", start_date=datetime.datetime(2020, 5, 20, 0, 0), ) task1 = BashOperator(bash_command="echo 1", task_id="Task1", dag=valid_dag) task2 = BashOperator(bash_command='echo "2"', task_id="Task2", dag=valid_dag) task1 >> task2
# under the License. """ Example Airflow DAG that shows the complex DAG structure. """ from airflow.models.dag import DAG # from airflow.models.baseoperator import chain from airflow.operators.bash_operator import BashOperator from airflow.operators.python_operator import PythonOperator from airflow.utils.dates import days_ago default_args = {"start_date": days_ago(1)} with DAG( dag_id="example_complex", default_args=default_args, schedule_interval=None, tags=['example'], ) as dag: # Create create_entry_group = BashOperator(task_id="create_entry_group", bash_command="echo create_entry_group") create_entry_group_result = BashOperator( task_id="create_entry_group_result", bash_command="echo create_entry_group_result") create_entry_group_result2 = BashOperator( task_id="create_entry_group_result2", bash_command="echo create_entry_group_result2")
def test_mark_success_on_success_callback(self): """ Test that ensures that where a task is marked success in the UI on_success_callback gets executed """ # use shared memory value so we can properly track value change even if # it's been updated across processes. success_callback_called = Value('i', 0) task_terminated_externally = Value('i', 1) shared_mem_lock = Lock() def success_callback(context): with shared_mem_lock: success_callback_called.value += 1 assert context['dag_run'].dag_id == 'test_mark_success' dag = DAG(dag_id='test_mark_success', start_date=DEFAULT_DATE, default_args={'owner': 'owner1'}) def task_function(ti): # pylint: disable=unused-argument time.sleep(60) # This should not happen -- the state change should be noticed and the task should get killed with shared_mem_lock: task_terminated_externally.value = 0 task = PythonOperator( task_id='test_state_succeeded1', python_callable=task_function, on_success_callback=success_callback, dag=dag, ) session = settings.Session() dag.clear() dag.create_dagrun( run_id="test", state=State.RUNNING, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE, session=session, ) ti = TaskInstance(task=task, execution_date=DEFAULT_DATE) ti.refresh_from_db() job1 = LocalTaskJob(task_instance=ti, ignore_ti_state=True, executor=SequentialExecutor()) job1.task_runner = StandardTaskRunner(job1) settings.engine.dispose() process = multiprocessing.Process(target=job1.run) process.start() for _ in range(0, 25): ti.refresh_from_db() if ti.state == State.RUNNING: break time.sleep(0.2) assert ti.state == State.RUNNING ti.state = State.SUCCESS session.merge(ti) session.commit() process.join(timeout=10) assert success_callback_called.value == 1 assert task_terminated_externally.value == 1 assert not process.is_alive()
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # [START dag] """This dag only runs some simple tasks to test Airflow's task execution.""" from datetime import datetime, timedelta from airflow.models.dag import DAG from airflow.operators.dummy import DummyOperator now = datetime.now() now_to_the_hour = (now - timedelta(0, 0, 0, 0, 0, 3)).replace(minute=0, second=0, microsecond=0) START_DATE = now_to_the_hour DAG_NAME = 'test_dag_v1' dag = DAG( DAG_NAME, schedule_interval='*/10 * * * *', default_args={'depends_on_past': True}, start_date=datetime(2021, 1, 1), catchup=False, ) run_this_1 = DummyOperator(task_id='run_this_1', dag=dag) run_this_2 = DummyOperator(task_id='run_this_2', dag=dag) run_this_2.set_upstream(run_this_1) run_this_3 = DummyOperator(task_id='run_this_3', dag=dag) run_this_3.set_upstream(run_this_2) # [END dag]