def test_dagrun_success_when_all_skipped(self): """ Tests that a DAG run succeeds when all tasks are skipped """ dag = DAG( dag_id='test_dagrun_success_when_all_skipped', start_date=datetime.datetime(2017, 1, 1) ) dag_task1 = ShortCircuitOperator( task_id='test_short_circuit_false', dag=dag, python_callable=lambda: False) dag_task2 = DummyOperator( task_id='test_state_skipped1', dag=dag) dag_task3 = DummyOperator( task_id='test_state_skipped2', dag=dag) dag_task1.set_downstream(dag_task2) dag_task2.set_downstream(dag_task3) initial_task_states = { 'test_short_circuit_false': State.SUCCESS, 'test_state_skipped1': State.SKIPPED, 'test_state_skipped2': State.SKIPPED, } dag_run = self.create_dag_run(dag=dag, state=State.RUNNING, task_states=initial_task_states) updated_dag_state = dag_run.update_state() self.assertEqual(State.SUCCESS, updated_dag_state)
def test_dagrun_success_when_all_skipped(self): """ Tests that a DAG run succeeds when all tasks are skipped """ dag = DAG(dag_id='test_dagrun_success_when_all_skipped', start_date=timezone.datetime(2017, 1, 1)) dag_task1 = ShortCircuitOperator(task_id='test_short_circuit_false', dag=dag, python_callable=lambda: False) dag_task2 = DummyOperator(task_id='test_state_skipped1', dag=dag) dag_task3 = DummyOperator(task_id='test_state_skipped2', dag=dag) dag_task1.set_downstream(dag_task2) dag_task2.set_downstream(dag_task3) initial_task_states = { 'test_short_circuit_false': State.SUCCESS, 'test_state_skipped1': State.SKIPPED, 'test_state_skipped2': State.SKIPPED, } dag_run = self.create_dag_run(dag=dag, state=State.RUNNING, task_states=initial_task_states) updated_dag_state = dag_run.update_state() self.assertEqual(State.SUCCESS, updated_dag_state)
def test_with_dag_run(self): value = False dag = DAG('shortcircuit_operator_test_with_dag_run', default_args={ 'owner': 'airflow', 'start_date': DEFAULT_DATE }, schedule_interval=INTERVAL) short_op = ShortCircuitOperator(task_id='make_choice', dag=dag, python_callable=lambda: value) branch_1 = DummyOperator(task_id='branch_1', dag=dag) branch_1.set_upstream(short_op) branch_2 = DummyOperator(task_id='branch_2', dag=dag) branch_2.set_upstream(branch_1) upstream = DummyOperator(task_id='upstream', dag=dag) upstream.set_downstream(short_op) dag.clear() logging.error("Tasks {}".format(dag.tasks)) dr = dag.create_dagrun( run_id="manual__", start_date=datetime.datetime.now(), execution_date=DEFAULT_DATE, state=State.RUNNING ) upstream.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) tis = dr.get_task_instances() self.assertEqual(len(tis), 4) for ti in tis: if ti.task_id == 'make_choice': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'upstream': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2': self.assertEquals(ti.state, State.SKIPPED) else: raise value = True dag.clear() dr.verify_integrity() upstream.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) tis = dr.get_task_instances() self.assertEqual(len(tis), 4) for ti in tis: if ti.task_id == 'make_choice': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'upstream': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2': self.assertEquals(ti.state, State.NONE) else: raise
def test_clear_skipped_downstream_task(self): """ After a downstream task is skipped by ShortCircuitOperator, clearing the skipped task should not cause it to be executed. """ dag = DAG('shortcircuit_clear_skipped_downstream_task', default_args={ 'owner': 'airflow', 'start_date': DEFAULT_DATE }, schedule_interval=INTERVAL) short_op = ShortCircuitOperator(task_id='make_choice', dag=dag, python_callable=lambda: False) downstream = DummyOperator(task_id='downstream', dag=dag) short_op >> downstream dag.clear() dr = dag.create_dagrun(run_id="manual__", start_date=timezone.utcnow(), execution_date=DEFAULT_DATE, state=State.RUNNING) short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) downstream.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) tis = dr.get_task_instances() for ti in tis: if ti.task_id == 'make_choice': self.assertEqual(ti.state, State.SUCCESS) elif ti.task_id == 'downstream': self.assertEqual(ti.state, State.SKIPPED) else: raise # Clear downstream with create_session() as session: clear_task_instances([t for t in tis if t.task_id == "downstream"], session=session, dag=dag) # Run downstream again downstream.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) # Check if the states are correct. for ti in dr.get_task_instances(): if ti.task_id == 'make_choice': self.assertEqual(ti.state, State.SUCCESS) elif ti.task_id == 'downstream': self.assertEqual(ti.state, State.SKIPPED) else: raise
def test_get_task_instance_on_empty_dagrun(self): """ Make sure that a proper value is returned when a dagrun has no task instances """ dag = DAG(dag_id='test_get_task_instance_on_empty_dagrun', start_date=timezone.datetime(2017, 1, 1)) ShortCircuitOperator(task_id='test_short_circuit_false', dag=dag, python_callable=lambda: False) session = settings.Session() now = timezone.utcnow() # Don't use create_dagrun since it will create the task instances too which we # don't want dag_run = models.DagRun( dag_id=dag.dag_id, run_id='manual__' + now.isoformat(), execution_date=now, start_date=now, state=State.RUNNING, external_trigger=False, ) session.add(dag_run) session.commit() ti = dag_run.get_task_instance('test_short_circuit_false') self.assertEqual(None, ti)
def createShortCircuitOprTask(shrtCirtaskId, dagObj, queryTaskId): return ShortCircuitOperator(task_id=shrtCirtaskId, python_callable=checkAvailability, provide_context=True, depends_on_past=True, dag=dagObj, op_kwargs=[("quertTaskId", queryTaskId)])
def create_gcs_short_circuit_operator(task_id: str, gcs_download_task_id: str, dag: DAG, provide_context: bool = True): return ShortCircuitOperator( task_id=task_id, provide_context=provide_context, python_callable=did_gcs_file_download, op_kwargs={'gcs_download_task_id': gcs_download_task_id}, dag=dag, )
def get_file_staging_operator(dag, output_dir, minimum_file_age_minutes, identifier=TIMESTAMP_TEMPLATE): return ShortCircuitOperator( task_id='stage_oldest_tsv_file', python_callable=paths.stage_oldest_tsv_file, op_args=[output_dir, identifier, minimum_file_age_minutes], dag=dag)
def make_control_flow(is_dummy_operator_short_circuit, dag): control_flow = ShortCircuitOperator( task_id="dummy-control-flow", dag=dag, provide_context=True, python_callable=partial(eval_control_flow, is_dummy_operator_short_circuit), ) return control_flow
def setUp(self): self.dag = DAG('shortcircuit_operator_test', default_args={ 'owner': 'airflow', 'start_date': DEFAULT_DATE }, schedule_interval=INTERVAL) self.short_op = ShortCircuitOperator( task_id='make_choice', dag=self.dag, python_callable=lambda: self.value) self.branch_1 = DummyOperator(task_id='branch_1', dag=self.dag) self.branch_1.set_upstream(self.short_op) self.upstream = DummyOperator(task_id='upstream', dag=self.dag) self.upstream.set_downstream(self.short_op) self.dag.clear() self.value = True
def _create_infinite_retry_short_circuit_operator(self, task_id, dag, python_callable): return ShortCircuitOperator(task_id=task_id, dag=dag, python_callable=python_callable, retries=99999, retry_exponential_backoff=True, max_retry_delay=timedelta(seconds=3600), retry_delay=timedelta(seconds=600), provide_context=True)
def test_dag_after_add_sequential_sensor_and_then_short_circuit( test_task_sensor_service, test_dummy_task): short_circuit_operator_id = 'test_short_circuit' short_circuit_operator = ShortCircuitOperator( dag=test_dummy_task.dag, task_id=short_circuit_operator_id, python_callable=lambda x: x, provide_context=True) test_task_sensor_service.add_task_short_circuit(test_dummy_task, short_circuit_operator) test_task_sensor_service.add_task_sequential_sensor(test_dummy_task) assert_dag_after_add_short_circuit_and_sequential_sensor( test_dummy_task, short_circuit_operator_id)
def test_with_dag_run(self): value = False dag = DAG('shortcircuit_operator_test_with_dag_run', default_args={ 'owner': 'airflow', 'start_date': DEFAULT_DATE }, schedule_interval=INTERVAL) short_op = ShortCircuitOperator(task_id='make_choice', dag=dag, python_callable=lambda: value) branch_1 = DummyOperator(task_id='branch_1', dag=dag) branch_1.set_upstream(short_op) branch_2 = DummyOperator(task_id='branch_2', dag=dag) branch_2.set_upstream(branch_1) upstream = DummyOperator(task_id='upstream', dag=dag) upstream.set_downstream(short_op) dag.clear() logging.error("Tasks {}".format(dag.tasks)) dr = dag.create_dagrun( run_id="manual__", start_date=timezone.utcnow(), execution_date=DEFAULT_DATE, state=State.RUNNING ) upstream.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) tis = dr.get_task_instances() self.assertEqual(len(tis), 4) for ti in tis: if ti.task_id == 'make_choice': self.assertEqual(ti.state, State.SUCCESS) elif ti.task_id == 'upstream': self.assertEqual(ti.state, State.SUCCESS) elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2': self.assertEqual(ti.state, State.SKIPPED) else: raise Exception value = True dag.clear() dr.verify_integrity() upstream.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) tis = dr.get_task_instances() self.assertEqual(len(tis), 4) for ti in tis: if ti.task_id == 'make_choice': self.assertEqual(ti.state, State.SUCCESS) elif ti.task_id == 'upstream': self.assertEqual(ti.state, State.SUCCESS) elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2': self.assertEqual(ti.state, State.NONE) else: raise Exception
def test_without_dag_run(self): """This checks the defensive against non existent tasks in a dag run""" value = False dag = DAG('shortcircuit_operator_test_without_dag_run', default_args={ 'owner': 'airflow', 'start_date': DEFAULT_DATE }, schedule_interval=INTERVAL) short_op = ShortCircuitOperator(task_id='make_choice', dag=dag, python_callable=lambda: value) branch_1 = DummyOperator(task_id='branch_1', dag=dag) branch_1.set_upstream(short_op) branch_2 = DummyOperator(task_id='branch_2', dag=dag) branch_2.set_upstream(branch_1) upstream = DummyOperator(task_id='upstream', dag=dag) upstream.set_downstream(short_op) dag.clear() short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) session = Session() tis = session.query(TI).filter( TI.dag_id == dag.dag_id, TI.execution_date == DEFAULT_DATE ) for ti in tis: if ti.task_id == 'make_choice': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'upstream': # should not exist raise elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2': self.assertEquals(ti.state, State.SKIPPED) else: raise value = True dag.clear() short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) for ti in tis: if ti.task_id == 'make_choice': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'upstream': # should not exist raise elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2': self.assertEquals(ti.state, State.NONE) else: raise session.close()
def setUp(self): self.dag = DAG('shortcircuit_operator_test', default_args={ 'owner': 'airflow', 'start_date': DEFAULT_DATE}, schedule_interval=INTERVAL) self.short_op = ShortCircuitOperator(task_id='make_choice', dag=self.dag, python_callable=lambda: self.value) self.branch_1 = DummyOperator(task_id='branch_1', dag=self.dag) self.branch_1.set_upstream(self.short_op) self.upstream = DummyOperator(task_id='upstream', dag=self.dag) self.upstream.set_downstream(self.short_op) self.dag.clear() self.value = True
def test_dag_after_add_task_short_circuit(test_task_sensor_service, test_dummy_task): short_circuit_operator_id = 'test_short_circuit' short_circuit_operator = ShortCircuitOperator( dag=test_dummy_task.dag, task_id=short_circuit_operator_id, python_callable=lambda x: x, provide_context=True) test_task_sensor_service.add_task_short_circuit(test_dummy_task, short_circuit_operator) task_downstream_list = test_dummy_task.get_direct_relatives(upstream=False) assert len(task_downstream_list) == 0 task_upstream_list = test_dummy_task.get_direct_relatives(upstream=True) assert len(task_upstream_list) == 1 operator = task_upstream_list[0] assert isinstance(operator, ShortCircuitOperator) assert operator.task_id == short_circuit_operator_id task_downstream_list = operator.get_direct_relatives(upstream=False) assert len(task_downstream_list) == 1 task_upstream_list = operator.get_direct_relatives(upstream=True) assert len(task_upstream_list) == 0
def test_dag_after_add_sequential_sensor_and_two_short_circuit_and_gapped_sensor( default_args, test_task_sensor_service, test_dummy_task): test_task_sensor_service.add_task_sequential_sensor(test_dummy_task) short_circuit_operator_id = 'test_short_circuit' short_circuit_operator = ShortCircuitOperator( dag=test_dummy_task.dag, task_id=short_circuit_operator_id, python_callable=lambda x: x, provide_context=True) test_task_sensor_service.add_task_short_circuit(test_dummy_task, short_circuit_operator) gapped_task_dag = DAG("test_gapped_dag", default_args=default_args) gapped_task = DummyOperator(dag=gapped_task_dag, task_id="gapped_task") execution_delta = timedelta(seconds=60) test_task_sensor_service.add_task_gap_sensor(test_dummy_task, gapped_task, execution_delta) assert_dag_after_add_sequential_sensor_and_two_short_circuit_and_gapped_sensor( test_dummy_task, short_circuit_operator_id, gapped_task_dag, gapped_task, execution_delta)
on_failure_callback=notify_email) as dag: email_to_csv = PythonOperator(task_id='email_to_csv', on_failure_callback=notify_email, python_callable=gmail_to_csv, op_kwargs={ 'username': var_config['gmail_username'], 'password': var_config['gmail_password'], 'imap_server': var_config['imap_server'], 'inbox_label': var_config['inbox_label'], 'csv_file_path': var_config['csv_file_path'] }) checkforfile = ShortCircuitOperator(task_id='checkforfile', provide_context=False, python_callable=checkforfile) csv_to_psql = PythonOperator(task_id='csv_to_psql', on_failure_callback=notify_email, python_callable=pg_load_table, op_kwargs={ 'file_path': var_config['file_path'], 'table_name': var_config['table_name'], 'dbname': var_config['pg_dbname'], 'host': var_config['pg_host'], 'port': var_config['pg_port'], 'user': var_config['pg_user'], 'pwd': var_config['pg_password'] })
logging.info("There were processes to kill") if ENABLE_KILL: logging.info("enable_kill is set to true") logging.info("Opting to send an email to alert the users that processes were killed") return True # True = don't short circuit the dag and execute downstream tasks else: logging.info("enable_kill is set to False") else: logging.info("Processes to kill list was either None or Empty") logging.info("Opting to skip sending an email since no processes were killed") return False # False = short circuit the dag and don't execute downstream tasks email_or_not_branch = ShortCircuitOperator( task_id="email_or_not_branch", python_callable=branch_function, provide_context=True, dag=dag) send_processes_killed_email = EmailOperator( task_id="send_processes_killed_email", to=PROCESS_KILLED_EMAIL_ADDRESSES, subject=PROCESS_KILLED_EMAIL_SUBJECT, html_content=""" <html> <body> <h6>This is not a failure alert!</h6>
# -*- coding: utf-8 -*- # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from datetime import datetime from airflow.models import DAG from airflow.operators.python_operator import ShortCircuitOperator from airflow.operators.dummy_operator import DummyOperator # DAG that has its short circuit op fail and skip multiple downstream tasks dag = DAG(dag_id='test_dagrun_short_circuit_false', start_date=datetime(2017, 1, 1)) dag_task1 = ShortCircuitOperator(task_id='test_short_circuit_false', dag=dag, python_callable=lambda: False) dag_task2 = DummyOperator(task_id='test_state_skipped1', dag=dag) dag_task3 = DummyOperator(task_id='test_state_skipped2', dag=dag) dag_task1.set_downstream(dag_task2) dag_task2.set_downstream(dag_task3)
class ShortCircuitOperatorTest(unittest.TestCase): def setUp(self): self.dag = DAG('shortcircuit_operator_test', default_args={ 'owner': 'airflow', 'start_date': DEFAULT_DATE}, schedule_interval=INTERVAL) self.short_op = ShortCircuitOperator(task_id='make_choice', dag=self.dag, python_callable=lambda: self.value) self.branch_1 = DummyOperator(task_id='branch_1', dag=self.dag) self.branch_1.set_upstream(self.short_op) self.branch_2 = DummyOperator(task_id='branch_2', dag=self.dag) self.branch_2.set_upstream(self.branch_1) self.upstream = DummyOperator(task_id='upstream', dag=self.dag) self.upstream.set_downstream(self.short_op) self.dag.clear() self.value = True def test_without_dag_run(self): """This checks the defensive against non existent tasks in a dag run""" self.value = False self.short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) session = Session() tis = session.query(TI).filter( TI.dag_id == self.dag.dag_id, TI.execution_date == DEFAULT_DATE ) for ti in tis: if ti.task_id == 'make_choice': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'upstream': # should not exist raise elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2': self.assertEquals(ti.state, State.SKIPPED) else: raise self.value = True self.dag.clear() self.short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) for ti in tis: if ti.task_id == 'make_choice': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'upstream': # should not exist raise elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2': self.assertEquals(ti.state, State.NONE) else: raise session.close() def test_with_dag_run(self): self.value = False logging.error("Tasks {}".format(self.dag.tasks)) dr = self.dag.create_dagrun( run_id="manual__", start_date=datetime.datetime.now(), execution_date=DEFAULT_DATE, state=State.RUNNING ) self.upstream.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) self.short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) tis = dr.get_task_instances() self.assertEqual(len(tis), 4) for ti in tis: if ti.task_id == 'make_choice': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'upstream': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2': self.assertEquals(ti.state, State.SKIPPED) else: raise self.value = True self.dag.clear() dr.verify_integrity() self.upstream.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) self.short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) tis = dr.get_task_instances() self.assertEqual(len(tis), 4) for ti in tis: if ti.task_id == 'make_choice': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'upstream': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2': self.assertEquals(ti.state, State.NONE) else: raise
def judge_if_1st_day_of_month(**kwargs): print(kwargs.get('ds')) if kwargs.get('ds').endswith('01'): logging.warn('judge_if_1st_day_of_month: 01' + kwargs.get('ds')) return True logging.warn('judge_if_1st_day_of_week: Not 01' + kwargs.get('ds')) return False if_1st_day_of_week = BranchPythonOperator( task_id='if_1st_day_of_week', python_callable=judge_if_1st_day_of_week, provide_context=True, trigger_rule="all_done", dag=dag) if_1st_day_of_month = ShortCircuitOperator( task_id='if_1st_day_of_month', python_callable=judge_if_1st_day_of_month, provide_context=True, trigger_rule="all_done", dag=dag) daily >> if_1st_day_of_week if_1st_day_of_week >> if_1st_day_of_month if_1st_day_of_week >> weekly weekly >> if_1st_day_of_month if_1st_day_of_month >> monthly if __name__ == "__main__": dag.cli()
) a.doc_md = task_sample.__doc__ b = BranchPythonOperator( task_id="b", params={}, python_callable=task_branch, ) c = DummyOperator(task_id="c") d = DummyOperator(task_id="d") e = ShortCircuitOperator( task_id="e", params={}, trigger_rule="none_failed", python_callable=task_stop, ) f = DummyOperator(task_id="f") g = PythonOperator( task_id="g", params={}, python_callable=task_fail, ) start >> a >> b >> [c, d] >> e >> f start >> g
dag = DAG( dag_id="real_estate", default_args=args, description="all learned during training", schedule_interval="@daily" ) def check_date(execution_date, **context): return execution_date <= datetime.datetime(2019, 11, 28) check_date = ShortCircuitOperator( task_id="check_if_before_end_of_last_year", python_callable=check_date, provide_context=True, dag=dag ) ''' use of f voor format dan {{{{ gebruiken om {{ 2 over te houden ''' get_from_api_to_gcs = HttpToGcsOperator( task_id="get_from_api_to_gcs", endpoint=f"/history?start_at={{{{ ds }}}}&end_at={{{{ tomorrow_ds }}}}&base=GBP&symbols={currency}", http_conn_id="currency-http", gcs_conn_id="google_cloud_storage_default", gcs_path=f"usecase/currency/{{{{ ds }}}}-{currency}.json", gcs_bucket=f"{bucket_name}", dag=dag )
# pull xcom from a subdag to see if data was written def iswritten(value, **context): #value = context['task_instance'].xcom_pull(dag_id=f"{DAG_NAME}.align", task_ids="write_align") #value = context['task_instance'].xcom_pull(task_ids=align_end_t.task_id, key="bbox") #logging.info(align_end_t.task_id) if value is not None: return value return False # conditional for successful alignment isaligned_t = ShortCircuitOperator( task_id='iswritten', python_callable=iswritten, trigger_rule=TriggerRule.ALL_DONE, op_kwargs={ "value": f"{{{{ task_instance.xcom_pull(task_ids='{align_end_t.task_id}') }}}}" }, provide_context=True, dag=dag) # delete source_{ds_nodash}/(*.png) (run if align_t succeeds and ngingest finishes) -- let it survive for 1 day in case there are re-runs and the same policy is still in effect lifecycle_config = { "lifecycle": { "rule": [{ "action": { "type": "Delete" }, "condition": { "age": 5 }
if _entity == entity and task.get("task") == action: yield task http_kernel_check = HttpSensor( task_id="http_kernel_check", http_conn_id="kernel_conn", endpoint="/changes", request_params={}, poke_interval=5, dag=dag, ) read_changes_task = ShortCircuitOperator( task_id="read_changes_task", provide_context=True, python_callable=read_changes, dag=dag, ) def JournalFactory(data): """Produz instância de `models.Journal` a partir dos dados retornados do endpoint `/journals/:journal_id` do Kernel. """ metadata = data["metadata"] journal = models.Journal() journal._id = journal.jid = data.get("id") journal.title = metadata.get("title", "") journal.title_iso = metadata.get("title_iso", "") journal.short_title = metadata.get("short_title", "")
if now_epoch - expected_run_epoch > schedule_interval + 7: return False else: return True # Skip unnecessary executions doc = """ Skip the subsequent tasks if a) the execution_date is in past b) there multiple dag runs are currently active """ start_task = ShortCircuitOperator( task_id='skip_check', #python_callable=is_latest_active_dagrun, python_callable=to_run_next, provide_context=True, depends_on_past=True, dag=dag) start_task.doc = doc t11 = BashOperator(task_id='catchup_control', bash_command="echo AAAAAAAAAAAAA BBBBBBBBBB CCCCCCCCCC " + str(datetime.now()), dag=dag) start_task >> t11 # Extract doc = """Extract from source database""" extract_task = PythonOperator(task_id='extract_from_db',
class ShortCircuitOperatorTest(unittest.TestCase): def setUp(self): self.dag = DAG('shortcircuit_operator_test', default_args={ 'owner': 'airflow', 'start_date': DEFAULT_DATE }, schedule_interval=INTERVAL) self.short_op = ShortCircuitOperator( task_id='make_choice', dag=self.dag, python_callable=lambda: self.value) self.branch_1 = DummyOperator(task_id='branch_1', dag=self.dag) self.branch_1.set_upstream(self.short_op) self.upstream = DummyOperator(task_id='upstream', dag=self.dag) self.upstream.set_downstream(self.short_op) self.dag.clear() self.value = True def test_without_dag_run(self): """This checks the defensive against non existent tasks in a dag run""" self.value = False self.short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) session = Session() tis = session.query(TI).filter(TI.dag_id == self.dag.dag_id, TI.execution_date == DEFAULT_DATE) for ti in tis: if ti.task_id == 'make_choice': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'upstream': # should not exist raise elif ti.task_id == 'branch_1': self.assertEquals(ti.state, State.SKIPPED) else: raise self.value = True self.dag.clear() self.short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) for ti in tis: if ti.task_id == 'make_choice': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'upstream': # should not exist raise elif ti.task_id == 'branch_1': self.assertEquals(ti.state, State.NONE) else: raise session.close() def test_with_dag_run(self): self.value = False dr = self.dag.create_dagrun(run_id="manual__", start_date=datetime.datetime.now(), execution_date=DEFAULT_DATE, state=State.RUNNING) self.upstream.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) self.short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) tis = dr.get_task_instances() for ti in tis: if ti.task_id == 'make_choice': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'upstream': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'branch_1': self.assertEquals(ti.state, State.SKIPPED) else: raise self.value = True self.dag.clear() self.upstream.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) self.short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) tis = dr.get_task_instances() for ti in tis: if ti.task_id == 'make_choice': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'upstream': self.assertEquals(ti.state, State.SUCCESS) elif ti.task_id == 'branch_1': self.assertEquals(ti.state, State.NONE) else: raise
error_message = kwargs['ti'].xcom_pull(key='error_message') publish_tg_message(TG_TOKEN, TG_CHAT_ID, error_message) with DAG( 'DAG_ORDERS_DATASET_WITH_BELLS_N_WHISTLES_HW4', default_args=default_args, description='Collects and dumps orders data from different sources.'\ ' Sends msg to telegram if something goes wrong.', schedule_interval=datetime.timedelta(hours=4), ) as dag: check_db = lambda: is_engine_available( read_pg_engine) and is_engine_available(write_pg_engine) check_db_step = ShortCircuitOperator(task_id='check_db_step', python_callable=check_db, dag=dag) order_step = PythonOperator(task_id='order_step', python_callable=read_orders, dag=dag) trans_step = PythonOperator(task_id='trans_step', python_callable=read_transactions, dag=dag) customers_n_goods_step = PythonOperator( task_id='customers_n_goods_step', python_callable=lambda: read_customers_n_goods(engine=read_pg_engine), dag=dag)
max_active_runs=1, schedule_interval=None) as dag: op1 = SodaToS3Operator(task_id='get_evictions_data', http_conn_id='API_Evictions', headers=soda_headers, days_ago=31, s3_conn_id='S3_Evictions', s3_bucket='sf-evictionmeter', s3_directory='soda_jsons', size_check=True, max_bytes=500000000, dag=dag) op2 = ShortCircuitOperator(task_id='check_get_results', python_callable=get_size, provide_context=True, dag=dag) op3 = PostgresOperator(task_id='truncate_target_tables', postgres_conn_id='RDS_Evictions', sql='sql/trunc_target_tables.sql', dag=dag) op4 = S3ToPostgresOperator(task_id='load_evictions_data', s3_conn_id='S3_Evictions', s3_bucket='sf-evictionmeter', s3_prefix='soda_jsons/soda_evictions_import', source_data_type='json', postgres_conn_id='RDS_Evictions', schema='raw', table='soda_evictions',
bash_command=ss + " " + p1pkg + " " + py_file_loc + "part1.py I", dag=dag) def new_rows(): s3 = boto3.resource('s3') bucket = s3.Bucket('rcs-training-12-18') for o in bucket.objects.all(): if o.key == 'config_files/skip': return False return True no_new = ShortCircuitOperator(task_id="new_rows", python_callable=new_rows, trigger_rule=TriggerRule.ONE_SUCCESS, dag=dag) p2 = BashOperator(task_id='data_curation', bash_command=ss + " " + p2pkg + " " + py_file_loc + "part2.py", trigger_rule=TriggerRule.ONE_SUCCESS, dag=dag) p3 = BashOperator(task_id='aggregation_and_move_to_staging', bash_command=ss + " " + py_file_loc + "part3.py", dag=dag) p4 = BashOperator(task_id='move_data_from_s3_to_snowflake', bash_command="python " + py_file_loc + "part4.py", dag=dag)
default_args = dict( owner = dag_owner, start_date = datetime(2020, 10, 14)) dag = DAG(dag_name, default_args = default_args, catchup = False, schedule_interval = "@once") dummy = DummyOperator(task_id = "dummy", dag = dag) task1a = ShortCircuitOperator( dag=dag, task_id='look_for_new_feeds', python_callable=look_for_new_feeds, provide_context=True ) task1b = PythonOperator( dag = dag, task_id = f"download_new_feed", python_callable = download_new_feed, provide_context = True, #op_kwargs = {"file_name" : file} ) dummy >> task1a >> task1b >> dummy files_xlsx = os.listdir(input_files_path) print(files_xlsx)
import airflow.utils.helpers from airflow.models import DAG from airflow.operators.dummy_operator import DummyOperator from airflow.operators.python_operator import ShortCircuitOperator args = { 'owner': 'airflow', 'start_date': airflow.utils.dates.days_ago(2), } dag = DAG(dag_id='example_short_circuit_operator', default_args=args) cond_true = ShortCircuitOperator( task_id='condition_is_True', python_callable=lambda: True, dag=dag, ) cond_false = ShortCircuitOperator( task_id='condition_is_False', python_callable=lambda: False, dag=dag, ) ds_true = [DummyOperator(task_id='true_' + str(i), dag=dag) for i in [1, 2]] ds_false = [DummyOperator(task_id='false_' + str(i), dag=dag) for i in [1, 2]] airflow.utils.helpers.chain(cond_true, *ds_true) airflow.utils.helpers.chain(cond_false, *ds_false)
def docker_move_subdag(host_top_dir, input_path, output_path): host_path = f"{host_top_dir}/{input_path}" with DAG("docker_backup_db", default_args=default_args, schedule_interval=timedelta(minutes=10)) as dag: locate_file_cmd = """ sleep 10 find {{params.source_location}} -type f -printf "%f\n" | head -1 """ t_view = BashOperator(task_id="view_file", bash_command=locate_file_cmd, xcom_push=True, params={"source_location": host_path}) def is_data_available(*args, **kwargs): ti = kwargs["ti"] data = ti.xcom_pull(key=None, task_ids="view_file") return data is not None t_is_data_available = ShortCircuitOperator( task_id="check_if_data_available", python_callable=is_data_available) t_move = DockerOperator( api_version="auto", docker_url= "tcp://socat:2375", # replace it with swarm/docker endpoint image="centos:latest", network_mode="bridge", volumes=[ f"{host_path}:{input_path}", f"{host_top_dir}/{input_path}:{output_path}", ], command=[ "/bin/bash", "-c", "/bin/sleep 30; " "/bin/mv {{params.source_location}}/{{ ti.xcom_pull('view_file') }} {{params.target_location}};" "/bin/echo '{{params.target_location}}/{{ ti.xcom_pull('view_file') }}';", ], task_id="move_data", xcom_push=True, params={ "source_location": f"{input_path}", "target_location": f"{output_path}" }, ) print_templated_cmd = """ cat {{ ti.xcom_pull('move_data') }} """ t_print = DockerOperator( api_version="auto", docker_url="tcp://socat:2375", image="centos:latest", volumes=[f"{host_top_dir}/{output_path}:{output_path}"], command=print_templated_cmd, task_id="print", ) t_view.set_downstream(t_is_data_available) t_is_data_available.set_downstream(t_move) t_move.set_downstream(t_print)