def test_trigger_dagrun_twice(self): """Test TriggerDagRunOperator with custom execution_date.""" utc_now = timezone.utcnow() task = TriggerDagRunOperator( task_id="test_trigger_dagrun_with_execution_date", trigger_dag_id=TRIGGERED_DAG_ID, execution_date=utc_now, dag=self.dag, poke_interval=1, reset_dag_run=True, wait_for_completion=True, ) run_id = f"manual__{utc_now.isoformat()}" with create_session() as session: dag_run = DagRun( dag_id=TRIGGERED_DAG_ID, execution_date=utc_now, state=State.SUCCESS, run_type="manual", run_id=run_id, ) session.add(dag_run) session.commit() task.execute(None) dagruns = session.query(DagRun).filter( DagRun.dag_id == TRIGGERED_DAG_ID).all() self.assertEqual(len(dagruns), 1) self.assertTrue(dagruns[0].external_trigger) self.assertEqual(dagruns[0].execution_date, utc_now)
def test_trigger_dagrun(self): """Test TriggerDagRunOperator.""" task = TriggerDagRunOperator(task_id="test_task", trigger_dag_id=TRIGGERED_DAG_ID, dag=self.dag) task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) with create_session() as session: dagruns = session.query(DagRun).filter(DagRun.dag_id == TRIGGERED_DAG_ID).all() self.assertEqual(len(dagruns), 1) self.assertTrue(dagruns[0].external_trigger)
def test_trigger_dagrun_operator_templated_conf(self): """Test passing a templated conf to the triggered DagRun.""" task = TriggerDagRunOperator( task_id="test_trigger_dagrun_with_str_execution_date", trigger_dag_id=TRIGGERED_DAG_ID, conf={"foo": "{{ dag.dag_id }}"}, dag=self.dag, ) task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) with create_session() as session: dagruns = session.query(DagRun).filter(DagRun.dag_id == TRIGGERED_DAG_ID).all() self.assertEqual(len(dagruns), 1) self.assertTrue(dagruns[0].conf, {"foo": TEST_DAG_ID})
def test_trigger_dagrun_with_wait_for_completion_true_fail(self): """Test TriggerDagRunOperator with wait_for_completion but triggered dag fails.""" execution_date = DEFAULT_DATE task = TriggerDagRunOperator( task_id="test_task", trigger_dag_id=TRIGGERED_DAG_ID, execution_date=execution_date, wait_for_completion=True, poke_interval=10, failed_states=[State.RUNNING], dag=self.dag, ) with self.assertRaises(AirflowException): task.run(start_date=execution_date, end_date=execution_date)
def test_trigger_dagrun_with_templated_execution_date(self): """Test TriggerDagRunOperator with templated execution_date.""" task = TriggerDagRunOperator( task_id="test_trigger_dagrun_with_str_execution_date", trigger_dag_id=TRIGGERED_DAG_ID, execution_date="{{ execution_date }}", dag=self.dag, ) task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) with create_session() as session: dagruns = session.query(DagRun).filter(DagRun.dag_id == TRIGGERED_DAG_ID).all() self.assertEqual(len(dagruns), 1) self.assertTrue(dagruns[0].external_trigger) self.assertEqual(dagruns[0].execution_date, DEFAULT_DATE)
def test_trigger_dagrun_with_execution_date(self): """Test TriggerDagRunOperator with custom execution_date.""" utc_now = timezone.utcnow() task = TriggerDagRunOperator( task_id="test_trigger_dagrun_with_execution_date", trigger_dag_id=TRIGGERED_DAG_ID, execution_date=utc_now, dag=self.dag, ) task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) with create_session() as session: dagruns = session.query(DagRun).filter(DagRun.dag_id == TRIGGERED_DAG_ID).all() self.assertEqual(len(dagruns), 1) self.assertTrue(dagruns[0].external_trigger) self.assertEqual(dagruns[0].execution_date, utc_now)
def _get_test_dag(self): with DAG(dag_id='test_dag', default_args=DEFAULT_DAG_ARGS) as dag: op1 = SparkSubmitOperator(task_id='op1') op2 = EmrAddStepsOperator(task_id='op2', job_flow_id='foo') op3 = S3ListOperator(task_id='op3', bucket='foo') op4 = EmrCreateJobFlowOperator(task_id='op4') op5 = TriggerDagRunOperator(task_id='op5', trigger_dag_id='foo') op6 = FileToWasbOperator(task_id='op6', container_name='foo', blob_name='foo', file_path='foo') op7 = EmailOperator(task_id='op7', subject='foo', to='foo', html_content='foo') op8 = S3CopyObjectOperator(task_id='op8', dest_bucket_key='foo', source_bucket_key='foo') op9 = BranchPythonOperator(task_id='op9', python_callable=print) op10 = PythonOperator(task_id='op10', python_callable=range) op1 >> [op2, op3, op4] op2 >> [op5, op6] op6 >> [op7, op8, op9] op3 >> [op7, op8] op8 >> [op9, op10] return dag
def test_trigger_dagrun_with_wait_for_completion_true(self): """Test TriggerDagRunOperator with wait_for_completion.""" execution_date = DEFAULT_DATE task = TriggerDagRunOperator( task_id="test_task", trigger_dag_id=TRIGGERED_DAG_ID, execution_date=execution_date, wait_for_completion=True, poke_interval=10, allowed_states=[State.RUNNING], dag=self.dag, ) task.run(start_date=execution_date, end_date=execution_date) with create_session() as session: dagruns = session.query(DagRun).filter(DagRun.dag_id == TRIGGERED_DAG_ID).all() self.assertEqual(len(dagruns), 1)
def create_trigger_subdag_task(trigger_dag_id, dag): def _always_trigger(context, dag_run_obj): return dag_run_obj return TriggerDagRunOperator(task_id='trigger_{trigger}_from_{dag}'.format( trigger=trigger_dag_id, dag=dag.dag_id), dag=dag, trigger_dag_id=trigger_dag_id, python_callable=_always_trigger)
def recreate_main_dag(dag, env): print("Main dag is set to rerun for the next rabbitmq watch", dag.dag_id) retrigger_dag = TriggerDagRunOperator(task_id="next_rerun", trigger_dag_id=WORKFLOW_DAG_ID + "_" + env, python_callable=auto_confirm_run_dag, email_on_failure=True, email=NOTIFY_EMAIL, dag=dag) return retrigger_dag
def move_blobs_to_processing(**context): results = blob_service.connection.list_blobs( input_container, processing_file_prefix) blobs_moved = 0 blob_urls = [] for blob in results: print("\t Blob name: " + blob.name) # Generate a SAS token for blob access blob_input_url = blob_service.connection.make_blob_url( input_container, blob.name, sas_token=blob_service.connection.generate_blob_shared_access_signature( input_container, blob.name, permission=BlobPermissions(read=True), expiry=datetime.utcnow() + timedelta(days=5))) print("\t SAS URL:{}".format(blob_input_url)) # Copy blob to processing bucket blob_service.connection.copy_blob( output_container, blob.name, blob_input_url, requires_sync=True) # Generate a SAS token the now moved blob for downstream dags blob_output_url = blob_service.connection.make_blob_url( output_container, blob.name, sas_token=blob_service.connection.generate_blob_shared_access_signature( output_container, blob.name, permission=BlobPermissions(read=True), expiry=datetime.utcnow() + timedelta(hours=1))) blobs_moved += 1 blob_urls.append(blob_output_url) def trigger_processing_dag(context, dag_run_obj): dag_run_obj.payload = { "image_url": blob_output_url, } return dag_run_obj TriggerDagRunOperator( task_id="trigger_processing", trigger_dag_id="image_processing", python_callable=trigger_processing_dag, dag=dag ).execute(context) # Remove existing blob blob_service.connection.delete_blob(input_container, blob.name) return blob_urls
def trigger_matching(**context): schema_name, _ = _get_schema_and_table(context) target_schema_name, target_table_name = _parse_schema(schema_name) target_tag = f'DSSGenericMatchingPipeline-{target_schema_name}-{target_table_name}' target_dag = None for dag in DagBag().dags.values(): if dag.tags and target_tag in dag.tags: target_dag = dag break if target_dag: TriggerDagRunOperator( trigger_dag_id=target_dag.dag_id, task_id='trigger-match' ).execute(context)
def start_image_processing(**context): print("Start load gen") for x in range(0, 100): def trigger_processing_dag(context, dag_run_obj): dag_run_obj.payload = { "image_url": image_url, } return dag_run_obj TriggerDagRunOperator(task_id="trigger_processing", trigger_dag_id="image_processing", python_callable=trigger_processing_dag, dag=dag).execute(context) print("Finish load gen")
def test_trigger_dagrun_with_reset_dag_run_false(self): """Test TriggerDagRunOperator with reset_dag_run.""" execution_date = DEFAULT_DATE task = TriggerDagRunOperator(task_id="test_task", trigger_dag_id=TRIGGERED_DAG_ID, execution_date=execution_date, reset_dag_run=False, dag=self.dag) task.run(start_date=execution_date, end_date=execution_date, ignore_ti_state=True) with self.assertRaises(DagRunAlreadyExists): task.run(start_date=execution_date, end_date=execution_date, ignore_ti_state=True)
def removeFAKE(count): return PostgresOperator(task_id="removeFAKE_" + str(count), postgres_conn_id='facdb', sql="/facdb_4_deduping/duplicates_removeFAKE.sql", dag=facdb_4_deduping) def yes_trigger(_, dag): return dag trigger_facdb_5_export = TriggerDagRunOperator( task_id='trigger_facdb_5_export', trigger_dag_id='facdb_5_export', python_callable=yes_trigger, dag=facdb_4_deduping) ## DEDUPING (facdb_4_deduping # The DAG kicks it off # Merge Child Care and Pre-K Duplicate records >> duplicates_ccprek_acs_hhs >> removeFAKE(0) >> duplicates_ccprek_doe_acs >> removeFAKE(1) >> duplicates_ccprek_doe_dohmh >> removeFAKE(2) >> duplicates_ccprek_acs_dohmh >> removeFAKE(3) >> duplicates_ccprek_dohmh >> removeFAKE(4) >> copy_backup4 # Merging and dropping remaining duplicates, pre-COLP >> duplicates_remaining >> removeFAKE(6)
import airflow from airflow.models import DAG from airflow.operators.dagrun_operator import TriggerDagRunOperator dag = DAG( dag_id='trigger_dag_source', default_args={'start_date': airflow.utils.dates.days_ago(2), 'owner': 'zkan'}, schedule_interval='@once', ) def trigger(context, dag_run_obj): print(context) dag_run_obj.payload = {'message': context['params']['message']} return dag_run_obj trigger = TriggerDagRunOperator( dag=dag, task_id='test_trigger_dagrun', trigger_dag_id="trigger_dag_target", python_callable=trigger, params={'message': 'Hello World'} )
journal.save() except AttributeError: logging.info("No issues are registered to models.Journal: %s " % journal) register_last_issues_task = PythonOperator( task_id="register_last_issues", provide_context=True, python_callable=register_last_issues, dag=dag, ) trigger_check_website_dag_task = TriggerDagRunOperator( task_id="trigger_check_website_dag_task", trigger_dag_id="check_website", dag=dag, ) http_kernel_check >> read_changes_task register_journals_task << read_changes_task register_issues_task << register_journals_task register_documents_task << register_issues_task register_documents_renditions_task << register_documents_task delete_journals_task << register_documents_renditions_task
def _build(dag_id, default_args): """Builds a new DAG defining the Algo Readiness workflow. Args: dag_id: The DAG ID. default_args: The default arguments for the DAG. Returns: The DAG object. """ config_dag = DAG(dag_id=dag_id, default_args=default_args) # Define SDF record advertiser tasks start_workflow = DummyOperator( task_id='start_workflow', dag=config_dag) create_report = GoogleDisplayVideo360CreateReportOperator( task_id="create_report", gcp_conn_id=CONN_ID, report=resource_loader.get_report_path("dv360_adv_report.json"), params={"partners": partner_id_list}, dag=config_dag) query_id = "{{ task_instance.xcom_pull('create_report', key='query_id') }}" run_report = GoogleDisplayVideo360RunReportOperator( task_id="run_report", gcp_conn_id=CONN_ID, query_id=query_id, dag=config_dag) wait_for_report = GoogleDisplayVideo360ReportSensor( task_id="wait_for_report", gcp_conn_id=CONN_ID, query_id=query_id, dag=config_dag) report_url = "{{ task_instance.xcom_pull('wait_for_report', key='report_url') }}" record_advertisers = GoogleDisplayVideo360RecordSDFAdvertiserOperator( task_id='record_advertisers', conn_id=CONN_ID, report_url=report_url, variable_name='dv360_sdf_advertisers', dag=config_dag) delete_report = GoogleDisplayVideo360DeleteReportOperator( task_id="delete_report", gcp_conn_id=CONN_ID, query_id=query_id, dag=config_dag) # Set dependencies for recording advertisers start_workflow >> create_report >> run_report >> wait_for_report >> record_advertisers >> delete_report # Trigger one reporting DAG for each partner for partner_id in partner_id_list: trigger_dag_id = algo_readiness_factory_dag.build_dag_id( partner_id) trigger_dag_task = TriggerDagRunOperator( task_id='trigger_%s' % trigger_dag_id, trigger_dag_id=trigger_dag_id, python_callable=_get_dag_run_obj, dag=config_dag) delete_report.set_downstream(trigger_dag_task) return config_dag
branch = BranchPythonOperator( task_id='branch', python_callable=decide_which_path, trigger_rule="all_done", dag=dag) cdm_branch = BranchPythonOperator( task_id='check_time', python_callable=decide_to_run_cdm_summary, dag=dag) rerun_trigger = TriggerDagRunOperator( task_id='rerun_trigger', trigger_dag_id=DAG_ID, dag=dag ) sleep_trigger = TriggerDagRunOperator( task_id='sleep_trigger', trigger_dag_id=SLEEP_DAG_ID, dag=dag ) update_flat_obs >> wait_for_base_tables update_flat_orders >> wait_for_base_tables update_flat_lab_obs >> wait_for_base_tables wait_for_base_tables >> update_hiv_summary
def repeat_dag(context, dag_run_obj): rq = context['params']['rq'] if rq.QueueSize() > 0: return dag_run_obj # @TODO find a way to make these separate tasks. Difficult because they # can't be pickled, therefore they can't be returned via a task. session, _ = db_connect('pdsdi_dev') rq = RedisQueue('DI_ReadyQueue') process_operator = SubDagOperator(subdag=process_subdag('di_process', 'di_checksum', session=session, archiveID=archiveID, n_procs=5, rq=rq), task_id='di_checksum', dag=dag) loop_operator = TriggerDagRunOperator(task_id='loop', provide_context=True, params={'rq': rq}, trigger_dag_id='di_process', python_callable=repeat_dag, dag=dag) process_operator >> loop_operator
) dag_params = { 'dag_id': 'ego_dp_substation_hvmv_voronoi', 'start_date': datetime(2020, 7, 7), 'schedule_interval': None } with DAG(**dag_params) as dag: set_id_as_subst_id = PythonOperator(task_id='set_id_as_subst_id', python_callable=set_id_as_subst_id) gemeindeschluessel = PythonOperator(task_id='gemeindeschluessel', python_callable=gemeindeschluessel) create_dummy_points_for_voronoi_calculation = PythonOperator( task_id='create_dummy_points_for_voronoi_calculation', python_callable=create_dummy_points_for_voronoi_calculation) voronoi_polygons_with_eucldean_distance = PythonOperator( task_id='voronoi_polygons_with_eucldean_distance', python_callable=voronoi_polygons_with_eucldean_distance) trigger_next_dag = TriggerDagRunOperator( trigger_dag_id='ego_dp_substation_ehv_voronoi', task_id='add_dummy_points') set_id_as_subst_id >> gemeindeschluessel >> create_dummy_points_for_voronoi_calculation >> voronoi_polygons_with_eucldean_distance >> trigger_next_dag
"""This function decides whether or not to Trigger the remote DAG""" c_p = context['params']['condition_param'] print("Controller DAG : conditionally_trigger = {}".format(c_p)) if context['params']['condition_param']: dag_run_obj.payload = {'message': context['params']['message']} pp.pprint(dag_run_obj.payload) return dag_run_obj # Define the DAG dag = DAG( dag_id='example_trigger_controller_dag', default_args={ "owner": "airflow", "start_date": datetime.utcnow(), }, schedule_interval='@once', ) # Define the single task in this controller example DAG trigger = TriggerDagRunOperator( task_id='test_trigger_dagrun', trigger_dag_id="example_trigger_target_dag", python_callable=conditionally_trigger, params={ 'condition_param': True, 'message': 'Hello World' }, dag=dag, )
if flags['kerberose']: cmd = 'echo $USER && kinit $USER -k -t $KEYTABS_DIR/$USER.keytab && ' + cmd operators[operator['id']] = BashOperator( task_id=operator['id'], bash_command=os.path.expandvars(cmd), dag=globals()[dag_id], email_on_failure=flags['faliure_email'], email=support_emails, queue=queue) elif operator['config']['type'] == 'trigger': operators[operator['id']] = TriggerDagRunOperator( task_id=operator['id'], trigger_dag_id=operator['config']['target_dag_id'], python_callable=conditionally_trigger, params={}, dag=globals()[dag_id], queue=queue) # Link operator edges if 'edges' in config['DAGS'][dag_id]: for edge in config['DAGS'][dag_id]['edges']: parent = list(edge)[-1] child = edge[parent] operators[parent].set_downstream(operators[child]) # Adding send notification if 'end_notification' in config['DAGS'][dag_id] and config['DAGS'][dag_id][ 'end_notification'] == 'False': pass
) OPERSVOD_STG_LOAD_CALENDAR = PythonOperator( task_id='OPERSVOD_STG_LOAD_CALENDAR', provide_context=False, python_callable=load_excel_calendar, dag=dag, ) OPERSVOD_STG_LOAD_REFERENCE = PythonOperator( task_id='OPERSVOD_STG_LOAD_REFERENCE', provide_context=False, python_callable=load_excel_reference, dag=dag, ) trigger_dag_OPERSVODKA_DWH = TriggerDagRunOperator( task_id='trigger_dag_OPERSVODKA_DWH', trigger_dag_id="OPERSVODKA_DWH", dag=dag, ) start >> OPERSVOD_STG_TRUNCATE_OPERSVOD_STG_STG_DUMMY_CALENDAR >> truncate_done start >> OPERSVOD_STG_TRUNCATE_OPERSVOD_STG_STG_REFERENCE >> truncate_done truncate_done >> OPERSVOD_STG_LOAD_CALENDAR >> load_done truncate_done >> OPERSVOD_STG_LOAD_REFERENCE >> load_done load_done >> trigger_dag_OPERSVODKA_DWH trigger_dag_OPERSVODKA_DWH >> finish
oauth_response=lambda response: response.json()['access_token'], response_check=lambda response: len(response.json()) > 0, dag=dag, ) load_strava_activity = OAuthHttpToGcsOperator( task_id='load_strava_activity', http_conn_id='', method='GET', endpoint='https://www.strava.com/api/v3/athlete/activities', oauth_endpoint='https://www.strava.com/oauth/token', oauth_body={ 'client_id': STRAVA_CLIENT_ID, 'client_secret': STRAVA_CLIENT_SECRET, 'grant_type': 'refresh_token', 'refresh_token': STRAVA_REFRESH_TOKEN }, oauth_response=lambda response: response.json()['access_token'], bucket=GOOGLE_STORAGE_BUCKET, filename=OUTPUT_FILENAME, dag=dag, ) trigger_activity_compiler = TriggerDagRunOperator( task_id='trigger_activity_compiler', trigger_dag_id='public_activity_compiler', dag=dag, ) check_strava_activity >> load_strava_activity >> trigger_activity_compiler
dag_id='initdb_controller_dag', default_args={ "owner": "airflow", # TODO use current date without harming dag functionality "start_date": datetime(2019, 6, 15), "depends_on_past": False }, schedule_interval='@once', catchup=False) # Define the single task in this controller example DAG trigger_useragent = TriggerDagRunOperator( task_id='trigger_useragentscraper', trigger_dag_id='useragentscraper_dag', python_callable=conditionally_trigger, params={ 'condition_param': True, 'message': 'triggering ua scraper' }, trigger_rule=TriggerRule.ALL_SUCCESS, dag=dag) trigger_proxy = TriggerDagRunOperator(task_id='trigger_proxyscraper', trigger_dag_id='proxyscraper_dag', python_callable=conditionally_trigger, params={ 'condition_param': True, 'message': 'triggering proxy scraper' }, trigger_rule=TriggerRule.ALL_SUCCESS, dag=dag)
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. """ Example usage of the TriggerDagRunOperator. This example holds 2 DAGs: 1. 1st DAG (example_trigger_controller_dag) holds a TriggerDagRunOperator, which will trigger the 2nd DAG 2. 2nd DAG (example_trigger_target_dag) which will be triggered by the TriggerDagRunOperator in the 1st DAG """ import airflow.utils.dates from airflow import DAG from airflow.operators.dagrun_operator import TriggerDagRunOperator dag = DAG( dag_id="example_trigger_controller_dag", default_args={ "owner": "airflow", "start_date": airflow.utils.dates.days_ago(2) }, schedule_interval="@once", ) trigger = TriggerDagRunOperator( task_id="test_trigger_dagrun", trigger_dag_id= "example_trigger_target_dag", # Ensure this equals the dag_id of the DAG to trigger conf={"message": "Hello World"}, dag=dag, )
c_p = context['params']['condition_param'] print("Controller DAG : conditionally_trigger = {}".format(c_p)) if context['params']['condition_param']: dag_run_obj.payload = {'message': context['params']['message']} pp.pprint(dag_run_obj.payload) return dag_run_obj ''' # Define the DAG dag = DAG(dag_id="example_trigger_controller_dag", default_args={ "owner": "airflow", "start_date": days_ago(2) }, schedule_interval=None, tags=['ksiskot']) # Define the single task in this controller example DAG trigger = TriggerDagRunOperator( task_id='test_trigger_dagrun', trigger_dag_id="FIRST_DAG", #python_callable=conditionally_trigger, params={ 'condition_param': True, 'message': 'Hello World' }, dag=dag, ) trigger
} dag = DAG( dag_id=dag_name, default_args=args, schedule_interval='0 2 * * *') def trigger_dag_run_pass_params(context, dag_run_obj): dag_run_obj.payload = context['params'] return dag_run_obj CleanUpSKOPages = TriggerDagRunOperator(task_id='CleanUpSKOPages', trigger_dag_id="01_CleanupDag", python_callable=trigger_dag_run_pass_params, params={}, dag=dag) ImportComtelData = TriggerDagRunOperator(task_id='ImportComtelData', trigger_dag_id="02_ImportDataDag", python_callable=trigger_dag_run_pass_params, params={}, dag=dag) WaitForImportComtelData = ExternalTaskSensor(task_id='WaitForImportComtelData', external_dag_id='02_ImportDataDag', external_task_id='dag_complete', execution_delta=None, # Same day as today dag=dag)
from airflow.sensors.external_task_sensor import ExternalTaskSensor from airflow.utils.state import State dag11 = DAG( dag_id="chapter6_figure617_dag11", start_date=airflow.utils.dates.days_ago(3), schedule_interval="0 0 * * *", ) dag12 = DAG( dag_id="chapter6_figure617_dag12", start_date=airflow.utils.dates.days_ago(3), schedule_interval=None, ) DummyOperator(task_id="etl", dag=dag11) >> TriggerDagRunOperator( task_id="trigger_dag2", trigger_dag_id="chapter6_figure617_dag12", dag=dag11 ) PythonOperator(task_id="report", dag=dag12, python_callable=lambda: print("hello")) # ============================================================================================================ dag21 = DAG( dag_id="chapter6_figure617_dag21", start_date=airflow.utils.dates.days_ago(3), schedule_interval="0 0 * * *", ) dag22 = DAG( dag_id="chapter6_figure617_dag22", start_date=airflow.utils.dates.days_ago(3), schedule_interval=None, )