t4 = PythonOperator(task_id='python_{}_3'.format(pub_id), python_callable=my_display_function, op_kwargs={'phase': 'EXTRACT_DATA_START'}, dag=dag) t5 = BashOperator( task_id='extractdata_{}'.format(pub_id), pool='simba_extract_data', bash_command= 'sh /x/home/dm_hdp_batch/test/projects/steam_donkey/scripts/export_processing.sh ', dag=dag) t6 = PythonOperator(task_id='python_{}_4'.format(pub_id), python_callable=my_display_function, op_kwargs={'phase': 'EXTRACT_DATA_END'}, dag=dag) t7 = TriggerDagRunOperator(task_id='trigger_{}_1'.format(pub_id), trigger_dag_id="SUB_{}_{}".format( sub_id, sub_id_ver), python_callable=conditionally_trigger, params={ 'condition_param': True, 'message': 'Hello World' }, dag=dag) t1.set_upstream(t0) t2.set_upstream(t1) t3.set_upstream(t2) t4.set_upstream(t3) t5.set_upstream(t4) t6.set_upstream(t5) t7.set_upstream(t6)
cd {}/ && git checkout test && git pull && git merge origin/development && git push """.format(DAG_LOCATION), dag=dag) trigger_next_environment_deploy = TriggerDagRunOperator( task_id='trigger_next_environment_deploy', python_callable=lambda context, dag_run: dag_run, trigger_dag_id="awesome_dag_tst", dag=dag) test_filter_countries >> promote_branch_to_test >> trigger_next_environment_deploy elif ENVIRONMENT == 'tst': trigger_next_environment_deploy = TriggerDagRunOperator( task_id='trigger_next_environment_deploy', python_callable=lambda context, dag_run: dag_run, trigger_dag_id="awesome_dag_acc", dag=dag) trigger_next_environment_deploy.set_upstream(test_filter_countries) # Set order of tasks # union_transactions.set_downstream(test_union_transactions) # test_union_transactions.set_downstream(enrich_transactions) # enrich_transactions.set_downstream(test_enrich_transactions) # test_enrich_transactions.set_downstream(filter_countries) # filter_countries.set_downstream(test_filter_countries)
for check_name, query in checks().items(): t = CheckOperator( task_id="check_consistency_" + check_name, sql=query, conn_id="postgresql_local", dag=dag, ) t.set_upstream(start_checks) t.set_downstream(end_checks) # Remove temporary CSV files for table in ["operations_stats_extras", "operations_valides"]: t = BashOperator( task_id="delete_output_csv_" + table, bash_command="rm " + out_path(table), dag=dag, ) t.set_upstream(end_checks) # Trigger DAG to generate final open data files # Trigger DAG to replace SECMAR database in remote database for dag_name in ["opendata_secmar", "replace_secmar_database"]: trigger_dag = TriggerDagRunOperator( task_id="trigger_" + dag_name + "_dag", trigger_dag_id=dag_name, python_callable=lambda context, dag_run: dag_run, dag=dag, ) trigger_dag.set_upstream(end_checks)
task_id='copy-file', bash_command='cp {path}/out_tr.txt {path}/out_tr_copy.txt'.format( path=path)) # delete the files that were created delete_files = BashOperator( task_id='delete-files', bash_command='rm -f {path}/out_tr.txt && rm -f {path}/out_tr_copy.txt'. format(path=path)) # Create Triggers trigger_layer_2 = TriggerDagRunOperator(task_id='trigger-layer2', trigger_dag_id='hw_bash_layer_2') trigger_layer_3 = TriggerDagRunOperator(task_id='trigger-layer-3', trigger_dag_id='hw_bash_layer_3') # Assign the operators to a DAG create_file.dag = dag_layer_1 trigger_layer_2.dag = dag_layer_1 print_file.dag = dag_layer_2 copy_file.dag = dag_layer_2 trigger_layer_3.dag = dag_layer_2 delete_files.dag = dag_layer_3 # Set any upstream requirements - e.g. especially for the triggers trigger_layer_2.set_upstream(task_or_task_list=[create_file]) trigger_layer_3.set_upstream(task_or_task_list=[print_file, copy_file])
"EMBULK_FILEPATH": in_path(table), "EMBULK_QUERY": helpers.read_sql_query(table), }, task_id="export_" + table, ) start = DummyOperator(task_id="start", dag=dag) end = DummyOperator(task_id="end", dag=dag) for table in SECMAR_TABLES + ["operations_valides"]: export = embulk_export(dag, table) export.set_upstream(start) command = "awk 'NR==1{{$0=tolower($0)}} 1' {filepath} > {tmp} && mv {tmp} {filepath}".format( tmp="/tmp/lower_" + table, table=table, filepath=in_path(table)) lowercase_header = BashOperator(task_id="lowercase_header_csv_" + table, bash_command=command, dag=dag) lowercase_header.set_upstream(export) lowercase_header.set_downstream(end) dag_name = "extract_secmar" trigger_dag = TriggerDagRunOperator( task_id="trigger_" + dag_name + "_dag", trigger_dag_id=dag_name, python_callable=lambda context, dag_run: dag_run, dag=dag, ) trigger_dag.set_upstream(end)
'python /home/airflow/gcs/data/GCPDWH/ivans/load_ie_segments_to_bq_dataflow.py --config config.properties --productconfig ivans.properties --env prod --separator "|" --stripheader 0 --stripdelim 0 --addaudit 1 --writeDeposition WRITE_APPEND --system IE --input ' + filename_IE) ''' t3 = BashOperator( task_id='T3_GCP_MOVE', bash_command='gsutil mv gs://dw-dev-insurance/ivans/current/IE_NCNU* gs://dw-dev-insurance/ivans/archive/' ) ''' t4 = BashOperator( task_id='T4_GCP_AS400_SEGMENTS_LOAD', bash_command= 'python /home/airflow/gcs/data/GCPDWH/ivans/load_as400_segments_to_bq_dataflow.py --config config.properties --productconfig ivans.properties --env prod --separator "|" --stripheader 0 --stripdelim 0 --addaudit 1 --writeDeposition WRITE_APPEND --system AS400 --input ' + filename_AS400) ''' t5 = BashOperator( task_id='T5_GCP_MOVE', bash_command='gsutil mv gs://dw-dev-insurance/ivans/current/AS400_NCNU* gs://dw-dev-insurance/ivans/archive/' ) ''' t6 = TriggerDagRunOperator(task_id="TRIGGER_INSURANCE_MART", trigger_dag_id="DAG_GCP_INSURANCE_MART_LOAD", python_callable=trigger_insurance_mart, dag=dag) t2.set_upstream(t1) #t3.set_upstream(t2) t4.set_upstream(t1) #t5.set_upstream(t4) t6.set_upstream(t2) t6.set_upstream(t4)