def tracking_dependency_operator(self, task_id, dep): return SqlSensor( task_id=task_id, conn_id=self.conn_id, sql="SELECT created_date FROM %s.%s WHERE CREATED_DATE>'{{ ds }}' LIMIT 1" % ( dep.schema, dep.table), **self.task_attributes )
from airflow import DAG from airflow.operators.dummy_operator import DummyOperator from airflow.operators.sensors import SqlSensor from airflow.operators.hive_operator import HiveOperator from airflow.contrib.operators.spark_submit_operator import SparkSubmitOperator from common.utils import generate_spark_args from settings import default_args dag = DAG('bi_sales_tmk_d', default_args=default_args, schedule_interval='0 4 * * *') wait_dw_commerce_contract = SqlSensor( task_id='wait_dw_commerce_contract', conn_id='etl_db', sql="SELECT * FROM etl.signal WHERE `name`='dw_commerce_contract_d' AND `value`='{{ macros.ds(ti) }}';", dag=dag ) wait_dw_student_basic = SqlSensor( task_id='wait_dw_student_basic', conn_id='etl_db', sql="SELECT * FROM etl.signal WHERE `name`='dw_student_basic_d' AND `value`='{{ macros.ds(ti) }}';", dag=dag ) wait_dw_teaching_lesson = SqlSensor( task_id='wait_dw_teaching_lesson', conn_id='etl_db', sql="SELECT * FROM etl.signal WHERE `name`='dw_teaching_lesson_d' AND `value`='{{ macros.ds(ti) }}';", dag=dag
from airflow.operators.sensors import SqlSensor from airflow.operators.hive_operator import HiveOperator from airflow.operators.mysql_operator import MySqlOperator from settings import default_args # - 员工信息 # - admin,部门 dag = DAG('dw_staff_basic_d', default_args=default_args, schedule_interval='10 1 * * *') start = SqlSensor( task_id='start', conn_id='src_main_db', sql= "SELECT * FROM restore_tracking.restore_log WHERE date(restored_time) = current_date;", dag=dag) # admin del_partiton_stg_admin = HiveOperator( task_id='del_partiton_stg_admin', hive_cli_conn_id='spark_thrift', hql= "alter table stg.newuuabc_admin drop if exists PARTITION (etl_date='{{ macros.ds(ti) }}');\n ", dag=dag) src_stg_admin = BashOperator( task_id='src_stg_admin', bash_command= 'dataship extract uuold_newuuabc.admin {{ macros.ds(ti) }} {{ macros.tomorrow_ds(ti) }}',
print(dag_run_obj.payload) return dag_run_obj pub_id = 3 dag_ver = 'v4' dag_id = 'PUB_{0}_{1}'.format(pub_id, dag_ver) sub_id = 3 sub_id_ver = 'v1' pubid_sql = 72 dag = DAG('PUB_3_v4', schedule_interval=None, default_args=default_args) t0 = SqlSensor( task_id='sql_sensor_{}'.format(pub_id), conn_id='sd_batch1', sql= 'select b.publishid from syncsystemobjectupdate a, syncpublish b where lower(a.objectdesc) = lower(b.objectdesc) and b.publishid = {}' .format(pubid_sql), dag=dag) t1 = PythonOperator(task_id='python_{}_1'.format(pub_id), python_callable=my_display_function, op_kwargs={'phase': 'BUILD_DELTA_START'}, dag=dag) t2 = BashOperator( task_id='builddelta_{}'.format(pub_id), pool='simba_build_delta', bash_command= 'sh /x/home/dm_hdp_batch/test/projects/steam_donkey/scripts/delta_processing.sh ', dag=dag) t3 = PythonOperator(task_id='python_{}_2'.format(pub_id), python_callable=my_display_function,
'depends_on_past': False, 'start_date': datetime(d.year, d.month, d.day) - timedelta(days=7), 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=5), } dag = DAG('sqlite_example_dag_' + user.replace('.', ''), default_args=default_args, schedule_interval=timedelta(days=1)) # t1, t2 and t3 are examples of tasks created by instantiating operators t0 = SqlSensor(task_id='check_babynames_tables', delta=timedelta(seconds=1), dag=dag) t1 = BashOperator(task_id='print_date', bash_command='date', dag=dag) def my_cool_function(ds=None, **kwargs): print "{}".format(ds) t2 = PythonOperator(task_id='show_ds', python_callable=my_cool_function, retries=3, provide_context=True, dag=dag)
defaultArgs = { "owner": "Nicholas", "retries": 1, "retry_delay": timedelta(minutes = 5), "start_date": datetime(2019, 10, 15), "depends_on_past": False, "email": ["*****@*****.**"], "email_on_failure": True } dag_id = "monthly_processing" dag = DAG(dag_id = dag_id, default_args = defaultArgs, schedule_interval = timedelta(hours = 7)) sql_sensor = SqlSensor( task_id = "check_for_requests", conn_id = "insight", sql = "SELECT * FROM requests;", poke_interval = 30, dag = dag ) spark_submit_task = PythonOperator( task_id = "spark_job", python_callable = schedule, dag = dag ) sql_sensor >> spark_submit_task
"email_list":EMAIL_LIST, "task_success_msg":"Checking signal for running MigrateDatabase passed successfully.", "task_failure_msg":"Signal not received to run MigrateDatabase." }, trigger_rule = 'all_success' )''' check_signal = SqlSensor( task_id='check_signal', conn_id='mysql_server', sql='SELECT flag FROM ' + master_db_name + '.md_check_signal where module="Decision Analytics" and task="Migrate Database"', dag=dag, timeout=300, poke_interval=10, soft_fail=True, on_failure_callback=notify_email_failure, params={ "email_list": EMAIL_LIST, "task_success_msg": "Checking signal for running MigrateDatabase passed successfully.", "task_failure_msg": "Signal not received to run MigrateDatabase." }, trigger_rule='all_success') run_sp_fc_processing_to_history = BashOperator( task_id='run_sp_fc_processing_to_history', bash_command=PYTHON_LOCATION + ' ' + os.path.join(MIGRATE_DB_SCRIPT_LOCATION, 'main.py run_sp_fc_processing_to_history '), dag=dag,
from airflow.operators.python_operator import PythonOperator from airflow.operators.http_operator import SimpleHttpOperator from settings import default_args from common.utils import on_prd, read_script from common.mail import mail_files current_dir = os.path.dirname(os.path.abspath(__file__)) dag = DAG('bi_tchops_pref_d', default_args=default_args, schedule_interval=on_prd('5 8 * * *')) start = SqlSensor( task_id='start', conn_id='src_main_db', sql= "SELECT * FROM restore_tracking.restore_log WHERE date(restored_time) >= '{{ macros.ds(ti) }}';", dag=dag) # gen_excel=SimpleHttpOperator( # task_id='gen_excel', # http_conn_id='taskhub_host', # endpoint='excel', # data=json.dumps([ # { # "sql": read_script(current_dir, "scripts/rpt.export_student_to_onesmart.sql"), # "filename": "testx01" # }, # { # "header": "a,b", # "sql": "SELECT id, flag from ods.newuuabc_student_user limit 11",
"id": "{}".format(account_id), "token": Variable.get("APPLICATION_ACCESS_TOKEN") }, # http params response_check= response_check, # will retry based on default_args if it fails dag=export_account_dag) print("Created account processing DAG {}".format( export_account_dag.dag_id)) # register the dynamically created DAG in the global namespace? globals()[export_account_task_name] = export_account_dag return account_ids sensor = SqlSensor( task_id='account_creation_check', conn_id='account-database', poke_interval=600, #do the select every 600 seconds, 5 minutes sql="SELECT id from accounts where created_at > '{{ds}}' LIMIT 1", dag=dag) process_new_accounts_task = PythonOperator( task_id='process_new_accounts', provide_context=True, python_callable=process_new_accounts, dag=dag) sensor >> process_new_accounts_task
from airflow.operators.bash_operator import BashOperator from airflow.operators.dummy_operator import DummyOperator from airflow.operators.sensors import SqlSensor from airflow.operators.hive_operator import HiveOperator from airflow.contrib.operators.spark_submit_operator import SparkSubmitOperator from utils import generate_spark_args from settings import default_args dag = DAG('bi_monitor_schema_columns_d', default_args=default_args, schedule_interval='54 0 * * *') wait_dw_schema_monitor = SqlSensor( task_id='wait_dw_schema_monitor', conn_id='etl_db', sql= "SELECT * FROM etl.signal WHERE `name`='dw_monitor_schema_d' AND `value`='{{ macros.ds(ti) }}';", dag=dag) dw_dm_columns_changed = HiveOperator( task_id='dw_dm_columns_changed', hive_cli_conn_id='spark_thrift', hql='scripts/dm.monitor_schema_col_changed.sql', hiveconf_jinja_translate=True, dag=dag) #从spark中读取comment变更了的字段,如果有则将alter语句写入到固定目录下;若有除comment以外的字段变化,则邮件抛出 end = DummyOperator(task_id='end', dag=dag) wait_dw_schema_monitor >> dw_dm_columns_changed >> end