from airflow.operators.hive_to_mysql import HiveToMySqlTransfer from airflow.sensors import OssSensor args = { 'owner': 'zhenqian.zhang', 'start_date': datetime(2019, 8, 8), 'depends_on_past': False, 'retries': 1, 'retry_delay': timedelta(minutes=5), 'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, } dag = airflow.DAG('okash_hourly', schedule_interval="10 * * * *", default_args=args) check_client_file = OssSensor( task_id='check_client_file', bucket_key= 'okash/okash/{table}/dt={{{{ ds }}}}/hour={{{{ execution_date.strftime("%H") }}}}/' .format(table='client'), bucket_name='okash', timeout=3600, dag=dag) add_client_partitions = HiveOperator(task_id='add_client_partitions', hql=""" ALTER TABLE ods_log_client_hi ADD IF NOT EXISTS PARTITION (dt = '{{ ds }}', hour = '{{ execution_date.strftime("%H") }}'); """,
# Converting date into YYYY-MM-DD format #print(d.strftime('%Y-%m-%d')) #we need yesterday and today date formats, but prefix and suffix are the same in our example. file_prefix = "myPrefiex/" file_suffix = "_file.csv" file_date = today.strftime('%Y-%m-%d') full_path_today = file_prefix + file_date + file_suffix file_date_yesterday = yesterday.strftime('%Y-%m-%d') full_path_yesterday = file_prefix + file_date_yesterday + file_suffix with airflow.DAG("file_sensor_example", default_args=default_args, schedule_interval="@once") as dag: start_task = DummyOperator(task_id="start") stop_task = DummyOperator(task_id="stop") sensor_task = FileSensor(task_id="file_sensor_task", poke_interval=30, filepath="/tmp/") #we expect yesterday to exist gcs_file_sensor_yesterday = GoogleCloudStorageObjectSensor( task_id='gcs_file_sensor_yesterday_task', bucket='myBucketName', object=full_path_yesterday) #for this example we expect today not to exist, keep running until 120 timeout, checkout docs for more options like mode and soft_fail gcs_file_sensor_today = GoogleCloudStorageObjectSensor( task_id='gcs_file_sensor_today_task',
# 央行月报汇报指标 # args = { 'owner': 'liushuzhen', 'start_date': datetime(2020, 3, 24), 'depends_on_past': False, 'retries': 3, 'retry_delay': timedelta(minutes=2), 'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, } dag = airflow.DAG( 'app_opay_user_agent_trans_sum_d', schedule_interval="30 02 * * *", default_args=args, ) ##----------------------------------------- 依赖 ---------------------------------------## dwd_opay_user_transaction_record_df_prev_day_task = OssSensor( task_id='dwd_opay_user_transaction_record_df_prev_day_task', bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str= "opay/opay_dw/dwd_opay_user_transaction_record_df/country_code=NG", pt='{{ds}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag)
import os from airflow.sensors import OssSensor args = { 'owner': 'yangmingze', 'start_date': datetime(2019, 12, 6), 'depends_on_past': False, 'retries': 3, 'retry_delay': timedelta(minutes=2), 'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, } dag = airflow.DAG( 'dwd_oride_promoter_users_device_df', schedule_interval="00 02 * * *", default_args=args, ) ##----------------------------------------- 变量 ---------------------------------------## db_name="oride_dw" table_name="dwd_oride_promoter_users_device_df" ##----------------------------------------- 依赖 ---------------------------------------## #获取变量 code_map=eval(Variable.get("sys_flag")) #判断ufile(cdh环境) if code_map["id"].lower()=="ufile": ods_sqoop_promoter_promoter_users_device_df_tesk = UFileSensor(
import airflow import os from airflow.contrib.operators.ssh_operator import SSHOperator from datetime import datetime default_args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2020, 1, 1, 0, 0), } dag = airflow.DAG( 'comet_docker_import', default_args=default_args, schedule_interval=None ) HOST_IP = os.getenv('HOST_IP') HOST_DIR = os.getenv('HOST_DIR') cmd_ssh = f'cd {HOST_DIR} && ./1.import.sh ' task_1 = SSHOperator( ssh_conn_id='comet_host', task_id='import', command=cmd_ssh, do_xcom_push=True, dag=dag )
import requests import os args = { 'owner': 'xiedong', 'start_date': datetime(2020, 3, 29), 'depends_on_past': False, 'retries': 3, 'retry_delay': timedelta(minutes=2), 'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, } dag = airflow.DAG('ods_sqoop_base_receive_money_request_record_di', schedule_interval="30 00 * * *", default_args=args, ) ##----------------------------------------- 依赖 ---------------------------------------## ods_binlog_base_receive_money_request_record_hi_task = OssSensor( task_id='ods_binlog_base_receive_money_request_record_hi_task', bucket_key='{hdfs_path_str}/dt={pt}/hour=22/_SUCCESS'.format( hdfs_path_str="opay_binlog/opay_transaction_db.opay_transaction.receive_money_request_record", pt='{{ds}}' ), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag )
import requests import os args = { 'owner': 'lili.chen', 'start_date': datetime(2019, 11, 15), 'depends_on_past': False, 'retries': 3, 'retry_delay': timedelta(minutes=2), 'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, } dag = airflow.DAG('dm_oride_driver_order_base_cube', schedule_interval="50 00 * * *", default_args=args) ##----------------------------------------- 变量 ---------------------------------------## db_name = "oride_dw" table_name = "dm_oride_driver_order_base_cube" ##----------------------------------------- 依赖 ---------------------------------------## #获取变量 code_map = eval(Variable.get("sys_flag")) #判断ufile(cdh环境) if code_map["id"].lower() == "ufile": # 依赖前一天分区 dependence_dwm_oride_driver_order_base_di_prev_day_task = UFileSensor( task_id='dwm_oride_driver_order_base_di_prev_day_task',
import os args = { 'owner': 'liushuzhen', 'start_date': datetime(2019, 12, 20), 'depends_on_past': False, 'retries': 3, 'retry_delay': timedelta(minutes=2), 'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, } dag = airflow.DAG( 'dim_opay_user_base_di', schedule_interval="00 01 * * *", default_args=args, ) ##----------------------------------------- 依赖 ---------------------------------------## ods_sqoop_base_user_di_prev_day_task = OssSensor( task_id='ods_sqoop_base_user_di_prev_day_task', bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str="opay_dw_sqoop_di/opay_user/user", pt='{{ds}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) ##----------------------------------------- 任务超时监控 ---------------------------------------##
import os args = { 'owner': 'yuanfeng', 'start_date': datetime(2019, 11, 25), 'depends_on_past': False, 'retries': 3, 'retry_delay': timedelta(minutes=2), 'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, } dag = airflow.DAG( 'dwd_pre_opos_payment_order_di', schedule_interval="30 01 * * *", default_args=args, ) ##----------------------------------------- 依赖 ---------------------------------------## dim_opos_bd_relation_df_task = OssSensor( task_id='dim_opos_bd_relation_df_task', bucket_key='{hdfs_path_str}/country_code=nal/dt={pt}/_SUCCESS'.format( hdfs_path_str="opos/opos_dw/dim_opos_bd_relation_df", pt='{{ds}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) ods_sqoop_base_pre_opos_payment_order_di_task = OssSensor( task_id='ods_sqoop_base_pre_opos_payment_order_di_task',
from airflow.sensors import OssSensor args = { 'owner': 'lijialong', 'start_date': datetime(2019, 11, 4), 'depends_on_past': False, 'retries': 3, 'retry_delay': timedelta(minutes=5), 'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, } dag = airflow.DAG( 'app_oride_order_global_operate_overview_d', schedule_interval="30 2 * * *", default_args=args, ) ##----------------------------------------- 变量 ---------------------------------------## db_name = "oride_dw" table_name = "app_oride_order_global_operate_overview_d" ##----------------------------------------- 依赖 ---------------------------------------## #获取变量 code_map = eval(Variable.get("sys_flag")) #判断ufile(cdh环境) if code_map["id"].lower() == "ufile": dependence_dwm_oride_order_base_di_task = UFileSensor( task_id='dwm_oride_order_base_di_task',
import os args = { 'owner': 'xiedong', 'start_date': datetime(2020, 3, 31), 'depends_on_past': False, 'retries': 3, 'retry_delay': timedelta(minutes=2), 'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, } dag = airflow.DAG( 'ods_sqoop_base_transfer_not_register_record_di', schedule_interval="30 00 * * *", default_args=args, ) ##----------------------------------------- 依赖 ---------------------------------------## ods_binlog_base_transfer_not_register_record_hi_task = OssSensor( task_id='ods_binlog_base_transfer_not_register_record_hi_task', bucket_key='{hdfs_path_str}/dt={pt}/hour=22/_SUCCESS'.format( hdfs_path_str= "opay_binlog/opay_transaction_db.opay_transaction.transfer_not_register_record", pt='{{ds}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag)
from datetime import datetime, timedelta from airflow.hooks.S3_hook import S3Hook from airflow.operators.python_operator import PythonOperator from airflow import models import logging args = { 'owner': 'airflow', 'start_date': airflow.utils.dates.days_ago(7), 'provide_context': True } def initialize_etl_process(): logging.info('Creating connections, pool and sql path') source_s3 = S3Hook(aws_conn_id='aws_default') source_s3.list_keys('deutsche-boerse-eurex-pds', prefix='2018-11-18', delimiter='/') dag = airflow.DAG('deutsche_boerse', schedule_interval="@once", default_args=args, max_active_runs=1) t1 = PythonOperator(task_id='analyze_trades', python_callable=initialize_etl_process, provide_context=False, dag=dag)
from utils.get_local_time import GetLocalTime args = { 'owner': 'lili.chen', 'start_date': datetime(2020, 4, 1), 'depends_on_past': False, 'retries': 3, 'retry_delay': timedelta(minutes=2), 'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, } dag = airflow.DAG( 'dwd_ocredit_phones_repayment_detail_di', schedule_interval="30 00 * * *", default_args=args, ) ##----------------------------------------- 变量 ---------------------------------------## db_name = "ocredit_phones_dw" table_name = "dwd_ocredit_phones_repayment_detail_di" hdfs_path = "oss://opay-datalake/opay/ocredit_phones_dw/" + table_name config = eval(Variable.get("ocredit_time_zone_config")) time_zone = config['NG']['time_zone'] ##----------------------------------------- 依赖 ---------------------------------------## ### 检查当前小时的分区依赖 ods_binlog_base_t_repayment_detail_all_hi_check_task = OssSensor( task_id='ods_binlog_base_t_repayment_detail_all_hi_check_task', bucket_key='{hdfs_path_str}/dt={pt}/hour=23/_SUCCESS'.format(
import requests import os args = { 'owner': 'xiedong', 'start_date': datetime(2019, 9, 21), 'depends_on_past': False, 'retries': 3, 'retry_delay': timedelta(minutes=2), 'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, } dag = airflow.DAG('dim_opay_region_state_mapping_df', schedule_interval="00 00 * * *", default_args=args) ##----------------------------------------- 任务超时监控 ---------------------------------------## def fun_task_timeout_monitor(ds, dag, **op_kwargs): dag_ids = dag.dag_id msg = [{ "dag": dag, "db": "opay_dw", "table": "{dag_name}".format(dag_name=dag_ids), "partition": "country_code=nal/dt={pt}".format(pt=ds), "timeout": "3000" }]
from utils.get_local_time import GetLocalTime args = { 'owner': 'yuanfeng', 'start_date': datetime(2020, 3, 30), 'depends_on_past': False, 'retries': 3, 'retry_delay': timedelta(minutes=2), 'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, } dag = airflow.DAG( 'dwd_otrade_b2c_mall_merchant_hf', schedule_interval="25 * * * *", default_args=args, ) ##----------------------------------------- 变量 ---------------------------------------## db_name = "otrade_dw" table_name = "dwd_otrade_b2c_mall_merchant_hf" hdfs_path = "oss://opay-datalake/otrade/otrade_dw/" + table_name config = eval(Variable.get("otrade_time_zone_config")) time_zone = config['NG']['time_zone'] ##----------------------------------------- 依赖 ---------------------------------------## ### 检查最新的商户表的依赖 dwd_otrade_b2c_mall_merchant_hf_check_pre_locale_task = OssSensor( task_id='dwd_otrade_b2c_mall_merchant_hf_check_pre_locale_task', bucket_key='{hdfs_path_str}/country_code=NG/dt={pt}/hour={hour}/_SUCCESS'.
self.data = dict(query=rendered_query) return super().execute(context) def save_file(task_instance, params, **kwargs): previous_task_id = 'raw_{}'.format(params['id']) data = task_instance.xcom_pull(task_ids=previous_task_id) filename = '{}.json'.format(params['id']) with open(filename, 'w') as file: file.write(data) return file.name with airflow.DAG(dag_id='airbods', start_date=datetime.datetime( 2021, 5, 17, tzinfo=datetime.timezone.utc)) as dag: # Datacake HTTP hook = HttpHook(http_conn_id='datacake_airbods') # List devices response = hook.run(endpoint=None, json=dict(query=textwrap.dedent(""" query { allDevices(inWorkspace:"0bdfb2eb-6531-4afb-a842-ce6b51d3c980") { id serialNumber verboseName } } """)))
import os args = { 'owner': 'yangmingze', 'start_date': datetime(2020, 1, 13), 'depends_on_past': False, 'retries': 3, 'retry_delay': timedelta(minutes=2), 'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, } dag = airflow.DAG( 'dwd_oride_driver_pay_records_df', schedule_interval="30 02 * * *", default_args=args, ) ##----------------------------------------- 依赖 ---------------------------------------## ods_sqoop_base_data_driver_pay_records_df_tesk = OssSensor( task_id='ods_sqoop_base_data_driver_pay_records_df_tesk', bucket_key="{hdfs_path_str}/dt={pt}/_SUCCESS".format( hdfs_path_str="oride_dw_sqoop/oride_data/data_driver_pay_records", pt="{{ds}}"), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) ##----------------------------------------- 变量 ---------------------------------------##
from plugins.TaskTouchzSuccess import TaskTouchzSuccess from airflow.sensors import OssSensor args = { 'owner': 'linan', 'start_date': datetime(2019, 5, 20), 'depends_on_past': False, 'retries': 3, 'retry_delay': timedelta(minutes=2), 'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, } dag = airflow.DAG('dwd_oride_driver_accept_order_click_detail_di', schedule_interval="10 00 * * *", default_args=args, ) ##----------------------------------------- 变量 ---------------------------------------## db_name="oride_dw" table_name="dwd_oride_driver_accept_order_click_detail_di" ##----------------------------------------- 依赖 ---------------------------------------## #获取变量 code_map=eval(Variable.get("sys_flag")) #判断ufile(cdh环境) if code_map["id"].lower()=="ufile":
import requests import os args = { 'owner': 'xiedong', 'start_date': datetime(2019, 9, 22), 'depends_on_past': False, 'retries': 3, 'retry_delay': timedelta(minutes=2), 'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, } dag = airflow.DAG('app_opay_transaction_consume_scenario_sum_d', schedule_interval="00 03 * * *", default_args=args) ##----------------------------------------- 依赖 ---------------------------------------## dwd_opay_transaction_record_di_prev_day_task = OssSensor( task_id='dwd_opay_transaction_record_di_prev_day_task', bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str= "opay/opay_dw/dwd_opay_transaction_record_di/country_code=NG", pt='{{ds}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag)
# 央行月报汇报指标 # args = { 'owner': 'lishuai', 'start_date': datetime(2019, 12, 1), 'depends_on_past': False, 'retries': 3, 'retry_delay': timedelta(minutes=2), 'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, } dag = airflow.DAG( 'dwd_opay_client_event_base_di', schedule_interval="00 02 * * *", default_args=args, ) ##----------------------------------------- 依赖 ---------------------------------------## opay_ep_logv0_prev_hour_task = HivePartitionSensor( task_id="opay_ep_logv0_prev_hour_task", table="opay_ep_logv0", partition="dt='{{ ds }}' and hour='22'", schema="oride_source", poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) opay_ep_logv1_prev_hour_task = HivePartitionSensor( task_id="opay_ep_logv1_prev_hour_task",
from datetime import datetime, timedelta from acme.operators.dwh_operators import PostgresToPostgresOperator from acme.operators.dwh_operators import AuditOperator from airflow.models import Variable args = { 'owner': 'airflow', 'start_date': datetime(2019, 5, 8, 17, 52), 'provide_context': True } tmpl_search_path = Variable.get("sql_path") dag = airflow.DAG('customer_staging', schedule_interval="*/2 * * * *", dagrun_timeout=timedelta(minutes=60), template_searchpath=tmpl_search_path, default_args=args, max_active_runs=1) get_auditid = AuditOperator(task_id='get_audit_id', postgres_conn_id='postgres_dwh', audit_key="customer", cycle_dtm="{{ ts }}", dag=dag, pool='postgres_dwh') extract_customer = PostgresToPostgresOperator( sql='select_customer.sql', pg_table='staging.customer', src_postgres_conn_id='postgres_oltp', dest_postgress_conn_id='postgres_dwh',
# airflow DAG arguments args = { "owner": "hunterowens", "start_date": airflow.utils.dates.days_ago(7), "provide_context": True, "email": ["*****@*****.**"], "email_on_failure": False, "retries": 1, "retry_delay": timedelta(minutes=5), } # initiating the DAG dag = airflow.DAG( "merge_homeless_datasets_into_one", schedule_interval="@monthly", default_args=args, max_active_runs=1, ) # when future annual datasets becomes available, just add a new url and add it to the # urls list the path & filename after '-O' is the path & name that the files are to # be placed and renamed to url2018 = ("wget -O /tmp/homeless2018.xlsx " "https://github.com/CityOfLosAngeles/aqueduct" "/raw/master/dags/homelessness/static_datasets/homeless2018.xlsx") url2017 = ("wget -O /tmp/homeless2017.xlsx " "https://github.com/CityOfLosAngeles/aqueduct/" "raw/master/dags/homelessness/static_datasets/homeless2017.xlsx") url2016 = ("wget -O /tmp/homeless2016.xlsx " "https://github.com/CityOfLosAngeles/aqueduct/" "raw/master/dags/homelessness/static_datasets/homeless2016.xlsx")
from airflow.sensors import OssSensor args = { 'owner': 'yangmingze', 'start_date': datetime(2019, 11, 28), 'depends_on_past': False, 'retries': 3, 'retry_delay': timedelta(minutes=2), 'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, } dag = airflow.DAG( 'dwd_oride_abnormal_order_df', schedule_interval="20 00 * * *", default_args=args, ) ##----------------------------------------- 变量 ---------------------------------------## db_name = "oride_dw" table_name = "dwd_oride_abnormal_order_df" ##----------------------------------------- 依赖 ---------------------------------------## #获取变量 code_map = eval(Variable.get("sys_flag")) #判断ufile(cdh环境) if code_map["id"].lower() == "ufile":
# Set expected runtime in seconds, setting to 0 is 7200 seconds expected_runtime = 0 # The group that owns this DAG owner = "Analytic Services" default_args = { 'owner': owner, 'depends_on_past': False, 'start_date': datetime(2017, 12, 22), 'schedule_interval': '@daily' } dag = airflow.DAG(dag_id='s2_collectible_completion_item_level', default_args=default_args ) # Start running at this time start_time_task = TimeSensor(target_time=time(7, 00), task_id='start_time_task', dag=dag ) ##current_date = (datetime.now()).date() ##stats_date = current_date - timedelta(days=1) def qubole_operator(task_id, sql, retries, retry_delay, dag): return PythonOperator( task_id=task_id, python_callable=qubole_wrapper,
6: "Alice", # Sunday } def branch_fnc(**kwargs): return "task_for_" + weekday_person_to_email[pendulum.today().weekday()] def pweedd(): print(pendulum.today().weekday()) with airflow.DAG( dag_id="test_dag_4", start_date=datetime(2019, 11, 17), schedule_interval=None, catchup=False, default_args=default_args, ) as dag: final_task = DummyOperator(task_id="final_task", dag=dag) for day in range(0, 6): printd = PythonOperator( dag=dag, task_id="print_week_day", python_callable=pweedd ) bop = BranchPythonOperator( task_id="branching", python_callable=branch_fnc, provide_context=True,
dingding_alert = DingdingAlert(dingding_address) args = { 'owner': 'linan', 'start_date': datetime(2020, 3, 19), 'depends_on_past': False, 'retries': 1, 'retry_delay': timedelta(minutes=5), # 'email': ['*****@*****.**'], # 'email_on_failure': True, # 'email_on_retry': False, } dag = airflow.DAG('bussiness_monitor', schedule_interval="*/5 * * * *", default_args=args) mysql_hook = MySqlHook("bussiness_mysql") mysql_conn = mysql_hook.get_conn() mysql_cursor = mysql_conn.cursor() exec_command = """ influx -database 'serverDB' -execute '{sql}' -format='csv' > {metrics_name}.txt && echo 1 || echo 0 """ ssh = paramiko.SSHClient() key = paramiko.AutoAddPolicy() ssh.set_missing_host_key_policy(key) ssh.connect('10.52.5.233', 22, 'airflow', '', timeout=5)
"use_beeline": "true"})}) create_new_conn(session, {"conn_id": "hiveserver2-dvstar", "conn_type": "hiveserver2", "host": "hive", "schema": 'dv_star', "login": "******", "port": 10000, "extra": json.dumps({"authMechanism": "NOSASL"})}) session.close() dag = airflow.DAG( 'init_datavault3_example', schedule_interval="@once", default_args=args, template_searchpath='/usr/local/airflow/sql', max_active_runs=1) t1 = PythonOperator(task_id='init_datavault3_example', python_callable=init_datavault3_example, provide_context=False, dag=dag) t2 = HiveOperator(task_id='create_stg_database', hive_cli_conn_id='hive_default', schema='default', hql='CREATE DATABASE IF NOT EXISTS {0}'.format(ADVWORKS_STAGING), dag=dag) t3 = HiveOperator(task_id='create_dv_database',
from datetime import date, timedelta import airflow from google.cloud import storage import os import json from airflow.contrib.operators.bigquery_operator import BigQueryOperator default_args = { "depends_on_past": False, "start_date": airflow.utils.dates.days_ago(1), "retries": 1, "retry_delay": datetime.timedelta(minutes=2), } # Run SQL command example DAG with airflow.DAG("Copy_Table_BigQuery", default_args=default_args, template_searchpath=['/home/airflow/gcs/data/pipeline4/'], schedule_interval="@once") as dag: start_task = DummyOperator(task_id="start") stop_task = DummyOperator(task_id="stop") copy_records = BigQueryOperator( task_id='copy_records', sql="myquery.sql", use_legacy_sql=False, create_disposition='CREATE_IF_NEEDED', write_disposition='WRITE_TRUNCATE', params=dict( destination_project=os.environ['AIRFLOW_VAR_ENTERPRISE_PROJECT'], source_project=os.environ['AIRFLOW_VAR_ENTERPRISE_PROJECT']), bigquery_conn_id='etl_sa') start_task >> copy_records >> stop_task
import datetime import airflow from airflow.operators import bash_operator YESTERDAY = datetime.datetime.now() - datetime.timedelta(days=1) default_args = { 'owner': 'Composer Example', 'depends_on_past': False, 'email': [''], 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': datetime.timedelta(minutes=5), 'start_date': YESTERDAY, } with airflow.DAG( 'composer_sample_dag', 'catchup=False', default_args=default_args, schedule_interval=datetime.timedelta(days=1)) as dag: # Print the dag_run id from the Airflow logs print_dag_run_conf = bash_operator.BashOperator( task_id='print_dag_run_conf', bash_command='echo {{ dag_run.id }}')
import requests import os args = { 'owner': 'xiedong', 'start_date': datetime(2020, 1, 13), 'depends_on_past': False, 'retries': 3, 'retry_delay': timedelta(minutes=2), 'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, } dag = airflow.DAG('dim_opay_bd_relation_df', schedule_interval="10 01 * * *", default_args=args) ##----------------------------------------- 依赖 ---------------------------------------## dim_opay_bd_admin_user_df_prev_day_task = OssSensor( task_id='dim_opay_bd_admin_user_df_prev_day_task', bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format( hdfs_path_str="opay/opay_dw/dim_opay_bd_admin_user_df/country_code=NG", pt='{{ds}}'), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag) ##----------------------------------------- 任务超时监控 ---------------------------------------##