def test_mysql_operator_test_multi(self): sql = [ "TRUNCATE TABLE test_airflow", "INSERT INTO test_airflow VALUES ('X')", ] from airflow.operators.mysql_operator import MySqlOperator t = MySqlOperator( task_id='mysql_operator_test_multi', mysql_conn_id='airflow_db', sql=sql, dag=self.dag) t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_mysql_operator_test(self): sql = """ CREATE TABLE IF NOT EXISTS test_airflow ( dummy VARCHAR(50) ); """ from airflow.operators.mysql_operator import MySqlOperator t = MySqlOperator( task_id='basic_mysql', sql=sql, dag=self.dag) t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_mysql_operator_test_multi(self): sql = [ "CREATE TABLE IF NOT EXISTS test_airflow (dummy VARCHAR(50))", "TRUNCATE TABLE test_airflow", "INSERT INTO test_airflow VALUES ('X')", ] from airflow.operators.mysql_operator import MySqlOperator t = MySqlOperator( task_id='mysql_operator_test_multi', sql=sql, dag=self.dag, ) t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_overwrite_schema(self): """ Verifies option to overwrite connection schema """ from airflow.operators.mysql_operator import MySqlOperator sql = "SELECT 1;" t = MySqlOperator( task_id='test_mysql_operator_test_schema_overwrite', sql=sql, dag=self.dag, database="foobar", ) from _mysql_exceptions import OperationalError try: t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) except OperationalError as e: assert "Unknown database 'foobar'" in str(e)
def test_overwrite_schema(self): """ Verifies option to overwrite connection schema """ from airflow.operators.mysql_operator import MySqlOperator sql = "SELECT 1;" op = MySqlOperator( task_id='test_mysql_operator_test_schema_overwrite', sql=sql, dag=self.dag, database="foobar", ) from _mysql_exceptions import OperationalError try: op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) except OperationalError as e: assert "Unknown database 'foobar'" in str(e)
'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=5), # 'queue': 'bash_queue', # 'pool': 'backfill', # 'priority_weight': 10, # 'end_date': datetime(2016, 1, 1), } dag = DAG('mysqltutorial', default_args=default_args, schedule_interval=timedelta(days=1)) # t1, t2 are examples of tasks created by instantiating operators t1 = MySqlOperator( task_id= 'insert_PD', sql= 'call storedP1("2019-06-01","2019-06-30")', mysql_conn_id= 'xyz_id', database= 'dbname', dag = dag) t2 = MySqlOperator( task_id= 'update_PD', sql= 'call storedP2("2019-06-01","2019-06-30")', mysql_conn_id= 'xyz_id', database= 'dbname', dag = dag) t2.set_upstream(t1)
) create_spotify_daily_top_50_global = MySqlOperator( task_id='create_spotify_daily_top_50_global', dag=dag, mysql_conn_id="airflow_mysql_db", sql=""" CREATE TABLE IF NOT EXISTS spotify_daily_top_50_global ( artist_name VARCHAR(100), track_id VARCHAR(100), track_name VARCHAR(100), duration_ms INT, explicit BOOL, popularity INT, daily_rank INT, album_id VARCHAR(100), album_name VARCHAR(100), album_type VARCHAR(100), release_date VARCHAR(100), dt VARCHAR(100) ) ; """ ) populate_spotify_daily_top_50_global = BashOperator( task_id='populate_spotify_daily_top_50_global', dag=dag,
connection.commit() curr.close() dag = DAG('run_zomato_api', start_date=datetime.datetime.now() # max_active_runs = 1 ) get_data_zomato_api = PythonOperator(task_id='get_data_zomato_api', python_callable=get_data_zomato_api, dag=dag) create_tables_mysql = MySqlOperator( task_id='create_tables_mysql', mysql_conn_id='mysql_zomato', sql=sql_statements_zomato.create_all_tables, dag=dag) # prep_json = PythonOperator( # task_id = 'prep_json', # python_callable = prep_json, # provide_context = True, # op_kwargs = {"result" : "{{ ti.xcom_pull(task_ids= 'get_data_zomato_api') }}"}, # dag = dag) create_insert_url_details = PythonOperator( task_id='create_insert_url_details', python_callable=url_details_create_insert, dag=dag)
default_args=default_args, schedule_interval='@daily', template_searchpath=['/usr/local/airflow/sql_files'], catchup=True) as dag: t1 = BashOperator( task_id='check_file_exists', bash_command= 'shasum ~/store_files_airflow/raw_store_transactions_2020-06-30.csv', retries=2, retry_delay=timedelta(seconds=15)) t2 = PythonOperator(task_id='clean_raw_csv', python_callable=data_cleaner) t3 = MySqlOperator(task_id='create_mysql_table', mysql_conn_id="mysql_conn", sql="create_table.sql") t4 = MySqlOperator(task_id='insert_into_table', mysql_conn_id="mysql_conn", sql="insert_into_table.sql") t5 = MySqlOperator(task_id='select_from_table', mysql_conn_id="mysql_conn", sql="select_from_table.sql") t6 = BashOperator( task_id='move_file1', bash_command= 'cat ~/store_files_airflow/location_wise_profit.csv && mv ~/store_files_airflow/location_wise_profit.csv ~/store_files_airflow/location_wise_profit_%s.csv' % yesterday_date)
) class CustomMySqlOperator(MySqlOperator): def execute(self, context): self.log.info('Executing: %s', self.sql) hook = MySqlHook(mysql_conn_id=self.mysql_conn_id, schema=self.database) return hook.get_records(self.sql, parameters=self.parameters) resume_replication = MySqlOperator( task_id='resume_replication', sql='START SLAVE;', mysql_conn_id=MYSQL_CONN_ID, database='etl', dag=dag, trigger_rule='all_done' ) update_flat_obs = MySqlOperator( task_id='update_flat_obs', sql='flat_obs_v1.3.sql', mysql_conn_id=MYSQL_CONN_ID, database='etl', dag=dag )
# success_finish_operator = \ BioWardrobeTriggerBasicAnalysisOperator(task_id='success_finish', trigger_rule=TriggerRule.ONE_SUCCESS, dag=dag) success_finish_operator.set_upstream([cli_download_sra, url_download, download_local_operator, copy_from_biowardrobe]) # # A STEP # error_finish_operator = MySqlOperator( task_id="finish_with_error", mysql_conn_id=biowardrobe_connection_id, sql="""update labdata set libstatus=2000, libstatustxt="{{ ti.xcom_pull(task_ids=['download_sra','download_local', 'download_aria2'], key=None) }}" where uid='{{ ti.xcom_pull(task_ids='branch_download', key='uid') }}'""", trigger_rule=TriggerRule.ONE_FAILED, autocommit=True, dag=dag) error_finish_operator.set_upstream([cli_download_sra, url_download, download_local_operator, copy_from_biowardrobe]) # # BIOWARDROBE DOWNLOAD TRIGGER # dag_t = DAG( dag_id='biowardrobe_download_trigger', default_args={
default_args=default_args, schedule_interval='@daily') as dag: check_file = BashOperator(task_id="check_file", bash_command="shasum ~/ip_files/or.csv", retries=2, retry_delay=timedelta(seconds=15)) pre_process = PythonOperator(task_id="pre_process", python_callable=pre_process) agg = PythonOperator(task_id="agg", python_callable=process_data) create_table = MySqlOperator( task_id="create_table", mysql_conn_id="mysql_db1", sql= "CREATE table IF NOT EXISTS aggre_res (stock_code varchar(100) NULL,descb varchar(100) NULL,country varchar(100) NULL,total_price varchar(100) NULL)" ) insert = MySqlOperator( task_id='insert_db', mysql_conn_id="mysql_db1", sql= "LOAD DATA INFILE '/var/lib/mysql-files/fin.csv' INTO TABLE aggre_res FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' IGNORE 1 ROWS;" ) email = EmailOperator( task_id='send_email', to='*****@*****.**', subject='Daily report generated', html_content= """ <h1>Congratulations! Your store reports are ready.</h1> """,
"""xcomの値を削除するDAG""" """delete_xcom_const.pyのSQLを用途に合わせて修正すること""" from datetime import datetime from airflow import DAG from airflow.operators.mysql_operator import MySqlOperator import pendulum from function import const_delete_xcom as consts DAG_NAME = 'dag_delete_xcom' default_args = { 'owner': 'terunrun', 'depends_on_past': False, 'start_date': datetime(2021, 2, 22, 0, 0, tzinfo=pendulum.timezone('Asia/Tokyo')), 'email_on_failure': False, 'email_on_retry': False, 'retries': 0, } dag = DAG(DAG_NAME, schedule_interval=None, catchup=False, default_args=default_args) # TASK設定 delete_xcom = MySqlOperator( task_id='delete_xcom', mysql_conn_id='airflow_db', sql=[consts.QUERY], dag=dag, )
'retries': 1, 'retry_delay': timedelta(minutes=5), # 'queue': 'bash_queue', # 'pool': 'backfill', # 'priority_weight': 10, # 'end_date': datetime(2016, 1, 1), # 'wait_for_downstream': False, # 'dag': dag, # 'sla': timedelta(hours=2), # 'execution_timeout': timedelta(seconds=300), # 'on_failure_callback': some_function, # 'on_success_callback': some_other_function, # 'on_retry_callback': another_function, # 'sla_miss_callback': yet_another_function, # 'trigger_rule': 'all_success' } dag = DAG('mysql_ingest', default_args=default_args, schedule_interval=None) mysql_task = MySqlOperator(task_id='mysql_ingest', mysql_conn_id='mysql_test', sql='select * from authors', database='mysql', dag=dag) hello_task = HelloDBOperator(task_id='sample-task', name='foo_bar', mysql_conn_id='mysql_test', database='mysql', dag=dag)
from airflow import DAG from airflow.operators.hive_to_mysql import HiveToMySqlTransfer from airflow.operators.mysql_operator import MySqlOperator from airflow.utils.dates import days_ago from datetime import datetime, timedelta default_args = { 'owner': 'airflow', 'start_date': days_ago(2), 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=1) } dag = DAG(dag_id='hive_to_mysql', schedule_interval='@once', default_args=default_args) t1 = MySqlOperator(task_id='drop_table', sql='drop table if exists temp_drivers', dag=dag) t2 = HiveToMySqlTransfer( task_id='hive_to_mysql', mysql_table='temp_drivers', mysql_preoperator='create table temp_drivers(col_value varchar(256))', sql='select * from temp_drivers', dag=dag) t1 >> t2
mysql_hook = MySqlHook(mysql_conn_id="mysql", schema="airflow_mdb") connection = mysql_hook.get_conn() cursor = connection.cursor() cursor.execute(request) tweets = cursor.fetchall() for tweet in tweets: print(tweet) #return tweets with DAG(dag_id="twitter_dag", schedule_interval="*/10 * * * *", default_args=default_args) as dag: waiting_for_tweets = FileSensor(task_id="waiting_for_tweets", fs_conn_id="fs_tweet", filepath="data.csv", poke_interval=5) fetching_tweets = PythonOperator(task_id="fetching_tweets", python_callable=fetching_tweets) cleaning_tweets = PythonOperator(task_id="cleaning_tweets", python_callable=cleaning_tweets) storing_tweets = MySqlOperator(task_id="storing_tweets", sql=storing_tweets(), mysql_conn_id='mysql', autocommit=True, database='airflow_mdb') loading_tweets = PythonOperator(task_id="loading_tweets", python_callable=loading_tweets) waiting_for_tweets >> fetching_tweets >> cleaning_tweets >> storing_tweets >> loading_tweets
hive_cli_conn_id='beeline', dag=dag) insertCategory_hiphop = HiveOperator(task_id='insertCategory_hiphop', hql=hiveSQL_insertCategory_hiphop, hive_cli_conn_id='beeline', dag=dag) insertoverwrite_final = HiveOperator(task_id='insertoverwrite_final', hql=hiveSQL_insertoverwrite_final, hive_cli_conn_id='beeline', dag=dag) createTableToMysql = MySqlOperator(mysql_conn_id='mysql', task_id='createTableToMysql', sql=hiveSQL_create_mysql, database='spotify_data', dag=dag) exportToMysql = HiveToMySqlTransfer( task_id='exportToMysql', sql="SELECT * FROM final", mysql_preoperator='DELETE FROM spotify_data.titledata', mysql_table='spotify_data.titledata', mysql_conn_id='mysql', hiveserver2_conn_id='beeline', dag=dag) ########################################################### C O P Y ################################################################ i = 0 if os.path.exists('/home/airflow/requestData/playlist'):
FROM oride_dw_ods.ods_sqoop_base_data_driver_df dd left join online_time ot on ot.driver_id=dd.id left join driver_comment dc on dc.driver_id=dd.id left join order_data od on od.driver_id=dd.id left join oride_dw_ods.ods_sqoop_base_data_driver_group_df ddg on ddg.id=dd.group_id AND ddg.dt=dd.dt left join driver_app_version dav on dav.driver_id=dd.id WHERE dd.dt='{{ ds }}' """, schema='dashboard', dag=dag) clear_driver_daily_summary = MySqlOperator( task_id='clear_driver_daily_summary', sql=""" DELETE FROM data_driver_report WHERE dt='{{ ds }}' """, dag=dag) def dirver_daily_summary_insert(ds, **kwargs): sql = """ SELECT null as id, dt, driver_id, real_name, phone_number, group_id, nvl(group_name, ''), nvl(group_leader, ''),
for db_object in DATABASE_OBJECTS: cleanup = PythonOperator( task_id='cleanup_' + str(db_object["airflow_db_model"].__name__), python_callable=cleanup_function, params=db_object, provide_context=True, dag=dag ) print_configuration.set_downstream(cleanup) # SQL query builder def custom_sql_query_builder(db_object): return "DELETE FROM %s WHERE DATE(%s) <= DATE(CURRENT_DATE() - INTERVAL %d DAY);" % (db_object['airflow_tablename'], db_object['age_check_column'], max_retention_days) ## Task declaration for custom mysql tables for db_object in CUSTOM_DATABASE_OBJECTS: celery_cleanup = MySqlOperator( task_id='cleanup_' + str(db_object["airflow_tablename"]), sql=custom_sql_query_builder(db_object), mysql_conn_id='mysql_default', autocommit=True, database='airflow', ) print_configuration.set_downstream(celery_cleanup)
} with DAG('store_dag', default_args=default_args, schedule_interval='@daily', template_searchpath=['/usr/local/airflow/sql_files'], catchup=False) as dag: t1 = BashOperator( task_id='check_file_exists', bash_command= 'shasum /usr/local/airflow/store_files_airflow/raw_store_transactions.csv', retries=2, retry_delay=timedelta(seconds=15)) t2 = PythonOperator(task_id='clean_raw_csv', python_callable=data_cleaner) t3 = MySqlOperator(task_id='create_mysql_table', sql='create_table.sql', mysql_conn_id='mysql_conn') t4 = MySqlOperator(task_id='insert_into_table', sql='insert_into_table.sql', mysql_conn_id='mysql_conn') t5 = MySqlOperator(task_id='select_from_table', sql='select_from_table.sql', mysql_conn_id='mysql_conn') t6 = BashOperator( task_id='move_location_profits', bash_command= 'mv ~/store_files_airflow/location_wise_profit.csv ~/store_files_airflow/location_wise_profit_%s.csv' % yesterdays_date) t7 = BashOperator( task_id='move_store_profits', bash_command=
default_args = { 'owner': 'liyi', 'start_date': airflow.utils.dates.days_ago(0), 'retries': 1, 'retry_delay': timedelta(minutes=5) } dag1 = DAG('mysql_create_from', default_args=default_args, description='A simple create_mysql_data DAG', schedule_interval=timedelta(minutes=10)) # print(dag1) # print(type(dag1)) sql1 = ["INSERT INTO airflow_test.test1_from (date1) values (now())"] t1 = MySqlOperator( mysql_conn_id='liyi', sql=sql1, task_id='mysql_create_from', dag=dag1 ) sql2 = ["""insert into airflow_test.test1_in(date1) select * from airflow_test.test1_from ORDER BY date1 DESC LIMIT 1;"""] t2 = MySqlOperator( mysql_conn_id='liyi', sql=sql2, task_id='mysql_insert_into', dag=dag1 ) t2.set_upstream(t1)
) sleep6 = BashOperator( task_id='sleep6', bash_command="sleep 6m", dag=dag, ) sleep7 = BashOperator( task_id='sleep7', bash_command="sleep 8m", dag=dag, ) build_hiv_summary_1 = MySqlOperator( task_id='build_hiv_summary_1', sql='call generate_flat_hiv_summary_vitals("build",11,500,1);', mysql_conn_id=MYSQL_CONN_ID, database='etl', dag=dag) build_hiv_summary_2 = MySqlOperator( task_id='build_hiv_summary_2', sql='call generate_flat_hiv_summary_vitals("build",12,500,1);', mysql_conn_id=MYSQL_CONN_ID, database='etl', dag=dag) build_hiv_summary_3 = MySqlOperator( task_id='build_hiv_summary_3', sql='call generate_flat_hiv_summary_vitals("build",13,500,1);', mysql_conn_id=MYSQL_CONN_ID, database='etl',
from airflow import DAG from airflow.operators.bash_operator import BashOperator from airflow.operators.mysql_operator import MySqlOperator default_args = { 'owner': 'airflow', 'start_date': dt.datetime(2020, 1, 1), 'retries': 0 } with DAG('dynamic_dag', default_args=default_args, schedule_interval='@daily', catchup=False) as dag: opr_end = BashOperator(task_id='opr_end', bash_command='echo "Done"') # Dynamic Definition of your DAG!! for counter in range(1, 4): task_id='opr_insert_' + str(counter) task_date=dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S") opr_insert = MySqlOperator(task_id=task_id, sql="INSERT INTO tasks (id, timestamp) VALUES ('" + task_id + "_" + task_date + "', '" + task_date + "');", mysql_conn_id='mysql', autocommit=True, database='airflow_mdb') opr_insert >> opr_end
# task 3 - python function def WriteToFile(): dir_path = '/home/karan/Attempt_ApacheAirflow/' file_name = 't3.log' full_path = dir_path + file_name f = open(full_path, "a+") f.write("T3 Time start: {}\n".format(str(datetime.now()))) f.close() t3 = PythonOperator(task_id='python_write_file', depends_on_past=False, python_callable=WriteToFile, email=['*****@*****.**'], email_on_failure=True, dag=dag) t3_complete = datetime.now() t3_status_sql = """ insert into stage_status values (3, '{}', 'python_create_file', 'success'), (2, '{}', 'create_file', 'success'); """.format(str(t3_complete), str(t2_complete)) t4 = MySqlOperator(task_id='Update_status_table', sql=t3_status_sql, mysql_conn_id='local_mysql', owner='airflow', dag=dag) # Dependancy t1 >> [t2, t3] >> t4
task_id='dwd_oride_driver_cheating_detection_hi_prev_hour_task', bucket_key='{hdfs_path_str}/dt={pt}/hour={hour}/_SUCCESS'.format( hdfs_path_str="oride/oride_dw/dwd_oride_driver_cheating_detection_hi/country_code=nal", pt='{{ds}}', hour='{{ execution_date.strftime("%H") }}' ), bucket_name='opay-datalake', poke_interval=60, # 依赖不满足时,一分钟检查一次依赖状态 dag=dag ) clear_promoter_mysql_data = MySqlOperator( task_id='clear_promoter_mysql_data', sql=""" DELETE FROM promoter_users_device WHERE dt='{{ ds_nodash }}' AND hour='{{ execution_date.hour }}'; DELETE FROM promoter_data_hour WHERE day='{{ ds_nodash }}' and hour='{{ execution_date.hour}}'; """, mysql_conn_id='opay_spread_mysql', dag=dag) promoter_detail_to_msyql = HiveToMySqlTransfer( task_id='promoter_detail_to_msyql', sql=""" SELECT null as id, t.code as code, t.bind_number as phone, t.bind_device as device_id, 0 as register_time, t.bind_time as bind_time, {{ ds_nodash }},
start_date=datetime(2018, 8, 12, 19, 35), #end_date=datetime(2018,12,31), default_args=default_args, #concurrency=1, default_view='tree', orientation='TB') # Task to print date t1 = BashOperator(task_id='start_print_date', bash_command='date', dag=dag) # Task to interact with MySQL # # MySQL Connection Info: # Conn ID: mysql_cloudsql # Conn Type: MySQL # Host: <ip_address> # Login: root # Password: <password> # t2 = MySqlOperator(task_id='interact_with_mysql', sql='create table ztable100 (cust_id int)', mysql_conn_id='mysql_cloudsql', autocommit=False, database="zdatabase", dag=dag) # Create DAG by specifying upstream tasks t2.set_upstream(t1) #ZEND
hive_cli_conn_id='spark_thrift', hql="alter table stg.teacher_contract_carport_time add PARTITION (etl_date='{{ macros.ds(ti) }}');\n ", dag=dag) stg_ods_carport_time = HiveOperator( task_id='stg_ods_carport_time', hive_cli_conn_id='spark_thrift', hql='scripts/stg2ods.teacher_contract_carport_time_insert.sql', dag=dag ) end = MySqlOperator( task_id='end', mysql_conn_id='etl_db', sql="INSERT INTO `signal` VALUES('{1}', '{0}') ON DUPLICATE KEY UPDATE `value`='{0}'; ".format("{{ macros.ds(ti) }}", "{{ dag.dag_id }}"), database='etl', dag=dag ) start >> delpart_stg_bk_class_times >> stg_bk_class_times >> addpart_stg_bk_class_times >> stg_ods_bk_class_times >> wait start >> delpart_stg_bk_class >> stg_bk_class >> addpart_stg_bk_class >> stg_ods_bk_class >> wait start >> delpart_stg_bk_sign_in >> stg_bk_sign_in >> addpart_stg_bk_sign_in >> stg_ods_bk_sign_in >> wait start >> delpart_stg_bk_subject >> stg_bk_subject >> addpart_stg_bk_subject >> stg_ods_bk_subject >> wait start >> delpart_stg_live_course_details >> stg_live_course_details >> addpart_stg_live_course_details >> stg_ods_live_course_details >> wait start >> delpart_stg_bk_subjectdet >> stg_bk_subjectdet >> addpart_stg_bk_subjectdet >> stg_ods_bk_subjectdet >> wait
from pyspark.sql.types import FloatType from bs4 import BeautifulSoup from airflow import DAG from airflow.operators.python_operator import PythonOperator from airflow.operators.bash_operator import BashOperator from airflow.operators.mysql_operator import MySqlOperator from airflow.utils.dates import days_ago args = {'owner': 'Tom'} dag = DAG('DAG1', schedule_interval="@daily", start_date=days_ago(1), default_args=args) #instantiating DAG create_database = MySqlOperator(task_id='create_database', mysql_conn="mysql_default", sql="CREATE DATABASE IF NOT EXISTS dag_data;", dag=dag) #grabbing SQL password from file with open('/home/tom/sql_password_json.json', 'r') as f: password = json.load(f) SQL_PASSWORD = password['password'] def get_data(): #scrapes and cleans data from website URL = "http://books.toscrape.com/" request = requests.get(URL) content = request.content #HTML content of website soup = BeautifulSoup(content, 'lxml') title_data = soup.find_all('article')
# 'sla_miss_callback': yet_another_function, # 'trigger_rule': 'all_success' } dag = DAG( 'basic_mysql', default_args=default_args, description='A simple mysql operation', schedule_interval=timedelta(days=1), ) # t1 t1 = BashOperator( task_id='print_date', bash_command='date', dag=dag, ) # sql for t2 sql = """ CREATE TABLE IF NOT EXISTS test_airflow ( dummy VARCHAR(50) ); """ t2 = MySqlOperator(task_id='insert_mysql', sql=sql, mysql_conn_id='local_instance', dag=dag) t1 >> t2
default_args = { 'owner': 'Airflow', 'start_date': datetime(2019, 11, 30), 'retries': 1, 'retries_delay': timedelta(seconds=5) } with DAG('spotify_dag', default_args=default_args, schedule_interval='@daily', template_searchpath=['/usr/local/airflow/sql_files'], catchup=False) as dag: t1 = PythonOperator( task_id='get_data_from_spotify', python_callable=getDataFromSpotify, ) t2 = MySqlOperator( task_id='create_mysql_table', mysql_conn_id="mysql_conn", sql="create_table.sql", ) t3 = MySqlOperator(task_id='insert_mysql_table', mysql_conn_id="mysql_conn", sql="insert_into_table.sql") t1 >> t2 >> t3
"email_list":EMAIL_LIST, "success_message":"Task completed successfully", "failure_message":"Task Failed" }, trigger_rule='all_done' ) t3 = MySqlOperator( task_id='MySqlOperatorExample', mysql_conn_id="server_55", sql="create_table.sql", on_success_callback=notify_email_success, on_failure_callback=notify_email_failure, params={ "master":master, #"success_message":"Mysql Operator passed successfully", #"failure_message":"Mysql Operator did not pass successfully", #"success_file":['/root/airflow/dags/Airflow_Tutorial/sql_files/create_table.sql'], #"failure_file":['/root/airflow/dags/Airflow_Tutorial/sql_files/create_table.py'], "email_list":EMAIL_LIST }, dag=dag ) t4 = EmailOperator( task_id='EmailOperatorExample', subject='Test: Email Operator', cc='*****@*****.**', bcc='*****@*****.**', to='*****@*****.**',
params={ "email_list": EMAIL_LIST, "task_success_msg": "Flag set to 0.", "task_failure_msg": "Failed to set flag back to 0" }, dag=dag) get_cp_past_5_runs = MySqlOperator( task_id='get_cp_past_5_runs', mysql_conn_id="mysql_server", on_failure_callback=notify_email_failure, params={ "master_db_name": master_db_name, "app_db_name": app_db_name, "email_list": EMAIL_LIST, "task_success_msg": "SQL Script to generate score of past five runs successfully execcuted.", "task failure_msg": "Failed to execute SQL Script for generating score of past runs." }, sql="cp_past_5_runs.sql", dag=dag) email_status = EmailOperator( task_id='email_status', to=EMAIL_LIST, trigger_rule='all_done', subject='Execution Completed', html_content="The Decision Analytics has completed it's execution",
'retry_delay': timedelta(minutes=30), 'max_active_runs': 1 } dag = DAG(dag_id=DAG_ID, default_args=default_args, schedule_interval='0 2 * * *', dagrun_timeout=timedelta(minutes=60), catchup=False, template_searchpath=[ '/usr/local/airflow/etl-scripts/flat_tables', '/usr/local/airflow/etl-scripts/calculated_tables', '/usr/local/airflow/etl-scripts/database_updates' ]) #update_surge_weekly update_surge_weekly = MySqlOperator( task_id='update_surge_weekly', sql='generate_surge_weekly_report_dataset_v2("sync",1,15000,20,true);', mysql_conn_id=MYSQL_CONN_ID, database='etl', dag=dag) finish = DummyOperator(task_id='finish', dag=dag) update_surge_weekly >> finish #update_vitals >> finish if __name__ == "__main__": dag.cli()
create_oride_orders_status = MySqlOperator( task_id='create_oride_orders_status', sql=""" CREATE TABLE IF NOT EXISTS oride_orders_status_10min ( `city_id` int(10) unsigned NOT NULL DEFAULT '0' COMMENT '城市ID', `city_name` varchar(64) NOT NULL DEFAULT '' COMMENT '城市名称', `serv_type` int(11) NOT NULL DEFAULT '-1' COMMENT '服务类型1专车2快车', `order_time` timestamp not null default '1970-01-02 00:00:00' comment 'time 10min', `daily` timestamp not null default '1970-01-02 00:00:00' comment 'time day', `orders` int unsigned not null default 0 comment '下单数', `orders_user` int unsigned not null default 0 comment '下单用户数', `orders_pick` int unsigned not null default 0 comment '订单业务接单数', `orders_pick_dserv` int unsigned not null default 0 comment '按司机业务接单数', `drivers_serv` int unsigned not null default 0 comment '在线司机数', `drivers_orderable` int(10) unsigned NOT NULL DEFAULT '0' COMMENT '可接单司机数', `orders_finish` int unsigned not null default 0 comment '订单业务完单数', `orders_finish_dserv` int unsigned not null default 0 comment '司机业务完单数', `orders_finish_create` int unsigned not null default 0 comment '订单业务基于创建时间的完单数', `avg_pick` int unsigned not null default 0 comment '订单业务平均应答时长(秒)take_time-create_time', `avg_pick_dserv` int unsigned not null default 0 comment '司机业务平均应答时长(秒)take_time-create_time', `avg_take` decimal(10,1) unsigned not null default '0.0' comment '订单业务平均接驾时长(分)pick_time-take_time', `avg_take_dserv` decimal(10,1) unsigned not null default '0.0' comment '司机业务平均接驾时长(分)pick_time-take_time', `not_sys_cancel_orders` int unsigned not null default 0 comment '订单业务应答后取消status = 6 and driver_id > 0 and cancel_role <> 3 and cancel_role <> 4', `not_sys_cancel_orders_dserv` int unsigned not null default 0 comment '司机业务应答后取消status = 6 and driver_id > 0 and cancel_role <> 3 and cancel_role <> 4', `picked_orders` int unsigned not null default 0 comment '订单业务成功接驾', `picked_orders_dserv` int unsigned not null default 0 comment '司机业务成接驾', `orders_accept` int unsigned not null default 0 comment '订单业务接单数', `orders_accept_dserv` int unsigned not null default 0 comment '司机业务接单数', `agg_orders_finish` int unsigned not null default 0 comment '订单业务累计完单数', `agg_orders_finish_dserv` int unsigned not null default 0 comment '司机业务累计完单数', primary key (`city_id`,`serv_type`,`order_time`) ) engine=innodb DEFAULT CHARSET=utf8; """, database='bi', mysql_conn_id='mysql_bi', dag=dag )