from datetime import datetime default_args = { 'start_date': datetime.strptime('2018-03-07', '%Y-%m-%d'), } infinity_war_ticket_check_dag = DAG('infinity_war_ticket_check_dag', default_args=default_args, catchup=False, schedule_interval=None, max_active_runs=1) t_0 = BMSOperator( dag=infinity_war_ticket_check_dag, task_id="check_tickets_for_inox_mantri_imax", site_url= "https://in.bookmyshow.com/buytickets/avengers-infinity-war-3d-bengaluru/movie-bang-ET00074502-MT/", show_date="20180427", venue="INMB") t_1 = EmailOperator( dag=infinity_war_ticket_check_dag, task_id='email_for_inox_mantri_imax', trigger_rule=TriggerRule.ALL_SUCCESS, to='*****@*****.**', subject='Tickets available at INOX Mantri Mall for Avengers Infinity war', html_content= 'Tickets available at INOX Mantri Mall for Avengers Infinity war') t_1.set_upstream(t_0)
["/Users/ravimuthyala/AirflowSparkTestCode/receipts.csv"], 'driver_memory': '1g', 'executor_cores': 1, 'num_executors': 1, 'executor_memory': '1g' } spark_submit_operator = SparkSubmitOperator(task_id='Spark_Scala_Submit_Job', dag=dag, **spark_config) emailNotify = EmailOperator(task_id='email_notification', to='*****@*****.**', subject='Spark Submit Job Alert', html_content='Airflow Spark Submit Job Done', dag=dag) t1Failed = EmailOperator(dag=dag, trigger_rule=TriggerRule.ONE_FAILED, task_id="SparkJobFailed", to=["*****@*****.**"], subject="Spark job Failed", html_content='<h3>Spark job has failed</h3>') python_operator.set_downstream(spark_submit_operator) spark_submit_operator.set_downstream(emailNotify) t1Failed.set_upstream([spark_submit_operator]) if __name__ == '__main__': dag.cli()
) move_tweets_to_sql = PythonOperator( task_id="csv_to_sql", # extra DAG context provide_context=True, # call the function python_callable=csv_to_sql, dag=dag, ) id_popular = PythonOperator( task_id="identify_popular_links", provide_context=True, python_callable=identify_popular_links, dag=dag, ) email_links = EmailOperator( task_id="email_best_links", to="*****@*****.**", subject="Latest popular links", html_content="Check out the latest!!", files=["{}/latest_links.txt".format(RAW_TWEET_DIR)], dag=dag, ) simple_search.set_downstream(move_tweets_to_sql) id_popular.set_upstream(move_tweets_to_sql) email_links.set_upstream(id_popular)
get_airflow_ts_nodash_id_task = PythonOperator( task_id='get_airflow_ts_nodash_id_task', python_callable=get_airflow_ts_nodash_id, dag=dag) ts_no_dash_id = '{{ ti.xcom_pull(task_ids="get_airflow_ts_nodash_id_task" , dag_id = "main_dag")}}' # showing how to get xcom bash_job_task = BashOperator(task_id="bash_job_task", bash_command="echo one", dag=dag) email_task = EmailOperator(task_id='send_email', to=email_to, subject='Airflow Alert - ' + ts_no_dash_id, html_content=""" <h3>Email Test </h3> """, dag=dag) spark_job_task = spark_submit(python_file=base_path + '/spark_job.py', job_name="spark", dag=dag, conn_id=None, env_var_size='SMALL') first_dummy_task.set_downstream(get_airflow_ts_nodash_id_task) get_airflow_ts_nodash_id_task.set_downstream([spark_job_task, bash_job_task]) email_task.set_upstream([spark_job_task, bash_job_task]) second_dummy_task.set_upstream(email_task) hello_task.set_upstream(email_task)
schema='my_hive_db', provide_context=True, dag=dag ) clean_data.set_upstream(create_source_id) count_data_rows.set_downstream([stop_flow, clean_data]) move_data_mysql = PythonOperator( task_id='move_data_mysql', python_callable=tasks.move_data_mssql, templates_dict={'schema': 'my_hive_db'}, provide_context=True, dag=dag ) move_data_mysql.set_upstream(clean_data) send_email = EmailOperator( task_id='send_email', to='*****@*****.**', subject='ingestion complete', html_content="Date: {{ ds }}", dag=dag) send_email.set_upstream(move_data_mysql)
'retry_delay': timedelta(minutes=5), } dag = DAG('start_test_baise', default_args=default_args, schedule_interval=timedelta(hours=4)) def start_task(): or_path = os.getcwd() os.chdir('/home/zluser/Desktop/airflow') os.system('python3 baise.py') os.chdir(or_path) t1 = PythonOperator( task_id="test_baise", python_callable=start_task, dag=dag, ) t2 = EmailOperator( task_id="send_email", dag=dag, trigger_rule=TriggerRule.ONE_SUCCESS, to='*****@*****.**', subject='Baise task is ok', html_content='<h3>Hi,dear,your task is Completed Successfully! </h3>\n%s' % time.asctime(time.localtime(time.time()))) t2.set_upstream(t1)
), two_day_rolling_avg as ( SELECT AVG(a.state, b.state) as two_day_avg FROM yesterday_covid_data a JOIN yesterday_covid_data b ON a.state = b.state ) SELECT a.state, b.state, c.two_day_avg FROM yesterday_covid_data a JOIN today_covid_data b ON a.state=b.state JOIN two_day_rolling_avg c ON a.state=b.two_day_avg;''', params={ 'today': today, 'yesterday': yesterday }) #Define task to send email send_email = EmailOperator( task_id='send_email', to=email_to, subject='Covid Queries DAG', html_content='<p>The Covid queries DAG completed successfully. <p>') #Define task dependencies using multiple methods t0 >> [query_1, query_2, query_3] query_1.set_downstream(send_email) query_2.set_downstream(send_email) send_email.set_upstream(query_3)
ep = ExecutePreprocessor(timeout=21600) try: out = ep.preprocess(nb, {'metadata': {'path': notebook_dir}}) except CellExecutionError: msg = 'Error executing the notebook "%s".\n\n' % notebook_path msg += 'See notebook "%s" for the traceback.' % notebook_path print(msg) raise finally: with open(notebook_path, mode='wt') as f: nbformat.write(nb, f) python_operator = PythonOperator(task_id='notebook_task', provide_context=True, python_callable=nb_task, dag=dag, op_kwargs={'notebook_path': notebook_path}, run_as_user=username) if email_on_success: email_operator = EmailOperator( task_id='email_task', to=emails_success.split(","), subject='{} completed successfully'.format(dag_id), dag=dag, html_content= "<p>This job is successfully executed, to customize the email content, please edit dag_template.py</p>" ) email_operator.set_upstream(python_operator)
bash_command='echo "hello world!!"', dag=dag) task3 = MySqlOperator(mysql_conn_id='airflow_db', task_id='basic_mysql', sql="SELECT * FROM `dag`", dag=dag) EMAIL_CONTENT = """ <ul> <li>Instatnce key: %s</li> <li>Owner: %s</li> <li>Host: %s</li> </ul> """ % ( "{{ task_instance_key_str }}", "{{ task.owner}}", "{{ ti.hostname }}", ) send_mail = EmailOperator(dag=dag, task_id="send_mail", to=["*****@*****.**"], subject="バッチ成功: 実行日 {{ ds }}", html_content=EMAIL_CONTENT) task1.set_downstream(task2) task2.set_downstream(task3) send_mail.set_upstream(task2)