def build_email(**context): with open('/tmp/pokupki.xlsx', mode='r') as file: email_op = EmailOperator( task_id='send_email', to=['*****@*****.**','*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**'], subject="Fortemarket покупки", html_content='Hello, <br/>', files=[file.name], ) email_op.execute(context)
def build_email(**context): with open('/tmp/BPM_report_new.xlsx', mode='r') as file: email_op = EmailOperator( task_id='send_email', to=[ '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**' ], subject="Daily BPM report", html_content='Hello, <br/>', files=[file.name], ) email_op.execute(context)
def build_email(**context): with open('/tmp/fm_unrecognized.xlsx', mode='r') as file: email_op = EmailOperator( task_id='send_email', to=[ '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**' ], subject="fm_unrecognized", html_content='Hello, <br/>', files=[file.name], ) email_op.execute(context)
def build_email(**context): with open('/tmp/vipiski.xlsx', mode='r') as file: email_op = EmailOperator( task_id='send_email', to=[ '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**' ], subject="Fortemarket выписка", html_content='Hello, <br/>', files=[file.name], ) email_op.execute(context)
default_args=default_args, schedule_interval="0 12 * * *") run_this_first = DummyOperator(task_id='run_this_first', dag=dag) branching = BranchPythonOperator(task_id='branching', python_callable=lambda: 'source_count' if datetime.now().day <= 7 and datetime.today( ).weekday() == 6 else 'ignore_not_sunday', dag=dag) branching.set_upstream(run_this_first) esucc = EmailOperator(task_id='email_success_' + dag.dag_id, to=email_addr, subject=dag.dag_id + ' [success] on ' + datetime.now().strftime('%Y-%m-%d'), html_content='Congratulation!', trigger_rule='all_success', dag=dag) source_count = BashOperator( task_id='source_count', bash_command='/disk1/source_data_count; ./daily_table_count.sh > out.log ', dag=dag) source_count.set_upstream(branching) esucc.set_upstream(source_count) ignore_not_sunday = DummyOperator(task_id='ignore_not_sunday', dag=dag) ignore_not_sunday.set_upstream(branching)
'email_on_failure': True, 'email_on_retry': False, 'start_date': airflow.utils.dates.days_ago(1), 'retries': 1, 'retry_delay': timedelta(minutes=1), 'concurrency': 1, 'max_active_run': 4 } dag = DAG('import_applog_mongodb', default_args=default_args, schedule_interval="10 5 * * *") esucc = EmailOperator(task_id='email_success_' + dag.dag_id, to=email_addr, subject=dag.dag_id + ' [success] on {{ ds }} ', html_content='Congratulation!', trigger_rule='all_success', dag=dag) # add table here: tables = [ 'browser_history', 'account_list', 'event_ios', 'event_app', 'ios_deviceinfo', 'frequentlocation', 'coordinates', 'hardware', 'location', 'network', 'telephone', 'hardwareios' ] # copy table to bi #bitables = ['hardware', 'hardwareios'] bitables = [] for table in tables: imp = BashOperator(
wrtr.writerow(['url', 'count']) wrtr.writerows(cntr.most_common(5)) simple_search = PythonOperator(task_id='search_twitter', provide_context=True, python_callable=search_twitter, dag=dag, params={'query': '#python'}) move_tweets_to_sqlite = PythonOperator(task_id='csv_to_sqlite', provide_context=True, python_callable=csv_to_sqlite, dag=dag) id_popular = PythonOperator(task_id='identify_popular_links', provide_context=True, python_callable=identify_popular_links, dag=dag) email_links = EmailOperator(task_id='email_best_links', to='*****@*****.**', subject='Latest popular links', html_content='Check out the latest!!', files=['{}/latest_links.txt'.format(RAW_TWEET_DIR)], dag=dag) simple_search.set_downstream(move_tweets_to_sqlite) id_popular.set_upstream(move_tweets_to_sqlite) email_links.set_upstream(id_popular)
params={'rasterdir': RASTER_DIR}, on_success_callback=lambda c: set_perms(c['params']['rasterdir'], default_args['owner']), dag=dag) make_raster_task = BashOperator(task_id='make_rasters', bash_command=make_raster_cmd, env={'PATH': ANACONDA_PATH}, params={ 'postphydir': POSTPHY_DIR, 'ecanalysispath': ECANALYSIS_PATH, 'rasterdir': RASTER_DIR }, dag=dag) ############ Report Completion email_me = EmailOperator(task_id='email_me', to=default_args['email'], subject='%s is merged' % dag_id, html_content='You may commence analysis.', dag=dag) rsync_task.set_upstream(make_postphy_dir_task) merge_events_task.set_upstream(rsync_task) kwik2pandas_task.set_upstream(merge_events_task) email_me.set_upstream(kwik2pandas_task) make_raster_dir_task.set_upstream(kwik2pandas_task) make_raster_task.set_upstream(make_raster_dir_task) globals()[dag_id] = dag
mv_kwik_bak_task = BashOperator(task_id='move_kwik_bak', bash_command=mv_kwik_bak_cmd, params={ 'block': BLOCK, 'birdid': BIRD }, dag=dag) rsync_task = BashOperator(task_id='rsync', bash_command=rsync_command, params={'block': BLOCK}, dag=dag) email_me = EmailOperator( task_id='email_me', to=default_args['email'], subject='%s is complete' % dag_id, html_content='you can now manually sort on niao', dag=dag) slack_it = SlackAPIPostOperator(task_id='slack_it', token=SLACK_TOKEN, text='%s is complete' % dag_id, channel='#ephys', dag=dag) make_kwd_task.set_upstream(make_klusta_dir_task) phy_task.set_upstream(make_kwd_task) #merge_events_task.set_upstream(phy_task) clear_phy_task.set_upstream(phy_task) make_kwik_bak_dir_task.set_upstream(phy_task) mv_kwik_bak_task.set_upstream(make_kwik_bak_dir_task)
send_processes_killed_email = EmailOperator( task_id="send_processes_killed_email", to=PROCESS_KILLED_EMAIL_ADDRESSES, subject=PROCESS_KILLED_EMAIL_SUBJECT, html_content=""" <html> <body> <h2>Dag Run Information</h2> <table> <tr><td><b> ID: </b></td><td>{{ dag_run.id }}</td></tr> <tr><td><b> DAG ID: </b></td><td>{{ dag_run.dag_id }}</td></tr> <tr><td><b> Execution Date: </b></td><td>{{ dag_run.execution_date }}</td></tr> <tr><td><b> Start Date: </b></td><td>{{ dag_run.start_date }}</td></tr> <tr><td><b> End Date: </b></td><td>{{ dag_run.end_date }}</td></tr> <tr><td><b> Run ID: </b></td><td>{{ dag_run.run_id }}</td></tr> <tr><td><b> External Trigger: </b></td><td>{{ dag_run.external_trigger }}</td></tr> </table> <h2>Task Instance Information</h2> <table> <tr><td><b> Task ID: </b></td><td>{{ task_instance.task_id }}</td></tr> <tr><td><b> Execution Date: </b></td><td>{{ task_instance.execution_date }}</td></tr> <tr><td><b> Start Date: </b></td><td>{{ task_instance.start_date }}</td></tr> <tr><td><b> End Date: </b></td><td>{{ task_instance.end_date }}</td></tr> <tr><td><b> Host Name: </b></td><td>{{ task_instance.hostname }}</td></tr> <tr><td><b> Unix Name: </b></td><td>{{ task_instance.unixname }}</td></tr> <tr><td><b> Job ID: </b></td><td>{{ task_instance.job_id }}</td></tr> <tr><td><b> Queued Date Time: </b></td><td>{{ task_instance.queued_dttm }}</td></tr> <tr><td><b> Log URL: </b></td><td><a href="{{ task_instance.log_url }}">{{ task_instance.log_url }}</a></td></tr> </table> <h2>Processes Killed</h2> <ul> {% for process_killed in task_instance.xcom_pull(task_ids='kill_halted_tasks', key='kill_halted_tasks.processes_to_kill') %} <li>Process {{loop.index}}</li> <ul> {% for key, value in process_killed.iteritems() %} <li>{{ key }}: {{ value }}</li> {% endfor %} </ul> {% endfor %} </ul> </body> </html> """, dag=dag)
dag = DAG(dag_id="connect_to_monary_and_email_operator", default_args=default_args, params=params) def connect_to_monary_and_email_operator(ds, **kwargs): m = Monary() pipeline = [{"$group": {"_id": "$state", "totPop": {"$sum": "$pop"}}}] states, population = m.aggregate("zips", "data", pipeline, ["_id", "totPop"], ["string:2", "int64"]) strs = list(map(lambda x: x.decode("utf-8"), states)) result = list("%s: %d" % (state, pop) for (state, pop) in zip(strs, population)) print(result) run_this = PythonOperator( task_id="connect_to_monary_and_email_operator", provide_context=True, python_callable=connect_to_monary_and_email_operator, dag=dag, ) send_email_notification_flow_successful = EmailOperator( task_id="send_email_notification_flow_successful", to="*****@*****.**", subject="custom email from airflow", html_content="{{ params['foo'](execution_date) }}", params=params, dag=dag, ) send_email_notification_flow_successful.set_upstream(run_this)
dag = DAG('edmjnl_crawler11', default_args=default_args, schedule_interval='@hourly', catchup=True) t1 = BashOperator( task_id='schedule_edmjnl_crawler', bash_command= "cd ~/airflow/edmjnl2 && scrapy runspider edmjnl.py -o file.csv -t csv", dag=dag) #For ^ I prefer: file_'{{ execution_date }}'.csv -t csv" t2 = BashOperator( task_id='schedule_edmjnl_crawler2', bash_command= "cd ~/airflow/edmjnl2 && scrapy runspider post.py -o file2.csv -t csv", dag=dag) t3 = EmailOperator(task_id='schedule_edmjnl_email', to='*****@*****.**', subject='Edmjnl Email', html_content="HTML content", files='file.csv', dag=dag) # For ^ I need something like ["home/antony/airflow/edmjnl/file_'{{ execution_date }}'.csv"] t1 >> t2
'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': True, 'retries': 0, } with DAG( 'PriceAlerter', default_args=default_args, schedule_interval='*/5 9-17 * * *', catchup=False, ) as dag: price_listener = PythonOperator( task_id='listener', python_callable=listener.listen, ) email_trigger = ShortCircuitOperator( task_id='email_trigger', python_callable=lambda: True if listener.trigger else False, trigger_rule=TriggerRule.NONE_FAILED, ) email = EmailOperator( task_id='email', to=email_service.email_list, subject=email_service.get_email_subject(listener.summary), html_content=email_service.get_html_content(listener.summary), ) price_listener.set_downstream(email_trigger) email_trigger.set_downstream(email)
# task_id='make_mansort_dir', # bash_command=as_user(make_mansort_dir_cmd, USER), # params={'mansortdir': MANSORT_DIR}, # dag=dag) # rsync_task = BashOperator( # task_id='rsync', # bash_command=as_user(rsync_command, USER), # params={'klustadir': KLUSTA_DIR, # 'mansortdir': MANSORT_DIR, # 'mansorthost': MANSORT_HOST}, # dag=dag) email_me = EmailOperator( task_id='email_me', to=default_args['email'], subject='%s is complete' % dag_id, html_content='You may now manually sort on NIAO', dag=dag) make_kwd_task.set_upstream(make_klusta_dir_task) phy_task.set_upstream(make_kwd_task) #merge_events_task.set_upstream(phy_task) clear_phy_task.set_upstream(phy_task) make_kwik_bak_dir_task.set_upstream(phy_task) mv_kwik_bak_task.set_upstream(make_kwik_bak_dir_task) # make_mansort_dir_task.set_upstream(phy_task) # rsync_task.set_upstream(clear_phy_task) # rsync_task.set_upstream(mv_kwik_bak_task) # rsync_task.set_upstream(make_mansort_dir_task) # email_me.set_upstream(rsync_task)
fill_search_terms = PythonOperator(task_id='fill_terms', provide_context=True, python_callable=fill_terms, dag=dag) gen_search_terms = BranchPythonOperator(task_id='generate_search_terms', provide_context=True, python_callable=generate_search_terms, dag=dag) email_links = EmailOperator(task_id='email_best_links', to='*****@*****.**', subject='Latest popular links', html_content='Check out the latest!!', files=['{}/latest_links.txt'.format(RAW_TWEET_DIR)], dag=dag) sub = SubDagOperator(subdag=subdag, task_id='insert_and_id_pop', trigger_rule='one_success', dag=dag) clear_latest = BashOperator(bash_command='rm -rf {}/latest_links.txt'.format( RAW_TWEET_DIR), task_id='clear_latest', dag=dag) gen_search_terms.set_upstream(fill_search_terms)
from datetime import datetime, timedelta default_args = { 'owner': 'airflow', 'start_date': datetime.now() - timedelta(seconds=10), 'retries': 0 } dag = DAG('Sales_Nov', default_args=default_args, start_date=datetime.now() - timedelta(seconds=10)) op1 = DummyOperator(task_id='File1_landing', dag=dag) t1 = EmailOperator(task_id='Processing_File_1', to='*****@*****.**', subject="Airflow_report", html_content="File 1 started", dag=dag) op2 = DummyOperator(task_id='File2_landing', dag=dag) t2 = EmailOperator(task_id='Processing_File_2', to='*****@*****.**', subject="Airflow_report", html_content="File 2 started", dag=dag) op3 = DummyOperator(task_id='Aggregating', dag=dag) op4 = DummyOperator(task_id='Final_Table_Push', dag=dag) t1.set_upstream(op1) t2.set_upstream(op2) op3.set_upstream(t1)
task_id='move_kwik_bak', bash_command=mv_kwik_bak_cmd, params={'klustadir': KLUSTA_DIR, 'kwikbakdir': KWIKBAK_DIR}, dag=dag) rsync_task = BashOperator( task_id='rsync', bash_command=as_user(rsync_command, USER), params={'klustadir': KLUSTA_DIR, 'mansortdir': MANSORT_DIR}, dag=dag) email_me = EmailOperator( task_id='email_me', to=default_args['email'], subject='%s is complete' % dag_id, html_content='You may now manually sort on NIAO', dag=dag) slack_it = SlackAPIPostOperator( task_id='slack_it', token=SLACK_TOKEN, text='%s is complete' % dag_id, channel='#ephys', dag=dag) make_kwd_task.set_upstream(make_klusta_dir_task) phy_task.set_upstream(make_kwd_task) #merge_events_task.set_upstream(phy_task) clear_phy_task.set_upstream(phy_task) make_kwik_bak_dir_task.set_upstream(phy_task)
# The DAG object; we'll need this to instantiate a DAG from airflow import DAG from airflow.operators import EmailOperator from datetime import datetime, timedelta default_args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2017, 3, 29), 'email_on_failure': True, 'email_on_retry': False, 'retries': 2, 'retry_delay': timedelta(minutes=5), # 'queue': 'bash_queue', # 'pool': 'backfill', # 'priority_weight': 10, 'end_date': datetime(2016, 3, 31) } dag = DAG( 'once_hour', default_args=default_args, schedule_interval="0 * * * *" ) # the schedule interval for the dag here is one day t1 = EmailOperator( to='*****@*****.**', subject='Generic Subject')
'start_date': one_min_ago, 'retries': 500 } dag = DAG('vs', default_args=default_args, schedule_interval='@once') chem1_pdb_prot1_pdb = BashOperator( task_id='chem1_pdb_prot1_pdb', bash_command="(cd /working-directory; virtualScreening.py -l chem1.pdb -o result -p prot1.pdb) ", dag=dag) chem1_pdb_prot1_pdb_success_mail = EmailOperator( task_id="chem1_pdb_prot1_pdb_success_mail", to=[u'*****@*****.**'], subject="chem1_pdb_prot1_pdb success", html_content="chem1_pdb_prot1_pdb success", dag=dag) chem1_pdb_prot1_pdb_success_mail.set_upstream(chem1_pdb_prot1_pdb) #chem1_pdb_prot1_pdb.set_upstream( ) chem1_pdb_prot2_pdb = BashOperator( task_id='chem1_pdb_prot2_pdb', bash_command="(cd /working-directory; virtualScreening.py -l chem1.pdb -o result -p prot2.pdb) ", dag=dag) chem1_pdb_prot2_pdb_success_mail = EmailOperator( task_id="chem1_pdb_prot2_pdb_success_mail", to=[u'*****@*****.**'],
fill_search_terms = PythonOperator(task_id='fill_terms', provide_context=True, python_callable=fill_terms, dag=dag) gen_search_terms = BranchPythonOperator(task_id='generate_search_terms', provide_context=True, python_callable=generate_search_terms, dag=dag) email_links = EmailOperator( task_id='email_best_links', to='*****@*****.**', subject='Latest popular links', html_content='Check out the latest!!', files=['{}/latest_links.txt'.format(RAW_TWEET_DIR)], dag=dag) sub = SubDagOperator(subdag=subdag, task_id='insert_and_id_pop', trigger_rule='one_success', dag=dag) clear_latest = BashOperator( bash_command='rm -rf {}/latest_links.txt'.format(RAW_TWEET_DIR), task_id='clear_latest', dag=dag) gen_search_terms.set_upstream(fill_search_terms)
# task_id='make_mansort_dir', # bash_command=as_user(make_mansort_dir_cmd, USER), # params={'mansortdir': MANSORT_DIR}, # dag=dag) # rsync_task = BashOperator( # task_id='rsync', # bash_command=as_user(rsync_command, USER), # params={'klustadir': KLUSTA_DIR, # 'mansortdir': MANSORT_DIR, # 'mansorthost': MANSORT_HOST}, # dag=dag) email_me = EmailOperator( task_id='email_me', to=default_args['email'], subject='%s is complete' % dag_id, html_content='You may now manually sort on NIAO', dag=dag) make_kwd_task.set_upstream(make_klusta_dir_task) phy_task.set_upstream(make_kwd_task) #merge_events_task.set_upstream(phy_task) clear_phy_task.set_upstream(phy_task) make_kwik_bak_dir_task.set_upstream(phy_task) mv_kwik_bak_task.set_upstream(make_kwik_bak_dir_task) # make_mansort_dir_task.set_upstream(phy_task) # rsync_task.set_upstream(clear_phy_task) # rsync_task.set_upstream(mv_kwik_bak_task) # rsync_task.set_upstream(make_mansort_dir_task) # email_me.set_upstream(rsync_task) email_me.set_upstream(mv_kwik_bak_task)
simple_search = PythonOperator(task_id='search_twitter', provide_context=True, python_callable=search_twitter, dag=dag, params={'query': '#python'}) move_tweets_to_sqlite = PythonOperator(task_id='csv_to_sqlite', provide_context=True, python_callable=csv_to_sqlite, dag=dag) id_popular = PythonOperator(task_id='identify_popular_links', provide_context=True, python_callable=identify_popular_links, dag=dag) email_links = EmailOperator(task_id='email_best_links', to='*****@*****.**', subject='Latest popular links', html_content='Check out the latest!!', files=['{}/latest_links.txt'.format(RAW_TWEET_DIR)], dag=dag) simple_search.set_downstream(move_tweets_to_sqlite) id_popular.set_upstream(move_tweets_to_sqlite) email_links.set_upstream(id_popular)
dag = DAG(dag_id='connect_to_monary_and_email_operator', default_args=default_args, params=params) def connect_to_monary_and_email_operator(ds, **kwargs): m = Monary() pipeline = [{"$group": {"_id": "$state", "totPop": {"$sum": "$pop"}}}] states, population = m.aggregate("zips", "data", pipeline, ["_id", "totPop"], ["string:2", "int64"]) strs = list(map(lambda x: x.decode("utf-8"), states)) result = list("%s: %d" % (state, pop) for (state, pop) in zip(strs, population)) print(result) run_this = PythonOperator(task_id='connect_to_monary_and_email_operator', provide_context=True, python_callable=connect_to_monary_and_email_operator, dag=dag) send_email_notification_flow_successful = EmailOperator( task_id='send_email_notification_flow_successful', to="*****@*****.**", subject='custom email from airflow', html_content="{{ params['foo'](execution_date) }}", params=params, dag=dag) send_email_notification_flow_successful.set_upstream(run_this)
params={'rasterdir': RASTER_DIR}, on_success_callback = lambda c: set_perms(c['params']['rasterdir'],default_args['owner']), dag=dag) make_raster_task = BashOperator( task_id='make_rasters', bash_command=make_raster_cmd, env={'PATH': ANACONDA_PATH}, params={'postphydir': POSTPHY_DIR, 'ecanalysispath': ECANALYSIS_PATH, 'rasterdir': RASTER_DIR}, dag=dag) ############ Report Completion email_me = EmailOperator( task_id='email_me', to=default_args['email'], subject='%s is merged' % dag_id, html_content='You may commence analysis.', dag=dag) rsync_task.set_upstream(make_postphy_dir_task) merge_events_task.set_upstream(rsync_task) kwik2pandas_task.set_upstream(merge_events_task) email_me.set_upstream(kwik2pandas_task) make_raster_dir_task.set_upstream(kwik2pandas_task) make_raster_task.set_upstream(make_raster_dir_task) globals()[dag_id] = dag