} dag = DAG( dag_id='test_example_bash_operator', default_args=args, schedule_interval='0 0 * * *', dagrun_timeout=timedelta(minutes=60)) cmd = 'ls -l' run_this_last = DummyOperator(task_id='run_this_last', dag=dag) run_this = BashOperator( task_id='run_after_loop', bash_command='echo 1', dag=dag) run_this.set_downstream(run_this_last) for i in range(3): i = str(i) task = BashOperator( task_id='runme_'+i, bash_command='echo "{{ task_instance_key_str }}" && sleep 1', dag=dag) task.set_downstream(run_this) task = BashOperator( task_id='also_run_this', bash_command='echo "run_id={{ run_id }} | dag_run={{ dag_run }}"', dag=dag) task.set_downstream(run_this_last) if __name__ == "__main__": dag.cli()
dag = DAG( dag_id='restart_streamer', default_args=args, schedule_interval='0 3 21 * *', dagrun_timeout=timedelta(minutes=60), user_defined_macros={ 'yesterday_year': compute_yesterday_year, 'src_dir': '/dest/colditz/tweetstreamer', 'src_host': 'vm049', 'real_user': '******' # must be able to sudo to this user }) kill_streamer = BashOperator( task_id='kill_streamer', bash_command= 'sudo -u {{real_user}} ssh {{src_host}} rm {{src_dir}}/DeleteToKill.txt ', dag=dag, ) start_streamer = BashOperator( task_id='start_streamer', bash_command= 'sudo -u {{real_user}} ssh {{src_host}} "nohup bash {{src_dir}}/runstreamer.sh </dev/null > /dev/null 2>&1 & " ', dag=dag, on_success_callback=notify_success_email) kill_streamer >> start_streamer if __name__ == "__main__": dag.cli()
args = { 'owner': 'Airflow', 'start_date': airflow.utils.dates.days_ago(1), } dag_prjt_main = DAG( dag_id=DAG_NAME, default_args=args, schedule_interval='* * * * *' #"@once" ) SQOOP_Task1 = BashOperator(task_id="Sqoop", bash_command='~/sqoop-1.4.7.bin__hadoop-2.6.0/bin/sqoop job --exec Sqoop_weblogdetails_test37', dag=dag_prjt_main) hive_cmd= """use test1; set hive.exec.dynamic.partition=true; set hive.exec.dynamic.partition.mode=nonstrict; set hive.exec.max.dynamic.partitions=1000; insert into weblog_partiton_table partition(host) select id, datevalue, ipaddress, url, responsecode, host from weblog_external as a where not exists(select b.id from weblog_partiton_table as b where a.id = b.id);""" hive_part = HiveOperator(hive_cli_conn_id='hive_cli_default', hql=hive_cmd, task_id = 'Hive', dag=dag_prjt_main) finish_task = DummyOperator(task_id="finaltask", dag=dag_prjt_main) SQOOP_Task1 >> hive_part >> finish_task if __name__ == '__main__': dag_prjt_main.cli()