default_args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2016, 11, 1), 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 5, 'retry_delay': timedelta(minutes=5) } dag = DAG('s3_dag_test', default_args=default_args, schedule_interval= '@once') t1 = BashOperator( task_id='bash_test', bash_command='echo "hello world" > s3_conn_test.txt', dag=dag) sensor = S3KeySensor( task_id='check_s3_for_file_in_s3', bucket_key='*', wildcard_match=True, bucket_name='airflow-input-sprite', s3_conn_id='aws_default', timeout=18*60*60, poke_interval=120, dag=dag) t1.set_upstream(sensor)
'ml-pipeline', default_args=default_args, concurrency=1, description='A simple ML data pipeline DAG', schedule_interval='@daily', ) t_export_bq_to_s3 = PythonOperator(task_id='export_bq_to_s3', python_callable=bq_to_s3, dag=dag, retries=1) check_s3_for_key = S3KeySensor(task_id='check_s3_for_key', bucket_key=OUTPUT_FILE_KEY, wildcard_match=True, bucket_name=BUCKET_NAME, s3_conn_id='aws_default', timeout=20, poke_interval=5, dag=dag) t_check_dataset_group = BranchPythonOperator( task_id='check_dataset_group', provide_context=True, python_callable=check_dataset_group, retries=1, dag=dag, ) t_init_personalize = DummyOperator( task_id="init_personalize", trigger_rule=TriggerRule.ALL_SUCCESS,
default_args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2016, 11, 1), 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 5, 'retry_delay': timedelta(minutes=5) } dag = DAG('s3_dag_test', default_args=default_args, schedule_interval= '@once') t1 = BashOperator( task_id='bash_test', bash_command='echo "hello, it should work" > s3_conn_test.txt', dag=dag) sensor = S3KeySensor( task_id='check_s3_for_file_in_s3', bucket_key='XXX/YYY/ZZZ.xml', wildcard_match=True, bucket_name='{BUCKET_NAME}', timeout=18*60*60, poke_interval=120, aws_conn_id='s3_connection', dag=dag) t1.set_upstream(sensor)
from datetime import datetime, timedelta default_args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2016, 11, 1), 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 5, 'retry_delay': timedelta(minutes=5) } dag = DAG('s3_dag_test', default_args=default_args, schedule_interval='@once') t1 = BashOperator( task_id='bash_test', bash_command='echo "hello, it should work" > s3_conn_test.txt', dag=dag) sensor = S3KeySensor(task_id='check_s3_for_file_in_s3', bucket_key='*', wildcard_match=True, bucket_name='uploadonly', s3_conn_id='minio_connection', timeout=18 * 60 * 60, poke_interval=120, dag=dag) t1.set_upstream(sensor)
"raw-ingester-out", "manifests", table, # tested using specific arguments "20190704", "15", "completed.manifest" ) query_log = SqlUtils.load_query(stage_sql_path).split("---") sensor = S3KeySensor( task_id="s3_key_sensor_{}_task".format(table), #bucket_key="raw-ingester-out/manifests/*", bucket_key=KEY_PATH, wildcard_match=True, bucket_name=BUCKET_NAME, aws_conn_id=AWS_CONN_ID, timeout=18*60*60, poke_interval=120 ) stage_adlogs_hourly_job = SnowflakeOperator( task_id="stage_logs_{}_hourly".format(table), snowflake_conn_id=SF_CONN_ID, warehouse=SF_WAREHOUSE, database=SF_DATABASE, sql=query_log, params={ "env": ENV, "team_name": TEAM_NAME },
from airflow.operators import SimpleHttpOperator, HttpSensor,\ BashOperator, EmailOperator, S3KeySensor from datetime import datetime, timedelta default_args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2019, 10, 29), 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 5, 'retry_delay': timedelta(minutes=0.5) } dag = DAG('s3_dag_test', default_args=default_args, schedule_interval='@once') t1 = BashOperator(task_id='bash_test', bash_command='echo "Hello, Billionaire!" > s3_conn_test.txt', dag=dag) sensor = S3KeySensor(task_id='check_for_file_in_s3', bucket_key='file-to-watch-*', wildcard_match=True, bucket_name='intellia-sensor-bucket', timeout=18 * 60 * 60, poke_interval=120, dag=dag) t1.set_upstream(sensor)
templated_command = """ {% for i in range(5) %} echo "{{ ds }}" echo "{{ macros.ds_add(ds, 7)}}" echo "{{ params.my_param }}" {% endfor %} """ t3 = BashOperator( task_id="templated", bash_command=templated_command, params={"my_param": "Parameter I passed in"}, dag=dag, ) s3sensor = S3KeySensor( task_id='new_s3_file_in_fanta-bucket', bucket_key='*', wildcard_match=True, bucket_name='airflow-input-fanta', s3_conn_id='My-funky-s3-connector', timeout=18*60*60, poke_interval=120, dag=dag) t2.set_upstream(t1) t3.set_upstream(t1) s3sensor >> t1
print(resp) return "OK" with DAG( dag_id=DAG_ID, default_args=DEFAULT_ARGS, dagrun_timeout=timedelta(hours=2), start_date=days_ago(2), schedule_interval=None, tags=['athena','redshift'], ) as dag: check_s3_for_key = S3KeySensor( task_id='check_s3_for_key', bucket_key=s3_key, wildcard_match=True, bucket_name=s3_bucket_name, s3_conn_id='aws_default', timeout=20, poke_interval=5, dag=dag ) files_to_s3 = PythonOperator( task_id="files_to_s3", python_callable=download_zip ) create_athena_movie_table = AWSAthenaOperator(task_id="create_athena_movie_table",query=create_athena_movie_table_query, database=athena_db, output_location='s3://'+s3_bucket_name+"/"+athena_results+'create_athena_movie_table') create_athena_ratings_table = AWSAthenaOperator(task_id="create_athena_ratings_table",query=create_athena_ratings_table_query, database=athena_db, output_location='s3://'+s3_bucket_name+"/"+athena_results+'create_athena_ratings_table') create_athena_tags_table = AWSAthenaOperator(task_id="create_athena_tags_table",query=create_athena_tags_table_query, database=athena_db, output_location='s3://'+s3_bucket_name+"/"+athena_results+'create_athena_tags_table')