""" ### Create tables DAG This DAG creates the dim and fact tables required by the ETL and should be run first. """ from datetime import datetime from airflow import DAG from airflow.operators.postgres_operator import PostgresOperator default_args = {'owner': 'Tommi Ranta', 'start_date': datetime(2016, 1, 1)} dag = DAG( '01_taxi_create_tables', default_args=default_args, description='Create Taxi ETL tables in Redshift with Airflow', schedule_interval="@once", template_searchpath=['/home/workspace/airflow', '/usr/local/airflow/']) dag.doc_md = __doc__ create_tables_task = PostgresOperator(task_id='create_tables', dag=dag, sql='create_tables.sql', postgres_conn_id='redshift') create_tables_task.doc_md = """\ ### Create tables PostgreOperator that executes the `create_tables.sql` SQL script. """ create_tables_task
# Add drop staging tables task for all weather data sql_query = """ {% for table, _ in tables.items() %} DROP TABLE staging_weather_{{ table }}; {% endfor %} DROP TABLE staging_weather_precipitation; DROP TABLE staging_holidays; """ staging_table_remove_task = PostgresOperator( task_id='staging_weather_remove', dag=dag, postgres_conn_id=db_conn_name, sql=sql_query, params={'tables': staging_weather_tables}) staging_table_remove_task.doc_md = """ ### Remove staging tables This task removes the stagings table now that we have \ finished all previous tasks. DROP TABLE queries are dynamically created based on the \ keys in the `staging_weather_tables` dictionary in addition \ to the precipitation and holiday staging tables. """ end_operator = DummyOperator(task_id='stop_execution', dag=dag) # Build the DAG task dependencies start_operator >> staging_create_tasks staging_create_tasks >> staging_to_copy_dummy staging_to_copy_dummy >> staging_copy_tasks