"""
### Create tables DAG

This DAG creates the dim and fact tables
required by the ETL and should be run first.
"""
from datetime import datetime
from airflow import DAG
from airflow.operators.postgres_operator import PostgresOperator

default_args = {'owner': 'Tommi Ranta', 'start_date': datetime(2016, 1, 1)}

dag = DAG(
    '01_taxi_create_tables',
    default_args=default_args,
    description='Create Taxi ETL tables in Redshift with Airflow',
    schedule_interval="@once",
    template_searchpath=['/home/workspace/airflow', '/usr/local/airflow/'])
dag.doc_md = __doc__

create_tables_task = PostgresOperator(task_id='create_tables',
                                      dag=dag,
                                      sql='create_tables.sql',
                                      postgres_conn_id='redshift')
create_tables_task.doc_md = """\
### Create tables
PostgreOperator that executes the `create_tables.sql` SQL script.
"""

create_tables_task
예제 #2
0
# Add drop staging tables task for all weather data
sql_query = """
    {% for table, _ in tables.items() %}
    DROP TABLE staging_weather_{{ table }};
    {% endfor %}
    DROP TABLE staging_weather_precipitation;
    DROP TABLE staging_holidays;
"""
staging_table_remove_task = PostgresOperator(
    task_id='staging_weather_remove',
    dag=dag,
    postgres_conn_id=db_conn_name,
    sql=sql_query,
    params={'tables': staging_weather_tables})
staging_table_remove_task.doc_md = """
### Remove staging tables
This task removes the stagings table now that we have \
finished all previous tasks.

DROP TABLE queries are dynamically created based on the \
keys in the `staging_weather_tables` dictionary in addition \
to the precipitation and holiday staging tables.
"""

end_operator = DummyOperator(task_id='stop_execution', dag=dag)

# Build the DAG task dependencies
start_operator >> staging_create_tasks
staging_create_tasks >> staging_to_copy_dummy
staging_to_copy_dummy >> staging_copy_tasks