Esempio n. 1
0
### Load data to time dimension
This task populates the `dim_time` table. \
Dimension table is truncated before inserting data.

We run data validation that expects to find a total of \
24 rows for 366 days (2016 was a leap year).
"""

# Add task to Load data into facts table
fact_weather_task = LoadFactOperator(task_id='load_weather_fact_table',
                                     dag=dag,
                                     redshift_conn_id=db_conn_name,
                                     dest_table="fact_weather",
                                     sql_query=SqlQueries.fact_weather_insert,
                                     provide_context=True)
fact_weather_task.doc_md = """\
### Load weather facts
This task populates the `fact_weather` table with \
numerical data.
"""

# Add task to check that we have data in the facts table
run_quality_checks = DataQualityOperator(
    task_id='run_data_quality_checks',
    dag=dag,
    redshift_conn_id=db_conn_name,
    table="fact_weather",
    sql_query=SqlQueries.row_count,
    equals=366 * 24,  # we expect to find hourly data for one year
)
run_quality_checks.doc_md = """\
Esempio n. 2
0
This task populates the `dim_payment_types` table. \
Each task execution appends any rows that are not \
previously found in the table.
"""

# Add data to facts table
fact_trips_task = LoadFactOperator(
    task_id='load_trips_fact_table',
    dag=dag,
    redshift_conn_id=db_conn_name,
    dest_table="fact_trips",
    source_table=
    'staging_trips_{{ macros.ds_format(yesterday_ds, "%Y-%m-%d", "%Y_%m") }}',  # noqa
    sql_query=SqlQueries.fact_trips_insert,
    provide_context=True)
fact_trips_task.doc_md = """\
### Load taxi trips facts
This task populates the `fact_trips` table with \
numerical data. The task writes the sum of \
rows in original table and staging table to a \
xcom called `expected_count`. This information \
is used in the next task when validating inserted data.
"""

# Validate data insert
run_quality_checks = DataQualityOperator(
    task_id='run_data_quality_checks',
    dag=dag,
    redshift_conn_id=db_conn_name,
    table="fact_trips",
    sql_query=SqlQueries.row_count,