load_table_fact_airbnb_austin_la = LoadFactOperator(
    task_id='Load_Fact_Airbnb_Austin_LA_Table',
    provided_context=True,
    dag=dag,
    aws_credentials_id='aws_credentials',
    redshift_conn_id='redshift',
    query=SqlQueries.load_fact_airbnb_austin_la_insert,
    operation='insert',
    table='FACT_AIRBNB_AUSTIN_LA')

create_table_fact_airbnb.set_upstream(load_table_dim_hosts)
create_table_fact_airbnb.set_upstream(load_table_dim_properties)
create_table_fact_airbnb.set_upstream(load_table_dim_calendars)
create_table_fact_airbnb.set_upstream(load_table_dim_reviews)

load_table_fact_airbnb_austin_la.set_upstream(create_table_fact_airbnb)

## RUN DATA QUALITY CHECKS TO ENSURE Recors have been moved correctly through platforms without any errors
run_quality_checks = DataQualityOperator(task_id='Run_DATA_QUALITY_CHECKS',
                                         dag=dag,
                                         provide_context=True,
                                         redshift_conn_id='redshift',
                                         tables=[
                                             'DIM_HOSTS', 'DIM_REVIEWS',
                                             'DIM_CALENDARS', 'DIM_PROPERTIES',
                                             'FACT_AIRBNB_AUSTIN_LA'
                                         ])

run_quality_checks.set_upstream(load_table_fact_airbnb_austin_la)

end_operator = DummyOperator(task_id='END_TASK', dag=dag)
load_time_dimension_table = LoadDimensionOperator(
    task_id='Load_time_dim_table',
    table_target='time',
    dag=dag,
    redshift_connection_id='redshift',
    query=SqlQueries.time_table_insert,
    truncate_before=True)

run_quality_checks = DataQualityOperator(
    task_id='Run_data_quality_checks',
    dag=dag,
    redshift_connection_id='redshift',
    tables=['songplays', 'users', 'songs', 'artists', 'time'])

end_operator = DummyOperator(task_id='Stop_execution', dag=dag)

start_operator.set_downstream(
    [stage_events_to_redshift, stage_songs_to_redshift])
load_songplays_table.set_upstream(
    [stage_events_to_redshift, stage_songs_to_redshift])
load_songplays_table.set_downstream([
    load_song_dimension_table, load_user_dimension_table,
    load_artist_dimension_table, load_time_dimension_table
])
run_quality_checks.set_upstream([
    load_song_dimension_table, load_user_dimension_table,
    load_artist_dimension_table, load_time_dimension_table
])
end_operator.set_upstream(run_quality_checks)
Ejemplo n.º 3
0
    task_id='LOAD_DIM_CALENDARS_TABLE',
    dag=dag,
    query=SqlQueries.calendars_table_insert,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    operation="insert",
    table="DIM_CALENDARS")
load_dim_calendars_table.set_upstream(create_dim_calendars_table)

create_load_fact_airbnb_amst_table = LoadFactOperator(
    task_id='Create_Load_FACT_AIRBNB_AMST_Table',
    dag=dag,
    query=SqlQueries.CREATE_LOAD_FACT_AIRBNB_AMST,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials")
create_load_fact_airbnb_amst_table.set_upstream(load_dim_hosts_table)
create_load_fact_airbnb_amst_table.set_upstream(load_dim_reviews_table)
create_load_fact_airbnb_amst_table.set_upstream(load_dim_properties_table)
create_load_fact_airbnb_amst_table.set_upstream(load_dim_calendars_table)

##RUN DATA QULAITY CHECKS TO ENSURE THAT RECORDS HAD BEEN MOVED CORRECTLY THROUGH PLATFORMS WITHOUT ANY ERRORS
run_quality_checks = DataQualityOperator(task_id='Run_DATA_QUALITY_Checks',
                                         dag=dag,
                                         redshift_conn_id="redshift")
run_quality_checks.set_upstream(create_load_fact_airbnb_amst_table)

##DUMMY OPERATOR to indicate that the DAG has run successfully - DAG

end_operator = DummyOperator(task_id='END_OPERATOR', dag=dag)

end_operator.set_upstream(run_quality_checks)