Exemple #1
0
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials")
create_neighbourhoods_staging_table.set_upstream(start_operator)

##Loading original events into STG Tables - DAGS

stage_listings_to_redshift = StageToRedshiftOperator(
    task_id='Stage_listings',
    dag=dag,
    table="STG_LISTINGS",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="capstone-project-airbnb",
    s3_key="listings_clean.csv",
    method="csv")
stage_listings_to_redshift.set_upstream(create_listings_staging_table)

stage_calendars_to_redshift = StageToRedshiftOperator(
    task_id='Stage_calendars',
    dag=dag,
    table="STG_CALENDARS",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="capstone-project-airbnb",
    s3_key="calendars_clean.csv",
    method="csv")
stage_calendars_to_redshift.set_upstream(create_calendars_staging_table)

stage_reviews_to_redshift = StageToRedshiftOperator(
    task_id='Stage_reviews',
    dag=dag,
create_staging_reviews_table.set_upstream(start_operator)

##Loading original data into Stagging Tables - DAGs

stage_listings_to_redshift = StageToRedshiftOperator(
    task_id='Stage_Listings',
    provided_context=True,
    dag=dag,
    aws_credentials_id='aws_credentials',
    redshift_conn_id='redshift',
    table='staging_listings',
    s3_bucket='udcdecapstone',
    s3_path='airbnb_listing_austin_la.parquet',
    region="us-east-1")

stage_listings_to_redshift.set_upstream(create_staging_listings_table)

stage_calendars_to_redshift = StageToRedshiftOperator(
    task_id='Stage_Calendar',
    provided_context=True,
    dag=dag,
    aws_credentials_id='aws_credentials',
    redshift_conn_id='redshift',
    table='staging_calendar',
    s3_bucket='udcdecapstone',
    s3_path='airbnb_calender_austin_la.csv',
    method='csv',
    region="us-east-1")

stage_calendars_to_redshift.set_upstream(create_staging_calendar_table)
load_time_dimension_table = LoadDimensionOperator(
    task_id='Load_time_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    table="time",
    query=SqlQueries.time_table_insert,
    append_flag=False)

run_quality_checks = DataQualityOperator(task_id='Run_data_quality_checks',
                                         dag=dag,
                                         redshift_conn_id="redshift",
                                         dq_checks=dq_checks)

end_operator = DummyOperator(task_id='Stop_execution', dag=dag)

#Dependencies
stage_events_to_redshift.set_upstream([start_operator])
stage_songs_to_redshift.set_upstream([start_operator])
load_songplays_table.set_upstream(
    [stage_events_to_redshift, stage_songs_to_redshift])
load_user_dimension_table.set_upstream([load_songplays_table])
load_song_dimension_table.set_upstream([load_songplays_table])
load_artist_dimension_table.set_upstream([load_songplays_table])
load_time_dimension_table.set_upstream([load_songplays_table])
run_quality_checks.set_upstream([
    load_user_dimension_table, load_song_dimension_table,
    load_artist_dimension_table, load_time_dimension_table
])
end_operator.set_upstream([run_quality_checks])