redshift_conn_id="redshift", aws_credentials_id="aws_credentials") create_neighbourhoods_staging_table.set_upstream(start_operator) ##Loading original events into STG Tables - DAGS stage_listings_to_redshift = StageToRedshiftOperator( task_id='Stage_listings', dag=dag, table="STG_LISTINGS", redshift_conn_id="redshift", aws_credentials_id="aws_credentials", s3_bucket="capstone-project-airbnb", s3_key="listings_clean.csv", method="csv") stage_listings_to_redshift.set_upstream(create_listings_staging_table) stage_calendars_to_redshift = StageToRedshiftOperator( task_id='Stage_calendars', dag=dag, table="STG_CALENDARS", redshift_conn_id="redshift", aws_credentials_id="aws_credentials", s3_bucket="capstone-project-airbnb", s3_key="calendars_clean.csv", method="csv") stage_calendars_to_redshift.set_upstream(create_calendars_staging_table) stage_reviews_to_redshift = StageToRedshiftOperator( task_id='Stage_reviews', dag=dag,
create_staging_reviews_table.set_upstream(start_operator) ##Loading original data into Stagging Tables - DAGs stage_listings_to_redshift = StageToRedshiftOperator( task_id='Stage_Listings', provided_context=True, dag=dag, aws_credentials_id='aws_credentials', redshift_conn_id='redshift', table='staging_listings', s3_bucket='udcdecapstone', s3_path='airbnb_listing_austin_la.parquet', region="us-east-1") stage_listings_to_redshift.set_upstream(create_staging_listings_table) stage_calendars_to_redshift = StageToRedshiftOperator( task_id='Stage_Calendar', provided_context=True, dag=dag, aws_credentials_id='aws_credentials', redshift_conn_id='redshift', table='staging_calendar', s3_bucket='udcdecapstone', s3_path='airbnb_calender_austin_la.csv', method='csv', region="us-east-1") stage_calendars_to_redshift.set_upstream(create_staging_calendar_table)
load_time_dimension_table = LoadDimensionOperator( task_id='Load_time_dim_table', dag=dag, redshift_conn_id="redshift", table="time", query=SqlQueries.time_table_insert, append_flag=False) run_quality_checks = DataQualityOperator(task_id='Run_data_quality_checks', dag=dag, redshift_conn_id="redshift", dq_checks=dq_checks) end_operator = DummyOperator(task_id='Stop_execution', dag=dag) #Dependencies stage_events_to_redshift.set_upstream([start_operator]) stage_songs_to_redshift.set_upstream([start_operator]) load_songplays_table.set_upstream( [stage_events_to_redshift, stage_songs_to_redshift]) load_user_dimension_table.set_upstream([load_songplays_table]) load_song_dimension_table.set_upstream([load_songplays_table]) load_artist_dimension_table.set_upstream([load_songplays_table]) load_time_dimension_table.set_upstream([load_songplays_table]) run_quality_checks.set_upstream([ load_user_dimension_table, load_song_dimension_table, load_artist_dimension_table, load_time_dimension_table ]) end_operator.set_upstream([run_quality_checks])