def load_dimensional_tables_dag(parent_dag_name, task_id, redshift_conn_id, aws_credentials_id, table, sql_query, *args, **kwargs): dag = DAG(f"{parent_dag_name}.{task_id}", **kwargs) """ Returns a DAG inserts data into a dimensional redshift table from staging tables. """ load_dimension_table = LoadDimensionOperator( task_id=f"load_{table}_dim_table", dag=dag, table=table, redshift_conn_id=redshift_conn_id, aws_credentials_id=aws_credentials_id, sql_query=sql_query) load_dimension_table return dag
def load_dimension_tables_dag(parent_dag_name, task_id, redshift_conn_id, table, append_data, insert_sql_stmt, *args, **kwargs): """ load_dimension_tables_dag is a custom subDAG, to make our code for our custom operator LoadDimensionOperator be reusable accross various DAGs. :param parent_dag_name: the name of the parent DAG :type parent_dag_name: string :param task_id: to give the subDag a unique id or name. :type task_id: string :param redshift_conn_id: Connection id of the Redshift connection to use :type redshift_conn_id: string Default is 'redshift' :param table: Redshift dimension table name, where data will be inserted. :type table: string :param append_data: if True, we will Append data to the table. :type append_data: Boolean param insert_sql_stmt: Query representing data that will be inserted type sql: string """ # A specific convention used to pass the subDAG to the parent DAG. dag = DAG(f"{parent_dag_name}.{task_id}", **kwargs) # calling our custom operator and passing to it the parameters needed (that our subDAG make it reusable) load_dimension_task = LoadDimensionOperator( task_id=f'Load_{table}_dim_table', dag=dag, redshift_conn_id=redshift_conn_id, table=table, append_data=append_data, sql=insert_sql_stmt) # return the above DAG to make our subDAG accessible, based on the ocnvention above. return dag
def load_dim_subdag( parent_dag_name: str, task_id: str, redshift_conn_id: str, sql_statement: str, do_truncate: bool, table_name: str, **kwargs, ): """ Airflow's subdag wrapper. Implements LoadDimensionOperator operator. Subdag's name will be f'{parent_dag_name}.{task_id}' Subdag related keyword arguments: - parent_dag_name -- Parent DAG name - task_id -- Task ID for the subdag to use Keyword arguments: redshift_conn_id -- Airflow connection name for Redshift detail sql_statement -- SQL statement to run do_truncate -- Does the table need to be truncated before running SQL statement table_name -- Dimension table name All keyword arguments will be passed to LoadDimensionOperator """ dag = DAG(f'{parent_dag_name}.{task_id}', **kwargs) load_dimension_table = LoadDimensionOperator( task_id=task_id, dag=dag, redshift_conn_id=redshift_conn_id, sql_query=sql_statement, do_truncate=do_truncate, table_name=table_name, ) load_dimension_table return dag
def load_dimensions_subdag(parent_dag_name, task_id, redshift_conn_id, dimension_tables_config, args, append=True, **kwargs): """executes LoadDimensionOperator for every table defined in dimension_tables_config Args: parent_dag_name (str): name of the parent DAG task_id (str): task id of the parent DAG redshift_conn_id (str): name of the connection created in Airflow dimension_tables_config (dict): structure containing tables and their insert sql statements args: default_args append (bool, optional): if false the tables will be truncated before insert new rows. Defaults to True. Returns: DAG: DAG with LoadDimensionOperator for each table """ dag = DAG(dag_id=f"{parent_dag_name}.{task_id}", default_args=args, start_date=days_ago(2), schedule_interval="@daily", max_active_runs=1) for table in dimension_tables_config: sql = dimension_tables_config[table] LoadDimensionOperator(task_id=f'Load_{table}_dim_table', dag=dag, postgres_conn_id=redshift_conn_id, table=table, append=append, sql=sql) return dag
columns= """airline_name,link,title,author,author_country,review_date,review_content,aircraft,seat_layout,date_flown, cabin_flown,type_traveller,overall_rating,seat_legroom_rating,seat_recline_rating,seat_width_rating, aisle_space_rating,viewing_tv_rating,power_supply_rating,seat_storage_rating,recommended""", redshift_conn_id='redshift', aws_credentials_id='aws_credentials', s3_bucket='skytrax-warehouse', s3_key='source-data/seat.csv', copy_extra= "FORMAT AS CSV REGION 'us-east-2' TRUNCATECOLUMNS EMPTYASNULL BLANKSASNULL ACCEPTANYDATE DATEFORMAT 'auto' IGNOREHEADER 1" ) load_passengers_dimension_table = LoadDimensionOperator( task_id='Load_passengers_dim_table', dag=dag, append_only=False, table='passengers', redshift_conn_id='redshift', sql=SqlQueries.passengers_table_insert) load_airports_dimension_table = LoadDimensionOperator( task_id='Load_airports_dim_table', dag=dag, append_only=False, table='airports', redshift_conn_id='redshift', sql=SqlQueries.airports_table_insert) load_airlines_dimension_table = LoadDimensionOperator( task_id='Load_airlines_dim_table', dag=dag,
load_weather_and_air_quality_data = LoadDataOperator( task_id="Load_weather_and_air_quality_data", dag=dag, postgres_conn_id="postgres", open_aq_conn="open_aq", open_weather_conn="open_weather", app_id=api_key, limit=60, date=datetime(2020, 7, 4, 0, 0), ) load_time_dimension_table = LoadDimensionOperator( task_id="Load_time_dim_table", dag=dag, postgres_conn_id="postgres", table="time_dim_table", sql=SqlQueries.insert_time, ) load_weather_dimension_table = LoadDimensionOperator( task_id="Load_weather_dim_table", dag=dag, postgres_conn_id="postgres", table="weather_dim_table", sql=SqlQueries.insert_weather, values=["main", "description"], ) load_measures_fact_table = LoadFactOperator( task_id="Load_measures_fact_table",
s3_bucket="udacity-dend", s3_key="song_data", region="us-west-2", ignore_headers=0, data_format="json", jsonpaths="") load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table', dag=dag, redshift_conn_id="redshift", sql=SqlQueries.songplay_table_insert) load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', dag=dag, redshift_conn_id="redshift", table="users", sql=SqlQueries.user_table_insert, update_strategy="overwrite") load_song_dimension_table = LoadDimensionOperator( task_id='Load_song_dim_table', dag=dag, redshift_conn_id="redshift", table="songs", sql=SqlQueries.song_table_insert, update_strategy="overwrite") load_artist_dimension_table = LoadDimensionOperator( task_id='Load_artist_dim_table', dag=dag,
AWS='aws_credentials', table='staging_songs', s3_bucket='udacity-dend', s3_key='song_data') load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table', dag=dag, conn_id='redshift', table="songplays", insert_sql=SqlQueries.songplay_table_insert) load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', dag=dag, table="users", conn_id='redshift', insert_sql=SqlQueries.user_table_insert, truncate=False, primary_key="userid") load_time_dimension_table = LoadDimensionOperator( task_id='Load_time_dim_table', dag=dag, conn_id='redshift', insert_sql=SqlQueries.time_table_insert, truncate=False, primary_key=None, table="time") load_song_dimension_table = LoadDimensionOperator( task_id='Load_song_dim_table',
, redshift_conn_id=redshift_conn_id , dag=dag , task_id='Load_Candidate_Fact_Table' ) city_fact_table = LoadFactOperator(table='city_fact' , sql_query=SqlQueries.city_fact_insert , redshift_conn_id=redshift_conn_id , dag=dag , task_id='Load_City_Fact_Table' ) # Load from stage to dimension candidate_dim_table = LoadDimensionOperator(table='candidate_dim' , sql_query=SqlQueries.candidate_dim_insert , redshift_conn_id=redshift_conn_id , dag=dag , task_id='Load_Candidate_Dimension_Table' ) student_dim_table = LoadDimensionOperator(table='student_dim' , sql_query=SqlQueries.student_dim_insert , redshift_conn_id=redshift_conn_id , dag=dag , task_id='Load_Student_Dimension_Table' ) special_dim_table = LoadDimensionOperator(table='special_dim' , sql_query=SqlQueries.special_dim_insert , redshift_conn_id=redshift_conn_id , dag=dag , task_id='Load_Special_Dimension_Table' ) city_dim_table = LoadDimensionOperator(table='city_dim'
table="staging_songs", json_option="auto", provide_context=True, dag=dag) load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table', redshift_conn_id="redshift", table="songplays", sql_query=SqlQueries.songplays_table_insert, dag=dag) load_users_dimension_table = LoadDimensionOperator( task_id='Load_users_dim_table', redshift_conn_id="redshift", table="users", sql_query=SqlQueries.users_table_insert, mode='truncate', dag=dag) load_songs_dimension_table = LoadDimensionOperator( task_id='Load_songs_dim_table', redshift_conn_id="redshift", table="songs", sql_query=SqlQueries.songs_table_insert, mode='truncate', dag=dag) load_artists_dimension_table = LoadDimensionOperator( task_id='Load_artists_dim_table', redshift_conn_id="redshift",
s3_key='song_data', json_path_option='auto' ) load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table', dag=dag, redshift_conn_id='redshift', table='songplays', sql_query=SqlQueries.songplay_table_insert ) load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', dag=dag, redshift_conn_id='redshift', table='users', sql_query=SqlQueries.user_table_insert, insert_mode='truncate' ) load_song_dimension_table = LoadDimensionOperator( task_id='Load_song_dim_table', dag=dag, redshift_conn_id='redshift', table='songs', sql_query=SqlQueries.song_table_insert, insert_mode='truncate' ) load_artist_dimension_table = LoadDimensionOperator( task_id='Load_artist_dim_table',
load_visitor_arrivals_mapped_staging_table = LoadFactOperator( task_id='Load_fact_visitor_arrivals_mapped_staging_table', dag=dag, redshift_conn_id='redshift', table='staging_visitor_arrivals_mapped', select_query=SqlQueries.visitor_arrival_mapped_staging_table_insert, truncate_insert=True ) staged_operator = DummyOperator(task_id='All_staged', dag=dag) load_port_dimension_table = LoadDimensionOperator( task_id='Load_dim_port_table', dag=dag, redshift_conn_id='redshift', table='dim_port', select_query=SqlQueries.port_table_insert, truncate_insert=True ) load_us_city_dimension_table = LoadDimensionOperator( task_id='Load_dim_us_city_table', dag=dag, redshift_conn_id='redshift', table='dim_us_city', select_query=SqlQueries.us_city_table_insert, truncate_insert=True ) load_us_state_dimension_table = LoadDimensionOperator( task_id='Load_dim_us_state_table',
aws_credentials_id=AIRFLOW_AWS_CREDENTIALS_ID, target_table=target_songs_table, s3_bucket=S3_BUCKET, s3_key=S3_SONGS_KEY, default_args=default_args)) load_songplays_table_task = LoadFactOperator( task_id='Load_songplays_fact_table', redshift_conn_id=AIRFLOW_REDSHIFT_CONN_ID, final_table=facts_songplays_table_name, dql_sql=SqlQueries.songplay_table_insert, dag=dag) load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', redshift_conn_id=AIRFLOW_REDSHIFT_CONN_ID, final_table=dim_users_table_name, dql_sql=SqlQueries.user_table_insert, dag=dag) load_song_dimension_table = LoadDimensionOperator( task_id='Load_song_dim_table', redshift_conn_id=AIRFLOW_REDSHIFT_CONN_ID, final_table=dim_songs_table_name, dql_sql=SqlQueries.song_table_insert, dag=dag) load_artist_dimension_table = LoadDimensionOperator( task_id='Load_artist_dim_table', redshift_conn_id=AIRFLOW_REDSHIFT_CONN_ID, final_table=dim_artists_table_name, dql_sql=SqlQueries.artist_table_insert,
s3_key='song_data/A/A/A', table='staging_songs', copy_json_option='auto', dag=dag) load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table', redshift_conn_id='redshift', table='songplays', load_sql_stmt=SqlQueries.songplay_table_insert, dag=dag) load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', redshift_conn_id='redshift', table='users', truncate_table=True, load_sql_stmt=SqlQueries.user_table_insert, dag=dag) load_song_dimension_table = LoadDimensionOperator( task_id='Load_song_dim_table', redshift_conn_id='redshift', table='songs', truncate_table=True, load_sql_stmt=SqlQueries.song_table_insert, dag=dag) load_artist_dimension_table = LoadDimensionOperator( task_id='Load_artist_dim_table', redshift_conn_id='redshift',
target_table=table_name_staging_game_match, s3_bucket=S3_BUCKET, s3_key=S3_TRANSFORMED_RAW_MATCH_DATA_KEY, dag=dag, ) # load_summoner_dimension_table_task = LoadDimensionOperator( # task_id="Load_Summoner_Dimension_Table_Task", # redshift_conn_id=AWS_REDSHIFT_CONN_ID, # final_table="", # dql_sql=SqlDmls.summoner_table_insert, # dag=dag, # ) load_champion_dimension_table_task = LoadDimensionOperator( task_id="Load_Champion_Dimension_Table_Task", redshift_conn_id=AWS_REDSHIFT_CONN_ID, final_table="", dql_sql=SqlDmls.champion_table_insert, # dag=dag, ) load_item_dimension_table_task = LoadDimensionOperator( task_id="Load_Item_Dimension_Table_Task", redshift_conn_id=AWS_REDSHIFT_CONN_ID, final_table="", dql_sql=SqlDmls.item_table_insert, # dag=dag, ) load_fact_match_table_task = LoadFactOperator( task_id="Load_Fact_Tables_Task", redshift_conn_id=AWS_REDSHIFT_CONN_ID, final_table=table_name_fact_game_match, dql_sql=SqlDmls.match_table_insert,
load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table', dag=dag, table='songplays', redshift_conn_id='redshift', fact_query=SqlQueries.songplay_table_insert, delect_or_append= 'append', # can either be 'append' to append data or 'delete' to truncate table and then add data. ) load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', dag=dag, table='users', redshift_conn_id='redshift', dimension_query=SqlQueries.user_table_insert, delect_or_append= 'delete', # can either be 'append' to append data or 'delete' to truncate table and then add data. ) load_song_dimension_table = LoadDimensionOperator( task_id='Load_song_dim_table', dag=dag, table='songs', redshift_conn_id='redshift', dimension_query=SqlQueries.song_table_insert, delect_or_append= 'delete' # can either be 'append' to append data or 'delete' to truncate table and then add data. )
insert_stmt=SqlQueries.final_immigration_table_insert) run_quality_checks_final_immigration_data = DataQualityOperator( task_id="run_quality_checks_final_immigration_data", dag=dag, redshift_conn_id="redshift", dq_checks=[{ 'check_sql': "SELECT COUNT(*) FROM final_immigration WHERE cicid is null", 'expected_result': 0 }]) create_D_CITY_DEMO = LoadDimensionOperator( task_id="create_D_CITY_DEMO", dag=dag, redshift_conn_id="redshift", append_data=True, table="D_CITY_DEMO", create_stmt=SqlQueries.create_table_D_CITY_DEMO, insert_stmt=SqlQueries.D_CITY_DEMO_INSERT) create_d_airport = LoadDimensionOperator( task_id="create_d_airport", dag=dag, redshift_conn_id="redshift", append_data=True, table="D_AIRPORT", create_stmt=SqlQueries.create_table_D_AIRPORT, insert_stmt=SqlQueries.D_AIRPORT_INSERT) create_d_time = LoadDimensionOperator( task_id="create_d_time",
load_songplays_table = LoadFactOperator( task_id='load_songplays_fact_table', redshift_conn_id="redshift", table="songplays", data_source=SqlQueries.songplay_table_insert, dag=dag) dim_tables_and_sources = [ ("users", SqlQueries.user_table_insert), ("songs", SqlQueries.song_table_insert), ("artists", SqlQueries.artist_table_insert), ("time", SqlQueries.time_table_insert), ] load_dimension_tables = LoadDimensionOperator(task_id='load_dim_tables', redshift_conn_id="redshift", tables=dim_tables_and_sources, dag=dag) run_quality_checks = DataQualityOperator(task_id='run_data_quality_checks', redshift_conn_id="redshift", tables=dim_tables_and_sources, dag=dag) end_operator = DummyOperator(task_id='stop_execution', dag=dag) start_operator >> stage_events_to_redshift start_operator >> stage_songs_to_redshift stage_events_to_redshift >> load_songplays_table stage_songs_to_redshift >> load_songplays_table load_songplays_table >> load_dimension_tables load_dimension_tables >> run_quality_checks
table='staging_songs', data='s3://' + Variable.get('s3_bucket') + '/' + Variable.get('songdata'), region=Variable.get('region'), json_option='auto', redshift_conn_id='redshift_conn_id', aws_conn_id='aws_conn_id') load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table', dag=dag, table='songplays', sql=SqlQueries.songplays_table_insert, redshift_conn_id='redshift_conn_id') load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', dag=dag, table='users', sql=SqlQueries.users_table_insert, redshift_conn_id='redshift_conn_id') load_song_dimension_table = LoadDimensionOperator( task_id='Load_song_dim_table', dag=dag, table='songs', sql=SqlQueries.songs_table_insert, redshift_conn_id='redshift_conn_id') load_artist_dimension_table = LoadDimensionOperator( task_id='Load_artist_dim_table', dag=dag, table='artists', sql=SqlQueries.artists_table_insert,
redshift_conn_id='redshift', s3_bucket='s3://udacity-dend/song_data', aws_credentials_id='aws_credentials', copy_options="'auto'", dag=dag) load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table', redshift_conn_id="redshift", destination_table="songplays", songplay_table_insert=SqlQueries.songplay_table_insert, dag=dag) load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', redshift_conn_id="redshift", destination_table="users", sql_query=SqlQueries.user_table_insert, dag=dag) load_song_dimension_table = LoadDimensionOperator( task_id='Load_song_dim_table', redshift_conn_id="redshift", destination_table="songs", sql_query=SqlQueries.song_table_insert, dag=dag) load_artist_dimension_table = LoadDimensionOperator( task_id='Load_artist_dim_table', redshift_conn_id="redshift", destination_table="artists", sql_query=SqlQueries.artist_table_insert,
copy_json_option='auto', region="us-west-2", data_format="JSON") load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table', dag=dag, redshift_conn_id='redshift', sql_query=SqlQueries.songplay_table_insert, table_name="songplays", append_only=False) load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', dag=dag, redshift_conn_id='redshift', sql_query=SqlQueries.user_table_insert, table_name="users", append_only=False) load_song_dimension_table = LoadDimensionOperator( task_id='Load_song_dim_table', dag=dag, redshift_conn_id='redshift', sql_query=SqlQueries.song_table_insert, table_name="songs", append_only=False) load_artist_dimension_table = LoadDimensionOperator( task_id='Load_artist_dim_table', dag=dag,
create_query=SqlQueries.create_staging_bikes) wait_operator = DummyOperator(task_id='waiting_until_completion', dag=dag) load_rides_facts_table = LoadFactOperator( redshift_conn_id="redshift", table="rides", create_query=SqlQueries.create_rides, insert_query=SqlQueries.rides_table_insert, task_id='Load_rides_facts_table', dag=dag) load_stations_dimension_table = LoadDimensionOperator( redshift_conn_id="redshift", table="stations", create_query=SqlQueries.create_stations, insert_query=SqlQueries.stations_table_insert, task_id='Load_stations_dim_table', dag=dag) load_weather_dimension_table = LoadDimensionOperator( redshift_conn_id="redshift", table="weather", create_query=SqlQueries.create_weather, insert_query=SqlQueries.weather_table_insert, task_id='Load_weather_dim_table', dag=dag) run_quality_checks = DataQualityOperator(task_id='Run_data_quality_checks', redshift_conn_id="redshift", dag=dag)
s3_key="song_data", region="us-west-2", extra_params="json 'auto' compupdate off region 'us-west-2'", execution_date=start_date ) load_songplays_table = LoadFactOperator( task_id=load_songplays_fact_tabletask_id, dag=dag, provide_context=True, aws_credentials_id="aws_credentials", redshift_conn_id='redshift', sql_source=SqlQueries.songplay_table_insert ) load_user_dimension_table = LoadDimensionOperator( task_id=load_user_dimension_table_task_id' start_date= datetime(2018, 5, 1), redshift_conn_id="redshift", aws_credentials_id="aws_credentials", table="users", sql_source=SqlQueries.user_table_insert, dag=dag ) load_song_dimension_table = LoadDimensionOperator( task_id=load_song_dimension_table_task_id, redshift_conn_id="redshift", table="songs", aws_credentials_id="aws_credentials", start_date= datetime(2018, 5, 1), sql_source=SqlQueries.song_table_insert,
"songs_stage" }) load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table', conn_id="redshift", sql=SqlQueries.songplay_table_insert, params={ 'table': 'songplays', 'truncate': True }) load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', conn_id="redshift", sql=SqlQueries.user_table_insert, params={ 'table': 'users', 'truncate': True }) load_song_dimension_table = LoadDimensionOperator( task_id='Load_song_dim_table', conn_id="redshift", sql=SqlQueries.song_table_insert, params={ 'table': 'songs', 'truncate': True }) load_artist_dimension_table = LoadDimensionOperator( task_id='Load_artist_dim_table',
task_id='Load_staging_songs_table', dag=dag, s3_bucket='udacity-dend', s3_prefix='song_data', table='staging_songs', copy_options="FORMAT AS JSON 'auto'") load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table', dag=dag, table='songplays', select_sql=SqlQueries.insert_songplays_table) load_user_dimension_table = LoadDimensionOperator( task_id='Load_users_dim_table', dag=dag, table='users', select_sql=SqlQueries.insert_users_table, mode='truncate') load_song_dimension_table = LoadDimensionOperator( task_id='Load_songs_dim_table', dag=dag, table='songs', select_sql=SqlQueries.insert_songs_table, mode='truncate') load_artist_dimension_table = LoadDimensionOperator( task_id='Load_artists_dim_table', dag=dag, table='artists', select_sql=SqlQueries.insert_artists_table,
s3_key='song_data/A/A/A', file_format='CSV' ) load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table', dag=dag, redshift_conn_id='redshift', target_table = "songplays", sql=SqlQueries.songplay_table_insert ) load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', dag=dag, redshift_conn_id='redshift', target_table = "users", sql=SqlQueries.user_table_insert ) load_song_dimension_table = LoadDimensionOperator( task_id='Load_song_dim_table', dag=dag, redshift_conn_id='redshift', target_table = "songs", sql=SqlQueries.song_table_insert ) load_artist_dimension_table = LoadDimensionOperator( task_id='Load_artist_dim_table',
table='staging_songs', s3_bucket='udacity-dend', s3_key='song_data/A/A/A', json_path='auto', region="us-west-2") load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table', dag=dag, redshift_conn_id='redshift', table='songplays', sql=SqlQueries.songplay_table_insert) load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', dag=dag, redshift_conn_id='redshift', table='users', sql=SqlQueries.user_table_insert, append=False) load_song_dimension_table = LoadDimensionOperator( task_id='Load_song_dim_table', dag=dag, redshift_conn_id='redshift', table='songs', sql=SqlQueries.song_table_insert, append=False) load_artist_dimension_table = LoadDimensionOperator( task_id='Load_artist_dim_table', dag=dag,
sql_create=SqlQueries.staging_songs_table_create, sql_stage=SqlQueries.staging_songs_copy, json_path='auto') load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table', dag=dag, table='songplays', redshift_conn_id='redshift', sql_create=SqlQueries.songplay_table_create, sql_select=SqlQueries.songplay_table_insert) load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', dag=dag, table='users', redshift_conn_id='redshift', sql_create=SqlQueries.user_table_create, sql_select=SqlQueries.user_table_insert) load_song_dimension_table = LoadDimensionOperator( task_id='Load_song_dim_table', dag=dag, table='songs', redshift_conn_id='redshift', sql_create=SqlQueries.song_table_create, sql_select=SqlQueries.song_table_insert) load_artist_dimension_table = LoadDimensionOperator( task_id='Load_artist_dim_table', dag=dag,
redshift_conn_id="redshift", aws_credentials_id="aws_credentials", s3_bucket="ctsprojbucket/", s3_key="countries", fileformat="parquet", truncate_flag='Y') Load_visitorsi94_fact = LoadFactOperator( task_id='Load_i94visitors_fact', dag=dag, redshift_conn_id="redshift", table_query=SqlQueries.visitors_fact_insert) load_dates_dim = LoadDimensionOperator(task_id='Load_dates_dim', dag=dag, redshift_conn_id="redshift", table_query=SqlQueries.dates_dim_insert, table="dates_dim", truncate_flag='Y') run_quality_check = DataQualityOperator( task_id='Run_data_quality_checks', dag=dag, redshift_conn_id="redshift", check_query= "select count(1) from public.i94visitors_fact where reasonforvisit is null", expected_count=10000) run_intg_check = IntegrityCheckOperator( task_id='Run_data_integrity_check', dag=dag, redshift_conn_id="redshift",
iam_role=Variable.get('iam_role'), json_format='auto', table_name='staging_songs' ) load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table', dag=dag, insert_query=SqlQueries.songplays_table_insert, table_name='songplays', redshift_conn_id='redshift' ) load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', dag=dag, insert_query=SqlQueries.users_table_insert, table_name='users', ) load_song_dimension_table = LoadDimensionOperator( task_id='Load_song_dim_table', dag=dag, insert_query=SqlQueries.songs_table_insert, table_name='songs', ) load_artist_dimension_table = LoadDimensionOperator( task_id='Load_artist_dim_table', dag=dag, insert_query=SqlQueries.artists_table_insert, table_name='artists',