stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, s3_key="song_data", redshift_conn_id="redshift", aws_credentials_id="aws_credentials", table="staging_songs", s3_bucket="udacity-dend", json_path="auto", region="us-west-2", overwrite=True) load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table', dag=dag, sql=SqlQueries.songplay_table_insert, redshift_conn_id="redshift", target_table="public.songplays") load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', dag=dag, sql=SqlQueries.user_table_insert, redshift_conn_id="redshift", target_table="public.users", overwrite=True) load_song_dimension_table = LoadDimensionOperator( task_id='Load_song_dim_table', dag=dag, sql=SqlQueries.song_table_insert,
stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, redshift_conn_id='redshift', aws_credentials_id='aws_credentials', table='staging_songs', s3_bucket="udacity-dend", s3_key="song_data", region='us-west-2', json_path='auto') #4. Use staging tables to populate fact table load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table', dag=dag, redshift_conn_id="redshift", table="songplays", sql_query=SqlQueries.songplay_table_insert, delete_first=True) #5. Use staging tables to populate user table load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', dag=dag, redshift_conn_id="redshift", table="users", sql_query=SqlQueries.user_table_insert, delete_first=True) #6. Use staging tables to populate song_table table load_song_dimension_table = LoadDimensionOperator(
redshift_conn_id="redshift", aws_credentials_id="aws_credentials", s3_bucket="udacity-dend", s3_key="song_data/A/A/A", json="auto" ) """ connecting to redshift running the LoadFactOperator operator with sql_queries.py """ load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table', redshift_conn_id = 'redshift', table="songplays", sql_query = SqlQueries.songplay_table_insert, dag=dag, append_only=False ) """ connecting to redshift running the LoadDimensionOperator operator with sql_queries.py """ load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', redshift_conn_id = 'redshift', table="users", sql_query = SqlQueries.user_table_insert,
redshift_conn_id='redshift', s3_bucket='udacity-dend', s3_key='song_data/', aws_credentials={ 'key': AWS_KEY, 'secret': AWS_SECRET }, region='us-west-2') load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table', dag=dag, source_table='songplays', target_table='songplays', redshift_conn_id='redshift', append_data=True, aws_credentials={ 'key': AWS_KEY, 'secret': AWS_SECRET }, region='us-west-2', sql_statement=SqlQueries.songplays_table_insert, provide_context=True) load_users_dimension_table = LoadDimensionOperator( task_id='Load_users_dim_table', dag=dag, target_table='users', redshift_conn_id='redshift', append_data=False, aws_credentials={ 'key': AWS_KEY,
json_format='s3://udacity-dend/log_json_path.json') stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, table='staging_songs', redshift_conn_id='redshift', aws_credentials_id='aws_credentials', s3_bucket='udacity-dend', s3_key= 'song_data', # load a small portion of song data with 'song_data/A/A/A' json_path='auto') load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table', dag=dag, conn_id='redshift', sql=SqlQueries.songplay_table_insert, target_table='songplays') load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', dag=dag, conn_id='redshift', sql=SqlQueries.user_table_insert, target_table='users', delete_first=True) load_song_dimension_table = LoadDimensionOperator( task_id='Load_song_dim_table', dag=dag, conn_id='redshift',
stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, table="staging_songs", redshift_conn_id="redshift", aws_credentials_id="aws_credentials", s3_bucket="udacity-dend", s3_key="song_data", json_format="'auto'", ) load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table', dag=dag, table='songplays', redshift_conn_id="redshift", truncate_table=False, select_sql=SqlQueries.songplay_table_insert, ) load_user_dimension_table = LoadDimensionOperator( task_id='Load_users_dim_table', dag=dag, table='users', redshift_conn_id="redshift", truncate_table=True, select_sql=SqlQueries.user_table_insert, ) load_song_dimension_table = LoadDimensionOperator( task_id='Load_songs_dim_table',
aws_credentials_id='aws_credentials', s3_bucket='dend-bucket-oregon-123', s3_key='capstone_immigration/trans_mode') stage_visa_code_to_redshift = StageToRedshiftOperator( task_id='Stage_visa_code', dag=dag, table='staging_visa_code', redshift_conn_id='redshift', aws_credentials_id='aws_credentials', s3_bucket='dend-bucket-oregon-123', s3_key='capstone_immigration/visa_code') load_arrivals_table = LoadFactOperator(task_id='Load_arrivals_fact_table', dag=dag, conn_id='redshift', sql=SqlQueries.arrivals_table_insert, target_table='arrivals') load_admissions_table = LoadFactOperator( task_id='Load_admissions_dim_table', dag=dag, conn_id='redshift', sql=SqlQueries.admissions_table_insert, target_table='admissions') load_time_table = LoadFactOperator(task_id='Load_time_dim_table', dag=dag, conn_id='redshift', sql=SqlQueries.time_table_insert, target_table='time')
def create_fact_tables(parent_dag_name, child_dag_name, start_date, end_date, schedule_interval, redshift_conn_id, degree_list, origin_table_format, destination_table_format, sql, upstream_subdag_id, *args, **kwargs): """ Check if upstream staging dependencies were successful, loads data into fact table, and lastly perform a data quality check. Keyword Arguments: parent_dag_name -- Parent DAG name defined in `main_dag.py` dag object child_dag_name -- Child DAG name used to define subdag ID start_date -- DAG start date end_date -- DAG end date schedule_interval -- (e.g. '@monthly', '@weekly', etc.) redshift_conn_id -- Redshift connection ID (str) degree_list -- List of degree names (list) origin_table_format -- Dictionary of table labels and staging table names used for fact table sql mapping (str) destination_table_format -- Fact table name to be formatted with degree name (str) sql -- Fact table query (str) """ dag = DAG(dag_id=f"{parent_dag_name}.{child_dag_name}", start_date=start_date, end_date=end_date, schedule_interval=schedule_interval, **kwargs) #help # upstream_subdag_id = kwargs['task_instance'].upstream_task_ids for degree in degree_list: destination_table = destination_table_format.format(degree=degree) origin_tables = { table: name.format(degree=degree) for (table, name) in origin_table_format.items() } start_task = DummyOperator(task_id=f'{degree}', dag=dag) upstream_check_task = PythonOperator( task_id=f'check_{destination_table}_dependencies', python_callable=upstream_staging_check, op_kwargs={ 'origin_tables': origin_tables, 'upstream_subdag_id': upstream_subdag_id }, provide_context=True) create_task = LoadFactOperator(task_id=destination_table, dag=dag, sql=sql, redshift_conn_id=redshift_conn_id, destination_table=destination_table, origin_tables=origin_tables, provide_context=True) check_task = DataQualityOperator(task_id='data_quality_check', dag=dag, redshift_conn_id=redshift_conn_id, table=destination_table, provide_context=True) start_task >> upstream_check_task upstream_check_task >> create_task create_task >> check_task return dag
json="s3://udacity-dend/log_json_path.json", ) stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, redshift_conn_id="redshift", aws_credentials_id="aws_credentials", s3_bucket="udacity-dend", s3_key="song_data/A/A/A", table="staging_songs", json="auto") load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table', dag=dag, redshift_conn_id="redshift", destination_table="songplays", facts_sql=SqlQueries.songplay_table_insert) load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', dag=dag, redshift_conn_id="redshift", destination_table="users", dim_sql=SqlQueries.user_table_insert, append_mode=False) load_song_dimension_table = LoadDimensionOperator( task_id='Load_song_dim_table', dag=dag, redshift_conn_id="redshift",
stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, target_table="staging_songs", sql_table_create=SqlQueries.staging_songs_table_create, redshift_conn_id="redshift", aws_credentials_id="aws_credentials", s3_bucket="udacity-dend", s3_key="song_data", json_file="auto", region="us-west-2") load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table', dag=dag, target_table="songplays", sql_table_create=SqlQueries.songplay_table_create, sql_table_insert=SqlQueries.songplay_table_insert, redshift_conn_id="redshift", mode="") load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', dag=dag, target_table="users", sql_table_create=SqlQueries.user_table_create, sql_table_insert=SqlQueries.user_table_insert, redshift_conn_id="redshift") load_song_dimension_table = LoadDimensionOperator( task_id='Load_song_dim_table', dag=dag,
schema='public', table='gov', s3_bucket='udacity-capstone-cg', s3_key='staging', copy_options=[ 'CSV', 'IGNOREHEADER 1', 'FILLRECORD', 'COMPUPDATE OFF', 'STATUPDATE OFF', 'TRUNCATECOLUMNS' ], dag=dag) get_ready_to_load = DummyOperator(task_id='Get_ready_to_load', dag=dag) load_tweets_table = LoadFactOperator(task_id='Load_tweets_fact_table', dag=dag, aws_credentials_id="aws_credentials", table="tweets", sql_query=SqlQueries.tweets_table_insert) load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', dag=dag, table='"user"', sql_query=SqlQueries.user_table_insert) load_geo_dimension_table = LoadDimensionOperator( task_id='Load_geo_dim_table', dag=dag, table="geo", sql_query=SqlQueries.geo_table_insert) run_basic_checks = DataQualityOperator(
redshift_conn_id="redshift", aws_credentials_id="aws_credentials", s3_bucket="udacity-dend", s3_key="log_data/2018/11/2018-11-01-events.json") stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, table="staging_songs", redshift_conn_id="redshift", aws_credentials_id="aws_credentials", s3_bucket="udacity-dend", s3_key="song_data/A/B/C/TRABCEI128F424C983.json") load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table', dag=dag, redshift_conn_id="redshift", destination_table="songplays") load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', dag=dag, redshift_conn_id="redshift", sql_statement=sql_queries.user_table_insert, table_name='users') load_song_dimension_table = LoadDimensionOperator( task_id='Load_song_dim_table', dag=dag, redshift_conn_id="redshift", sql_statement=sql_queries.song_table_insert, table_name='songs')
aws_credentials_id="aws_credentials", json="s3://udacity-dend/log_json_path.json") stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, table='public.songs', redshift_conn_id='redshift', aws_credentials_id='aws_credentials', json='s3://udacity-dend/song_data', s3_bucket='udacity-dend', s3_key='song_data') load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table', dag=dag, aws_credentials='aws_credentials', table='public.songplays', truncate_table=True, query=SqlQueries.songplay_table_insert) load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', dag=dag, aws_credentials='aws_credentials', table='public.users', truncate_table=True, query=SqlQueries.user_table_insert) load_song_dimension_table = LoadDimensionOperator( task_id='Load_song_dim_table', dag=dag, aws_credentials='aws_credentials',
json="s3://udacity-dend/log_json_path.json") stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, redshift_conn_id="redshift", aws_credentials_id="aws_credentials", table="staging_songs", s3_bucket="udacity-dend", s3_key="song_data/", aws_region="us-west-2", json="auto") load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table', dag=dag, redshift_conn_id="redshift", sql_insert=SqlQueries.songplay_table_insert, destination_table="public.songplays") load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', dag=dag, redshift_conn_id="redshift", sql_insert=SqlQueries.user_table_insert, destination_table="public.users", delete=True) load_song_dimension_table = LoadDimensionOperator( task_id='Load_song_dim_table', dag=dag, redshift_conn_id="redshift",
jsonpath='log_json_path.json') stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, table='staging_songs', redshift_conn_id='redshift', aws_credentials_id='aws_credentials', s3_bucket='udacity-dend', s3_key='song_data', aws_region='us-west-2') load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table', dag=dag, redshift_conn_id='redshift', append_data=append_data, query=SqlQueries.songplay_table_insert, ) load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', dag=dag, redshift_conn_id='redshift', append_data=append_data, table='users', query=SqlQueries.user_table_insert, ) load_song_dimension_table = LoadDimensionOperator( task_id='Load_song_dim_table',
stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', redshift_connection_id='redshift', table_name='staging_songs', aws_credential_id='aws_credentials', s3_bucket='udacity-dend', s3_key='song-data/A/A', json_path="auto", dag=dag ) # Calling LoadFactOperator to load the data into songplays fact table load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table', sql_statement=SqlQueries.songplay_table_insert, target_table='songplays', redshift_connection_id='redshift', dag=dag ) # Calling LoadDimensionOperator to load the data into users dimension table load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', target_table='users', redshift_connection_id='redshift', sql_statement=SqlQueries.user_table_insert, truncate=False, dag=dag ) # Calling LoadDimensionOperator to load the data into songs dimension table
) stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, table="staging_songs", redshift_conn_id="redshift", s3_bucket="udacity-dend", s3_key="song_data", aws_credentials_id='aws_credentials' ) load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table', dag=dag, redshift_conn_id='redshift', table='songplays', sql_query=SqlQueries.songplay_table_insert ) load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', dag=dag, redshift_conn_id='redshift', table='users', sql_query=SqlQueries.user_table_insert ) load_song_dimension_table = LoadDimensionOperator( task_id='Load_song_dim_table', dag=dag,
create_or_delete='delete', staging_or_dwh='dwh', redshift_conn_id='redshift_conn_id', ) create_dwh_tables = CreateOrDeleteOperator( task_id='create_dwh_tables', dag=dag, create_or_delete='create', staging_or_dwh='dwh', redshift_conn_id='redshift_conn_id', ) load_artists_table = LoadFactOperator(task_id='load_artists_fact_table', redshift_conn_id="redshift_conn_id", table='artists', append=True, dag=dag) load_concerts_table = LoadDimensionOperator( task_id='load_concerts_dimension_table', redshift_conn_id="redshift_conn_id", table='concerts', append=True, dag=dag) load_songs_table = LoadDimensionOperator(task_id='load_songs_dimension_table', redshift_conn_id="redshift_conn_id", table='songs', append=True, dag=dag)
for dimension_item in dimension_items: load_dimension_table = LoadDimensionOperator( task_id=f"load_{dimension_item['item']}_dimension_table", dag=dag, redshift_conn_id="redshift", table=dimension_item["item"], query=dimension_item["query"], append=False) load_dimension_tables.append(load_dimension_table) ### Build fact table milestone_2 = DummyOperator(task_id='milestone_2', dag=dag) Load_sales_fact_table = LoadFactOperator(task_id='Load_sales_fact_table', dag=dag, redshift_conn_id="redshift", table="sales", query=SqlQueries.sales_table_insert) ### Quality checks tables = [ 'sales', 'sales', 'sales', 'sales', 'magasin', 'utilisateur', 'cours' ] columns = [ 'id_ville', 'id_temps', 'id_famille_produit', 'id_magasin', 'id_enseigne', 'id_profil', 'id_devise' ] null_quality_checks = CheckNullOperator(task_id='null_quality_checks', dag=dag, redshift_conn_id="redshift",
format_json=Variable.get('json_event_format', default_var=default_json_event_format)) stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, table="staging_songs", redshift_conn_id="redshift", aws_credentials_id="aws_credentials", s3_bucket="udacity-dend", s3_key="song_data/", format_json=Variable.get('json_song_format', default_var=default_json_song_format)) load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table', redshift_conn_id="redshift", table='songplays', dag=dag) load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', redshift_conn_id="redshift", table='users', params={ 'append_flag': Variable.get('append_flag', default_var=default_append_flag) }, dag=dag) load_song_dimension_table = LoadDimensionOperator( task_id='Load_song_dim_table', redshift_conn_id="redshift",
) stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, redshift_conn_id="redshift", aws_credentials_id="aws_credentials", target_table_name="staging_songs", s3_data_path="s3://udacity-dend/song_data", json_schema="auto", ) load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table', dag=dag, redshift_conn_id="redshift", sql_query=SqlQueries.songplay_table_insert, filter_expr="WHERE page='NextSong'" ) load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', dag=dag, redshift_conn_id="redshift", sql_query=SqlQueries.user_table_insert, filter_expr="WHERE page='NextSong'" ) load_song_dimension_table = LoadDimensionOperator( task_id='Load_song_dim_table', dag=dag,
task_id='Stage_songs', dag=dag, redshift_conn='redshift', aws_credentials='aws_credentials', table='staging_songs', s3_bucket='udacity-dend', s3_key='song_data', json_path='auto', sql=SqlQueries.staging_table_copy, provide_context=True, ) load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table', dag=dag, redshift_conn='redshift', table='songplays', sql=SqlQueries.songplay_table_insert, provide_context=False, ) load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', dag=dag, redshift_conn='redshift', table='users', sql=SqlQueries.user_table_insert, provide_context=False, ) load_song_dimension_table = LoadDimensionOperator( task_id='Load_song_dim_table',
stage_demography_to_redshift = StageRedshiftFromS3Operator( task_id='Stage_demography', dag=dag, redshift_conn_id="redshift", aws_credentials_id="aws_credentials", target_table_name="staging_demographics", s3_data_path="s3://dend-veegaaa-capstone/us-cities-demographics.csv", ignore_header=1, delimiter=';', ) load_immigration_facts_table = LoadFactOperator( task_id='Load_immigration_facts_fact_table', dag=dag, redshift_conn_id="redshift", sql_query=insert_queries['immigration_facts'], filter_expr="WHERE cicid is not null", ) load_states_dimension_table = LoadDimensionOperator( task_id='Load_states_dim_table', dag=dag, redshift_conn_id="redshift", sql_query=insert_queries['states'], filter_expr="", mode='append') load_cities_dimension_table = LoadDimensionOperator( task_id='Load_cities_dim_table', dag=dag,
airport_codes_staging = StageJsonToRedshiftOperator( task_id='airport_codes_staging', dag=dag, redshift_conn_id='redshift', aws_credentials_id='aws_credentials', table='staging_airport_code_data', s3_bucket="beppe-udacity-capstone", s3_key="capstone/airport_code_") end_staging = DummyOperator(task_id='end_staging', dag=dag) # Load Fact table load_fact_table = LoadFactOperator( task_id='load_fact_table', dag=dag, redshift_conn_id="redshift", table_name="fact_temperature", sql_insert_stmt=SqlQueries.fact_table_insert, truncate=False) # Load Dimension table load_time_dimension_table = LoadDimensionOperator( task_id='load_time_dimension_table', dag=dag, redshift_conn_id="redshift", table_name="time", sql_insert_stmt=SqlQueries.time_table_insert, truncate=False) # Load Dimension table load_airport_dimension_table = LoadDimensionOperator(
stage_songs = StageToRedshiftOperator( task_id='staging_songs', dag=dag, create_table_sql=create_tables.staging_songs, s3_bucket='udacity-dend', s3_key='song_data', schema='public', table='staging_songs', redshift_conn_id='redshift', aws_conn_id='aws_credentials', copy_options=["JSON 'auto ignorecase'"]) load_songplays = LoadFactOperator( task_id='load_fact_songplays', dag=dag, insert_table_sql=insert_tables.songplays, redshift_conn_id='redshift') load_users = LoadDimensionOperator( task_id='load_dim_users', dag=dag, insert_table_sql=insert_tables.users, schema='public', table='users', truncate=False, redshift_conn_id='redshift') load_songs = LoadDimensionOperator( task_id='load_dim_songs', dag=dag,
) stage_weather_stations_info_from_s3_to_redshift = LoadS3ToRedshiftOperator( task_id='stage_weather_stations_info_from_s3_to_redshift', aws_credentials_id='aws_credentials_id', redshift_conn_id='redshift', table='staging_weather_station_info', s3_bucket='udacity-dend-alex-ho', s3_key='weather_sg/weather_stations_info_{{ ts_nodash }}.csv', dag=dag ) load_temperature_table = LoadFactOperator( task_id='load_temperature_events_fact_table', redshift_conn_id="redshift", table='temperature_events', append=True, dag=dag ) load_rainfall_table = LoadFactOperator( task_id='load_rainfall_events_fact_table', redshift_conn_id="redshift", table='rainfall_events', append=True, dag=dag ) load_carpark_availability_table = LoadFactOperator( task_id='load_carpark_availability_fact_table', redshift_conn_id="redshift",
stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, table="staging_songs", json_path="auto", file_type='json', redshift_conn_id='redshift', aws_conn_id="aws_credentials", s3_bucket="udacity-dend", s3_key="song_data/A/A/A", ) load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table', dag=dag, table='songplays', redshift_conn_id='redshift', aws_conn_id='aws_credentials', insert_sql_qry=SqlQueries.songplay_table_insert) load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', dag=dag, table='users', redshift_conn_id='redshift', aws_conn_id='aws_credentials', insert_sql_qry=SqlQueries.user_table_insert) load_song_dimension_table = LoadDimensionOperator( task_id='Load_song_dim_table', dag=dag,
dag=dag, table="staging_songs", redshift_conn_id="redshift", aws_credentials_id="aws_credentials", s3_bucket=Variable.get('s3_bucket'), s3_key=Variable.get('s3_key_song_data'), log_json_path='auto', depends_on_past=False, retries=3, retry_delay=timedelta(minutes=5), email_on_retry=False) load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table', dag=dag, table="songplays", redshift_conn_id='redshift', depends_on_past=False, retries=3, retry_delay=timedelta(minutes=5), email_on_retry=False) load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', dag=dag, table="users", redshift_conn_id='redshift', depends_on_past=False, retries=3, retry_delay=timedelta(minutes=5), email_on_retry=False) load_song_dimension_table = LoadDimensionOperator(
s3_bucket=S3_BUCKET, s3_key="raw/i94_immigration_labels_description/visa_code.csv") copy_country_code = StageCSVToRedshiftOperator( task_id="copy_country_code_description", dag=dag, redshift_conn_id=REDSHIFT_CONN_ID, schema=SCHEMA_NAME, table="i94country_code", s3_bucket=S3_BUCKET, s3_key="raw/i94_immigration_labels_description/country_code.csv") load_usa_travelers_info = LoadFactOperator( task_id="load_usa_travelers_info", dag=dag, redshift_conn_id=REDSHIFT_CONN_ID, schema=SCHEMA_NAME, table="city_state_travelers_entry", insert_sql=SqlQueries.city_state_travelers_entry_insert) load_arrival_date = LoadDimensionOperator( task_id="load_arrival_date", dag=dag, redshift_conn_id=REDSHIFT_CONN_ID, schema=SCHEMA_NAME, table="arrival_date", insert_sql=SqlQueries.arrival_date_insert) # Data Quality # get the dq_checks_settings for data quality # file: [airflow_file]/plugins/helpers/dq_check_settings.json
stage_covid_to_redshift = StageToRedshiftOperator( task_id='Stage_covid', dag=dag, table="staging_covid", redshift_conn_id="redshift", aws_credentials_id="aws_s3_connection", s3_bucket="udacity-data-lake", s3_key="covid19/staging", region="us-west-2", extra_params="delimiter ';'" ) load_covid_cases_fact_table = LoadFactOperator( task_id='Load_covid_cases_fact_table', dag=dag, table='fact_covid_cases', redshift_conn_id="redshift", load_sql_stmt=SqlQueries.covid_cases_insert ) load_dim_location_table = LoadDimensionOperator( task_id='Load_dim_location_table', dag=dag, table='dim_location', redshift_conn_id="redshift", truncate_table=True, load_sql_stmt=SqlQueries.location_table_insert ) load_dim_date_table = LoadDimensionOperator( task_id='Load_dim_date_table',