stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, redshift_conn_id='redshift', aws_credentials_id='aws_credentials', target_table='staging_songs', s3_bucket='udacity-dend', s3_key='song_data/{year}/{month}/{full_date}-events.json', format_option='json_path', provide_context=True ) load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table', dag=dag, redshift_conn_id='redshift', target_table='songplays', sql_query=SqlQueries.songplay_table_insert, ) load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', dag=dag, redshift_conn_id='redshift', target_table='users', sql_query=SqlQueries.user_table_insert, delete_records_before_load=False, ) load_song_dimension_table = LoadDimensionOperator( task_id='Load_song_dim_table',
stage_songs_to_redshift = StageToRedshiftOperator( task_id='load_songs_from_s3_to_redshift', redshift_conn_id='redshift', aws_credentials_id='aws_credentials', region_name="us-west-2", s3_bucket='{{ var.json.s3.bucket }}', s3_key='{{ var.json.s3.song_key }}', table_name='staging_songs', json_format="auto", dag=dag) load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table', redshift_conn_id='redshift', load_mode=LoadMode.TRUNCATE, clear_table_sql=SqlQueries.clear_table.format("songplays"), load_data_sql=SqlQueries.songplay_table_insert, dag=dag) load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', redshift_conn_id='redshift', load_mode=LoadMode.TRUNCATE, clear_table_sql=SqlQueries.clear_table.format("users"), load_data_sql=SqlQueries.user_table_insert, dag=dag) load_song_dimension_table = LoadDimensionOperator( task_id='Load_song_dim_table', redshift_conn_id='redshift',
task_id='Stage_dependencies', provide_context=True, dag=dag) stage_projects_to_redshift = StageToRedshiftOperator( aws_conn_id='aws_credentials', redshift_conn_id='redshift', table='staging_projects', s3_addr='s3://dend-capstone-lkj/projects_with_repository.csv', task_id='Stage_projects', provide_context=True, dag=dag) load_repositories_fact_table = LoadFactOperator( redshift_conn_id='redshift', table='repository_fact', task_id='Load_repositories_fact_table', provide_context=True, dag=dag) load_projects_dimension_table = LoadDimensionOperator( redshift_conn_id='redshift', table='project_dim', task_id='Load_projects_dim_table', provide_context=True, dag=dag) load_versions_dimension_table = LoadDimensionOperator( redshift_conn_id='redshift', table='version_dim', task_id='Load_versions_dim_table', provide_context=True,
stage_songs_to_redshift = StageToRedshiftOperator( task_id='stage_songs', dag=dag, create_table_sql=SqlQueries.create_songs_stage, stage_table_sql=SqlQueries.staging_table_copy_template, redshift_conn_id="redshift", s3_bucket="udacity-dend", s3_key="song_data", table="songs_stage", s3_region="us-west-2") load_songplays_table = LoadFactOperator( task_id='load_songplays_fact_table', dag=dag, redshift_conn_id="redshift", table="songplays", create_table_sql=SqlQueries.create_songplays, insert_table_sql=SqlQueries.songplay_table_insert) load_user_dimension_table = LoadDimensionOperator( task_id='load_user_dim_table', dag=dag, redshift_conn_id="redshift", table="songplays", create_table_sql=SqlQueries.create_users, insert_table_sql=SqlQueries.user_table_insert, delete_existing_records=False) load_song_dimension_table = LoadDimensionOperator( task_id='load_song_dim_table',
redshift_conn_id='redshift', ) stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, table='staging_songs', aws_credentials_id='aws_credentials', s3_key='song_data/', s3_bucket='udacity-dend', redshift_conn_id='redshift', ) load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table', dag=dag, redshift_conn_id='redshift', sql_stmt=SqlQueries.songplay_table_insert) load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', dag=dag, redshift_conn_id='redshift', table='users', load_option='delete-load', sql_stmt=SqlQueries.user_table_insert) load_song_dimension_table = LoadDimensionOperator( task_id='Load_song_dim_table', dag=dag, redshift_conn_id='redshift',
task_id='Stage_songs', dag=dag, table='staging_songs', time_format='epochmillisecs', region='us-west-2', format_type='auto', s3_bucket='udacity-dend', s3_key='song_data/A/A/A/', use_partitioning=False, execution_date='{{ execution_date }}', redshift_conn_id='redshift', aws_credentials_id='aws_credentials') load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table', dag=dag, redshift_conn_id='redshift', table='songplays', append_data='False', sql=SqlQueries.songplay_table_insert) user_table_task_id = "Load_user_dim_table" load_user_dim_task = SubDagOperator(subdag=load_dimension_tables_dag( "sparkify_etl_dag", user_table_task_id, "redshift", "users", "False", SqlQueries.user_table_insert, start_date=start_date), task_id=user_table_task_id, dag=dag)
s3_key="log_data", path="s3://udacity-dend/log_json_path.json", dag=dag) stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', redshift_conn_id="redshift", aws_credentials_id="aws_credentials", table="staging_songs", s3_bucket="udacity-dend", s3_key="song_data", dag=dag) load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table', redshift_conn_id="redshift", table="songplays", sql_stmt=SqlQueries.songplay_table_insert, dag=dag) load_user_dimension_table = LoadDimensionOperator( task_id='Load_user_dim_table', redshift_conn_id="redshift", table="songs", sql_stmt=SqlQueries.song_table_insert, dag=dag) load_song_dimension_table = LoadDimensionOperator( task_id='Load_song_dim_table', redshift_conn_id="redshift", table="users", sql_stmt=SqlQueries.user_table_insert,
table='staging_events', copy_options="JSON 's3://udacity-dend/log_json_path.json'") stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, s3_bucket='udacity-dend', s3_prefix='song_data', table='staging_songs', copy_options="FORMAT AS JSON 'auto'") # Insert fact and dimension tables load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table', dag=dag, table='songplays', select_sql=SqlQueries.songplays_table_insert) load_user_dimension_table = LoadDimensionOperator( task_id='Load_users_dim_table', dag=dag, table='users', select_sql=SqlQueries.users_table_insert, mode='truncate') load_song_dimension_table = LoadDimensionOperator( task_id='Load_songs_dim_table', dag=dag, table='songs', select_sql=SqlQueries.songs_table_insert,