Exemple #1
0
    task_id='Stage_events',
    dag=dag,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    table="staging_events",
    s3_bucket=bucket_name,
    s3_key=
    "log_data/{execution_date.year}/{execution_date.month}/{ds}-events.json",
    region="us-west-2",
    json_format="s3://udacity-dend/log_json_path.json",
    time_format="epochmillisecs")

# songs dimension tables insert
load_songs = LoadDimensionOperator(task_id="load_songs",
                                   dag=dag,
                                   redshift_conn_id="redshift",
                                   table="songs",
                                   insert_truncate=False,
                                   query=SqlQueries.song_table_insert)

# artists dimension table insert
load_artists = LoadDimensionOperator(task_id="load_artists",
                                     dag=dag,
                                     redshift_conn_id="redshift",
                                     table="artists",
                                     insert_truncate=False,
                                     query=SqlQueries.artist_table_insert)

# songplays fact table insert
load_songplays = LoadFactOperator(task_id="load_songplays",
                                  dag=dag,
                                  redshift_conn_id="redshift",
Exemple #2
0
    table="staging_songs",
    s3_bucket='udacity-dend',
    s3_key="song_data",
    parameters="JSON 'auto'")

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id="redshift",
    table="songplays",
    sql_code=SqlQueries.songplay_table_insert)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    table="users",
    sql_code=SqlQueries.user_table_insert,
    delete_parameter=True)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    table="songs",
    sql_code=SqlQueries.song_table_insert,
    delete_parameter=True)

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
    dag=dag,
Exemple #3
0
    table="staging_songs",
    create_stmt=SqlQueries.create_table_staging_songs)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    provide_context=True,
    aws_credentials_id="aws_credentials",
    redshift_conn_id='redshift',
    create_stmt=SqlQueries.create_table_songplays,
    sql_query=SqlQueries.songplay_table_insert)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    provide_context=True,
    aws_credentials_id="aws_credentials",
    redshift_conn_id='redshift',
    create_stmt=SqlQueries.create_table_users,
    sql_query=SqlQueries.user_table_insert)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    provide_context=True,
    aws_credentials_id="aws_credentials",
    redshift_conn_id='redshift',
    create_stmt=SqlQueries.create_table_songs,
    sql_query=SqlQueries.song_table_insert)

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
    dag=dag,
    table_name="staging_events",
    aws_credentials_id='aws_credentials',
    redshift_conn_id="redshift",
    s3_bucket="udacity-dend",
    s3_key="log_data/{execution_date.year}/{execution_date.month}/")

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id="redshift",
    sql_statement=SqlQueries.songplay_table_insert)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    sql_statement=SqlQueries.user_table_insert)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    sql_statement=SqlQueries.song_table_insert)

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    sql_statement=SqlQueries.time_table_insert)
load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table',
                                        redshift_conn_id="redshift",
                                        table="songplays",
                                        sql=SqlQueries.songplay_table_insert,
                                        dag=dag)

#
# The following code will load the dim tables.
#      Note that it allows a 'truncate' function to clean the table before loading
#

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    redshift_conn_id="redshift",
    table="users",
    sql=SqlQueries.user_table_insert,
    truncate=True,
    dag=dag)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    redshift_conn_id="redshift",
    table="songs",
    sql=SqlQueries.song_table_insert,
    truncate=True,
    dag=dag)

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
    redshift_conn_id="redshift",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    destination_table="staging_songs",
    load_stagging_table=SqlQueries.COPY_SONGS_SQL.format())

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    load_fact_table=SqlQueries.songplay_table_insert.format())

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    truncate_table=True,
    truncate_table_query=SqlQueries.TRUNCATE_TABLE.format("users"),
    load_dimension_table=SqlQueries.user_table_insert.format())

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    truncate_table=True,
    truncate_table_query=SqlQueries.TRUNCATE_TABLE.format("songs"),
    load_dimension_table=SqlQueries.song_table_insert.format())

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
Exemple #7
0
    copy_json_option='auto',
    region='us-west-2',
    redshift_conn_id='redshift',
    aws_credentials_id='aws_credentials',
    dag=dag)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    table='songplays',
    select_sql=SqlQueries.songplay_table_insert,
    redshift_conn_id='redshift',
    dag=dag)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    table='users',
    select_sql=SqlQueries.user_table_insert,
    redshift_conn_id='redshift',
    dag=dag)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    table='songs',
    select_sql=SqlQueries.song_table_insert,
    redshift_conn_id='redshift',
    dag=dag)

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
    table='artists',
    select_sql=SqlQueries.artist_table_insert,
    redshift_conn_id='redshift',
    json="auto",
    dag=dag
)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    redshift_conn_id="redshift",
    table="public.songplays",
    sql_template=SqlQueries.songplay_table_insert,
    dag=dag
)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    redshift_conn_id="redshift",
    table="public.users",
    sql_template=SqlQueries.user_table_insert,
    dag=dag
)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    redshift_conn_id="redshift",
    table="public.songs",
    sql_template=SqlQueries.song_table_insert,
    dag=dag
)

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
    redshift_conn_id="redshift",
Exemple #9
0
    s3_bucket="dend",
    s3_key="song_data",
    file_format="JSON")

# Insert Fact Tables
load_songplays_table = LoadFactOperator(
    task_id=f'load_songplays',
    redshift_conn_id="redshift",
    table='songplays',
    sql_stmt=SqlQueries.songplay_table_insert,
    dag=dag)

# Insert DIM Tables
load_users_table = LoadDimensionOperator(task_id=f'load_users',
                                         redshift_conn_id="redshift",
                                         table='users',
                                         truncate=True,
                                         sql_stmt=SqlQueries.user_table_insert,
                                         dag=dag)

load_songs_table = LoadDimensionOperator(task_id=f'load_songs',
                                         redshift_conn_id="redshift",
                                         table='songs',
                                         truncate=True,
                                         sql_stmt=SqlQueries.song_table_insert,
                                         dag=dag)

load_artists_table = LoadDimensionOperator(
    task_id=f'load_artists',
    redshift_conn_id="redshift",
    table='artists',
    truncate=True,
stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    destination_table='staging_songs',
    source_s3bucket='s3://udacity-dend/song_data',
    json_path='auto',
    dag=dag)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    sql_query=SqlQueries.songplay_table_insert)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    sql_insert=SqlQueries.user_table_insert,
    sql_truncate=SqlQueries.user_table_truncate,
    append_data=False)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    sql_insert=SqlQueries.song_table_insert,
    sql_truncate=SqlQueries.song_table_truncate,
    append_data=False)

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
    dag=dag,
    sql_insert=SqlQueries.artist_table_insert,
    sql_truncate=SqlQueries.artist_table_truncate,
Exemple #11
0
    data_conversion_kwargs=None,
    data_load_args=None,
    data_load_kwargs={'STATUPDATE': 'OFF'},
    del_existing=True)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id='redshift',
    dest_table='songplays',
    select_sql=SqlQueries.songplay_table_insert)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id='redshift',
    dest_table='users',
    select_sql=SqlQueries.user_table_insert,
    append=False)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    redshift_conn_id='redshift',
    dest_table='songs',
    select_sql=SqlQueries.song_table_insert,
    append=False)

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
    dag=dag,
    is_json_path = 1,
    create_table_query = SqlQueries.staging_songs_table_create
)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id = 'redshift',
    destination_table='public.songplays',
    sql_query = SqlQueries.songplay_table_insert
)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id = 'redshift',
    destination_table = 'public.users',
    sql_query = SqlQueries.user_table_insert
)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    redshift_conn_id = 'redshift',
    destination_table = 'public.songs',
    sql_query = SqlQueries.song_table_insert
)

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
    dag=dag,
Exemple #13
0
        'key': AWS_KEY,
        'secret': AWS_SECRET
    })

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    table_name='songplays',
    redshift_conn_id='redshift',
    sql_statement=SqlQueries.songplay_table_insert,
)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    table_name='users',
    redshift_conn_id='redshift',
    sql_statement=SqlQueries.user_table_insert,
)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    table_name='songs',
    redshift_conn_id='redshift',
    sql_statement=SqlQueries.song_table_insert,
)

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
    dag=dag,
    table="staging_demographic",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_default",
    s3_bucket="udacity-dend-dalal",
    s3_key="demographic_data",
    file_type="CSV")

load_city_table = LoadFactOperator(
    task_id='Load_city_fact_table',
    dag=dag,
    redshift_conn_id="redshift",
    sql_insert_query=SqlQueries.city_table_insert)

load_immigrant_dimension_table = LoadDimensionOperator(
    task_id='Load_immigrant_dim_table',
    dag=dag,
    table="immigrant",
    redshift_conn_id="redshift",
    sql_insert_query=SqlQueries.immigrant_table_insert)

load_temperature_dimension_table = LoadDimensionOperator(
    task_id='Load_temperature_dim_table',
    dag=dag,
    table="temperature",
    redshift_conn_id="redshift",
    sql_insert_query=SqlQueries.temperature_table_insert)

load_demographic_dimension_table = LoadDimensionOperator(
    task_id='Load_demographic_dim_table',
    dag=dag,
    table="demographic",
    redshift_conn_id="redshift",
Exemple #15
0
    sql_statement=SqlQueries.create_staging_urbanization_rate_table,
    format=Variable.get('format'))

load_vaccinations_fact_table = LoadFactOperator(
    task_id='load_vaccinations_fact_table',
    dag=dag,
    redshift_conn_id="redshift",
    table="vaccinations_fact",
    sql_create=SqlQueries.create_vaccinations_fact_table,
    sql_insert=SqlQueries.vaccinations_fact_table_insert,
    mode="append-only")

load_country_region_dimension_table = LoadDimensionOperator(
    task_id='Load_country_region_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    table="country_region_dim",
    sql_create=SqlQueries.create_country_region_dimension_table,
    sql_insert=SqlQueries.country_region_dimension_table_insert,
    mode="delete-load")

load__time_dimension_table = LoadDimensionOperator(
    task_id='Load_time_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    table="time_dim",
    sql_create=SqlQueries.create_time_dimension_table,
    sql_insert=SqlQueries.time_dimension_table_insert,
    mode="delete-load")

load__vaccines_dimension_table = LoadDimensionOperator(
    task_id='Load_vaccines_dim_table',
Exemple #16
0
    dag=dag
)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    conn_id="redshift",
    table="songplays",
    select_statement = SqlQueries.songplay_table_insert,
    delete_data=True,
    dag=dag
)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    conn_id="redshift",
    table="users",
    select_statement = SqlQueries.user_table_insert,
    dag=dag
)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    conn_id="redshift",
    table="songs",
    select_statement = SqlQueries.song_table_insert,
    dag=dag
)

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
    conn_id="redshift",
    task_id='Stage_country',
    dag=dag,
    table="Country_Stg",
    s3_bucket="udend-capstone-immig",
    s3_key="countryCodes/")

stage_usdemo_to_redshift = StageToRedshiftOperator(
    task_id='Stage_us_demographics',
    dag=dag,
    table="us_demographics_stg",
    s3_bucket="udend-capstone-immig",
    s3_key="us_demographics/")

load_country_dimension_table = LoadDimensionOperator(
    task_id='Load_country_dim_table',
    dag=dag,
    table="dim_country",
    sql=SqlQueries.country_table_insert)
load_state_dimension_table = LoadDimensionOperator(
    task_id='Load_state_dim_table',
    dag=dag,
    table="dim_state",
    sql=SqlQueries.state_table_insert)

load_i94mode_dimension_table = LoadDimensionOperator(
    task_id='Load_i94mode_dim_table',
    dag=dag,
    table="dim_i94mode",
    sql=SqlQueries.i94mode_table_insert)

load_i94visa_dimension_table = LoadDimensionOperator(
    table="staging_songs",
    s3_bucket=S3_BUCKET,
    s3_key=SONG_DATA,
    region=REGION)

load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table',
                                        dag=dag,
                                        redshift_conn_id=REDSHIFT_CONN_ID,
                                        table="songplays",
                                        sql=SqlQueries.songplay_table_insert,
                                        overwrite=True)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id=REDSHIFT_CONN_ID,
    table="users",
    sql=SqlQueries.user_table_insert,
    overwrite=True)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    redshift_conn_id=REDSHIFT_CONN_ID,
    table="songs",
    sql=SqlQueries.song_table_insert,
    overwrite=True)

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
    dag=dag,
Exemple #19
0
    table="staging_songs",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="udacity-dend",
    s3_key="song_data")

load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table',
                                        dag=dag,
                                        table="songplays",
                                        redshift_conn_id="redshift",
                                        sql=SqlQueries.songplay_table_insert)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    table="users",
    redshift_conn_id="redshift",
    sql=SqlQueries.user_table_insert,
    mode="overwrite")

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    table="songs",
    redshift_conn_id="redshift",
    sql=SqlQueries.song_table_insert,
    mode="overwrite")

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
    dag=dag,
    aws_credentials_id="aws_credentials",
    s3_bucket="udacity-dend",
    s3_key="song_data/A/A/A",
    json="auto")

load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table',
                                        dag=dag,
                                        table="public.songplays",
                                        redshift_conn_id="redshift",
                                        truncate_table=True,
                                        query=SqlQueries.songplay_table_insert)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    table="public.users",
    redshift_conn_id="redshift",
    truncate_table=True,
    query=SqlQueries.user_table_insert)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    table="public.songs",
    redshift_conn_id="redshift",
    truncate_table=True,
    query=SqlQueries.song_table_insert)

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
    dag=dag,
Exemple #21
0
    s3_key='song-data/A/B/G/',
    region='US-WEST-2')

# Load songplays fact table in redshift.
load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id='redshift_conn',
    insert_facts_query=SqlQueries.songplay_table_insert,
    facts_table_name='public.songplays')

# Load users dimension table in redshift.
load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id='redshift_conn',
    select_dim_query=SqlQueries.user_table_insert,
    dim_table_name='public.users',
    append_only_flag=False)

# Load songs dimension table in redshift.
load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    redshift_conn_id='redshift_conn',
    select_dim_query=SqlQueries.song_table_insert,
    dim_table_name='public.songs',
    append_only_flag=False)

# Load artists dimension table in redshift.
load_artist_dimension_table = LoadDimensionOperator(
    data_format="JSON"
)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id="redshift",
    table="songplay",
    sql="songplay_table_insert",
    append_only=False
)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    table="songplay",
    sql="user_table_insert",
    append_only=False
)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    table="songplay",
    sql="song_table_insert",
    append_only=False
)

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
    redshift='redshift',
    s3_key='song_data/',
    s3_bucket='dend',
    table='staging_songs',
)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag
    redshift='redshift',
    table='songplays',
    sql_query=SqlQueries.songplay_table_insert
)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag
    provide_context=True,
    table='users',
    aws_credentials='aws_credentials',
    redshift='redshift',
    sql_query=SqlQueries.user_table_insert
)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag
    provide_context=True,
    table='songs',
    aws_credentials='aws_credentials',
    redshift='redshift',
Exemple #24
0
                                            'table':
                                            'songplays',
                                            'fields': [
                                                'start_time', 'userid',
                                                'level', 'songid', 'artistid',
                                                'sessionid', 'location',
                                                'user_agent', 'playid'
                                            ]
                                        },
                                        sql=SqlQueries.songplay_table_insert)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    params={
        'table': 'users',
        'fields': ['userid', 'first_name', 'last_name', 'gender', 'level']
    },
    sql=SqlQueries.user_table_insert)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    params={
        'table': 'songs',
        'fields': ['songid', 'title', 'artistid', 'year', 'duration']
    },
    sql=SqlQueries.song_table_insert)
    region='us-west-1',
    data_format='JSON',
    dag=dag)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    redshift_conn_id='redshift',
    table="public.songplays",
    select_sql=SqlQueries.songplay_table_insert,
    append_only=True,
    dag=dag)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    redshift_conn_id='redshift',
    table="public.users",
    select_sql=SqlQueries.user_table_insert,
    mode='truncate',
    dag=dag)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    redshift_conn_id='redshift',
    table="public.songs",
    select_sql=SqlQueries.song_table_insert,
    mode='truncate',
    dag=dag)

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
    redshift_conn_id='redshift',
    table='public.staging_songs',
    redshift_conn_id='redshift',
    aws_credentials_id='aws_credentials',
    s3_bucket='udacity-dend',
    s3_key='song_data')

load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table',
                                        dag=dag,
                                        table='public.songplays',
                                        redshift_conn_id='redshift',
                                        sql=SqlQueries.songplay_table_insert)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id='redshift',
    table='public.users',
    append_mode=False,
    sql=SqlQueries.user_table_insert)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    redshift_conn_id='redshift',
    table='public.songs',
    append_mode=False,
    sql=SqlQueries.song_table_insert)

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
    dag=dag,
Exemple #27
0
    table="staging_songs",
    s3_bucket="udacity-dend",
    s3_key="song_data/A/A/A",
    json_path="auto",
    file_type="json")

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id="redshift",
    table='songplays',
    sql_query=SqlQueries.songplay_table_insert)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    table='users',
    sql_query=SqlQueries.user_table_insert)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    table='songs',
    sql_query=SqlQueries.song_table_insert)

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    table='artists',
Exemple #28
0
    table_name='dim_review_table',
    redshift_conn_id='redshift',
    sql_statement=create_tables.CREATE_DIM_TABLE_SQL,
)

create_all_fact_table = CreateTableOperator(
    task_id='create_all_fact_table',
    dag=dag,
    table_name='fact_price_table',
    redshift_conn_id='redshift',
    sql_statement=create_tables.CREATE_FACT_TABLE_SQL,
)

load_review_dimension_table = LoadDimensionOperator(
    task_id='load_review_dimension_table',
    dag=dag,
    table_name='dim_review_table',
    redshift_conn_id='redshift',
    sql_statement=insert_queries.dim_review_table_insert)

load_property_dimension_table = LoadDimensionOperator(
    task_id='load_property_dimension_table',
    dag=dag,
    table_name='dim_property_table',
    redshift_conn_id='redshift',
    sql_statement=insert_queries.dim_property_table_insert)

load_listing_dimension_table = LoadDimensionOperator(
    task_id='load_listing_dimension_table',
    dag=dag,
    table_name='dim_lisiting_table',
    redshift_conn_id='redshift',
Exemple #29
0
)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    table='songplays',
    insert_sql=SqlQueries.songplay_table_insert
)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    table='users',
    insert_sql=SqlQueries.user_table_insert,
    truncate=True
)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    table='songs',
    insert_sql=SqlQueries.song_table_insert,
    truncate=True
)
Exemple #30
0
    table="public.cities_demographics_from_bucket",
    s3_bucket="temperature-processed-data",
    s3_key="city_demographics")

airport_code_to_redshift = StageToRedshiftOperator(
    task_id='airport_code_data',
    dag=dag,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    table="public.airport_code_from_bucket",
    s3_bucket="temperature-processed-data",
    s3_key="airport_code")

load_city_temp_dimension_table = LoadDimensionOperator(
    task_id='Load_city_temp_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    table="public.city_temp",
    query=SqlQueries.city_temp_table_insert)

load_immigration_dimension_table = LoadDimensionOperator(
    task_id='Load_immigration_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    table="public.immigration",
    query=SqlQueries.immigration_table_insert)

load_city_demographics_dimension_table = LoadDimensionOperator(
    task_id='Load_city_demographics_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    table="public.cities_demographics",