s3_key="log_data",
    json_path="s3://udacity-dend/log_json_path.json")

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    table="staging_songs",
    s3_bucket="udacity-dend",
    s3_key="song_data")

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    table="songplays",
    sql_query=SqlQueries.songplay_table_insert,
    append_data='True')

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    table="users",
    sql_query=SqlQueries.user_table_insert,
    append_data='True')

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
# load the stage songs redshift table using the user defined operator
stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    table="public.staging_songs",
    conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="udacity-dend",
    s3_key="song_data/A/A/A/TRAAAAK128F9318786.json",
    json="auto")

# load the songplays redshift table using the user defined operator
load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    conn_id="redshift",
    table="public.songplays",
    sql_statement=SqlQueries.songplay_table_insert)

# load the user redshift table using the user defined operator
load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    conn_id="redshift",
    table="public.users",
    sql_statement=SqlQueries.user_table_insert)

# load the songs redshift table using the user defined operator
load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    conn_id=REDSHIFT_CONN_ID,
    aws_credentials_id=AWS_CREDENTIALS_ID,
    s3_bucket=INPUT_BUCKET,
    s3_key='song_data',
    table='staging_songs',
    file_format="JSON 'auto'",
    provide_context=True,
)

load_songplays_table = LoadFactOperator(
    task_id=f'Load_{fact_table_name_and_query[0]}_fact_table',
    dag=dag,
    table=fact_table_name_and_query[0],
    conn_id=REDSHIFT_CONN_ID,
    sql=fact_table_name_and_query[1],
)

dim_operators = [
    LoadDimensionOperator(
        task_id=f'Load_{dim_table_name}_dim_table',
        dag=dag,
        table=dim_table_name,
        conn_id=REDSHIFT_CONN_ID,
        sql=dim_query,
    ) for dim_table_name, dim_query in dim_tables_name_to_query.items()
]

run_quality_checks = DataQualityOperator(
Example #4
0
stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    aws_arn_id="aws_arn",
    table='staging_songs',
    s3_bucket='udacity-dend',
    s3_key='song_data/A/A/A/',
    json_format="auto")

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    table='songplays',
    truncate_data=False,
    sql_query=SqlQueries.songplay_table_insert,
    append_data=True)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    table="users",
    truncate_data=True,
    sql_query=SqlQueries.user_table_insert)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    table="songs",
Example #5
0
stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    table="staging_songs",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="udacity-dend",
    s3_key="song_data",
    region="us-west-2",
    extra_params="JSON 'auto'")

load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table',
                                        dag=dag,
                                        redshift_conn_id="redshift",
                                        table="songplays",
                                        sql=getattr(SqlQueries,
                                                    "songplay_table_insert"),
                                        append_only=True)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    table="users",
    sql=getattr(SqlQueries, "user_table_insert"),
    append_only=False)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    table="public.staging_songs",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="udacity-dend",
    s3_key="song_data/A/A/A",
    region = "us-west-2",
    #https://knowledge.udacity.com/questions/253565
    json_format = "auto",
    create_table = create_tables.staging_songs_table,
    provide_context = True
)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id="redshift",
    table="public.songplays",
    create_table=create_tables.songplay_table_create,
    load_table=SqlQueries.songplay_table_insert
)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    table="public.users",
    create_table=create_tables.user_table_create,
    load_table=SqlQueries.user_table_insert,
    truncate_insert=True
)

load_song_dimension_table = LoadDimensionOperator(
    file_format='csv')

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    provide_context=True,
    redshift_conn_id=redshift_credentials_id,
    aws_credentials_id=aws_credentials_id,
    target_table="staging_songs",
    s3_bucket=s3_bucket,
    s3_key=s3_songs,
    file_format='csv')

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id=redshift_credentials_id,
    target_table='songplays',
    sql=SqlQueries.songplay_table_insert)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    provide_context=True,
    redshift_conn_id=redshift_credentials_id,
    target_table='users',
    sql=SqlQueries.user_table_insert,
    insert_mode='truncate')

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
Example #8
0
stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    table="staging_songs",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    file_typ="json",
    s3_bucket="udacity-dend",
    s3_key="song_data",
    sql=SqlQueries.create_staging_songs_table)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    create_table_sql=SqlQueries.create_songplays_table,
    insert_table_sql=SqlQueries.songplay_table_insert,
    mode="append",
    target_table="songplays")

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    create_table_sql=SqlQueries.create_users_table,
    insert_table_sql=SqlQueries.user_table_insert,
    mode="overwrite",
    target_table="users")
Example #9
0
    s3_data_category_prefix='log_data/',
    execution_date=None)

stage_songs_to_redshift = StageToRedshiftOperator(
    # first 3 arguments are from BaseOperator arguments
    task_id='copy_data_from_s3_song_data_prefix_to_staging_songs_table',
    dag=elt_dag,
    provide_context=True,
    redshift_table='staging_songs',
    s3_bucket='udacity-dend',
    s3_data_category_prefix='song_data/',
    execution_date=None)

# Transfer data from staging tables to star schema tables
load_songplays_fact_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=elt_dag,
    sql_query=SqlQueries.songplay_table_insert)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=elt_dag,
    sql_query=SqlQueries.user_table_insert,
    append_data_boolean=False,
    table="public.dim_users")

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=elt_dag,
    sql_query=SqlQueries.song_table_insert,
    append_data_boolean=False,
    table="public.dim_songs")
    context=True,
    s3_bucket="udacity-dend/log_data/2018/11/2018-11-12-events.json",
    execution_date=start_date)

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_events',
    dag=dag,
    table="staging_songs",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    region="us-west-2",
    context=True,
    s3_bucket="s3://udacity-dend/song_data",
    execution_date=start_date)

load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table',
                                        dag=dag)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table', dag=dag)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table', dag=dag)

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table', dag=dag)

load_time_dimension_table = LoadDimensionOperator(
    task_id='Load_time_dim_table', dag=dag)

run_quality_checks = DataQualityOperator(task_id='Run_data_quality_checks',
                                         dag=dag)
    s3_key="log_data",
    json_format="s3://udacity-dend/log_json_path.json",
    provide_context=True,
    dag=dag)
stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    connection_id="redshift",
    aws_credentials_id="aws_credentials",
    table="staging_songs",
    s3_bucket="udacity-dend",
    s3_key="song_data",
    dag=dag)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    fact_table='songplays',
    sql_statement=SqlQueries.songplay_table_insert,
    dag=dag)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dimension_table='users',
    sql_statement=SqlQueries.user_table_insert,
    mode='truncate-insert',
    dag=dag)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dimension_table='songs',
    sql_statement=SqlQueries.song_table_insert,
    mode='truncate-insert',
Example #12
0
    end_date=datetime(2018, 11, 30, hour=23),
    table='staging_events',
    redshift_conn_id='redshift',
    aws_credentials_id='aws_credentials',
    s3_bucket='udacity-dend',
    copy_sql=SqlQueries.copy_staging_events,
    params={'log_path':'s3://udacity-dend/log_json_path.json'},
    # s3_key='log_data/'
    s3_key='log_data/{{macros.ds_format(ds,"%Y-%m-%d","%Y")}}/{{macros.ds_format(ds,"%Y-%m-%d","%m")}}/{{ds}}-events.json'
)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag_3,
    # catchup=False,
    redshift_conn_id='redshift',
    aws_credentials_id='aws_credentials',
    table='songplays',
    sql_query=SqlQueries.songplay_table_insert
)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag_3,
    table='songs',
    redshift_conn_id='redshift',
    aws_credentials_id='aws_credentials',
    truncate_insert=True,
    # catchup=False,
    sql_query=SqlQueries.song_table_insert
)
Example #13
0
    dag=dag,
    provide_context=True,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    table="staging_songs",
    s3_bucket="udacity-dend",
    s3_key="song_data/",
    region="us-west-2",
    file_format="JSON",
    json_path="",
    file_dated=False)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    provide_context=True,
    redshift_conn_id='redshift',
    aws_credentials_id="aws_credentials",
    table="songplays",
    sql_query=SqlQueries.songplay_table_insert)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    provide_context=True,
    redshift_conn_id='redshift',
    aws_credentials_id="aws_credentials",
    table="users",
    sql_query=SqlQueries.user_table_insert)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    table="staging_songs",
    s3_bucket="udacity-dend",
    s3_key="song_data/A/A/A",
    file_type="json",
    json_parameter="auto")

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id="redshift",
    table="songplays",
    source_sql_command=SqlQueries.songplay_table_insert,
    delete_data=True)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    table="users",
    source_sql_command=SqlQueries.user_table_insert)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
        'target_table': 'staging_visit',
        's3_bucket': 's3://cap-proj-ds/visits',
        'aws_credentials': {
            'AWS_KEY': AWS_KEY,
            'AWS_SECRET': AWS_SECRET
        },
        'format': 'json'
    })

load_time_window_met_table = LoadFactOperator(
    task_id='time_window_met_fact_table',
    dag=dag,
    provide_context=True,
    params={
        'target_table':
        'time_window_met',
        'sql':
        SqlQueries.time_window_met_table_insert,
        'insert_fields': [
            'time_window_id', 'checkout_time_id', 'checkout_time',
            'account_id', 'visit_id', 'met'
        ]
    })

load_time_window_table = LoadDimensionOperator(
    task_id='time_window_fact_table',
    dag=dag,
    provide_context=True,
    params={
        'target_table':
        'time_window',
        'sql':
stage_songs_to_redshift = StageToRedshiftOperator(
    task_id="stage_songs_to_redshift",
    dag=dag,
    table="staging_songs",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="udacity-dend",
    s3_key="song_data/A/A/A",
    json_path="auto",
)

load_songplays_table = LoadFactOperator(
    task_id="load_songplays_table",
    dag=dag,
    table="songplays",
    redshift_conn_id="redshift",
    load_sql_stmt=SqlQueries.songplay_table_insert,
)

load_user_dimension_table = LoadDimensionOperator(
    task_id="load_user_dimension_table",
    dag=dag,
    table="users",
    redshift_conn_id="redshift",
    load_sql_stmt=SqlQueries.user_table_insert,
)

load_song_dimension_table = LoadDimensionOperator(
    task_id="load_song_dimension_table",
    dag=dag,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    table="staging_songs",
    s3_bucket="udacity-dend",
    s3_key="song_data",
    ignore_headers="0",
    data_format="json",
    task_id='Stage_songs',
    provide_context=True,
    dag=dag)

# Load data to fact and dimmension tables

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    redshift_conn_id="redshift",
    destination_table="songplays",
    sql_statement=SqlQueries.songplay_table_insert,
    dag=dag)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    redshift_conn_id="redshift",
    destination_table="users",
    sql_statement=SqlQueries.user_table_insert,
    update_mode="insert",
    dag=dag)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    redshift_conn_id="redshift",
    destination_table="songs",
stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='stage_songs_task',
    dag=dag,
    table="staging_songs",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="udacity-dend",
    s3_key="song_data/",
    file_type="json",
    json_path="auto")

# Load from staging to Redshift

load_songplays_table = LoadFactOperator(
    task_id='load_songplays_fact_table_task',
    dag=dag,
    redshift_conn_id="redshift",
    table="songplays",
    sql_source=SqlQueries.songplay_table_insert)

load_user_dimension_table = LoadDimensionOperator(
    task_id='load_user_dim_table_task',
    dag=dag,
    redshift_conn_id="redshift",
    table='users',
    sql_source=SqlQueries.user_table_insert)

load_song_dimension_table = LoadDimensionOperator(
    task_id='load_song_dim_table_task',
    dag=dag,
    redshift_conn_id="redshift",
    table='songs',
Example #19
0
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    region='ap-south-1b',
    table="staging_songs",
    provide_context=True,
    s3_bucket="udacity-dend",
    s3_key="song_data",
    file_typ="json",
    json_path="auto",
)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    region='ap-south-1b',
    provide_context=True,
    sql_query=SqlQueries.songplay_table_insert
)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    region='ap-south-1b',
    provide_context=True,
    sql_query=SqlQueries.user_table_insert
)
Example #20
0
    s3_key='song_data',
    s3_format='json',
    s3_format_mode='auto',
    region='us-west-2',
    target_table='staging_songs',
    aws_credentials_id='aws_credentials',
    redshift_conn_id='redshift',
    render_s3_key=False,
    truncate=True)

### Load fact table, do not truncate
load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    target_table='songplays',
    target_columns=
    '(playid, start_time, userid, level, songid, artistid, sessionid, location, user_agent)',
    redshift_conn_id='redshift',
    sql_transform_command=SqlQueries.songplay_table_insert,
    truncate=False)

### Load dim tables, do truncate
load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    target_table='users',
    target_columns='(userid, first_name, last_name, gender, level)',
    redshift_conn_id='redshift',
    sql_transform_command=SqlQueries.user_table_insert,
    truncate=True)
Example #21
0
stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    table="staging_songs",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="udacity-dend",
    s3_key="song_data/A/A/A",
    type_file="json",
    format_file="json 'auto' compupdate off region 'us-west-2'",
)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    table="songplays",
    redshift_conn_id="redshift",
    load_sql_script=SqlQueries.songplay_table_insert)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    table="users",
    redshift_conn_id="redshift",
    load_sql_script=SqlQueries.user_table_insert)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    table="songs",
    redshift_conn_id="redshift",
Example #22
0
stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    target_table="public.staging_songs",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="udacity-dend",
    s3_key="song_data",
    json="auto"
)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id="redshift",
    target_table="public.songplays",
    truncate_target_table=True,
    query=SqlQueries.songplay_table_insert
)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    target_table="public.users",
    truncate_target_table=True,
    query=SqlQueries.user_table_insert
)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    task_id='Stage_aggregations',
    provide_context=True,
    dag=dag,
    table="Staging_aggregations",
    conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="social-system-test/",
    s3_key="temp/post_agg_{run_id}.csv",
    region="us-east-1",
    file_type="csv")

load_history_table = LoadFactOperator(
    task_id='Load_history_fact_table',
    dag=dag,
    provide_context=True,
    conn_id='redshift',
    table='history',
    query=SqlQueries.get_profile_history,
    truncate=True,
)

run_quality_checks = DataQualityOperator(task_id='Run_data_quality_checks',
                                         dag=dag,
                                         provide_context=True,
                                         conn_id='redshift',
                                         tables=["history"])

end_operator = DummyOperator(task_id='Stop_execution', dag=dag)

start_operator >> stage_users_to_redshift
start_operator >> get_ES_data >> stage_aggregations_to_redshift
    options=["json 's3://udacity-dend/log_json_path.json'"])

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    s3_origin='udacity-dend',
    s3_prefix='song_data',
    aws_connection_id='aws_credentials',
    redshift_connection_id='redshift',
    schema_target='public',
    table_target='staging_songs',
    options=["json 'auto'"])

load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table',
                                        table_target='songplays',
                                        dag=dag,
                                        redshift_connection_id='redshift',
                                        query=SqlQueries.songplay_table_insert,
                                        truncate_before=True)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    table_target='users',
    dag=dag,
    redshift_connection_id='redshift',
    query=SqlQueries.user_table_insert,
    truncate_before=True)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    table_target='songs',
    dag=dag,
Example #25
0
    s3_key="log_data",
    file_type="JSON 's3://udacity-dend/log_json_path.json'")

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    table="[public].staging_songs",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="udacity-dend",
    s3_key="song_data",
    file_type="JSON 's3://udacity-dend/log_json_path.json'")

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id="redshift",
    table="songplays",
    select_query=SqlQueries.songplay_table_insert)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    table="users",
    truncate_table=True,
    select_query=SqlQueries.user_table_insert)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
Example #26
0
    json_path='s3://udacity-dend/log_json_path.json',
    provide_context=True)

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    postgres_conn_id='redshift',
    aws_credentials_id='aws_credentials',
    table='staging_songs',
    s3_bucket='udacity-dend',
    s3_key='song_data')

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    postgres_conn_id='redshift',
    table="songplays",
    sql_statement=SqlQueries.songplay_table_insert,
    append=True)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    postgres_conn_id='redshift',
    table="users",
    sql_statement=SqlQueries.user_table_insert)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    postgres_conn_id='redshift',
    table='staging_events',
    s3_addr='s3://udacity-dend/log_data',
    task_id='Stage_events',
    provide_context=True,
    dag=dag)

stage_songs_to_redshift = StageToRedshiftOperator(
    aws_conn_id='aws_credentials',
    redshift_conn_id='redshift',
    table='staging_songs',
    s3_addr='s3://udacity-dend/song_data',
    task_id='Stage_songs',
    dag=dag)

load_songplays_table = LoadFactOperator(redshift_conn_id='redshift',
                                        table='songplays',
                                        task_id='Load_songplays_fact_table',
                                        dag=dag)

load_user_dimension_table = LoadDimensionOperator(
    redshift_conn_id='redshift',
    table='users',
    replace=True,
    task_id='Load_user_dim_table',
    dag=dag)

load_song_dimension_table = LoadDimensionOperator(
    redshift_conn_id='redshift',
    table='songs',
    replace=True,
    task_id='Load_song_dim_table',
    dag=dag)
stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    provide_context=True,
    table='staging_songs',
    redshift_conn_id='redshift',
    aws_credentials_id='aws_credentials',
    s3_bucket='udacity-dend',
    s3_key='song_data',
    region='us-west-2',
    json_path='auto')

load_songplays_fact_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    provide_context=True,
    redshift_conn_id='redshift',
    table='songplays',
    sql_query=SqlQueries.songplay_table_insert)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id='redshift',
    table='users',
    sql_query=SqlQueries.user_table_insert)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    redshift_conn_id='redshift',
    region='eu-north-1')

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    table="staging_songs  ",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="udac001",
    s3_key="song_asif",
    copy_json_option='auto',
    region='eu-north-1')

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id='redshift',
    table='songplays',
    select_sql=SqlQueries.songplay_table_insert)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id='redshift',
    table='users',
    truncate=True,
    select_sql=SqlQueries.user_table_insert)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    redshift_conn_id='redshift',
stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='stage_songs'
    , dag=dag
    , redshift_conn_id="redshift"
    , aws_credentials_id="aws_credentials"
    , table="staging_songs"
    , s3_bucket="udacity-dend"
    , s3_key="song_data/A/A/A"
    , json_path="auto"
    , file_type="json"
)

load_songplays_table = LoadFactOperator(
    task_id='load_songplays_fact_table'
    , dag=dag
    , redshift_conn_id="redshift"
    , table='songplays'
    , sql_stmt=SqlQueries.songplay_table_insert
)

load_user_dimension_table = LoadDimensionOperator(
    , task_id='load_user_dim_table'
    , dag=dag
    , redshift_conn_id="redshift"
    , table='users'
    , sql_stmt=SqlQueries.user_table_insert
)

load_song_dimension_table = LoadDimensionOperator(
    task_id='load_song_dim_table'
    , dag=dag