コード例 #1
0
    s3_bucket="udacity-dend",
    s3_key="song_data/",
    aws_region="us-west-2",
    json="auto")

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id="redshift",
    sql_insert=SqlQueries.songplay_table_insert,
    destination_table="public.songplays")

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    sql_insert=SqlQueries.user_table_insert,
    destination_table="public.users",
    delete=True)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    sql_insert=SqlQueries.song_table_insert,
    destination_table="public.songs",
    delete=True)

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
    dag=dag,
    json_path='auto')

#4. Use staging tables to populate fact table
load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id="redshift",
    table="songplays",
    sql_query=SqlQueries.songplay_table_insert,
    delete_first=True)

#5. Use staging tables to populate user table
load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    table="users",
    sql_query=SqlQueries.user_table_insert,
    delete_first=True)

#6. Use staging tables to populate song_table table
load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    table="songs",
    sql_query=SqlQueries.song_table_insert,
    delete_first=True)

#7. Use staging tables to populate artist table
load_artist_dimension_table = LoadDimensionOperator(
コード例 #3
0
    redshift_conn_id = 'redshift',
    table="songplays",
    sql_query = SqlQueries.songplay_table_insert,
    dag=dag,
    append_only=False
)

"""
connecting to redshift
running the LoadDimensionOperator operator with sql_queries.py
"""
load_user_dimension_table = LoadDimensionOperator(

    task_id='Load_user_dim_table',
    redshift_conn_id = 'redshift',
    table="users",
    sql_query = SqlQueries.user_table_insert,
    dag=dag,
    append_only=False
)

"""
connecting to redshift
running the LoadDimensionOperator operator with sql_queries.py
"""
load_song_dimension_table = LoadDimensionOperator(

    task_id='Load_song_dim_table',
    redshift_conn_id = 'redshift',
    table="songs",
    sql_query = SqlQueries.song_table_insert,
    append_data=True,
    aws_credentials={
        'key': AWS_KEY,
        'secret': AWS_SECRET
    },
    region='us-west-2',
    sql_statement=SqlQueries.songplays_table_insert,
    provide_context=True)

load_users_dimension_table = LoadDimensionOperator(
    task_id='Load_users_dim_table',
    dag=dag,
    target_table='users',
    redshift_conn_id='redshift',
    append_data=False,
    aws_credentials={
        'key': AWS_KEY,
        'secret': AWS_SECRET
    },
    region='us-west-2',
    sql_statement=SqlQueries.users_table_insert,
    provide_context=True)

load_songs_dimension_table = LoadDimensionOperator(
    task_id='Load_songs_dim_table',
    dag=dag,
    target_table='songs',
    redshift_conn_id='redshift',
    append_data=False,
    aws_credentials={
        'key': AWS_KEY,
コード例 #5
0
    aws_credentials_id='aws_credentials',
    s3_bucket='udacity-dend',
    s3_key=
    'song_data',  # load a small portion of song data with 'song_data/A/A/A'
    json_path='auto')

load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table',
                                        dag=dag,
                                        conn_id='redshift',
                                        sql=SqlQueries.songplay_table_insert,
                                        target_table='songplays')

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    conn_id='redshift',
    sql=SqlQueries.user_table_insert,
    target_table='users',
    delete_first=True)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    conn_id='redshift',
    sql=SqlQueries.song_table_insert,
    target_table='songs',
    delete_first=True)

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
    dag=dag,
コード例 #6
0
    s3_key="song_data",
    aws_credentials_id='aws_credentials'
)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id='redshift',
    table='songplays',
    sql_query=SqlQueries.songplay_table_insert
)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id='redshift',
    table='users',
    sql_query=SqlQueries.user_table_insert
)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    redshift_conn_id='redshift',
    table='songs',
    sql_query=SqlQueries.song_table_insert
)

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
    dag=dag,
コード例 #7
0
ファイル: global_dag.py プロジェクト: alex-toy/darties
}, {
    'item': "devise",
    "query": SqlQueries.devise_table_insert
}, {
    'item': "cours",
    "query": SqlQueries.cours_table_insert
}, {
    'item': "magasin",
    "query": SqlQueries.magasin_table_insert
}]

for dimension_item in dimension_items:
    load_dimension_table = LoadDimensionOperator(
        task_id=f"load_{dimension_item['item']}_dimension_table",
        dag=dag,
        redshift_conn_id="redshift",
        table=dimension_item["item"],
        query=dimension_item["query"],
        append=False)
    load_dimension_tables.append(load_dimension_table)

### Build fact table
milestone_2 = DummyOperator(task_id='milestone_2', dag=dag)

Load_sales_fact_table = LoadFactOperator(task_id='Load_sales_fact_table',
                                         dag=dag,
                                         redshift_conn_id="redshift",
                                         table="sales",
                                         query=SqlQueries.sales_table_insert)

### Quality checks
コード例 #8
0
    s3_bucket="udacity-dend",
    s3_key="song_data/A/A/A",
    table="staging_songs",
    json="auto")

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id="redshift",
    destination_table="songplays",
    facts_sql=SqlQueries.songplay_table_insert)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    destination_table="users",
    dim_sql=SqlQueries.user_table_insert,
    append_mode=False)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    destination_table="songs",
    dim_sql=SqlQueries.song_table_insert,
    append_mode=False)

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
    dag=dag,
コード例 #9
0
)

# Calling LoadFactOperator to load the data into songplays fact table
load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    sql_statement=SqlQueries.songplay_table_insert,
    target_table='songplays',
    redshift_connection_id='redshift',
    dag=dag
)

# Calling LoadDimensionOperator to load the data into users dimension table
load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    target_table='users',
    redshift_connection_id='redshift',
    sql_statement=SqlQueries.user_table_insert,
    truncate=False,
    dag=dag
)

# Calling LoadDimensionOperator to load the data into songs dimension table
load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    target_table='songs',
    redshift_connection_id='redshift',
    sql_statement=SqlQueries.song_table_insert,
    truncate=False,
    dag=dag
)

# Calling LoadDimensionOperator to load the data into artists dimension table
コード例 #10
0
    task_id='Stage_songs',
    dag=dag,
    aws_credentials_id="aws_credentials",
    iam_role="Redshift_Read_S3",
    redshift_conn_id="redshift",
    s3_json_structure_path="s3://udacity-redshift/song_paths.json",
    s3_data_path="s3://udacity-dend/song_data",
    table='staging_songs')

load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table',
                                        dag=dag,
                                        redshift_conn_id="redshift")

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    table="users")

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    table="songs")

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    table="artists")
コード例 #11
0
    dag=dag)

stage_reviews_to_redshift = StageToRedshiftOperator(
    task_id='stage_reviews',
    redshift_conn_id='redshift',
    table='staging_reviews',
    s3_bucket='podcast-project',
    s3_key='reviews.csv',
    dag=dag)

load_podcast_stats_table = LoadDimensionOperator(
    task_id='load_podcast_agg_table',
    redshift_conn_id='redshift',
    destination_table='podcast_agg_reviews',
    query_dimension="""  SELECT sp.podcast_id, sp.title
                     ,avg(sr.rating)
                      ,count(r.*)
                      FROM staging_podcast AS sp LEFT JOIN staging_reviews AS sr ON 
                      sp.podcast_id = sr.podcast_id
                      WHERE sr.rating IS NOT NULL 
                      GROUP BY sp.podcast_id, sp.title;""",
    dag=dag)

load_categories_stats_table = LoadDimensionOperator(
    task_id='load_categories_agg_table',
    redshift_conn_id='redshift',
    destination_table='categories_agg_reviews',
    query_dimension="""(category, total_podcast, category_avg_rating)  
                      SELECT sc.category,count(distinct sc.podcast_id),avg(sr.rating)
                      FROM staging_category AS sc LEFT JOIN staging_reviews AS sr ON 
                      sc.podcast_id = sr.podcast_id
                      WHERE sr.rating IS NOT NULL
コード例 #12
0
    json_file="auto",
    region="us-west-2")

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    target_table="songplays",
    sql_table_create=SqlQueries.songplay_table_create,
    sql_table_insert=SqlQueries.songplay_table_insert,
    redshift_conn_id="redshift",
    mode="")

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    target_table="users",
    sql_table_create=SqlQueries.user_table_create,
    sql_table_insert=SqlQueries.user_table_insert,
    redshift_conn_id="redshift")

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    target_table="songs",
    sql_table_create=SqlQueries.song_table_create,
    sql_table_insert=SqlQueries.song_table_insert,
    redshift_conn_id="redshift")

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
    dag=dag,
コード例 #13
0
    task_id='Stage_songs',
    dag=dag,
    table="staging_songs",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="udacity-dend",
    s3_key="song_data/A/B/C/TRABCEI128F424C983.json")

load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table',
                                        dag=dag,
                                        redshift_conn_id="redshift",
                                        destination_table="songplays")

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    sql_statement=sql_queries.user_table_insert,
    table_name='users')

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    sql_statement=sql_queries.song_table_insert,
    table_name='songs')

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    sql_statement=sql_queries.artist_table_insert,
コード例 #14
0
    aws_credentials_id='aws_credentials',
    json='s3://udacity-dend/song_data',
    s3_bucket='udacity-dend',
    s3_key='song_data')

load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table',
                                        dag=dag,
                                        aws_credentials='aws_credentials',
                                        table='public.songplays',
                                        truncate_table=True,
                                        query=SqlQueries.songplay_table_insert)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    aws_credentials='aws_credentials',
    table='public.users',
    truncate_table=True,
    query=SqlQueries.user_table_insert)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    aws_credentials='aws_credentials',
    table='public.songs',
    truncate_table=True,
    query=SqlQueries.song_table_insert)

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
    dag=dag,
    email_on_retry=False)

load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table',
                                        dag=dag,
                                        table="songplays",
                                        redshift_conn_id='redshift',
                                        depends_on_past=False,
                                        retries=3,
                                        retry_delay=timedelta(minutes=5),
                                        email_on_retry=False)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    table="users",
    redshift_conn_id='redshift',
    depends_on_past=False,
    retries=3,
    retry_delay=timedelta(minutes=5),
    email_on_retry=False)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    table="songs",
    redshift_conn_id='redshift',
    depends_on_past=False,
    retries=3,
    retry_delay=timedelta(minutes=5),
    email_on_retry=False)
コード例 #16
0
from operators.load_dimension import LoadDimensionOperator
from helpers import SqlQueries 

def subdag(
    parent_dag_name,
    task_id,
    redshift_conn_id,
    aws_credentials_id,
    table,
    sql_query,
    *args, **kwargs):
  dag = DAG(
      f"{parent_dag_name}.{task_id}",
      **kwargs
  )
"""
    Inserts Data into a dimensional redshift table from staging tables.
"""

load_dimension_table = LoadDimensionOperator(
    task_id=f"load_{table}_dim_table",
    dag=dag,
    table=table,
    redshift_conn_id=redshift_conn_id,
    aws_credentials_id=aws_credentials_id,
    sql_query=sql_query
)

load_dimension_table

コード例 #17
0
    s3_bucket='udacity-dend',
    s3_key='song_data',
    aws_region='us-west-2')

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id='redshift',
    append_data=append_data,
    query=SqlQueries.songplay_table_insert,
)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id='redshift',
    append_data=append_data,
    table='users',
    query=SqlQueries.user_table_insert,
)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    redshift_conn_id='redshift',
    append_data=append_data,
    table='songs',
    query=SqlQueries.song_table_insert,
)

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
コード例 #18
0
    task_id='create_dwh_tables',
    dag=dag,
    create_or_delete='create',
    staging_or_dwh='dwh',
    redshift_conn_id='redshift_conn_id',
)

load_artists_table = LoadFactOperator(task_id='load_artists_fact_table',
                                      redshift_conn_id="redshift_conn_id",
                                      table='artists',
                                      append=True,
                                      dag=dag)

load_concerts_table = LoadDimensionOperator(
    task_id='load_concerts_dimension_table',
    redshift_conn_id="redshift_conn_id",
    table='concerts',
    append=True,
    dag=dag)

load_songs_table = LoadDimensionOperator(task_id='load_songs_dimension_table',
                                         redshift_conn_id="redshift_conn_id",
                                         table='songs',
                                         append=True,
                                         dag=dag)

check_data_quality = DataQualityOperator(
    task_id='check_data_quality',
    dag=dag,
    redshift_conn_id='redshift_conn_id',
    dq_checks=[
        {
コード例 #19
0
    json_format="'auto'",
)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    table='songplays',
    redshift_conn_id="redshift",
    truncate_table=False,
    select_sql=SqlQueries.songplay_table_insert,
)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_users_dim_table',
    dag=dag,
    table='users',
    redshift_conn_id="redshift",
    truncate_table=True,
    select_sql=SqlQueries.user_table_insert,
)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_songs_dim_table',
    dag=dag,
    table='songs',
    redshift_conn_id="redshift",
    truncate_table=True,
    select_sql=SqlQueries.song_table_insert,
)

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artists_dim_table',
コード例 #20
0
    aws_credentials_id="aws_credentials",
    s3_bucket="udacity-dend",
    s3_key="song_data/",
    format_json=Variable.get('json_song_format',
                             default_var=default_json_song_format))

load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table',
                                        redshift_conn_id="redshift",
                                        table='songplays',
                                        dag=dag)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    redshift_conn_id="redshift",
    table='users',
    params={
        'append_flag': Variable.get('append_flag',
                                    default_var=default_append_flag)
    },
    dag=dag)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    redshift_conn_id="redshift",
    table='songs',
    params={
        'append_flag': Variable.get('append_flag',
                                    default_var=default_append_flag)
    },
    dag=dag)
コード例 #21
0
    s3_data_path="s3://udacity-dend/song_data",
    json_schema="auto",
)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id="redshift",
    sql_query=SqlQueries.songplay_table_insert,
    filter_expr="WHERE page='NextSong'"
)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    sql_query=SqlQueries.user_table_insert,
    filter_expr="WHERE page='NextSong'"
)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    sql_query=SqlQueries.song_table_insert,
    filter_expr=""
)

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
    dag=dag,
コード例 #22
0
    provide_context=True,
)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn='redshift',
    table='songplays',
    sql=SqlQueries.songplay_table_insert,
    provide_context=False,
)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn='redshift',
    table='users',
    sql=SqlQueries.user_table_insert,
    provide_context=False,
)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    redshift_conn='redshift',
    table='songs',
    sql=SqlQueries.song_table_insert,
    provide_context=False,
)

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
コード例 #23
0
    ignore_header=1,
    delimiter=';',
)

load_immigration_facts_table = LoadFactOperator(
    task_id='Load_immigration_facts_fact_table',
    dag=dag,
    redshift_conn_id="redshift",
    sql_query=insert_queries['immigration_facts'],
    filter_expr="WHERE cicid is not null",
)

load_states_dimension_table = LoadDimensionOperator(
    task_id='Load_states_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    sql_query=insert_queries['states'],
    filter_expr="",
    mode='append')

load_cities_dimension_table = LoadDimensionOperator(
    task_id='Load_cities_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    sql_query=insert_queries['cities'],
    filter_expr="",
    mode='append')

load_times_dimension_table = LoadDimensionOperator(
    task_id='Load_times_dim_table',
    dag=dag,
コード例 #24
0
end_staging = DummyOperator(task_id='end_staging', dag=dag)

# Load Fact table
load_fact_table = LoadFactOperator(
    task_id='load_fact_table',
    dag=dag,
    redshift_conn_id="redshift",
    table_name="fact_temperature",
    sql_insert_stmt=SqlQueries.fact_table_insert,
    truncate=False)

# Load Dimension table
load_time_dimension_table = LoadDimensionOperator(
    task_id='load_time_dimension_table',
    dag=dag,
    redshift_conn_id="redshift",
    table_name="time",
    sql_insert_stmt=SqlQueries.time_table_insert,
    truncate=False)

# Load Dimension table
load_airport_dimension_table = LoadDimensionOperator(
    task_id='load_airport_dimension_table',
    dag=dag,
    redshift_conn_id="redshift",
    table_name="airport",
    sql_insert_stmt=SqlQueries.airport_table_insert,
    truncate=False)

# Load Dimension table
load_demographic_dimension_table = LoadDimensionOperator(
コード例 #25
0
    schema='public',
    table='staging_songs',
    redshift_conn_id='redshift',
    aws_conn_id='aws_credentials',
    copy_options=["JSON 'auto ignorecase'"])

load_songplays = LoadFactOperator(
    task_id='load_fact_songplays',
    dag=dag,
    insert_table_sql=insert_tables.songplays,
    redshift_conn_id='redshift')

load_users = LoadDimensionOperator(
    task_id='load_dim_users',
    dag=dag,
    insert_table_sql=insert_tables.users,
    schema='public',
    table='users',
    truncate=False,
    redshift_conn_id='redshift')

load_songs = LoadDimensionOperator(
    task_id='load_dim_songs',
    dag=dag,
    insert_table_sql=insert_tables.songs,
    schema='public',
    table='songs',
    truncate=False,
    redshift_conn_id='redshift')

load_artists = LoadDimensionOperator(
    task_id='load_dim_artists',
    append_data=True,
    aws_credentials={
        'key': AWS_KEY,
        'secret': AWS_SECRET
    },
    region='us-east-1',
    sql_statement=SqlQueries.pleasurevisits_table_insert,
    provide_context=True)

load_flights_dimension_table = LoadDimensionOperator(
    task_id='Load_flights_dim_table',
    dag=dag,
    target_table='flights',
    redshift_conn_id='redshift',
    append_data=False,
    aws_credentials={
        'key': AWS_KEY,
        'secret': AWS_SECRET
    },
    region='us-east-1',
    sql_statement=SqlQueries.flights_table_insert,
    provide_context=True)

load_cities_dimension_table = LoadDimensionOperator(
    task_id='Load_cities_dim_table',
    dag=dag,
    target_table='cities',
    redshift_conn_id='redshift',
    append_data=False,
    aws_credentials={
        'key': AWS_KEY,
コード例 #27
0
    s3_bucket="udacity-dend",
    s3_key="song_data/A/A/A",
)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    table='songplays',
    redshift_conn_id='redshift',
    aws_conn_id='aws_credentials',
    insert_sql_qry=SqlQueries.songplay_table_insert)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    table='users',
    redshift_conn_id='redshift',
    aws_conn_id='aws_credentials',
    insert_sql_qry=SqlQueries.user_table_insert)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    table='songs',
    redshift_conn_id='redshift',
    aws_conn_id='aws_credentials',
    insert_sql_qry=SqlQueries.song_table_insert)

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
    dag=dag,
コード例 #28
0
        append=True,
        dag=dag
)

load_carpark_availability_table = LoadFactOperator(
        task_id='load_carpark_availability_fact_table',
        redshift_conn_id="redshift",
        table='carpark_availability',
        append=True,
        dag=dag
)

load_carpark_table = LoadDimensionOperator(
        task_id='load_carpark_info_dimension_table',
        redshift_conn_id="redshift",
        table='carpark',
        append=False,
        dag=dag        
)

load_weather_station_table = LoadDimensionOperator(
        task_id='load_weather_stations_info_dimension_table',
        redshift_conn_id="redshift",
        table='weather_stations',
        append=False,
        dag=dag        
)

load_time_table = LoadDimensionOperator(
        task_id='load_time_dimension_table',
        redshift_conn_id='redshift',
コード例 #29
0
    table="i94country_code",
    s3_bucket=S3_BUCKET,
    s3_key="raw/i94_immigration_labels_description/country_code.csv")

load_usa_travelers_info = LoadFactOperator(
    task_id="load_usa_travelers_info",
    dag=dag,
    redshift_conn_id=REDSHIFT_CONN_ID,
    schema=SCHEMA_NAME,
    table="city_state_travelers_entry",
    insert_sql=SqlQueries.city_state_travelers_entry_insert)

load_arrival_date = LoadDimensionOperator(
    task_id="load_arrival_date",
    dag=dag,
    redshift_conn_id=REDSHIFT_CONN_ID,
    schema=SCHEMA_NAME,
    table="arrival_date",
    insert_sql=SqlQueries.arrival_date_insert)

# Data Quality
# get the dq_checks_settings for data quality
# file: [airflow_file]/plugins/helpers/dq_check_settings.json
airflow_file = pathlib.Path(__file__).parent.parent.absolute()
dq_check_settings = os.path.join(airflow_file, "plugins", "helpers",
                                 "dq_check_settings.json")
with open(dq_check_settings) as json_file:
    dq_checks = json.load(json_file)
    dq_checks = dq_checks['dq_checks']

run_quality_checks = DataQualityOperator(task_id="run_data_quality_checks",
コード例 #30
0
    table="staging_songs",
    s3_bucket="udacity-dend",
    json_path="auto",
    region="us-west-2",
    overwrite=True)

load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table',
                                        dag=dag,
                                        sql=SqlQueries.songplay_table_insert,
                                        redshift_conn_id="redshift",
                                        target_table="public.songplays")

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    sql=SqlQueries.user_table_insert,
    redshift_conn_id="redshift",
    target_table="public.users",
    overwrite=True)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    sql=SqlQueries.song_table_insert,
    redshift_conn_id="redshift",
    target_table="public.songs",
    overwrite=True)

load_artist_dimension_table = LoadDimensionOperator(
    task_id='Load_artist_dim_table',
    dag=dag,