stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    s3_key="song_data",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    table="staging_songs",
    s3_bucket="udacity-dend",
    json_path="auto",
    region="us-west-2",
    overwrite=True)

load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table',
                                        dag=dag,
                                        sql=SqlQueries.songplay_table_insert,
                                        redshift_conn_id="redshift",
                                        target_table="public.songplays")

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    sql=SqlQueries.user_table_insert,
    redshift_conn_id="redshift",
    target_table="public.users",
    overwrite=True)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    sql=SqlQueries.song_table_insert,
stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    redshift_conn_id='redshift',
    aws_credentials_id='aws_credentials',
    table='staging_songs',
    s3_bucket="udacity-dend",
    s3_key="song_data",
    region='us-west-2',
    json_path='auto')

#4. Use staging tables to populate fact table
load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id="redshift",
    table="songplays",
    sql_query=SqlQueries.songplay_table_insert,
    delete_first=True)

#5. Use staging tables to populate user table
load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    table="users",
    sql_query=SqlQueries.user_table_insert,
    delete_first=True)

#6. Use staging tables to populate song_table table
load_song_dimension_table = LoadDimensionOperator(
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="udacity-dend",
    s3_key="song_data/A/A/A",
    json="auto"
)

"""
connecting to redshift
running the LoadFactOperator operator with sql_queries.py
"""
load_songplays_table = LoadFactOperator(

    task_id='Load_songplays_fact_table',
    redshift_conn_id = 'redshift',
    table="songplays",
    sql_query = SqlQueries.songplay_table_insert,
    dag=dag,
    append_only=False
)

"""
connecting to redshift
running the LoadDimensionOperator operator with sql_queries.py
"""
load_user_dimension_table = LoadDimensionOperator(

    task_id='Load_user_dim_table',
    redshift_conn_id = 'redshift',
    table="users",
    sql_query = SqlQueries.user_table_insert,
                                                  redshift_conn_id='redshift',
                                                  s3_bucket='udacity-dend',
                                                  s3_key='song_data/',
                                                  aws_credentials={
                                                      'key': AWS_KEY,
                                                      'secret': AWS_SECRET
                                                  },
                                                  region='us-west-2')

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    source_table='songplays',
    target_table='songplays',
    redshift_conn_id='redshift',
    append_data=True,
    aws_credentials={
        'key': AWS_KEY,
        'secret': AWS_SECRET
    },
    region='us-west-2',
    sql_statement=SqlQueries.songplays_table_insert,
    provide_context=True)

load_users_dimension_table = LoadDimensionOperator(
    task_id='Load_users_dim_table',
    dag=dag,
    target_table='users',
    redshift_conn_id='redshift',
    append_data=False,
    aws_credentials={
        'key': AWS_KEY,
Exemple #5
0
    json_format='s3://udacity-dend/log_json_path.json')

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    table='staging_songs',
    redshift_conn_id='redshift',
    aws_credentials_id='aws_credentials',
    s3_bucket='udacity-dend',
    s3_key=
    'song_data',  # load a small portion of song data with 'song_data/A/A/A'
    json_path='auto')

load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table',
                                        dag=dag,
                                        conn_id='redshift',
                                        sql=SqlQueries.songplay_table_insert,
                                        target_table='songplays')

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    conn_id='redshift',
    sql=SqlQueries.user_table_insert,
    target_table='users',
    delete_first=True)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    conn_id='redshift',
stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    table="staging_songs",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="udacity-dend",
    s3_key="song_data",
    json_format="'auto'",
)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    table='songplays',
    redshift_conn_id="redshift",
    truncate_table=False,
    select_sql=SqlQueries.songplay_table_insert,
)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_users_dim_table',
    dag=dag,
    table='users',
    redshift_conn_id="redshift",
    truncate_table=True,
    select_sql=SqlQueries.user_table_insert,
)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_songs_dim_table',
    aws_credentials_id='aws_credentials',
    s3_bucket='dend-bucket-oregon-123',
    s3_key='capstone_immigration/trans_mode')

stage_visa_code_to_redshift = StageToRedshiftOperator(
    task_id='Stage_visa_code',
    dag=dag,
    table='staging_visa_code',
    redshift_conn_id='redshift',
    aws_credentials_id='aws_credentials',
    s3_bucket='dend-bucket-oregon-123',
    s3_key='capstone_immigration/visa_code')

load_arrivals_table = LoadFactOperator(task_id='Load_arrivals_fact_table',
                                       dag=dag,
                                       conn_id='redshift',
                                       sql=SqlQueries.arrivals_table_insert,
                                       target_table='arrivals')

load_admissions_table = LoadFactOperator(
    task_id='Load_admissions_dim_table',
    dag=dag,
    conn_id='redshift',
    sql=SqlQueries.admissions_table_insert,
    target_table='admissions')

load_time_table = LoadFactOperator(task_id='Load_time_dim_table',
                                   dag=dag,
                                   conn_id='redshift',
                                   sql=SqlQueries.time_table_insert,
                                   target_table='time')
def create_fact_tables(parent_dag_name, child_dag_name, start_date, end_date,
                       schedule_interval, redshift_conn_id, degree_list,
                       origin_table_format, destination_table_format, sql,
                       upstream_subdag_id, *args, **kwargs):
    """
    Check if upstream staging dependencies were successful, loads data into fact table, and lastly perform a data quality check.

    Keyword Arguments:
    parent_dag_name -- Parent DAG name defined in `main_dag.py` dag object
    child_dag_name -- Child DAG name used to define subdag ID
    start_date -- DAG start date
    end_date -- DAG end date
    schedule_interval -- (e.g. '@monthly', '@weekly', etc.)
    redshift_conn_id   -- Redshift connection ID (str)
    degree_list -- List of degree names (list)
    origin_table_format -- Dictionary of table labels and staging table names used for fact table sql mapping (str)
    destination_table_format -- Fact table name to be formatted with degree name (str)
    sql -- Fact table query (str)
    """

    dag = DAG(dag_id=f"{parent_dag_name}.{child_dag_name}",
              start_date=start_date,
              end_date=end_date,
              schedule_interval=schedule_interval,
              **kwargs)

    #help
    # upstream_subdag_id = kwargs['task_instance'].upstream_task_ids

    for degree in degree_list:
        destination_table = destination_table_format.format(degree=degree)
        origin_tables = {
            table: name.format(degree=degree)
            for (table, name) in origin_table_format.items()
        }

        start_task = DummyOperator(task_id=f'{degree}', dag=dag)

        upstream_check_task = PythonOperator(
            task_id=f'check_{destination_table}_dependencies',
            python_callable=upstream_staging_check,
            op_kwargs={
                'origin_tables': origin_tables,
                'upstream_subdag_id': upstream_subdag_id
            },
            provide_context=True)

        create_task = LoadFactOperator(task_id=destination_table,
                                       dag=dag,
                                       sql=sql,
                                       redshift_conn_id=redshift_conn_id,
                                       destination_table=destination_table,
                                       origin_tables=origin_tables,
                                       provide_context=True)

        check_task = DataQualityOperator(task_id='data_quality_check',
                                         dag=dag,
                                         redshift_conn_id=redshift_conn_id,
                                         table=destination_table,
                                         provide_context=True)

        start_task >> upstream_check_task
        upstream_check_task >> create_task
        create_task >> check_task

    return dag
Exemple #9
0
    json="s3://udacity-dend/log_json_path.json",
)

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="udacity-dend",
    s3_key="song_data/A/A/A",
    table="staging_songs",
    json="auto")

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id="redshift",
    destination_table="songplays",
    facts_sql=SqlQueries.songplay_table_insert)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    destination_table="users",
    dim_sql=SqlQueries.user_table_insert,
    append_mode=False)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
Exemple #10
0
stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    target_table="staging_songs",
    sql_table_create=SqlQueries.staging_songs_table_create,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="udacity-dend",
    s3_key="song_data",
    json_file="auto",
    region="us-west-2")

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    target_table="songplays",
    sql_table_create=SqlQueries.songplay_table_create,
    sql_table_insert=SqlQueries.songplay_table_insert,
    redshift_conn_id="redshift",
    mode="")

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    target_table="users",
    sql_table_create=SqlQueries.user_table_create,
    sql_table_insert=SqlQueries.user_table_insert,
    redshift_conn_id="redshift")

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
Exemple #11
0
                                 schema='public',
                                 table='gov',
                                 s3_bucket='udacity-capstone-cg',
                                 s3_key='staging',
                                 copy_options=[
                                     'CSV', 'IGNOREHEADER 1', 'FILLRECORD',
                                     'COMPUPDATE OFF', 'STATUPDATE OFF',
                                     'TRUNCATECOLUMNS'
                                 ],
                                 dag=dag)

get_ready_to_load = DummyOperator(task_id='Get_ready_to_load', dag=dag)

load_tweets_table = LoadFactOperator(task_id='Load_tweets_fact_table',
                                     dag=dag,
                                     aws_credentials_id="aws_credentials",
                                     table="tweets",
                                     sql_query=SqlQueries.tweets_table_insert)
load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    table='"user"',
    sql_query=SqlQueries.user_table_insert)

load_geo_dimension_table = LoadDimensionOperator(
    task_id='Load_geo_dim_table',
    dag=dag,
    table="geo",
    sql_query=SqlQueries.geo_table_insert)

run_basic_checks = DataQualityOperator(
Exemple #12
0
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="udacity-dend",
    s3_key="log_data/2018/11/2018-11-01-events.json")

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    table="staging_songs",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="udacity-dend",
    s3_key="song_data/A/B/C/TRABCEI128F424C983.json")

load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table',
                                        dag=dag,
                                        redshift_conn_id="redshift",
                                        destination_table="songplays")

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    sql_statement=sql_queries.user_table_insert,
    table_name='users')

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    sql_statement=sql_queries.song_table_insert,
    table_name='songs')
Exemple #13
0
    aws_credentials_id="aws_credentials",
    json="s3://udacity-dend/log_json_path.json")

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    table='public.songs',
    redshift_conn_id='redshift',
    aws_credentials_id='aws_credentials',
    json='s3://udacity-dend/song_data',
    s3_bucket='udacity-dend',
    s3_key='song_data')

load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table',
                                        dag=dag,
                                        aws_credentials='aws_credentials',
                                        table='public.songplays',
                                        truncate_table=True,
                                        query=SqlQueries.songplay_table_insert)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    aws_credentials='aws_credentials',
    table='public.users',
    truncate_table=True,
    query=SqlQueries.user_table_insert)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    aws_credentials='aws_credentials',
    json="s3://udacity-dend/log_json_path.json")

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    table="staging_songs",
    s3_bucket="udacity-dend",
    s3_key="song_data/",
    aws_region="us-west-2",
    json="auto")

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id="redshift",
    sql_insert=SqlQueries.songplay_table_insert,
    destination_table="public.songplays")

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    sql_insert=SqlQueries.user_table_insert,
    destination_table="public.users",
    delete=True)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
Exemple #15
0
    jsonpath='log_json_path.json')

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    table='staging_songs',
    redshift_conn_id='redshift',
    aws_credentials_id='aws_credentials',
    s3_bucket='udacity-dend',
    s3_key='song_data',
    aws_region='us-west-2')

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id='redshift',
    append_data=append_data,
    query=SqlQueries.songplay_table_insert,
)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id='redshift',
    append_data=append_data,
    table='users',
    query=SqlQueries.user_table_insert,
)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
Exemple #16
0
stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    redshift_connection_id='redshift',
    table_name='staging_songs',
    aws_credential_id='aws_credentials',
    s3_bucket='udacity-dend',
    s3_key='song-data/A/A',
    json_path="auto",
    dag=dag
)

# Calling LoadFactOperator to load the data into songplays fact table
load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    sql_statement=SqlQueries.songplay_table_insert,
    target_table='songplays',
    redshift_connection_id='redshift',
    dag=dag
)

# Calling LoadDimensionOperator to load the data into users dimension table
load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    target_table='users',
    redshift_connection_id='redshift',
    sql_statement=SqlQueries.user_table_insert,
    truncate=False,
    dag=dag
)

# Calling LoadDimensionOperator to load the data into songs dimension table
)

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    table="staging_songs",
    redshift_conn_id="redshift",
    s3_bucket="udacity-dend",
    s3_key="song_data",
    aws_credentials_id='aws_credentials'
)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id='redshift',
    table='songplays',
    sql_query=SqlQueries.songplay_table_insert
)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id='redshift',
    table='users',
    sql_query=SqlQueries.user_table_insert
)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    create_or_delete='delete',
    staging_or_dwh='dwh',
    redshift_conn_id='redshift_conn_id',
)

create_dwh_tables = CreateOrDeleteOperator(
    task_id='create_dwh_tables',
    dag=dag,
    create_or_delete='create',
    staging_or_dwh='dwh',
    redshift_conn_id='redshift_conn_id',
)

load_artists_table = LoadFactOperator(task_id='load_artists_fact_table',
                                      redshift_conn_id="redshift_conn_id",
                                      table='artists',
                                      append=True,
                                      dag=dag)

load_concerts_table = LoadDimensionOperator(
    task_id='load_concerts_dimension_table',
    redshift_conn_id="redshift_conn_id",
    table='concerts',
    append=True,
    dag=dag)

load_songs_table = LoadDimensionOperator(task_id='load_songs_dimension_table',
                                         redshift_conn_id="redshift_conn_id",
                                         table='songs',
                                         append=True,
                                         dag=dag)
Exemple #19
0
for dimension_item in dimension_items:
    load_dimension_table = LoadDimensionOperator(
        task_id=f"load_{dimension_item['item']}_dimension_table",
        dag=dag,
        redshift_conn_id="redshift",
        table=dimension_item["item"],
        query=dimension_item["query"],
        append=False)
    load_dimension_tables.append(load_dimension_table)

### Build fact table
milestone_2 = DummyOperator(task_id='milestone_2', dag=dag)

Load_sales_fact_table = LoadFactOperator(task_id='Load_sales_fact_table',
                                         dag=dag,
                                         redshift_conn_id="redshift",
                                         table="sales",
                                         query=SqlQueries.sales_table_insert)

### Quality checks
tables = [
    'sales', 'sales', 'sales', 'sales', 'magasin', 'utilisateur', 'cours'
]
columns = [
    'id_ville', 'id_temps', 'id_famille_produit', 'id_magasin', 'id_enseigne',
    'id_profil', 'id_devise'
]

null_quality_checks = CheckNullOperator(task_id='null_quality_checks',
                                        dag=dag,
                                        redshift_conn_id="redshift",
    format_json=Variable.get('json_event_format',
                             default_var=default_json_event_format))

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    table="staging_songs",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="udacity-dend",
    s3_key="song_data/",
    format_json=Variable.get('json_song_format',
                             default_var=default_json_song_format))

load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table',
                                        redshift_conn_id="redshift",
                                        table='songplays',
                                        dag=dag)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    redshift_conn_id="redshift",
    table='users',
    params={
        'append_flag': Variable.get('append_flag',
                                    default_var=default_append_flag)
    },
    dag=dag)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    redshift_conn_id="redshift",
Exemple #21
0
)

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    target_table_name="staging_songs",
    s3_data_path="s3://udacity-dend/song_data",
    json_schema="auto",
)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn_id="redshift",
    sql_query=SqlQueries.songplay_table_insert,
    filter_expr="WHERE page='NextSong'"
)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    sql_query=SqlQueries.user_table_insert,
    filter_expr="WHERE page='NextSong'"
)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
Exemple #22
0
    task_id='Stage_songs',
    dag=dag,
    redshift_conn='redshift',
    aws_credentials='aws_credentials',
    table='staging_songs',
    s3_bucket='udacity-dend',
    s3_key='song_data',
    json_path='auto',
    sql=SqlQueries.staging_table_copy,
    provide_context=True,
)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    redshift_conn='redshift',
    table='songplays',
    sql=SqlQueries.songplay_table_insert,
    provide_context=False,
)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    redshift_conn='redshift',
    table='users',
    sql=SqlQueries.user_table_insert,
    provide_context=False,
)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
stage_demography_to_redshift = StageRedshiftFromS3Operator(
    task_id='Stage_demography',
    dag=dag,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    target_table_name="staging_demographics",
    s3_data_path="s3://dend-veegaaa-capstone/us-cities-demographics.csv",
    ignore_header=1,
    delimiter=';',
)

load_immigration_facts_table = LoadFactOperator(
    task_id='Load_immigration_facts_fact_table',
    dag=dag,
    redshift_conn_id="redshift",
    sql_query=insert_queries['immigration_facts'],
    filter_expr="WHERE cicid is not null",
)

load_states_dimension_table = LoadDimensionOperator(
    task_id='Load_states_dim_table',
    dag=dag,
    redshift_conn_id="redshift",
    sql_query=insert_queries['states'],
    filter_expr="",
    mode='append')

load_cities_dimension_table = LoadDimensionOperator(
    task_id='Load_cities_dim_table',
    dag=dag,
Exemple #24
0
airport_codes_staging = StageJsonToRedshiftOperator(
    task_id='airport_codes_staging',
    dag=dag,
    redshift_conn_id='redshift',
    aws_credentials_id='aws_credentials',
    table='staging_airport_code_data',
    s3_bucket="beppe-udacity-capstone",
    s3_key="capstone/airport_code_")

end_staging = DummyOperator(task_id='end_staging', dag=dag)

# Load Fact table
load_fact_table = LoadFactOperator(
    task_id='load_fact_table',
    dag=dag,
    redshift_conn_id="redshift",
    table_name="fact_temperature",
    sql_insert_stmt=SqlQueries.fact_table_insert,
    truncate=False)

# Load Dimension table
load_time_dimension_table = LoadDimensionOperator(
    task_id='load_time_dimension_table',
    dag=dag,
    redshift_conn_id="redshift",
    table_name="time",
    sql_insert_stmt=SqlQueries.time_table_insert,
    truncate=False)

# Load Dimension table
load_airport_dimension_table = LoadDimensionOperator(
Exemple #25
0
stage_songs = StageToRedshiftOperator(
    task_id='staging_songs',
    dag=dag,
    create_table_sql=create_tables.staging_songs,
    s3_bucket='udacity-dend',
    s3_key='song_data',
    schema='public',
    table='staging_songs',
    redshift_conn_id='redshift',
    aws_conn_id='aws_credentials',
    copy_options=["JSON 'auto ignorecase'"])

load_songplays = LoadFactOperator(
    task_id='load_fact_songplays',
    dag=dag,
    insert_table_sql=insert_tables.songplays,
    redshift_conn_id='redshift')

load_users = LoadDimensionOperator(
    task_id='load_dim_users',
    dag=dag,
    insert_table_sql=insert_tables.users,
    schema='public',
    table='users',
    truncate=False,
    redshift_conn_id='redshift')

load_songs = LoadDimensionOperator(
    task_id='load_dim_songs',
    dag=dag,
Exemple #26
0
)

stage_weather_stations_info_from_s3_to_redshift = LoadS3ToRedshiftOperator(
        task_id='stage_weather_stations_info_from_s3_to_redshift',
        aws_credentials_id='aws_credentials_id',
        redshift_conn_id='redshift',
        table='staging_weather_station_info',
        s3_bucket='udacity-dend-alex-ho',
        s3_key='weather_sg/weather_stations_info_{{ ts_nodash }}.csv',
        dag=dag
)

load_temperature_table = LoadFactOperator(
        task_id='load_temperature_events_fact_table',
        redshift_conn_id="redshift",
        table='temperature_events',
        append=True,
        dag=dag
)

load_rainfall_table = LoadFactOperator(
        task_id='load_rainfall_events_fact_table',
        redshift_conn_id="redshift",
        table='rainfall_events',
        append=True,
        dag=dag
)

load_carpark_availability_table = LoadFactOperator(
        task_id='load_carpark_availability_fact_table',
        redshift_conn_id="redshift",
stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    table="staging_songs",
    json_path="auto",
    file_type='json',
    redshift_conn_id='redshift',
    aws_conn_id="aws_credentials",
    s3_bucket="udacity-dend",
    s3_key="song_data/A/A/A",
)

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,
    table='songplays',
    redshift_conn_id='redshift',
    aws_conn_id='aws_credentials',
    insert_sql_qry=SqlQueries.songplay_table_insert)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    table='users',
    redshift_conn_id='redshift',
    aws_conn_id='aws_credentials',
    insert_sql_qry=SqlQueries.user_table_insert)

load_song_dimension_table = LoadDimensionOperator(
    task_id='Load_song_dim_table',
    dag=dag,
    dag=dag,
    table="staging_songs",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket=Variable.get('s3_bucket'),
    s3_key=Variable.get('s3_key_song_data'),
    log_json_path='auto',
    depends_on_past=False,
    retries=3,
    retry_delay=timedelta(minutes=5),
    email_on_retry=False)

load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table',
                                        dag=dag,
                                        table="songplays",
                                        redshift_conn_id='redshift',
                                        depends_on_past=False,
                                        retries=3,
                                        retry_delay=timedelta(minutes=5),
                                        email_on_retry=False)

load_user_dimension_table = LoadDimensionOperator(
    task_id='Load_user_dim_table',
    dag=dag,
    table="users",
    redshift_conn_id='redshift',
    depends_on_past=False,
    retries=3,
    retry_delay=timedelta(minutes=5),
    email_on_retry=False)

load_song_dimension_table = LoadDimensionOperator(
Exemple #29
0
    s3_bucket=S3_BUCKET,
    s3_key="raw/i94_immigration_labels_description/visa_code.csv")

copy_country_code = StageCSVToRedshiftOperator(
    task_id="copy_country_code_description",
    dag=dag,
    redshift_conn_id=REDSHIFT_CONN_ID,
    schema=SCHEMA_NAME,
    table="i94country_code",
    s3_bucket=S3_BUCKET,
    s3_key="raw/i94_immigration_labels_description/country_code.csv")

load_usa_travelers_info = LoadFactOperator(
    task_id="load_usa_travelers_info",
    dag=dag,
    redshift_conn_id=REDSHIFT_CONN_ID,
    schema=SCHEMA_NAME,
    table="city_state_travelers_entry",
    insert_sql=SqlQueries.city_state_travelers_entry_insert)

load_arrival_date = LoadDimensionOperator(
    task_id="load_arrival_date",
    dag=dag,
    redshift_conn_id=REDSHIFT_CONN_ID,
    schema=SCHEMA_NAME,
    table="arrival_date",
    insert_sql=SqlQueries.arrival_date_insert)

# Data Quality
# get the dq_checks_settings for data quality
# file: [airflow_file]/plugins/helpers/dq_check_settings.json
Exemple #30
0
stage_covid_to_redshift = StageToRedshiftOperator(
    task_id='Stage_covid',
    dag=dag,
    table="staging_covid",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_s3_connection",
    s3_bucket="udacity-data-lake",
    s3_key="covid19/staging",
    region="us-west-2",
    extra_params="delimiter ';'"
)

load_covid_cases_fact_table = LoadFactOperator(
    task_id='Load_covid_cases_fact_table',
    dag=dag,
    table='fact_covid_cases',
    redshift_conn_id="redshift",
    load_sql_stmt=SqlQueries.covid_cases_insert
)

load_dim_location_table = LoadDimensionOperator(
    task_id='Load_dim_location_table',
    dag=dag,
    table='dim_location',
    redshift_conn_id="redshift",
    truncate_table=True,
    load_sql_stmt=SqlQueries.location_table_insert
)

load_dim_date_table = LoadDimensionOperator(
    task_id='Load_dim_date_table',