) upload_home_to_s3 = UploadToS3Operator( task_id="Upload_Home_to_S3", dag=dag, s3_id=s3_id, bucket=s3_bucket, key=s3_home_key, directory=output_home_dir ) stage_stock_to_redshift = StageToRedshiftOperator( task_id='Stage_stock_data', dag=dag, redshift_conn_id=redshift_id, aws_credentials_id=aws_id, staging_table=SqlQueries.staging_stocks, create_params=SqlQueries.staging_stock_etf_create, s3_path=s3_stock_dir, is_json=False ) stage_etf_to_redshift = StageToRedshiftOperator( task_id='Stage_etf_data', dag=dag, redshift_conn_id=redshift_id, aws_credentials_id=aws_id, staging_table=SqlQueries.staging_etfs, create_params=SqlQueries.staging_stock_etf_create, s3_path=s3_etf_dir, is_json=False )
dag = DAG('udac_example_dag', default_args=default_args, description='Load and transform data in Redshift with Airflow', schedule_interval='@hourly', catchup=True, max_active_runs=5 ) start_operator = DummyOperator(task_id='Begin_execution', dag=dag) stage_events_to_redshift = StageToRedshiftOperator( task_id='Stage_events', dag=dag, redshift_conn_id='redshift', aws_credentials_id='aws_credentials', s3_bucket=S3_BUCKET, table="staging_events", s3_key='log_data/{{ ds }}-events.csv', f_type='csv', ignore_headers=1, provide_context=True, ) stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, redshift_conn_id='redshift', aws_credentials_id='aws_credentials', table='staging_songs', s3_bucket=S3_BUCKET, s3_key='song_data/', f_type='json',
create_tables_task = PostgresOperator( task_id='create_tables', dag=dag, sql='create_tables.sql', postgres_conn_id="redshift" ) # Four different operator will stage the data, tranform the data and run check on data quality # Task to stage event data from S3 to Redshift stage_events_to_redshift = StageToRedshiftOperator( task_id='Stage_events', dag=dag, provide_context=True, aws_conn_id='aws_credentials', redshift_conn_id='redshift', table='staging_events', s3_bucket= 'udacity-dend', s3_key='log_data', log_json_path='s3://udacity-dend/log_json_path.json' ) # Task to stage song data from S3 to Redshift stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, provide_context=True, aws_conn_id='aws_credentials', redshift_conn_id='redshift', table='staging_songs', s3_bucket='udacity-dend',
'retry_delay': timedelta(minutes=5), 'catchup': False } dag = DAG('udac_example_dag', default_args=default_args, description='Load and transform data in Redshift with Airflow', schedule_interval='0 * * * *') start_operator = DummyOperator(task_id='Begin_execution', dag=dag) stage_events_to_redshift = StageToRedshiftOperator( task_id='Stage_events', dag=dag, redshift_conn_id='redshift', aws_credentials_id='aws_credentials', table='staging_events', s3_bucket='udacity-dend', s3_key='log_data', data_format='json') stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, redshift_conn_id='redshift', aws_credentials_id='aws_credentials', table='staging_songs', s3_bucket='udacity-dend', s3_key='song_data', data_format='json')
dag = DAG('data-pipeline', default_args=default_args, description='Load and trasnform data from s3 to redshift', schedule_interval='0 * * * *', max_active_runs=3 ) start_operator = DummyOperator(task_id='Begin_execution', dag=dag) stage_events_to_redshift = StageToRedshiftOperator( task_id='Stage_events', dag=dag, redshift_conn_id="redshift_conn_id", aws_credentials_id="aws_credentials", table="public.staging_events", s3_bucket="udacity-dend", s3_key="log_data", json_path="s3://udacity-dend/log_json_path.json", ) stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, redshift_conn_id="redshift_conn_id", aws_credentials_id="aws_credentials", table="public.staging_songs", s3_bucket = "udacity-dend", s3_key = "song_data/A", json_path="auto",
default_args=default_args, description='Load and transform data in Redshift with Airflow', schedule_interval='0 * * * *', catchup=False) start_operator = DummyOperator(task_id='Begin_execution', dag=dag) create_tables_task = PostgresOperator(task_id="create_tables", dag=dag, sql='create_tables.sql', postgres_conn_id="redshift") stage_events_to_redshift = StageToRedshiftOperator( task_id='Stage_events', dag=dag, origin=log_data, destination='public.staging_events', redshift_conn_id='redshift', aws_credentials_id='aws_credentials', json_format="s3://udacity-dend/log_json_path.json") stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, origin=song_data, destination='public.staging_songs', redshift_conn_id='redshift', aws_credentials_id='aws_credentials') load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table', dag=dag, query=SqlQueries.songplay_table_insert,
start_operator = DummyOperator( task_id ='Begin_execution', dag=dag ) ''' create_tables = PostgresOperator( task_id="create_tables", dag=dag, postgres_conn_id="redshift", sql='create_tables.sql' ) ''' #operators to stage the event tables. stage_events_to_redshift = StageToRedshiftOperator( task_id='Stage_events', dag=dag, table="public.staging_events", redshift_conn_id="redshift", aws_credentials_id="aws_credentials", s3_bucket="s3://udacity-dend/log_data" ) #operators to stage the songs table. stage_songs_to_redshift = StageToRedshiftOperatorOne( task_id='Stage_songs', dag=dag, table="public.staging_songs", redshift_conn_id="redshift", aws_credentials_id="aws_credentials", s3_bucket="s3://udacity-dend/song_data/A/A/" )
dag = DAG('etl_redshift_dag', default_args=default_args, description='Load and transform data in Redshift with Airflow', schedule_interval='0 0 * * *', catchup=False) start_operator = DummyOperator(task_id='Begin_execution', dag=dag) create_tables = CreateTablesOperator(task_id='Create_tables', redshift_conn_id="redshift", dag=dag) stage_demographics_to_redshift = StageToRedshiftOperator( task_id='Stage_demographics', redshift_conn_id='redshift', destination_table='staging_demographics', s3_data='s3://jehofman-udacity-dend-capstone-project/demographics', aws_credentials_id="aws_credentials", s3_jsonpath='auto', dag=dag) stage_immigration_to_redshift = StageToRedshiftOperator( task_id='Stage_immigration', redshift_conn_id='redshift', destination_table='staging_immigration', s3_data='s3://jehofman-udacity-dend-capstone-project/immigration', aws_credentials_id="aws_credentials", s3_jsonpath='auto', dag=dag) load_immigrant_table = LoadDimensionOperator( task_id='Load_immigrant_table',
max_active_runs=1) start_operator = DummyOperator(task_id='Begin_execution', dag=dag) get_movie_details = GetMovieDetails( task_id="prepare_data", dag=dag, s3_bucket=s3_bucket, s3_key=input_data, ) stage_movies_to_redshift = StageToRedshiftOperator( task_id='Stage_movies', table_name="staging_movies", s3_bucket=s3_bucket, s3_key="movies", redshift_conn_id="redshift", aws_credential_id="aws_credentials", dag=dag, provide_context=True) stage_director_to_redshift = StageToRedshiftOperator( task_id='Stage_director', table_name="staging_director", s3_bucket=s3_bucket, s3_key="director", redshift_conn_id="redshift", aws_credential_id="aws_credentials", dag=dag, provide_context=True)
dag = DAG('sparkify_dag', default_args=default_args, description='Load and transform data in Redshift with Airflow', schedule_interval='0 0 * * *', max_active_runs=1 ) start_operator = DummyOperator(task_id='Begin_execution', dag=dag) stage_events_to_redshift = StageToRedshiftOperator( task_id='Stage_events', provide_context=False, dag=dag, table = "staging_events", s3_path = "s3://udacity-dend/log_data", redshift_credentials="redshift", aws_credentials="aws_credentials", region="us-west-2", data_format="JSON" ) stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', provide_context=False, dag=dag, table = "staging_songs", s3_path = "s3://udacity-dend/song_data", redshift_credentials="redshift", aws_credentials="aws_credentials", region="us-west-2",
'email_on_retry': False, } dag = DAG('udac_example_dag', default_args=default_args, description='Load and transform data in Redshift with Airflow', schedule_interval='0 * * * *' ) start_operator = DummyOperator(task_id='Begin_execution', dag=dag) stage_events_to_redshift = StageToRedshiftOperator( task_id='Stage_events', dag=dag, redshift_conn_id="redshift", aws_credentials_id="aws_credentials", s3_bucket="udacity-dend", s3_key="log_data/{execution_date.year}/{execution_date.month}/", table= "staging_events", jsonpaths= "s3://udacity-dend/log_json_path.json", provide_context=True, ) stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, redshift_conn_id="redshift", aws_credentials_id="aws_credentials", s3_bucket="udacity-dend", s3_key="song_data", table= "staging_songs", ignore_headers="0",
# Create tables using an operator create_redshift_tables = CreateTablesOperator( task_id='Create_tables', dag=dag, redshift_conn_id="redshift" ) # Stage events(log) data to Redshift stage_events_to_redshift = StageToRedshiftOperator( task_id='stage_events', dag=dag, provide_context=True, table="staging_events", redshift_conn_id="redshift", aws_credentials_id="aws_credentials", s3_bucket="udacity-dend", s3_key="log_data", region="us-west-2", file_format="JSON", execution_date=start_date ) # Stage songs data to Redshift stage_songs_to_redshift = StageToRedshiftOperator( task_id='stage_songs', dag=dag, provide_context=True, table="staging_songs", redshift_conn_id="redshift", aws_credentials_id="aws_credentials",
dag = DAG( 'etl_dag', default_args=default_args, description='Load and transform data in Redshift with Airflow', schedule_interval='0 * * * *', ) start_operator = DummyOperator(task_id='Begin_execution', dag=dag) stage_events_to_redshift = StageToRedshiftOperator( task_id='Stage_events', dag=dag, redshift_conn_id="redshift", aws_credentials_id="aws_credentials", table="staging_events", s3_bucket=config['S3']['BUCKET_NAME'], s3_key=config['S3']['LOG_DATA'], json_format=config['S3']['LOG_PATH'], provide_context=True, ) stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, redshift_conn_id="redshift", aws_credentials_id="aws_credentials", table="staging_songs", s3_bucket=config['S3']['BUCKET_NAME'], s3_key=config['S3']['SONG_DATA'], json_format="'auto'",
dag = DAG('data_pipelines_dag', default_args=default_args, description='Load and transform data in Redshift with Airflow', schedule_interval='0 * * * *', catchup=False ) start_operator = DummyOperator(task_id='Begin_execution', dag=dag) stage_events_to_redshift = StageToRedshiftOperator( task_id='Stage_events', #python_callable=list_keys, dag=dag, redshift_conn_id='redshift', s3_conn_id='aws_credentials', table='staging_events', region='us-west-2', s3_path='log_data', s3_bucket='udacity-dend', ) stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, redshift_conn_id='redshift', s3_conn_id='aws_credentials', table='staging_songs', region='us-west-2', s3_path='song_data',
dag = DAG( dag_id=DAG_ID, default_args=default_args, description='Load and transform data in Redshift with Airflow', schedule_interval='@hourly', ) start_operator = DummyOperator(task_id='Begin_execution', dag=dag) stage_events_to_redshift = StageToRedshiftOperator( task_id='Stage_events', dag=dag, redshift_conn_id=REDSHIFT_CONN_ID, aws_credentials_id=AWS_CREDENTIALS_ID, table='staging_events', s3_bucket=S3_BUCKET, s3_key=S3_LOG_KEY, region=REGION, truncate=False, data_format=f"JSON '{LOG_JSON_PATH}'", ) stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, redshift_conn_id=REDSHIFT_CONN_ID, aws_credentials_id=AWS_CREDENTIALS_ID, table='staging_songs', s3_bucket=S3_BUCKET, s3_key=S3_SONG_KEY, region=REGION,
) start_operator = DummyOperator(task_id='Begin_execution', dag=dag) # Since we have provided context = True in the default args, we have access to # {execution_date.year} and {execution_date.month} and {ds} # On S3, the data is partitioned in this format: # s3://udacity-dend/log_data/2018/11/2018-11-01-events.json # s3://udacity-dend/log_data/2018/11/2018-11-30-events.json # Finally, we also pass in the json_format copy_events_from_s3_to_redshift = StageToRedshiftOperator( task_id="copy_events_from_s3_to_redshift", dag=dag, redshift_conn_id = "redshift", aws_credentials_id = "aws_credentials", table = "staging_events", s3_bucket = "udacity-dend", s3_key = "log_data/{execution_date.year}/{execution_date.month}/{ds}-events.json", arn_iam_role = "arn:aws:iam::506140549518:role/dwhRole", region = "us-west-2", json_format = "s3://udacity-dend/log_json_path.json" ) copy_songs_from_s3_to_redshift = StageToRedshiftOperator( task_id="copy_songs_from_s3_to_redshift", dag=dag, redshift_conn_id = "redshift", aws_credentials_id = "aws_credentials", table = "staging_songs", s3_bucket = "udacity-dend", s3_key = "song_data", arn_iam_role = "arn:aws:iam::506140549518:role/dwhRole",
sql=sql_tables.CREATE_TABLE_STAGING_ACCIDENTS, dag=dag ) create_table_stage_cities_task = PostgresOperator( task_id='create_stage_cities_table', postgres_conn_id="redshift", sql=sql_tables.CREATE_TABLE_STAGING_CITIES, dag=dag ) stage_accidents_to_redshift_task = StageToRedshiftOperator( task_id="Stage_accidents", conn_id="redshift", aws_credentials="aws_credentials", table="staging_accidents", s3_bucket='davidhidalgo-udacity', s3_key="accidents.csv", dag=dag ) stage_cities_to_redshift_task = StageToRedshiftOperator( task_id='Stage_cities', conn_id="redshift", aws_credentials="aws_credentials", table="staging_cities", s3_bucket="davidhidalgo-udacity", s3_key="cities.csv", dag=dag )
'email_on_failure': False, 'email_on_retry': False, } dag = DAG('udac_example_dag', default_args=default_args, description='Load and transform data in Redshift with Airflow', schedule_interval='0 * * * *') start_operator = DummyOperator(task_id='Begin_execution', dag=dag) stage_events_to_redshift = StageToRedshiftOperator( task_id='Stage_events', dag=dag, redshift_conn_id='redshift', aws_credentials_id='aws_credentials', table="Stage_events", s3_bucket=" ", s3_key="log_data", data_format='JSON', schema='auto') stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, redshift_conn_id='redshift', aws_credentials_id='aws_credentials', table="Stage_songs", s3_bucket=" ", s3_key="song_data", data_format='JSON', schema='auto')
max_active_runs=1) # DAG tasks start_operator = DummyOperator(task_id='Begin_execution', dag=dag) # `s3_key` supports template string and could be filled like this: # # s3_key='log_data/{{ execution_date.strftime("%Y-%m-%d") }}-events.json' # # to load a single file from logs for a particular day. # But for the sake of the demo in case we have limited amount of test data # we will load all entire dataset. stage_events_to_redshift = StageToRedshiftOperator(task_id='Stage_events', dag=dag, redshift_conn_id='redshift', aws_credentials_id='aws', aws_region='us-west-2', table='staging_events', s3_bucket='udacity-dend', s3_key='log_data/*') stage_songs_to_redshift = StageToRedshiftOperator(task_id='Stage_songs', dag=dag, redshift_conn_id='redshift', aws_credentials_id='aws', aws_region='us-west-2', table='staging_songs', s3_bucket='udacity-dend', s3_key='song_data/*/*/*/*') load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table', dag=dag,
dag = DAG( 'github_repo_popularity_etl_v1', default_args=default_args, description='Full ETL pipeline combining GitHub and Hacker News data', schedule_interval=timedelta(days=1), catchup=False, max_active_runs=1, ) start_operator = DummyOperator(task_id='Begin_execution', dag=dag) stage_gh_repos_to_redshift = StageToRedshiftOperator( task_id='Stage_gh_repos', dag=dag, table='staging_github_repos', create_table_sql=SqlQueries.create_staging_github_repos, s3_key='github-repositories.csv', ) stage_hn_posts_to_redshift = StageToRedshiftOperator( task_id='Stage_hn_posts', dag=dag, table='staging_hacker_news_posts', create_table_sql=SqlQueries.create_staging_hacker_news_posts, s3_key='hn.csv', ) load_github_repos_table = LoadTableOperator( task_id='Load_github_repos_table', dag=dag,
dag = DAG( 'udac_example_dag', default_args=default_args, description='Load and transform data in Redshift with Airflow', schedule_interval='0 * * * *', catchup=False #catchup is turned off ) start_operator = DummyOperator(task_id='Begin_execution', dag=dag) stage_events_to_redshift = StageToRedshiftOperator( task_id='Stage_events', dag=dag, table="[public].staging_events", redshift_conn_id="redshift", aws_credentials_id="aws_credentials", s3_bucket="udacity_dend", s3_key="log_data", file_type="JSON 's3://udacity-dend/log_json_path.json'") stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, table="[public].staging_songs", redshift_conn_id="redshift", aws_credentials_id="aws_credentials", s3_bucket="udacity_dend", s3_key="song_data", file_type="JSON 'auto'")
'start_date': datetime(2019, 1, 12), } dag = DAG('udac_example_dag', default_args=default_args, catchup=False, description='Load and transform data in Redshift with Airflow', schedule_interval='0 * * * *') start_operator = DummyOperator(task_id='Begin_execution', dag=dag) stage_events_to_redshift = StageToRedshiftOperator( task_id='Stage_events', dag=dag, redshift_conn_id="redshift_ag", aws_credentials_id="aws_credentials_ag", redshift_sink_table="staging_events", s3_origin_bucket="udacity-dend", s3_key="log_data", data_format="JSON") stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, redshift_conn_id="redshift_ag", aws_credentials_id="aws_credentials_ag", redshift_sink_table="staging_songs", s3_origin_bucket="udacity-dend", s3_key="song_data", data_format="JSON")
} dag = DAG( 'scoreindicators_dag', default_args=default_args, description='Load and transform data in Redshift with Airflow', #schedule_interval='0 * * * *' ) start_operator = DummyOperator(task_id='Begin_execution', dag=dag) stage_events_to_redshift = StageToRedshiftOperator( task_id='Stage_events', dag=dag, conn_id="redshift", aws_credentials_id="aws_credentials", table="staging_events", s3_bucket="lcf-udacity-de-bucket", s3_key="data/", delimiter=",") stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, conn_id="redshift", aws_credentials_id="aws_credentials", table="staging_songs", s3_bucket="lcf-udacity-de-bucket", s3_key="data/happiness/", delimiter=",")
description='Load and transform data in Redshift with Airflow', schedule_interval="@hourly", max_active_runs=1) start_operator = DummyOperator(task_id='start_execution', dag=dag) table_creation = PostgresOperator(task_id='tables_creation', dag=dag, postgres_conn_id='redshift', sql='/create_tables.sql') stage_events_to_redshift = StageToRedshiftOperator( task_id='stage_events', dag=dag, table="staging_events", redshift_conn_id='redshift', aws_credentials_id="aws_credentials", s3_bucket='udacity-dend', s3_key="log-data/{execution_date.year}/{execution_date.month:02d}", file_format='JSON \'s3://udacity-dend/log_json_path.json\'') stage_songs_to_redshift = StageToRedshiftOperator( task_id='stage_songs', dag=dag, table="staging_songs", redshift_conn_id='redshift', aws_credentials_id="aws_credentials", s3_bucket='udacity-dend', s3_key='song_data/A/A', file_format='JSON \'auto\'')
'start_date': datetime(2019, 1, 12) } dag = DAG('airflow_dag', default_args=default_args, description='Load and transform data in Redshift with Airflow', schedule_interval='0 * * * *' #start_date=datetime.utcnow() ) start_operator = DummyOperator(task_id='Begin_execution', dag=dag) stage_events_to_redshift = StageToRedshiftOperator( task_id='Stage_events', dag=dag, redshift_conn_id='redshift', aws_credentials_id='aws_credentials', table='public.staging_events', source_path='s3://udacity-dend/log_data', JSON_path='s3://udacity-dend/log_json_path.json') stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, redshift_conn_id='redshift', aws_credentials_id='aws_credentials', table='public.staging_songs', source_path='s3://udacity-dend/song_data', JSON_path='auto') load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table',
'email_on_retry': False, 'schedule_interval': '@hourly' } dag = DAG('udac_example_dag', default_args=default_args, description='Load and transform data in Redshift with Airflow', schedule_interval='0 * * * *') start_operator = DummyOperator(task_id='Begin_execution', dag=dag) stage_events_to_redshift = StageToRedshiftOperator( task_id='Stage_events', dag=dag, redshift_conn_id="redshift", aws_credentials_id="aws_credentials", table="staging_events", s3_bucket='udacity-dend', s3_key="log_data/", extra_params="format as json 's3://udacity-dend/log_json_path.json'") stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, redshift_conn_id="redshift", aws_credentials_id="aws_credentials", table="staging_songs", s3_bucket='udacity-dend', s3_key="song_data", extra_params="json 'auto' compupdate off region 'us-west-2'")
'email_on_retry': False } dag = DAG('airflow_dag', default_args=default_args, description='Load and transform data in Redshift with Airflow', schedule_interval='0 * * * *', max_active_runs=3) start_operator = DummyOperator(task_id='Begin_execution', dag=dag) stage_events_to_redshift = StageToRedshiftOperator( task_id='Stage_events', dag=dag, provide_context=True, aws_credentials_id="aws_credentials", redshift_conn_id='redshift', s3_bucket="udacity-dend-airflow-test", s3_key="log_data", table="staging_events", create_stmt=sql_queries.create_table_staging_events) stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, provide_context=True, aws_credentials_id="aws_credentials", redshift_conn_id='redshift', s3_bucket="udacity-dend-airflow-test", s3_key="song_data", table="staging_songs", create_stmt=sql_queries.create_table_staging_songs)
"email_on_retry": False } dag = DAG("udac_example_dag", default_args=default_args, description="Load and transform data in Redshift with Airflow", schedule_interval="0 * * * *", catchup=False) start_operator = DummyOperator(task_id="Begin_execution", dag=dag) stage_events_to_redshift = StageToRedshiftOperator( task_id="Stage_events", dag=dag, table="staging_events", redshift_conn_id="redshift", aws_credentials_id="aws_credentials", s3_bucket="udacity-dend", s3_key="log_data", file_type="JSON") stage_songs_to_redshift = StageToRedshiftOperator( task_id="Stage_songs", dag=dag, table="staging_songs", redshift_conn_id="redshift", aws_credentials_id="aws_credentials", s3_bucket="udacity-dend", s3_key="song_data/A/A/A", file_type="JSON")
'catchup': False } dag = DAG('dag', default_args=default_args, description='Load and transform data in Redshift with Airflow', schedule_interval='0 * * * *') start_operator = DummyOperator(task_id='Begin_execution', dag=dag) stage_events_to_redshift = StageToRedshiftOperator( task_id='Stage_events', dag=dag, table='public.staging_events', redshift_conn_id='redshift', redshift_iam_role=v.get('redshift_iam_role'), s3_bucket='udacity-dend', s3_prefix='log_data', json_location='s3://udacity-dend/log_json_path.json', region='us-west-2', provide_context=True) stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, table='public.staging_songs', redshift_conn_id='redshift', redshift_iam_role=v.get('redshift_iam_role'), s3_bucket='udacity-dend', s3_prefix='song_data', region='us-west-2',
} dag = DAG( 'sparkify', default_args=default_args, description='Load and transform Sparkify data in Redshift with Airflow', start_date=datetime.utcnow(), schedule_interval='@daily', catchup=False) start_operator = DummyOperator(task_id='Begin_execution', dag=dag) stage_events_to_redshift = StageToRedshiftOperator( task_id='Stage_events', dag=dag, table='staging_events', aws_credentials_id='aws_credentials', redshift_conn_id='redshift', s3_data_location='s3://udacity-dend/log_data', json_path_location='s3://udacity-dend/log_json_path.json') stage_songs_to_redshift = StageToRedshiftOperator( task_id='Stage_songs', dag=dag, table='staging_songs', aws_credentials_id='aws_credentials', redshift_conn_id='redshift', s3_data_location='s3://udacity-dend/song_data') load_songplays_table = LoadFactOperator( task_id='Load_songplays_fact_table', dag=dag,