'retries': 3, 'retry_delay': timedelta(minutes=5), } dag = DAG('udac_example_dag', default_args=default_args, description='Load and transform data in Redshift with Airflow', schedule_interval='0 * * * *', max_active_runs=1 ) start_operator = DummyOperator(task_id='Begin_execution', dag=dag) create_tables = CreateTablesOperator( task_id = 'create_tables', dag = dag, redshift_conn_id = 'redshift' ) stage_events_to_redshift = StageToRedshiftOperator( task_id='Stage_events', dag=dag, aws_credentials_id='aws_credentials', redshift_conn_id='redshift', table_name='staging_events', s3_bucket='udacity-dend', s3_key='log_data', region='us-west-2', json_path='s3://udacity-dend/log_json_path.json', provide_context=True )
'retries': 3, 'retry_delay': timedelta(minutes=5), 'catchup':False } dag = DAG('udac_example_dag', default_args=default_args, description='Load and transform data in Redshift with Airflow', schedule_interval='0 * * * *' ) start_operator = DummyOperator(task_id='Begin_execution', dag=dag) create_tables = CreateTablesOperator( task_id='Create_tables', dag=dag, conn_id="redshift", create_query_list= create_table_queries_list ) stage_events_to_redshift = StageToRedshiftOperator( task_id='Stage_events', dag=dag, table='staging_events', conn_id="redshift", aws_credentials_id="aws_credentials", s3_bucket="udacity-dend", s3_key="log_data", region="us-west-2", file_format='JSON', optional_path='s3://udacity-dend/log_json_path.json' )
'retry_delay': timedelta(minutes=5), 'catchup': False, 'email_on_retry': False } dag_name = 'sparkify_dend_dag' dag = DAG(dag_name, default_args=default_args, description='Load and transform data in Redshift with Airflow', schedule_interval='0 * * * *', max_active_runs=3) start_operator = DummyOperator(task_id='Begin_execution', dag=dag) create_redshift_tables = CreateTablesOperator(task_id='Create_tables', dag=dag, redshift_conn_id="redshift") stage_events_to_redshift = StageToRedshiftOperator( task_id='Stage_events', dag=dag, provide_context=True, table="events", redshift_conn_id="redshift", aws_credentials_id="aws_credentials", s3_bucket="udacity-dend", s3_key="log_data", region="us-west-2", file_format="JSON", execution_date=start_date)
} dag = DAG('udac_sparkify_dag_1', default_args=default_args, description='Load and transform data from S3 to Redshift with Airflow', schedule_interval='@hourly', # schedule_interval='@monthly', max_active_runs=1 ) start_operator = DummyOperator(task_id='Begin_execution', dag=dag) create_staging_event_table = CreateTablesOperator( task_id='Create_staging_event_table', dag=dag, redshift_conn_id="redshift", aws_credentials_id="aws_credentials", sql=CreateTableStaments.CREATE_STAGING_EVENTS_TABLE_SQL ) create_staging_song_table = CreateTablesOperator( task_id='Create_staging_songs_table', dag=dag, redshift_conn_id="redshift", aws_credentials_id="aws_credentials", sql=CreateTableStaments.CREATE_STAGING_SONGS_TABLE_SQL ) create_fact_songplays_table = CreateTablesOperator( task_id='Create_fact_songplays_table', dag=dag,
'start_date': datetime(2019, 7, 27), 'depends_on_past': False, 'email_on_retry': False, 'retries': 3, 'retry_delay': timedelta(minutes=5), } dag = DAG('udac_example_dag', default_args=default_args, description='Load and transform data in Redshift with Airflow', schedule_interval='0 * * * *', catchup=False) start_operator = CreateTablesOperator( task_id='Begin_execution', dag=dag, redshift_conn_id='redshift', sql_file='/home/workspace/airflow/create_tables.sql') json_path = "s3://{}/{}".format('udacity-dend', 'log_json_path.json') stage_events_to_redshift = StageToRedshiftOperator( task_id='Stage_events', dag=dag, table='staging_events', redshift_conn_id='redshift', aws_credentials_id='aws_credentials', s3_bucket='udacity-dend', s3_key='log_data/2018/11/', copy_extra="FORMAT AS JSON '{}' REGION 'us-west-2'".format(json_path)) stage_songs_to_redshift = StageToRedshiftOperator(
'retry_delay': timedelta(minutes=5), 'catchup': False } dag = DAG('udac_capstone_dag', default_args=default_args, description='Load and transform data in Redshift with Airflow', schedule_interval='@monthly', max_active_runs=1) start_operator = DummyOperator(task_id='Begin_execution', dag=dag) create_staging_table = CreateTablesOperator( task_id='Create_staging_table', dag=dag, provide_context=True, redshift_conn_id="redshift", aws_credentials_id="aws_credentials", queries=SqlQueries.create_staging_table_queries) create_target_table = CreateTablesOperator( task_id='Create_target_table', dag=dag, provide_context=True, redshift_conn_id="redshift", aws_credentials_id="aws_credentials", queries=SqlQueries.create_target_table_queries) consolidate_operator_1 = DummyOperator(task_id='Consolidate_execution_1', dag=dag)
'email_on_retry': False } dag = DAG( 'movie_recommendation_dag', default_args=default_args, description= 'Load and transform Movie Recommendation Data from S3 to Redshift with Airflow', schedule_interval="@monthly") start_operator = DummyOperator(task_id='Begin_execution', dag=dag) end_operator = DummyOperator(task_id='Stop_execution', dag=dag) create_tables_task = CreateTablesOperator(task_id="Create_tables", dag=dag, redshift_conn_id="redshift") stage_ratings_task = StageToRedshiftOperator(task_id="Stage_ratings", dag=dag, table="staging_ratings", redshift_conn_id="redshift", aws_credentials="aws_credentials", s3_bucket="spark-out-data", s3_key="ratings", data_format="PARQUET") stage_movies_task = StageToRedshiftOperator(task_id="Stage_movies", dag=dag, table="staging_movies", redshift_conn_id="redshift",
'retry_delay': timedelta(minutes=5), 'email_on_retry': False, 'catchup': False } dag = DAG('udacity-pipeline', default_args=default_args, description='Load and transform data in Redshift with Airflow', schedule_interval='@hourly') start_operator = DummyOperator(task_id='Begin_execution', dag=dag) # Create the songplays fact table create_fact_tables = CreateTablesOperator( task_id='Create_fact_tables', tables=['songplays'], redshift_conn_id='redshift', sql_queries=[SqlQueries.songplay_table_create], dag=dag) # Create the dimension tables: artists, songs, time, and users create_dimension_tables = CreateTablesOperator( task_id='Create_dimension_tables', tables=['artists', 'songs', 'time', 'users'], redshift_conn_id='redshift', sql_queries=[ SqlQueries.artist_table_create, SqlQueries.song_table_create, SqlQueries.time_table_create, SqlQueries.user_table_create ], dag=dag) # Create the two stagginng tables: staging events and staging songs
dag = DAG('udac_example_dag', default_args=default_args, description='Load and transform data in Redshift with Airflow', schedule_interval='0 * * * *' ) start_operator = DummyOperator(task_id='Begin_execution', dag=dag) create_tables = CreateTablesOperator( task_id='Create_tables', dag=dag, redshift_conn_id="redshift", aws_credentials_id="aws_credentials", tables={"staging_events":SqlQueries.create_staging_events, "staging_songs":SqlQueries.create_staging_songs, "songplays":SqlQueries.create_songplays_table, "users":SqlQueries.create_users_table, "artists":SqlQueries.create_artist_table, "songs":SqlQueries.create_songs_table, "time":SqlQueries.create_time_table} ) stage_events_to_redshift = StageToRedshiftOperator( task_id='Stage_events', dag=dag, redshift_conn_id="redshift", aws_credentials_id="aws_credentials", table="staging_events", s3_bucket=s3_bucket, s3_key=events_key,
dag = DAG( 'capstone_pipeline', default_args=default_arguments, description= 'Use Airflow to load New York car crash and weather data from S3 to Redshift, perform data quality checks and SQL queries', max_active_runs=3) ''' Specify dummy operator at the beginning of the DAG ''' start_operator = DummyOperator(task_id='begin_execution', dag=dag) ''' Specify CreateTablesOperator to create tables on Redshift ''' create_tables_in_redshift = CreateTablesOperator( redshift_conn_id='redshift', task_id='create_tables_in_redshift', dag=dag) ''' Specify StageToRedshiftOperator to stage events and song data to Redshift ''' bucket = 'mh-udacity-dend' region = 'eu-central-1' stage_crash_data_to_redshift = StageToRedshiftOperator( redshift_conn_id='redshift', aws_credentials_id='aws_credentials', table='staging_crashes', s3_bucket=bucket, s3_key='capstone_project/crash_data', region=region, file_format='JSON',
'depends_on_past': False, 'retry_delay': timedelta(minutes=5), 'retries': 3, } dag = DAG('udac_example_dag', default_args=default_args, description='Load and transform data in Redshift with Airflow', schedule_interval='0 * * * *', catchup=False) start_operator = DummyOperator(task_id='Begin_execution', dag=dag) create_tables_redshift = CreateTablesOperator( task_id='Create_tables', dag=dag, redshift_conn_id="redshift", file_path="/home/workspace/airflow/create_tables.sql") stage_events_to_redshift = StageToRedshiftOperator( task_id='Stage_events', dag=dag, redshift_conn_id="redshift", table="staging_events", aws_credentials_id="aws_credentials", s3_bucket=s3_bucket, s3_key=log_s3_key, log_json_file=log_json_file, provide_context=True) stage_songs_to_redshift = StageToRedshiftOperator(
# this ensures that `songplays` table is created after all other tables were created initialize_tables = [ SqlQueries.create_staging_events, SqlQueries.create_staging_songs, SqlQueries.create_table_users, SqlQueries.create_table_time, SqlQueries.create_table_artists, SqlQueries.create_table_songs, SqlQueries.create_table_songplays, ] create_tables = CreateTablesOperator( task_id='create_tables', dag=dag, redshift_conn_id="redshift", queries_to_run=initialize_tables, # list of tables to create table_names=[ "staging_events", "staging_songs", "users", "time", "artists", "songs", "songplays" ]) stage_events_to_redshift = StageToRedshiftOperator( task_id='Stage_events', dag=dag, table="public.staging_events", #create_sql_stmt=SqlQueries.create_staging_events, redshift_conn_id="redshift", aws_credentials_id="aws_credentials", s3_bucket="udacity-dend", s3_key="log_data", region="us-west-2",
dag = DAG('udac_example_dag', default_args=default_args, description='Load and transform data in Redshift with Airflow', max_active_runs=1) #schedule_interval = "@hourly" #schedule_interval = '0 0 * * *') start_operator = DummyOperator(task_id='START_OPERATOR', dag=dag) ##Creating STG Tables - DAGS create_listings_staging_table = CreateTablesOperator( task_id='Create_Listings_STG_Table', dag=dag, query=SqlQueries.create_staging_listings, table="STG_LISTINGS", redshift_conn_id="redshift", aws_credentials_id="aws_credentials") create_listings_staging_table.set_upstream(start_operator) create_calendars_staging_table = CreateTablesOperator( task_id='Create_Calendars_STG_Table', dag=dag, query=SqlQueries.create_staging_calendars, table="STG_CALENDARS", redshift_conn_id="redshift", aws_credentials_id="aws_credentials") create_calendars_staging_table.set_upstream(start_operator) create_reviews_staging_table = CreateTablesOperator(
target_table="covidcases") drop_table_masternode = DropTablesOperator(task_id='drop_table_masternode', dag=dag, target_table="masternode") drop_table_hospital = DropTablesOperator(task_id='drop_table_hospital', dag=dag, target_table="hospital") #create_tables create_tables_operator = DummyOperator(task_id='create_tables', dag=dag) create_tables_covidcases_stage = CreateTablesOperator( task_id='create_tables_covidcases_stage', dag=dag, sql=SqlQueries.table_create_covidcases_stage, table='table_create_covidcases_stage') create_tables_masternode_stage = CreateTablesOperator( task_id='create_tables_masternode_stage', dag=dag, sql=SqlQueries.table_create_masternode_stage, table='table_create_masternode_stage') create_tables_hospital_stage = CreateTablesOperator( task_id='create_tables_hospital_stage', dag=dag, sql=SqlQueries.table_create_hospital_stage, table='table_create_hospital_stage')