Ejemplo n.º 1
0
)

upload_home_to_s3 = UploadToS3Operator(
    task_id="Upload_Home_to_S3",
    dag=dag,
    s3_id=s3_id,
    bucket=s3_bucket,
    key=s3_home_key,
    directory=output_home_dir
)

stage_stock_to_redshift = StageToRedshiftOperator(
    task_id='Stage_stock_data',
    dag=dag,
    redshift_conn_id=redshift_id,
    aws_credentials_id=aws_id,
    staging_table=SqlQueries.staging_stocks,
    create_params=SqlQueries.staging_stock_etf_create,
    s3_path=s3_stock_dir,
    is_json=False
)

stage_etf_to_redshift = StageToRedshiftOperator(
    task_id='Stage_etf_data',
    dag=dag,
    redshift_conn_id=redshift_id,
    aws_credentials_id=aws_id,
    staging_table=SqlQueries.staging_etfs,
    create_params=SqlQueries.staging_stock_etf_create,
    s3_path=s3_etf_dir,
    is_json=False
)
dag = DAG('udac_example_dag',
          default_args=default_args,
          description='Load and transform data in Redshift with Airflow',
          schedule_interval='@hourly',
          catchup=True,
          max_active_runs=5
          )

start_operator = DummyOperator(task_id='Begin_execution', dag=dag)

stage_events_to_redshift = StageToRedshiftOperator(
    task_id='Stage_events',
    dag=dag,
    redshift_conn_id='redshift',
    aws_credentials_id='aws_credentials',
    s3_bucket=S3_BUCKET,
    table="staging_events",
    s3_key='log_data/{{ ds }}-events.csv',
    f_type='csv',
    ignore_headers=1,
    provide_context=True,
)

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    redshift_conn_id='redshift',
    aws_credentials_id='aws_credentials',
    table='staging_songs',
    s3_bucket=S3_BUCKET,
    s3_key='song_data/',
    f_type='json',
create_tables_task = PostgresOperator(
    task_id='create_tables',
    dag=dag,
    sql='create_tables.sql',
    postgres_conn_id="redshift"
)

# Four different operator will stage the data, tranform the data and run check on data quality

# Task to stage event data from S3 to Redshift
stage_events_to_redshift = StageToRedshiftOperator(
    task_id='Stage_events',
    dag=dag,
    provide_context=True,
    aws_conn_id='aws_credentials',
    redshift_conn_id='redshift',
    table='staging_events',
    s3_bucket= 'udacity-dend',
    s3_key='log_data', 
    log_json_path='s3://udacity-dend/log_json_path.json'
)

# Task to stage song data from S3 to Redshift
stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    provide_context=True,
    aws_conn_id='aws_credentials',
    redshift_conn_id='redshift',
    table='staging_songs',
    s3_bucket='udacity-dend',
    'retry_delay': timedelta(minutes=5),
    'catchup': False
}

dag = DAG('udac_example_dag',
          default_args=default_args,
          description='Load and transform data in Redshift with Airflow',
          schedule_interval='0 * * * *')

start_operator = DummyOperator(task_id='Begin_execution', dag=dag)

stage_events_to_redshift = StageToRedshiftOperator(
    task_id='Stage_events',
    dag=dag,
    redshift_conn_id='redshift',
    aws_credentials_id='aws_credentials',
    table='staging_events',
    s3_bucket='udacity-dend',
    s3_key='log_data',
    data_format='json')

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    redshift_conn_id='redshift',
    aws_credentials_id='aws_credentials',
    table='staging_songs',
    s3_bucket='udacity-dend',
    s3_key='song_data',
    data_format='json')
dag = DAG('data-pipeline',
          default_args=default_args,
          description='Load and trasnform data from s3 to redshift',
          schedule_interval='0 * * * *',
          max_active_runs=3
        )

start_operator = DummyOperator(task_id='Begin_execution',  dag=dag)

stage_events_to_redshift = StageToRedshiftOperator(
    task_id='Stage_events',
    dag=dag,
    redshift_conn_id="redshift_conn_id",
    aws_credentials_id="aws_credentials",
    table="public.staging_events",
    s3_bucket="udacity-dend",
    s3_key="log_data",
    json_path="s3://udacity-dend/log_json_path.json",
    
)

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    redshift_conn_id="redshift_conn_id",
    aws_credentials_id="aws_credentials",
    table="public.staging_songs",
    s3_bucket = "udacity-dend",
    s3_key = "song_data/A",
    json_path="auto",
Ejemplo n.º 6
0
          default_args=default_args,
          description='Load and transform data in Redshift with Airflow',
          schedule_interval='0 * * * *',
          catchup=False)

start_operator = DummyOperator(task_id='Begin_execution', dag=dag)

create_tables_task = PostgresOperator(task_id="create_tables",
                                      dag=dag,
                                      sql='create_tables.sql',
                                      postgres_conn_id="redshift")

stage_events_to_redshift = StageToRedshiftOperator(
    task_id='Stage_events',
    dag=dag,
    origin=log_data,
    destination='public.staging_events',
    redshift_conn_id='redshift',
    aws_credentials_id='aws_credentials',
    json_format="s3://udacity-dend/log_json_path.json")

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    origin=song_data,
    destination='public.staging_songs',
    redshift_conn_id='redshift',
    aws_credentials_id='aws_credentials')

load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table',
                                        dag=dag,
                                        query=SqlQueries.songplay_table_insert,
start_operator = DummyOperator( task_id ='Begin_execution',  dag=dag )

'''
create_tables = PostgresOperator(
    task_id="create_tables",
    dag=dag,
    postgres_conn_id="redshift",
    sql='create_tables.sql'
)
'''

#operators to stage the event tables. 
stage_events_to_redshift = StageToRedshiftOperator(
    task_id='Stage_events',
    dag=dag,
    table="public.staging_events",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="s3://udacity-dend/log_data"
)
 

#operators to stage the songs table.
stage_songs_to_redshift = StageToRedshiftOperatorOne(
    task_id='Stage_songs',
    dag=dag,
    table="public.staging_songs",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="s3://udacity-dend/song_data/A/A/"
) 
dag = DAG('etl_redshift_dag',
          default_args=default_args,
          description='Load and transform data in Redshift with Airflow',
          schedule_interval='0 0 * * *',
          catchup=False)

start_operator = DummyOperator(task_id='Begin_execution', dag=dag)

create_tables = CreateTablesOperator(task_id='Create_tables',
                                     redshift_conn_id="redshift",
                                     dag=dag)

stage_demographics_to_redshift = StageToRedshiftOperator(
    task_id='Stage_demographics',
    redshift_conn_id='redshift',
    destination_table='staging_demographics',
    s3_data='s3://jehofman-udacity-dend-capstone-project/demographics',
    aws_credentials_id="aws_credentials",
    s3_jsonpath='auto',
    dag=dag)

stage_immigration_to_redshift = StageToRedshiftOperator(
    task_id='Stage_immigration',
    redshift_conn_id='redshift',
    destination_table='staging_immigration',
    s3_data='s3://jehofman-udacity-dend-capstone-project/immigration',
    aws_credentials_id="aws_credentials",
    s3_jsonpath='auto',
    dag=dag)

load_immigrant_table = LoadDimensionOperator(
    task_id='Load_immigrant_table',
Ejemplo n.º 9
0
          max_active_runs=1)

start_operator = DummyOperator(task_id='Begin_execution', dag=dag)

get_movie_details = GetMovieDetails(
    task_id="prepare_data",
    dag=dag,
    s3_bucket=s3_bucket,
    s3_key=input_data,
)

stage_movies_to_redshift = StageToRedshiftOperator(
    task_id='Stage_movies',
    table_name="staging_movies",
    s3_bucket=s3_bucket,
    s3_key="movies",
    redshift_conn_id="redshift",
    aws_credential_id="aws_credentials",
    dag=dag,
    provide_context=True)

stage_director_to_redshift = StageToRedshiftOperator(
    task_id='Stage_director',
    table_name="staging_director",
    s3_bucket=s3_bucket,
    s3_key="director",
    redshift_conn_id="redshift",
    aws_credential_id="aws_credentials",
    dag=dag,
    provide_context=True)
Ejemplo n.º 10
0
dag = DAG('sparkify_dag',
          default_args=default_args,
          description='Load and transform data in Redshift with Airflow',
          schedule_interval='0 0 * * *',
          max_active_runs=1
        )


start_operator = DummyOperator(task_id='Begin_execution',  dag=dag)

stage_events_to_redshift = StageToRedshiftOperator(
    task_id='Stage_events',
    provide_context=False,
    dag=dag,
    table = "staging_events",
    s3_path = "s3://udacity-dend/log_data",
    redshift_credentials="redshift",
    aws_credentials="aws_credentials",
    region="us-west-2",
    data_format="JSON"
)

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    provide_context=False,
    dag=dag,
    table = "staging_songs",
    s3_path = "s3://udacity-dend/song_data",
    redshift_credentials="redshift",
    aws_credentials="aws_credentials",
    region="us-west-2",
    'email_on_retry': False,
}

dag = DAG('udac_example_dag',
          default_args=default_args,
          description='Load and transform data in Redshift with Airflow',
          schedule_interval='0 * * * *'
        )
start_operator = DummyOperator(task_id='Begin_execution',  dag=dag)

stage_events_to_redshift = StageToRedshiftOperator(
    task_id='Stage_events',
    dag=dag,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="udacity-dend",
    s3_key="log_data/{execution_date.year}/{execution_date.month}/",
    table= "staging_events",
    jsonpaths= "s3://udacity-dend/log_json_path.json",
    provide_context=True,  
)

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="udacity-dend",
    s3_key="song_data",
    table= "staging_songs",
    ignore_headers="0",
# Create tables using an operator
create_redshift_tables = CreateTablesOperator(
    task_id='Create_tables',
    dag=dag,
    redshift_conn_id="redshift"
)

# Stage events(log) data to Redshift
stage_events_to_redshift = StageToRedshiftOperator(
    task_id='stage_events',
    dag=dag,
    provide_context=True,
    table="staging_events",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="udacity-dend",
    s3_key="log_data",
    region="us-west-2",
    file_format="JSON",
    execution_date=start_date
)

# Stage songs data to Redshift
stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='stage_songs',
    dag=dag,
    provide_context=True,
    table="staging_songs",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
Ejemplo n.º 13
0
dag = DAG(
    'etl_dag',
    default_args=default_args,
    description='Load and transform data in Redshift with Airflow',
    schedule_interval='0 * * * *',
)

start_operator = DummyOperator(task_id='Begin_execution', dag=dag)

stage_events_to_redshift = StageToRedshiftOperator(
    task_id='Stage_events',
    dag=dag,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    table="staging_events",
    s3_bucket=config['S3']['BUCKET_NAME'],
    s3_key=config['S3']['LOG_DATA'],
    json_format=config['S3']['LOG_PATH'],
    provide_context=True,
)

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    table="staging_songs",
    s3_bucket=config['S3']['BUCKET_NAME'],
    s3_key=config['S3']['SONG_DATA'],
    json_format="'auto'",
Ejemplo n.º 14
0
dag = DAG('data_pipelines_dag',
          default_args=default_args,
          description='Load and transform data in Redshift with Airflow',
          schedule_interval='0 * * * *',
          catchup=False          
        )

start_operator = DummyOperator(task_id='Begin_execution',  dag=dag)
 
stage_events_to_redshift = StageToRedshiftOperator(
    task_id='Stage_events',
    #python_callable=list_keys,
    dag=dag,
    redshift_conn_id='redshift',
    s3_conn_id='aws_credentials',
    table='staging_events',
    region='us-west-2',
    s3_path='log_data',
    s3_bucket='udacity-dend',   
    )
 
 
stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    redshift_conn_id='redshift',
    s3_conn_id='aws_credentials', 
    table='staging_songs',
    region='us-west-2',
    s3_path='song_data',
Ejemplo n.º 15
0
dag = DAG(
    dag_id=DAG_ID,
    default_args=default_args,
    description='Load and transform data in Redshift with Airflow',
    schedule_interval='@hourly',
)

start_operator = DummyOperator(task_id='Begin_execution', dag=dag)

stage_events_to_redshift = StageToRedshiftOperator(
    task_id='Stage_events',
    dag=dag,
    redshift_conn_id=REDSHIFT_CONN_ID,
    aws_credentials_id=AWS_CREDENTIALS_ID,
    table='staging_events',
    s3_bucket=S3_BUCKET,
    s3_key=S3_LOG_KEY,
    region=REGION,
    truncate=False,
    data_format=f"JSON '{LOG_JSON_PATH}'",
)

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    redshift_conn_id=REDSHIFT_CONN_ID,
    aws_credentials_id=AWS_CREDENTIALS_ID,
    table='staging_songs',
    s3_bucket=S3_BUCKET,
    s3_key=S3_SONG_KEY,
    region=REGION,
         )

start_operator = DummyOperator(task_id='Begin_execution',  dag=dag)

# Since we have provided context = True in the default args, we have access to
# {execution_date.year} and {execution_date.month} and {ds}
# On S3, the data is partitioned in this format:
# s3://udacity-dend/log_data/2018/11/2018-11-01-events.json
# s3://udacity-dend/log_data/2018/11/2018-11-30-events.json
# Finally, we also pass in the json_format
copy_events_from_s3_to_redshift = StageToRedshiftOperator(
    task_id="copy_events_from_s3_to_redshift",
    dag=dag,
    redshift_conn_id = "redshift",
    aws_credentials_id = "aws_credentials",
    table = "staging_events",
    s3_bucket = "udacity-dend",
    s3_key = "log_data/{execution_date.year}/{execution_date.month}/{ds}-events.json",
    arn_iam_role = "arn:aws:iam::506140549518:role/dwhRole",
    region = "us-west-2",
    json_format = "s3://udacity-dend/log_json_path.json"
)

copy_songs_from_s3_to_redshift = StageToRedshiftOperator(
    task_id="copy_songs_from_s3_to_redshift",
    dag=dag,
    redshift_conn_id = "redshift",
    aws_credentials_id = "aws_credentials",
    table = "staging_songs",
    s3_bucket = "udacity-dend",
    s3_key = "song_data",
    arn_iam_role = "arn:aws:iam::506140549518:role/dwhRole",
Ejemplo n.º 17
0
    sql=sql_tables.CREATE_TABLE_STAGING_ACCIDENTS,
    dag=dag
)

create_table_stage_cities_task = PostgresOperator(
    task_id='create_stage_cities_table',
    postgres_conn_id="redshift",
    sql=sql_tables.CREATE_TABLE_STAGING_CITIES,
    dag=dag
)

stage_accidents_to_redshift_task = StageToRedshiftOperator(
    task_id="Stage_accidents",
    conn_id="redshift",
    aws_credentials="aws_credentials",
    table="staging_accidents",
    s3_bucket='davidhidalgo-udacity',
    s3_key="accidents.csv",
    dag=dag
)

stage_cities_to_redshift_task = StageToRedshiftOperator(
    task_id='Stage_cities',
    conn_id="redshift",
    aws_credentials="aws_credentials",
    table="staging_cities",
    s3_bucket="davidhidalgo-udacity",
    s3_key="cities.csv",
    dag=dag
)
Ejemplo n.º 18
0
    'email_on_failure': False,
    'email_on_retry': False,
}

dag = DAG('udac_example_dag',
          default_args=default_args,
          description='Load and transform data in Redshift with Airflow',
          schedule_interval='0 * * * *')

start_operator = DummyOperator(task_id='Begin_execution', dag=dag)

stage_events_to_redshift = StageToRedshiftOperator(
    task_id='Stage_events',
    dag=dag,
    redshift_conn_id='redshift',
    aws_credentials_id='aws_credentials',
    table="Stage_events",
    s3_bucket=" ",
    s3_key="log_data",
    data_format='JSON',
    schema='auto')

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    redshift_conn_id='redshift',
    aws_credentials_id='aws_credentials',
    table="Stage_songs",
    s3_bucket=" ",
    s3_key="song_data",
    data_format='JSON',
    schema='auto')
    max_active_runs=1)

# DAG tasks
start_operator = DummyOperator(task_id='Begin_execution', dag=dag)

# `s3_key` supports template string and could be filled like this:
#
#   s3_key='log_data/{{ execution_date.strftime("%Y-%m-%d") }}-events.json'
#
# to load a single file from logs for a particular day.
# But for the sake of the demo in case we have limited amount of test data
# we will load all entire dataset.
stage_events_to_redshift = StageToRedshiftOperator(task_id='Stage_events',
                                                   dag=dag,
                                                   redshift_conn_id='redshift',
                                                   aws_credentials_id='aws',
                                                   aws_region='us-west-2',
                                                   table='staging_events',
                                                   s3_bucket='udacity-dend',
                                                   s3_key='log_data/*')

stage_songs_to_redshift = StageToRedshiftOperator(task_id='Stage_songs',
                                                  dag=dag,
                                                  redshift_conn_id='redshift',
                                                  aws_credentials_id='aws',
                                                  aws_region='us-west-2',
                                                  table='staging_songs',
                                                  s3_bucket='udacity-dend',
                                                  s3_key='song_data/*/*/*/*')

load_songplays_table = LoadFactOperator(task_id='Load_songplays_fact_table',
                                        dag=dag,
dag = DAG(
    'github_repo_popularity_etl_v1',
    default_args=default_args,
    description='Full ETL pipeline combining GitHub and Hacker News data',
    schedule_interval=timedelta(days=1),
    catchup=False,
    max_active_runs=1,
)

start_operator = DummyOperator(task_id='Begin_execution', dag=dag)

stage_gh_repos_to_redshift = StageToRedshiftOperator(
    task_id='Stage_gh_repos',
    dag=dag,
    table='staging_github_repos',
    create_table_sql=SqlQueries.create_staging_github_repos,
    s3_key='github-repositories.csv',
)

stage_hn_posts_to_redshift = StageToRedshiftOperator(
    task_id='Stage_hn_posts',
    dag=dag,
    table='staging_hacker_news_posts',
    create_table_sql=SqlQueries.create_staging_hacker_news_posts,
    s3_key='hn.csv',
)

load_github_repos_table = LoadTableOperator(
    task_id='Load_github_repos_table',
    dag=dag,
dag = DAG(
    'udac_example_dag',
    default_args=default_args,
    description='Load and transform data in Redshift with Airflow',
    schedule_interval='0 * * * *',
    catchup=False  #catchup is turned off
)

start_operator = DummyOperator(task_id='Begin_execution', dag=dag)

stage_events_to_redshift = StageToRedshiftOperator(
    task_id='Stage_events',
    dag=dag,
    table="[public].staging_events",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="udacity_dend",
    s3_key="log_data",
    file_type="JSON 's3://udacity-dend/log_json_path.json'")

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    table="[public].staging_songs",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="udacity_dend",
    s3_key="song_data",
    file_type="JSON 'auto'")
Ejemplo n.º 22
0
    'start_date': datetime(2019, 1, 12),
}

dag = DAG('udac_example_dag',
          default_args=default_args,
          catchup=False,
          description='Load and transform data in Redshift with Airflow',
          schedule_interval='0 * * * *')

start_operator = DummyOperator(task_id='Begin_execution', dag=dag)

stage_events_to_redshift = StageToRedshiftOperator(
    task_id='Stage_events',
    dag=dag,
    redshift_conn_id="redshift_ag",
    aws_credentials_id="aws_credentials_ag",
    redshift_sink_table="staging_events",
    s3_origin_bucket="udacity-dend",
    s3_key="log_data",
    data_format="JSON")

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    redshift_conn_id="redshift_ag",
    aws_credentials_id="aws_credentials_ag",
    redshift_sink_table="staging_songs",
    s3_origin_bucket="udacity-dend",
    s3_key="song_data",
    data_format="JSON")
}

dag = DAG(
    'scoreindicators_dag',
    default_args=default_args,
    description='Load and transform data in Redshift with Airflow',
    #schedule_interval='0 * * * *'
)

start_operator = DummyOperator(task_id='Begin_execution', dag=dag)

stage_events_to_redshift = StageToRedshiftOperator(
    task_id='Stage_events',
    dag=dag,
    conn_id="redshift",
    aws_credentials_id="aws_credentials",
    table="staging_events",
    s3_bucket="lcf-udacity-de-bucket",
    s3_key="data/",
    delimiter=",")

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    conn_id="redshift",
    aws_credentials_id="aws_credentials",
    table="staging_songs",
    s3_bucket="lcf-udacity-de-bucket",
    s3_key="data/happiness/",
    delimiter=",")
          description='Load and transform data in Redshift with Airflow',
          schedule_interval="@hourly",
          max_active_runs=1)

start_operator = DummyOperator(task_id='start_execution', dag=dag)

table_creation = PostgresOperator(task_id='tables_creation',
                                  dag=dag,
                                  postgres_conn_id='redshift',
                                  sql='/create_tables.sql')

stage_events_to_redshift = StageToRedshiftOperator(
    task_id='stage_events',
    dag=dag,
    table="staging_events",
    redshift_conn_id='redshift',
    aws_credentials_id="aws_credentials",
    s3_bucket='udacity-dend',
    s3_key="log-data/{execution_date.year}/{execution_date.month:02d}",
    file_format='JSON \'s3://udacity-dend/log_json_path.json\'')

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='stage_songs',
    dag=dag,
    table="staging_songs",
    redshift_conn_id='redshift',
    aws_credentials_id="aws_credentials",
    s3_bucket='udacity-dend',
    s3_key='song_data/A/A',
    file_format='JSON \'auto\'')
Ejemplo n.º 25
0
    'start_date': datetime(2019, 1, 12)
}

dag = DAG('airflow_dag',
          default_args=default_args,
          description='Load and transform data in Redshift with Airflow',
          schedule_interval='0 * * * *'
          #start_date=datetime.utcnow()
          )

start_operator = DummyOperator(task_id='Begin_execution', dag=dag)

stage_events_to_redshift = StageToRedshiftOperator(
    task_id='Stage_events',
    dag=dag,
    redshift_conn_id='redshift',
    aws_credentials_id='aws_credentials',
    table='public.staging_events',
    source_path='s3://udacity-dend/log_data',
    JSON_path='s3://udacity-dend/log_json_path.json')

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    redshift_conn_id='redshift',
    aws_credentials_id='aws_credentials',
    table='public.staging_songs',
    source_path='s3://udacity-dend/song_data',
    JSON_path='auto')

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
Ejemplo n.º 26
0
    'email_on_retry': False,
    'schedule_interval': '@hourly'
}

dag = DAG('udac_example_dag',
          default_args=default_args,
          description='Load and transform data in Redshift with Airflow',
          schedule_interval='0 * * * *')

start_operator = DummyOperator(task_id='Begin_execution', dag=dag)

stage_events_to_redshift = StageToRedshiftOperator(
    task_id='Stage_events',
    dag=dag,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    table="staging_events",
    s3_bucket='udacity-dend',
    s3_key="log_data/",
    extra_params="format as json 's3://udacity-dend/log_json_path.json'")

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    table="staging_songs",
    s3_bucket='udacity-dend',
    s3_key="song_data",
    extra_params="json 'auto' compupdate off region 'us-west-2'")
Ejemplo n.º 27
0
Archivo: dag.py Proyecto: danvargg/dend
    'email_on_retry': False
}

dag = DAG('airflow_dag',
          default_args=default_args,
          description='Load and transform data in Redshift with Airflow',
          schedule_interval='0 * * * *',
          max_active_runs=3)

start_operator = DummyOperator(task_id='Begin_execution', dag=dag)

stage_events_to_redshift = StageToRedshiftOperator(
    task_id='Stage_events',
    dag=dag,
    provide_context=True,
    aws_credentials_id="aws_credentials",
    redshift_conn_id='redshift',
    s3_bucket="udacity-dend-airflow-test",
    s3_key="log_data",
    table="staging_events",
    create_stmt=sql_queries.create_table_staging_events)

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    provide_context=True,
    aws_credentials_id="aws_credentials",
    redshift_conn_id='redshift',
    s3_bucket="udacity-dend-airflow-test",
    s3_key="song_data",
    table="staging_songs",
    create_stmt=sql_queries.create_table_staging_songs)
    "email_on_retry": False
}

dag = DAG("udac_example_dag",
          default_args=default_args,
          description="Load and transform data in Redshift with Airflow",
          schedule_interval="0 * * * *",
          catchup=False)

start_operator = DummyOperator(task_id="Begin_execution", dag=dag)

stage_events_to_redshift = StageToRedshiftOperator(
    task_id="Stage_events",
    dag=dag,
    table="staging_events",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="udacity-dend",
    s3_key="log_data",
    file_type="JSON")

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id="Stage_songs",
    dag=dag,
    table="staging_songs",
    redshift_conn_id="redshift",
    aws_credentials_id="aws_credentials",
    s3_bucket="udacity-dend",
    s3_key="song_data/A/A/A",
    file_type="JSON")
Ejemplo n.º 29
0
    'catchup': False
}

dag = DAG('dag',
          default_args=default_args,
          description='Load and transform data in Redshift with Airflow',
          schedule_interval='0 * * * *')

start_operator = DummyOperator(task_id='Begin_execution', dag=dag)

stage_events_to_redshift = StageToRedshiftOperator(
    task_id='Stage_events',
    dag=dag,
    table='public.staging_events',
    redshift_conn_id='redshift',
    redshift_iam_role=v.get('redshift_iam_role'),
    s3_bucket='udacity-dend',
    s3_prefix='log_data',
    json_location='s3://udacity-dend/log_json_path.json',
    region='us-west-2',
    provide_context=True)

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    table='public.staging_songs',
    redshift_conn_id='redshift',
    redshift_iam_role=v.get('redshift_iam_role'),
    s3_bucket='udacity-dend',
    s3_prefix='song_data',
    region='us-west-2',
Ejemplo n.º 30
0
}

dag = DAG(
    'sparkify',
    default_args=default_args,
    description='Load and transform Sparkify data in Redshift with Airflow',
    start_date=datetime.utcnow(),
    schedule_interval='@daily',
    catchup=False)

start_operator = DummyOperator(task_id='Begin_execution', dag=dag)

stage_events_to_redshift = StageToRedshiftOperator(
    task_id='Stage_events',
    dag=dag,
    table='staging_events',
    aws_credentials_id='aws_credentials',
    redshift_conn_id='redshift',
    s3_data_location='s3://udacity-dend/log_data',
    json_path_location='s3://udacity-dend/log_json_path.json')

stage_songs_to_redshift = StageToRedshiftOperator(
    task_id='Stage_songs',
    dag=dag,
    table='staging_songs',
    aws_credentials_id='aws_credentials',
    redshift_conn_id='redshift',
    s3_data_location='s3://udacity-dend/song_data')

load_songplays_table = LoadFactOperator(
    task_id='Load_songplays_fact_table',
    dag=dag,