def get_s3_to_redshift_dag(parent_dag_name, task_id, redshift_conn_id, aws_credentials_id, table, create_sql_stmt, s3_bucket, s3_key, *args, **kwargs): dag = DAG(f"{parent_dag_name}.{task_id}", **kwargs) create_task = PostgresOperator(task_id=f"create_{table}_table", dag=dag, postgres_conn_id=redshift_conn_id, sql=create_sql_stmt) copy_task = S3ToRedshiftOperator( task_id=f"load_{table}_from_s3_to_redshift", dag=dag, table=table, redshift_conn_id=redshift_conn_id, aws_credentials_id=aws_credentials_id, s3_bucket=s3_bucket, s3_key=s3_key) # # TODO: Move the HasRowsOperator task here from the DAG # Done check_task = HasRowsOperator(task_id=f"check_{table}_has_rows", dag=dag, redshift_conn_id=redshift_conn_id, table=table) # # TODO: Use DAG ordering to place the check task # Done create_task >> copy_task >> check_task return dag
def get_s3_to_redshift_dag(parent_dag_name, task_id, redshift_conn_id, aws_credentials_id, table, create_sql_stmt, s3_bucket, s3_key, *args, **kwargs): dag = DAG(f"{parent_dag_name}.{task_id}", **kwargs) create_task = PostgresOperator(task_id=f"create_{table}_table", dag=dag, postgres_conn_id=redshift_conn_id, sql=create_sql_stmt) copy_task = S3ToRedshiftOperator( task_id=f"load_{table}_from_s3_to_redshift", dag=dag, table=table, redshift_conn_id=redshift_conn_id, aws_credentials_id=aws_credentials_id, s3_bucket=s3_bucket, s3_key=s3_key) check_task = HasRowsOperator(ask_id=f"check_{table}_data", dag=dag, redshift_conn_id=redshift_conn_id, table=table) create_task >> copy_task copy_task >> check_task return dag
task_id=stations_task_id, dag=dag, ) # # TODO: Migrate Create and Copy tasks to the Subdag # create_trips_table = PostgresOperator(task_id="create_trips_table", dag=dag, postgres_conn_id="redshift", sql=sql.CREATE_TRIPS_TABLE_SQL) copy_trips_task = S3ToRedshiftOperator( task_id="load_trips_from_s3_to_redshift", dag=dag, table="trips", redshift_conn_id="redshift", aws_credentials_id="aws_credentials", s3_bucket="udacity-dend", s3_key="data-pipelines/divvy/unpartitioned/divvy_trips_2018.csv") check_trips = HasRowsOperator(task_id="check_trips_data", dag=dag, redshift_conn_id="redshift", table="trips") create_stations_table = PostgresOperator( task_id="create_stations_table", dag=dag, postgres_conn_id="redshift", sql=sql.CREATE_STATIONS_TABLE_SQL, )
# # TODO: Create a DAG which performs the following functions: # # 1. Loads Trip data from S3 to RedShift # 2. Performs a data quality check on the Trips table in RedShift # 3. Uses the FactsCalculatorOperator to create a Facts table in Redshift # a. **NOTE**: to complete this step you must complete the FactsCalcuatorOperator # skeleton defined in plugins/operators/facts_calculator.py # dag = DAG("lesson3.exercise4", start_date=datetime.datetime.utcnow()) copy_trips_task = S3ToRedshiftOperator( redshift_conn_id='redshift', aws_credentials='aws_credentials', table='trips', s3_bucket='udacity-dend', s3_key='data-pipelines/divvy/unpartitioned/divvy_trips_2018.csv', task_id='copy_trips_data_from_s3_to_redshift', dag=dag) check_trips_task = HasRowsOperator(redshift_conn_id='redshift', table='trips', task_id='check_trips_table_data', dag=dag) calculate_facts_task = FactsCalculatorOperator(redshift_conn_id='redshift', origin_table='trips', destination_table='trips_facts', fact_column='tripduration', groupby_column='bikeid', task_id='calculate_facts_trips',
dag = DAG( f"{parent_dag_name}.{task_id}", **kwargs ) create_task = PostgresOperator( task_id=f"create_{table}_table", dag=dag, postgres_conn_id=redshift_conn_id, sql=create_sql_stmt ) copy_task = S3ToRedshiftOperator( task_id=f"load_{table}_from_s3_to_redshift", dag=dag, table=table, redshift_conn_id=redshift_conn_id, aws_credentials_id=aws_credentials_id, s3_bucket=s3_bucket, s3_key=s3_key ) # # TODO: Move the HasRowsOperator task here from the DAG # create_task >> copy_task # # TODO: Use DAG ordering to place the check task # return dag