submit_sqoop = BashOperator( task_id="sqoop_full_table_import", bash_command= 'bash /home/airflow/gcs/plugins/sqoop_simple_table_imports_for_airflow.sh ephemeral-spark-cluster-{{ds_nodash}}' ) bq_load_flight_delays = GoogleCloudStorageToBigQueryOperator( task_id="bq_load_flight_delays", bucket="spark-etl-1", source_objects=["sqoop-output/flights/*.avro"], destination_project_dataset_table= "bigdata-etl-20201027.data_analysis.flight_delays", autodetect=True, source_format="AVRO", create_disposition="CREATE_IF_NEEDED", skip_leading_rows=0, write_disposition="WRITE_APPEND", max_bad_records=0) delete_cluster = DataprocClusterDeleteOperator( task_id="delete_dataproc_cluster", cluster_name="ephemeral-spark-cluster-{{ds_nodash}}", region="us-central1", trigger_rule=TriggerRule.ALL_DONE) create_cluster.dag = dag create_cluster.set_downstream(submit_sqoop) submit_sqoop.set_downstream(bq_load_flight_delays) bq_load_flight_delays.set_downstream(delete_cluster)
bo_borrar_bq_vdg_rpt_siniestros_personas_morales = BashOperator( task_id="bo_borrar_bq_vdg_rpt_siniestros_personas_morales", bash_command="bq rm -f -t " + PROJECT_ID_BQ + ":" + DATASET_BQ + "." + TABLE_NAME_BQ) gcs2bqo_crear_bq_vdg_rpt_siniestros_personas_morales = GoogleCloudStorageToBigQueryOperator( task_id='gcs2bqo_crear_bq_vdg_rpt_siniestros_personas_morales', bucket=SOURCE_BUCKET, source_objects=[SOURCE_OBJECTS], destination_project_dataset_table=PROJECT_ID_BQ + "." + DATASET_BQ + "." + TABLE_NAME_BQ, source_format='PARQUET', write_disposition='WRITE_TRUNCATE') dpcdo_vdg_delete_cluster = DataprocClusterDeleteOperator( task_id='dpcdo_vdg_delete_cluster', cluster_name=CLUSTER_NAME, project_id=PROJECT_ID, depends_on_past=False, trigger_rule=TriggerRule.ALL_DONE) END_DAG = DummyOperator(task_id='FIN', depends_on_past=False) dpcco_vdg_create_cluster.dag = DAG_VDG_RESERVAS dpcco_vdg_create_cluster \ >> [dpso_vdg_polizas_certificado, dpso_vdg_siniestros] \ >> dpso_vdg_rpt_siniestros_personas_morales \ >> bo_borrar_bq_vdg_rpt_siniestros_personas_morales >> gcs2bqo_crear_bq_vdg_rpt_siniestros_personas_morales \ >> dpcdo_vdg_delete_cluster \ >> END_DAG