Example #1
0
    submit_sqoop = BashOperator(
        task_id="sqoop_full_table_import",
        bash_command=
        'bash /home/airflow/gcs/plugins/sqoop_simple_table_imports_for_airflow.sh ephemeral-spark-cluster-{{ds_nodash}}'
    )

    bq_load_flight_delays = GoogleCloudStorageToBigQueryOperator(
        task_id="bq_load_flight_delays",
        bucket="spark-etl-1",
        source_objects=["sqoop-output/flights/*.avro"],
        destination_project_dataset_table=
        "bigdata-etl-20201027.data_analysis.flight_delays",
        autodetect=True,
        source_format="AVRO",
        create_disposition="CREATE_IF_NEEDED",
        skip_leading_rows=0,
        write_disposition="WRITE_APPEND",
        max_bad_records=0)

    delete_cluster = DataprocClusterDeleteOperator(
        task_id="delete_dataproc_cluster",
        cluster_name="ephemeral-spark-cluster-{{ds_nodash}}",
        region="us-central1",
        trigger_rule=TriggerRule.ALL_DONE)

    create_cluster.dag = dag
    create_cluster.set_downstream(submit_sqoop)
    submit_sqoop.set_downstream(bq_load_flight_delays)
    bq_load_flight_delays.set_downstream(delete_cluster)
    bo_borrar_bq_vdg_rpt_siniestros_personas_morales = BashOperator(
        task_id="bo_borrar_bq_vdg_rpt_siniestros_personas_morales",
        bash_command="bq rm -f -t " + PROJECT_ID_BQ + ":" + DATASET_BQ + "." +
        TABLE_NAME_BQ)

    gcs2bqo_crear_bq_vdg_rpt_siniestros_personas_morales = GoogleCloudStorageToBigQueryOperator(
        task_id='gcs2bqo_crear_bq_vdg_rpt_siniestros_personas_morales',
        bucket=SOURCE_BUCKET,
        source_objects=[SOURCE_OBJECTS],
        destination_project_dataset_table=PROJECT_ID_BQ + "." + DATASET_BQ +
        "." + TABLE_NAME_BQ,
        source_format='PARQUET',
        write_disposition='WRITE_TRUNCATE')

    dpcdo_vdg_delete_cluster = DataprocClusterDeleteOperator(
        task_id='dpcdo_vdg_delete_cluster',
        cluster_name=CLUSTER_NAME,
        project_id=PROJECT_ID,
        depends_on_past=False,
        trigger_rule=TriggerRule.ALL_DONE)

    END_DAG = DummyOperator(task_id='FIN', depends_on_past=False)

dpcco_vdg_create_cluster.dag = DAG_VDG_RESERVAS

dpcco_vdg_create_cluster \
    >> [dpso_vdg_polizas_certificado, dpso_vdg_siniestros] \
    >> dpso_vdg_rpt_siniestros_personas_morales \
    >> bo_borrar_bq_vdg_rpt_siniestros_personas_morales >> gcs2bqo_crear_bq_vdg_rpt_siniestros_personas_morales \
    >> dpcdo_vdg_delete_cluster \
    >> END_DAG