from pyspark.sql.functions import col from constants import dict_dbs_locations, dict_dbs_names def load_l_airport(spark, integration_layer_loc, landing_zone_name): delta_l_airport = DeltaTable.forPath(spark, integration_layer_loc + '/L_AIRPORT') df_LZ_l_airport = spark.sql(f""" SELECT CODE ,DESCRIPTION FROM {landing_zone_name}.L_AIRPORT """) delta_l_airport.alias("oldData") \ .merge(df_LZ_l_airport.alias("newData"), "oldData.CODE = newData.CODE") \ .whenMatchedUpdate(set={"DESCRIPTION": col("newData.DESCRIPTION")}) \ .whenNotMatchedInsert(values={"CODE": col("newData.CODE"), "DESCRIPTION": col("newData.DESCRIPTION")}) \ .execute() if __name__ == '__main__': spark = initialize_spark_session('load_l_airport') from delta.tables import * integration_layer_loc = dict_dbs_locations.get('INTEGRATION_LAYER_LOC') landing_zone_name = dict_dbs_names.get('LANDING_ZONE_NAME') load_l_airport(spark, integration_layer_loc, landing_zone_name)
from helper_functions.initialize_spark_session import initialize_spark_session import os from pyspark.sql.functions import col logging.basicConfig(level=logging.INFO, format="%(asctime)s: %(levelname)s: %(message)s ") if __name__ == '__main__': spark = initialize_spark_session('create_presentation_layer') from delta.tables import * # Creating the presentation_layer database in spark sql try: db_name = dict_dbs_names.get('PRESENTATION_LAYER_NAME') db_loc = dict_dbs_locations.get('PRESENTATION_LAYER_LOC') spark.sql( ddl_create_presentation_layer_db.format( presentation_layer_db_name=db_name, presentation_layer_db_loc=db_loc)) spark.sql(f'USE {db_name}') logging.info(f'{db_name} has been created.') except Exception as e: logging.error(f'Failed to create the {db_name} db in spark sql,{e}') spark.stop() raise Exception(f'Failed to create the {db_name}, {e}')
logging.info( 'CITY_DEMOGRAPHICS has been loaded in the Presentation layer') except Exception as e: logging.error( 'Failed to load CITY_DEMOGRAPHICS in the Presentation Layer') spark.stop() raise Exception( f'Failed to load CITY_DEMOGRAPHICS in the Presentation Layer,{e}') if __name__ == '__main__': spark = initialize_spark_session('load_pl_city_demographics') from delta.tables import * try: presentation_layer_loc = dict_dbs_locations.get( 'PRESENTATION_LAYER_LOC') presentation_layer_name = dict_dbs_names.get('PRESENTATION_LAYER_NAME') integration_layer_name = dict_dbs_names.get('INTEGRATION_LAYER_NAME') except Exception as e: logging.error('Failed to retrieve Environment variables') spark.stop() raise Exception( f'Failed to load CITY_DEMOGRAPHICS in the Presentation Layer,{e}') load_pl_city_demographics(spark, presentation_layer_name, presentation_layer_loc, integration_layer_name)
import logging from sql_queries.landing_zone_ddl import ddl_create_land_zone_db, dict_landing_zone_ddls from constants import dict_dbs_locations, dict_dbs_names from helper_functions.initialize_spark_session import initialize_spark_session logging.basicConfig(level=logging.INFO, format="%(asctime)s: %(levelname)s: %(message)s ") if __name__ == '__main__': spark = initialize_spark_session('create_landing_zone') # Creating the landing_zone database in spark sql try: db_name = dict_dbs_names.get('LANDING_ZONE_NAME') db_loc = dict_dbs_locations.get('LANDING_ZONE_LOC') spark.sql( ddl_create_land_zone_db.format(landing_zone_db_name=db_name, landing_zone_db_loc=db_loc)) logging.info(f'{db_name} has been created.') except Exception as e: logging.error(f'Failed to create the {db_name} db in spark sql,{e}') spark.stop() raise Exception(f'Failed to create the {db_name}, {e}') # creating landing zone tables try:
"STATE_ABR": col("newData.STATE_ABR"), "STATE_FIPS": col("newData.STATE_FIPS"), "STATE_NAME": col("newData.STATE_NAME"), "WAC_CODE": col("newData.WAC_CODE") }) \ .execute() logging.info('STATE has been loaded in the Presentation layer') except Exception as e: logging.error('Failed to load STATE in the Presentation Layer') spark.stop() raise Exception(f'Failed to load STATE in the Presentation Layer,{e}') if __name__ == '__main__': spark = initialize_spark_session('load_pl_state') from delta.tables import * try: presentation_layer_loc = dict_dbs_locations.get( 'PRESENTATION_LAYER_LOC') integration_layer_name = dict_dbs_names.get('INTEGRATION_LAYER_NAME') except Exception as e: logging.error('Failed to retrieve Environment variables') spark.stop() raise Exception(f'Failed to load STATE in the Presentation Layer,{e}') load_pl_state(spark, presentation_layer_loc, integration_layer_name)
from helper_functions.initialize_spark_session import initialize_spark_session import os from pyspark.sql.functions import col logging.basicConfig(level=logging.INFO, format="%(asctime)s: %(levelname)s: %(message)s ") if __name__ == '__main__': spark = initialize_spark_session('create_integration_layer') from delta.tables import * # Creating the integration_layer database in spark sql try: db_name = dict_dbs_names.get('INTEGRATION_LAYER_NAME') db_loc = dict_dbs_locations.get('INTEGRATION_LAYER_LOC') spark.sql( ddl_create_integration_layer_db.format( integration_layer_db_name=db_name, integration_layer_db_loc=db_loc)) spark.sql(f'USE {db_name}') logging.info(f'{db_name} has been created.') except Exception as e: logging.error(f'Failed to create the {db_name} db in spark sql,{e}') spark.stop() raise Exception(f'Failed to create the {db_name}, {e}')