Python get Exemples, constants.dict_dbs_locations.get Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : load_landing_zone_standard_lookup.py Projet : OmarRehan/Multi-Layer-Data-Lake-Pipelines-on-AWS

def load_lz_standard_lookup(table_name):
    landing_zone_location = dict_dbs_locations.get('LANDING_ZONE_LOC')

    # Initializing a Spark session
    spark = initialize_spark_session('load_lz' + table_name)

    # Loading the standard lookups with the same schema in landing_zone
    try:

        # Standard schema for the standard lookups
        schema_lookups_schema = StructType([
            StructField("Code", StringType(), True),
            StructField("Description", StringType(), True)
        ])

        df_file = spark \
            .read \
            .schema(schema_lookups_schema) \
            .option("header", "true") \
            .csv(os.path.join(edge_node_path, table_name, '*.csv'))

        df_file.write.format("csv") \
            .mode("overwrite") \
            .option("sep", ",") \
            .option('header', 'true') \
            .save(os.path.join(landing_zone_location, table_name))

        logging.info(f'{table_name} has been loaded in the landing zone.')

    except Exception as e:
        logging.error(f"Failed to load {table_name} in the landing zone,{e}")

Exemple #2

0

Afficher le fichier

Fichier : load_il_l_airport.py Projet : OmarRehan/Multi-Layer-Data-Lake-Pipelines-on-AWS

from pyspark.sql.functions import col
from constants import dict_dbs_locations, dict_dbs_names


def load_l_airport(spark, integration_layer_loc, landing_zone_name):
    delta_l_airport = DeltaTable.forPath(spark,
                                         integration_layer_loc + '/L_AIRPORT')

    df_LZ_l_airport = spark.sql(f"""
        SELECT 
        CODE
        ,DESCRIPTION
        FROM {landing_zone_name}.L_AIRPORT
    """)

    delta_l_airport.alias("oldData") \
        .merge(df_LZ_l_airport.alias("newData"), "oldData.CODE = newData.CODE") \
        .whenMatchedUpdate(set={"DESCRIPTION": col("newData.DESCRIPTION")}) \
        .whenNotMatchedInsert(values={"CODE": col("newData.CODE"), "DESCRIPTION": col("newData.DESCRIPTION")}) \
        .execute()


if __name__ == '__main__':
    spark = initialize_spark_session('load_l_airport')
    from delta.tables import *

    integration_layer_loc = dict_dbs_locations.get('INTEGRATION_LAYER_LOC')
    landing_zone_name = dict_dbs_names.get('LANDING_ZONE_NAME')

    load_l_airport(spark, integration_layer_loc, landing_zone_name)

Exemple #3

0

Afficher le fichier

Fichier : create_presentation_layer.py Projet : OmarRehan/Multi-Layer-Data-Lake-Pipelines-on-AWS

import os
from pyspark.sql.functions import col

logging.basicConfig(level=logging.INFO,
                    format="%(asctime)s: %(levelname)s: %(message)s ")

if __name__ == '__main__':
    spark = initialize_spark_session('create_presentation_layer')

    from delta.tables import *

    # Creating the presentation_layer database in spark sql
    try:

        db_name = dict_dbs_names.get('PRESENTATION_LAYER_NAME')
        db_loc = dict_dbs_locations.get('PRESENTATION_LAYER_LOC')

        spark.sql(
            ddl_create_presentation_layer_db.format(
                presentation_layer_db_name=db_name,
                presentation_layer_db_loc=db_loc))

        spark.sql(f'USE {db_name}')

        logging.info(f'{db_name} has been created.')

    except Exception as e:
        logging.error(f'Failed to create the {db_name} db in spark sql,{e}')
        spark.stop()
        raise Exception(f'Failed to create the {db_name}, {e}')

Exemple #4

0

Afficher le fichier

Fichier : load_pl_city_demographics.py Projet : OmarRehan/Multi-Layer-Data-Lake-Pipelines-on-AWS

        logging.info(
            'CITY_DEMOGRAPHICS has been loaded in the Presentation layer')

    except Exception as e:
        logging.error(
            'Failed to load CITY_DEMOGRAPHICS in the Presentation Layer')
        spark.stop()
        raise Exception(
            f'Failed to load CITY_DEMOGRAPHICS in the Presentation Layer,{e}')


if __name__ == '__main__':
    spark = initialize_spark_session('load_pl_city_demographics')
    from delta.tables import *

    try:

        presentation_layer_loc = dict_dbs_locations.get(
            'PRESENTATION_LAYER_LOC')
        presentation_layer_name = dict_dbs_names.get('PRESENTATION_LAYER_NAME')
        integration_layer_name = dict_dbs_names.get('INTEGRATION_LAYER_NAME')

    except Exception as e:
        logging.error('Failed to retrieve Environment variables')
        spark.stop()
        raise Exception(
            f'Failed to load CITY_DEMOGRAPHICS in the Presentation Layer,{e}')

    load_pl_city_demographics(spark, presentation_layer_name,
                              presentation_layer_loc, integration_layer_name)

Exemple #5

0

Afficher le fichier

from pyspark.sql.types import StructField, StructType, StringType
from constants import dict_dbs_locations, edge_node_path
from sql_queries.landing_zone_ddl import list_landing_zone_standard_lookups
from helper_functions.zip_csv_to_gzip_parquet import zip_csv_to_gzip_parquet
from helper_functions.loop_files import loop_files
import os

logging.basicConfig(level=logging.INFO,
                    format="%(asctime)s: %(levelname)s: %(message)s ")

if __name__ == '__main__':

    # Initializing a Spark session
    spark = initialize_spark_session('load_landing_zone')

    landing_zone_location = dict_dbs_locations.get('LANDING_ZONE_LOC')

    # Loading the standard lookups with the same schema in landing_zone
    try:

        # Standard schema for the standard lookups
        schema_lookups_schema = StructType([
            StructField("Code", StringType(), True),
            StructField("Description", StringType(), True)
        ])

        # Loops over all the standard lookups to load them
        for table_name in list_landing_zone_standard_lookups:
            df_file = spark \
                .read \
                .schema(schema_lookups_schema) \

Exemple #6

0

Afficher le fichier

Fichier : create_landing_zone.py Projet : OmarRehan/Multi-Layer-Data-Lake-Pipelines-on-AWS

from sql_queries.landing_zone_ddl import ddl_create_land_zone_db, dict_landing_zone_ddls
from constants import dict_dbs_locations, dict_dbs_names
from helper_functions.initialize_spark_session import initialize_spark_session

logging.basicConfig(level=logging.INFO,
                    format="%(asctime)s: %(levelname)s: %(message)s ")

if __name__ == '__main__':

    spark = initialize_spark_session('create_landing_zone')

    # Creating the landing_zone database in spark sql
    try:

        db_name = dict_dbs_names.get('LANDING_ZONE_NAME')
        db_loc = dict_dbs_locations.get('LANDING_ZONE_LOC')

        spark.sql(
            ddl_create_land_zone_db.format(landing_zone_db_name=db_name,
                                           landing_zone_db_loc=db_loc))

        logging.info(f'{db_name} has been created.')

    except Exception as e:
        logging.error(f'Failed to create the {db_name} db in spark sql,{e}')
        spark.stop()
        raise Exception(f'Failed to create the {db_name}, {e}')

    # creating landing zone tables
    try:

Exemple #7

0

Afficher le fichier

Fichier : create_integration_layer.py Projet : OmarRehan/Multi-Layer-Data-Lake-Pipelines-on-AWS

import os
from pyspark.sql.functions import col

logging.basicConfig(level=logging.INFO,
                    format="%(asctime)s: %(levelname)s: %(message)s ")

if __name__ == '__main__':
    spark = initialize_spark_session('create_integration_layer')

    from delta.tables import *

    # Creating the integration_layer database in spark sql
    try:

        db_name = dict_dbs_names.get('INTEGRATION_LAYER_NAME')
        db_loc = dict_dbs_locations.get('INTEGRATION_LAYER_LOC')

        spark.sql(
            ddl_create_integration_layer_db.format(
                integration_layer_db_name=db_name,
                integration_layer_db_loc=db_loc))

        spark.sql(f'USE {db_name}')

        logging.info(f'{db_name} has been created.')

    except Exception as e:
        logging.error(f'Failed to create the {db_name} db in spark sql,{e}')
        spark.stop()
        raise Exception(f'Failed to create the {db_name}, {e}')