Python EtlLogger Examples

Programming Language: Python

Namespace/Package Name: etl_logger

Class/Type: EtlLogger

Examples at hotexamples.com: 8

Python EtlLogger - 8 examples found. These are the top rated real world Python examples of etl_logger.EtlLogger extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

EtlLogger(4)

info(4)

debug(1)

Frequently Used Methods

EtlLogger (4)

info (4)

debug (1)

Example #1

Show file

class EtlProcess:
    """ EtlProcess orchestrates the ETL process """

    spark: SparkSession
    extractor: Any  # better: create abstract base class Extractor
    transformer: Any
    loader: Any
    logger: EtlLogger

    def __init__(self, extractor, transformer, loader,
                 spark: Union[SparkSession, Mock] = create_spark_session()) \
            -> None:
        """ Initialize the EtlProcess """
        self.logger = EtlLogger()

        self.spark = spark
        self.extractor = extractor
        self.transformer = transformer
        self.loader = loader

    def run(self) -> None:
        """ Run the ETL process """
        self.logger.debug('starting run')
        try:
            initial_df: DataFrame = self.extractor.read_from_db(self.spark)

            transformed_df: DataFrame = \
                self.transformer.clean_data(self.spark, initial_df)

            self.loader.write_to_db(self.spark, transformed_df)
        finally:
            self.spark.stop()

Example #2

Show file

    def __init__(self, extractor, transformer, loader,
                 spark: Union[SparkSession, Mock] = create_spark_session()) \
            -> None:
        """ Initialize the EtlProcess """
        self.logger = EtlLogger()

        self.spark = spark
        self.extractor = extractor
        self.transformer = transformer
        self.loader = loader

Example #3

Show file

class LoaderCsv:
    """ Loader implements the "load" process of ETL """
    output_cols: ClassVar[List[str]] = ['Customer ID', 'Total Orders']
    logger: EtlLogger

    def __init__(self, config: Dict[str, Any]):
        self.path = config['path']
        self.logger = EtlLogger()

    def load(self, spark: SparkSession, df: DataFrame):
        self.logger.info(f'Load: {self.path}')

        # Convert Spark DataFrame to Pandas DataFrame, because Pandas can write
        # to a single, plain CSV file instead of writing a distributed HDFS file
        # (for demo purposes only; usually HDFS is the right way to go)
        df.toDF(*LoaderCsv.output_cols) \
          .toPandas() \
          .to_csv(self.path, header=True, index=False)

Example #4

Show file

File: extractor.py Project: mwoinoski/python_bootcamp

class ExtractorCsv:
    """ Extractor implements the "extract" process of ETL """
    input_schema: ClassVar[str] = \
        '`Customer ID` integer, `Order ID` integer, `Order Total` double'
    path: str
    logger: EtlLogger

    def __init__(self, config: Dict[str, Any]):
        self.path = config['path']
        self.logger = EtlLogger()

    def extract(self, spark: SparkSession) -> DataFrame:
        self.logger.info(f'Extract: {self.path}')

        df: DataFrame = spark.read.csv(self.path,
                                       header=True,
                                       schema=ExtractorCsv.input_schema)
        return df

Example #5

Show file

class TransformerTopFiveCust:
    """ Transform implements the "transform" process of ETL """
    logger: EtlLogger

    def __init__(self):
        self.logger = EtlLogger()

    def transform(self, spark: SparkSession, df: DataFrame) -> DataFrame:
        """ Apply transformations to a DataFrame """
        self.logger.info('Transform: top 5 customer totals')

        result_limit = 5
        result_df = df.toDF('cust_id', 'order_id', 'amount') \
                      .select('cust_id', 'amount') \
                      .groupBy('cust_id') \
                      .agg(sum('amount').alias('total')) \
                      .orderBy('total', ascending=False) \
                      .limit(result_limit)
        return result_df

Example #6

Show file

 def __init__(self, config: Dict[str, Any]):
     self.path = config['path']
     self.logger = EtlLogger()

Example #7

Show file

 def __init__(self):
     self.logger = EtlLogger()

Example #8

Show file

File: etl_driver_csv_to_csv.py Project: mwoinoski/python_bootcamp

"""
Driver for an ETL process that extracts a CSV file and loads a CSV file.
"""

from etl_process import EtlProcess
from extractor import ExtractorCsv
from transformer import TransformerTopFiveCust
from loader import LoaderCsv
from etl_logger import EtlLogger


if __name__ == '__main__':
    logger = EtlLogger()
    logger.info('ETL Process starting')

    extract_file = 'customer-orders.csv'
    # path: str = f'file://{Path().absolute()}/{file}'  # read local file
    extract_path = f'hdfs://localhost:9000/user/sutter/data/{extract_file}'  # read from Hadoop server
    extractor = ExtractorCsv({'path': extract_path})

    transformer = TransformerTopFiveCust()

    load_path = './customer-orders-totals.csv'
    loader = LoaderCsv({'path': load_path})

    etl_process = EtlProcess(extractor, transformer, loader)
    etl_process.run()

    logger.info('ETL Process complete')