""" ATD Data Lake 'ready' bucket for GRIDSMART, with aggregation @author Kenneth Perrine, Nadia Florez """ import pandas as pd import numpy as np import arrow import _setpath from atd_data_lake.support import etl_app, last_update from atd_data_lake.util import date_util APP_DESCRIPTION = etl_app.AppDescription( appName="gs_ready_agg.py", appDescr="Aggregates 'ready' Data Lake bucket GRIDSMART counts") class GSReadyAggApp(etl_app.ETLApp): """ Application functions and special behavior around GRIDSMART aggregation """ def __init__(self, args): """ Initializes application-specific variables """ super().__init__("gs", APP_DESCRIPTION, args=args, purposeSrc="ready", purposeTgt="ready", perfmetStage="Aggregate")
""" Publish Bluetooth "Ready" Data Lake data @author Kenneth Perrine, Nadia Florez """ import hashlib import arrow import _setpath from atd_data_lake.support import etl_app, last_update from atd_data_lake import config # This sets up application information: APP_DESCRIPTION = etl_app.AppDescription( appName="bt_extract_soc.py", appDescr="Extracts Bluetooth files from the 'Ready' bucket to Socrata") class BTPublishApp(etl_app.ETLApp): """ Application functions and special behavior around Bluetooth exporting to Socrata. """ def __init__(self, args): """ Initializes application-specific variables """ super().__init__("bt", APP_DESCRIPTION, args=args, purposeSrc="ready", perfmetStage="Publish") self.publishers = None
""" Publish Wavetronix "Ready" Data Lake data @author Kenneth Perrine """ import hashlib import arrow import _setpath from atd_data_lake.support import etl_app, last_update from atd_data_lake import config # This sets up application information: APP_DESCRIPTION = etl_app.AppDescription( appName="wt_extract_soc.py", appDescr="Extracts Wavetronix from the 'Ready' bucket to Socrata") class GSAggPublishApp(etl_app.ETLApp): """ Application functions and special behavior around Wavetronix exporting to Socrata. """ def __init__(self, args): """ Initializes application-specific variables """ super().__init__("wt", APP_DESCRIPTION, args=args, purposeSrc="ready",
""" Publish GRIDSMART Aggregated "Ready" Data Lake data @author Kenneth Perrine, Nadia Florez """ import hashlib import arrow import _setpath from atd_data_lake.support import etl_app, last_update from atd_data_lake import config # This sets up application information: APP_DESCRIPTION = etl_app.AppDescription( appName="gs_agg_extract_soc.py", appDescr="Extracts GRIDSMART aggregates from the 'Ready' bucket to Socrata" ) class GSAggPublishApp(etl_app.ETLApp): """ Application functions and special behavior around GRIDSMART exporting to Socrata. """ def __init__(self, args): """ Initializes application-specific variables """ super().__init__("gs", APP_DESCRIPTION, args=args, purposeSrc="ready",
""" Bluetooth sensor JSON standardization translates between the "raw" and "rawjson" layers. @author Kenneth Perrine, Nadia Florez """ import csv, datetime, os import _setpath from atd_data_lake.support import etl_app, last_update, perfmet from atd_data_lake.util import date_util from atd_data_lake import config # This sets up application information: APP_DESCRIPTION = etl_app.AppDescription( appName="bt_json_standard.py", appDescr= "Performs JSON canonicalization for Bluetooth data between the raw and rawjson Data Lake buckets" ) class BTJSONStandardApp(etl_app.ETLApp): """ Application functions and special behavior around Bluetooth JSON canonicalization. """ def __init__(self, args): """ Initializes application-specific variables """ super().__init__("bt", APP_DESCRIPTION, args=args,
""" Movement of GRIDSMART data files to S3 "raw" layer. @author Kenneth Perrine, Nadia Florez """ import os import _setpath from atd_data_lake.support import etl_app, last_update from atd_data_lake import config from atd_data_lake.drivers.devices import last_upd_gs, gs_support # This sets up application information: APP_DESCRIPTION = etl_app.AppDescription( appName="gs_insert_lake.py", appDescr="Inserts GRIDSMART data from field devices into the Raw Data Lake" ) class GSInsertLakeApp(etl_app.ETLApp): """ Application functions and special behavior around GRIDSMART ingestion. """ def __init__(self, args): """ Initializes application-specific variables """ self.deviceFilter = None super().__init__("gs", APP_DESCRIPTION, args=args,
""" Bluetooth sensor ingestion takes files from the AWAM share and places them into the Data Lake "raw" layer. @author Kenneth Perrine """ import _setpath from atd_data_lake.support import etl_app, last_update from atd_data_lake.config import config_app from atd_data_lake.util import date_dirs from atd_data_lake.drivers import last_upd_fs # This sets up application information: APP_DESCRIPTION = etl_app.AppDescription( appName="bt_insert_lake.py", appDescr="Inserts Bluetooth data from AWAM share into the Raw Data Lake") # This defines the valid filename formats that exist in the AWAM directory: DIR_DEFS = [date_dirs.DateDirDef(prefix=config_app.UNIT_LOCATION + "_bt_", dateFormat="%m-%d-%Y", postfix=".txt"), date_dirs.DateDirDef(prefix=config_app.UNIT_LOCATION + "_btmatch_", dateFormat="%m-%d-%Y", postfix=".txt"), date_dirs.DateDirDef(prefix=config_app.UNIT_LOCATION + "_bt_summary_15_", dateFormat="%m-%d-%Y", postfix=".txt")] class BTLastUpdateProv(last_upd_fs.LastUpdFileProv): """ Overrides the default file provider so as to generate the correct identifier for AWAM files
Bluetooth sensor JSON preparation translates "rawjson" and "ready" layers. @author Kenneth Perrine, Nadia Florez """ import hashlib import pandas as pd import _setpath from atd_data_lake.support import etl_app, last_update from atd_data_lake import config # This sets up application information: APP_DESCRIPTION = etl_app.AppDescription( appName="wt_ready.py", appDescr= "Performs JSON enrichment for Wavetronix data between the 'rawjson' and 'ready' Data Lake buckets" ) class WTReadyApp(etl_app.ETLApp): """ Application functions and special behavior around Wavetronix JSON final data enrichment. """ def __init__(self, args): """ Initializes application-specific variables """ super().__init__("wt", APP_DESCRIPTION, args=args,
""" import os, datetime, json import pandas as pd import pytz import _setpath from atd_data_lake.support import etl_app, last_update, perfmet from atd_data_lake import config from atd_data_lake.drivers.devices import gs_investigate from atd_data_lake.util import date_util # This sets up application information: APP_DESCRIPTION = etl_app.AppDescription( appName="gs_json_standard.py", appDescr= "Performs JSON canonicalization for GRIDSMART data between the 'raw' and 'rawjson' Data Lake buckets" ) class GSJSONStandardApp(etl_app.ETLApp): """ Application functions and special behavior around GRIDSMART JSON canonicalization. """ def __init__(self, args): """ Initializes application-specific variables """ super().__init__("gs", APP_DESCRIPTION, args=args,