def on_http_reload_lems(request): gcp_logger = init_gcp_logger() request_args = request.args gcp_logger.info('on_http_reload_lems(), args=%s', request_args) year = request_args[ 'year'] if request_args and 'year' in request_args else datetime.now( ).year storage_client = init_storage_client() bucket = storage_client.get_bucket(GCP_STORAGE_BUCKET_ID) already_fetched = [ x.get('name') for x in get_already_fetched( storage_client, bucket, LEMS_STORAGE_PATH_PREFIX, 1024) ] file_name_mask = f"lems_data_{str(year)}" to_reload = [af for af in already_fetched if file_name_mask in af] dfs = [ _blob_to_df(bucket, gcp_logger, blob_name) for blob_name in to_reload ] df_all_dates = pd.concat(dfs) df_unique = df_all_dates.loc[~df_all_dates.index.duplicated(keep='first')] merge_df_to_db(NMI, df_unique, 'sites', gcp_logger) return ('', 200)
def on_http_reload_enlighten(request): gcp_logger = init_gcp_logger() request_args = request.args gcp_logger.info('on_http_reload_enlighten(), args=%s', request_args) year = request_args[ 'year'] if request_args and 'year' in request_args else datetime.now( ).year storage_client = init_storage_client() bucket = storage_client.get_bucket(GCP_STORAGE_BUCKET_ID) gcp_logger = init_gcp_logger() already_fetched = [ x.get('name') for x in get_already_fetched( storage_client, bucket, ENLIGHTEN_STORAGE_PATH_PREFIX, 1024) if f"/{str(year)}/" in x.get('name') ] dfs = [ _blob_to_df(bucket, gcp_logger, blob_name) for blob_name in already_fetched ] df_all_dates = pd.concat(dfs) merge_df_to_db(NMI, df_all_dates, 'sites', gcp_logger) return ('', 200)
def on_http_reload_nem12(request): gcp_logger = init_gcp_logger() gcp_logger.info('on_http_reload_nem12(), args=%s', request.args) storage_client = init_storage_client() blob_name = f"nem12/merged/nem12_{NMI}.csv" bucket = storage_client.get_bucket(GCP_STORAGE_BUCKET_ID) gcp_logger = init_gcp_logger() handle_nem12_blob_merged(None, None, storage_client, bucket, blob_name, 'sites', gcp_logger) return ('', 200)
def on_http_fetch_dailies(request): gcp_logger = init_gcp_logger() gcp_logger.info('on_http_fetch_dailies(), args=%s', request.args) storage_client = init_storage_client() fdb = init_firestore_client() columns = [ 'meter_consumptions_kwh', 'meter_generations_kwh', 'solar_generations_kwh', 'solar_mean_powrs_kw', 'solar_devices_reportings', 'capacities_kw', 'charge_quantities_kwh', 'deterioration_states_pct', 'discharge_quantities_kwh', 'power_at_charges_kw', 'residual_capacities_pct', 'total_charge_quantities_kwh', 'total_discharge_quantities_kwh', 'min_temperature_c', 'max_temperature_c' ] df_all_dailies = pd.DataFrame(index=pd.DatetimeIndex([]), columns=columns) missing_values = np.zeros(48) for doc in fdb.collection(f"sites/{NMI}/dailies").order_by( 'interval_date', direction='ASCENDING').stream(): doc_fields = list(doc.to_dict().keys()) if 'interval_date' not in doc_fields: gcp_logger.info('Missing field interval_date') continue gcp_logger.info('Processing interval_date=%s', doc.get('interval_date')) doc_dict = {} for column in columns: doc_dict[column] = np.array( doc.get(column)) if column in doc_fields else missing_values df_all_dailies.loc[doc.get('interval_date')] = doc_dict pkl_file_name = f"dailies_{NMI}.pkl" pkl_file_path = f"/tmp/{pkl_file_name}" df_all_dailies.to_pickle(f"/tmp/{pkl_file_name}") bucket = storage_client.get_bucket(GCP_STORAGE_BUCKET_ID) blob = Blob(pkl_file_name, bucket) with open(pkl_file_path, "rb") as pkl_file: blob.upload_from_file(pkl_file) os.remove(pkl_file_path) return ('', 200)
def on_storage_blob(data, context): """Background Cloud Function to be triggered by Cloud Storage. This generic function logs relevant data when a file is changed. Args: data (dict): The Cloud Functions event payload. context (google.cloud.functions.Context): Metadata of triggering event. Returns: None; the output is written to Stackdriver Logging """ gcp_logger = init_gcp_logger() gcp_logger.info('on_http_get_lems_data()') storage_client = init_storage_client() event_id = context.event_id event_type = context.event_type blob_created = data.get('timeCreated') blob_updated = data.get('updated') blob_name = data.get('name') bucket_name = data.get('bucket') gcp_logger.info( 'event_id=%s, event_type=%s, bucket=%s, name=%s, metageneration=%s, created=%s, updated=%s', event_id, event_type, bucket_name, blob_name, data.get('metageneration'), blob_created, blob_updated) bucket = storage_client.get_bucket(bucket_name) if blob_name.startswith(NEM12_STORAGE_PATH_IN): handle_nem12_blob_in(data, context, storage_client, bucket, blob_name, gcp_logger) elif blob_name.startswith(NEM12_STORAGE_PATH_MERGED): handle_nem12_blob_merged(data, context, storage_client, bucket, blob_name, 'sites', gcp_logger) elif blob_name.startswith(ENLIGHTEN_STORAGE_PATH_PREFIX): handle_enlighten_blob(data, context, storage_client, bucket, blob_name, 'sites', gcp_logger) elif blob_name.startswith(LEMS_STORAGE_PATH_PREFIX): handle_lems_blob(data, context, storage_client, bucket, blob_name, 'sites', gcp_logger) else: gcp_logger.debug('Skipping storage event event_id=%s, event_type=%s', context.event_id, context.event_type) return ('', 200)
def test_handle_nem12_blob_merged(): # setup storage_client = init_storage_client() fdb = init_firestore_client() logger = logging.getLogger() # given blob_name = 'nem12/merged/nem12_6408091979_small.csv' bucket = storage_client.get_bucket(GCP_STORAGE_BUCKET_ID) # when handle_nem12_blob_merged(None, None, storage_client, bucket, blob_name, 'test_sites', logger) # then dailies = [ d for d in fdb.collection('test_sites').document( '6408091979').collection('dailies').stream() ] assert len(dailies) > 100
def on_http_get_lems_data(request): """Responds to any HTTP request. Args: request (flask.Request): HTTP request object. Returns: The response text or any set of values that can be turned into a Response object using `make_response <http://flask.pocoo.org/docs/1.0/api/#flask.Flask.make_response>`. """ gcp_logger = init_gcp_logger() gcp_logger.info('on_http_get_lems_data(), args=%s', request.args) storage_client = init_storage_client() bucket = storage_client.get_bucket(GCP_STORAGE_BUCKET_ID) min_date = datetime.combine(date.fromisoformat(LEMS_DATA_MIN_DATE), datetime.min.time()) yesterday = datetime.combine(date.today(), datetime.min.time()) - timedelta(days=1) already_fetched = get_already_fetched( storage_client, bucket, LEMS_STORAGE_PATH_PREFIX, ALREADY_FETCHED_SIZE_THRESHOLD_BYTES) for as_of_date in idate_range(min_date, yesterday): blob_name = f"{LEMS_STORAGE_PATH_PREFIX}/{str(as_of_date.year)}/lems_data_{as_of_date.strftime('%Y%m%d')}.csv" blob_exists = next( (i for i in already_fetched if (i.get('name') == blob_name)), None) is not None if not blob_exists: gcp_logger.info('blob %s not exists, downloading.', blob_name) resp = get_lems_data_resp(LEMS_USER, LEMS_PASSWORD, LEMS_BATTERY_ID, as_of_date) dfm = pd.DataFrame(resp.json()) new_blob = bucket.blob(blob_name) new_blob.upload_from_string(dfm.to_csv()) else: gcp_logger.debug('blob %s already exists, skipping.', blob_name) return ('', 200)
def on_http_get_enlighten_data(request): """Responds to any HTTP request. Args: request (flask.Request): HTTP request object. Returns: The response text or any set of values that can be turned into a Response object using `make_response <http://flask.pocoo.org/docs/1.0/api/#flask.Flask.make_response>`. """ gcp_logger = init_gcp_logger() gcp_logger.info('on_http_get_enlighten_data(), args=%s', request.args) storage_client = init_storage_client() bucket = storage_client.get_bucket(GCP_STORAGE_BUCKET_ID) min_date = datetime.combine(date.fromisoformat( ENLIGHTEN_DATA_MIN_DATE), datetime.min.time()) yesterday = datetime.combine( date.today(), datetime.min.time()) - timedelta(days=1) already_fetched = get_already_fetched( storage_client, bucket, ENLIGHTEN_STORAGE_PATH_PREFIX, ALREADY_FETCHED_SIZE_THRESHOLD_BYTES) fetched_counter = 0 for as_of_date in idate_range(min_date, yesterday): if fetched_counter >= MAX_FETCHED_BATCH_SIZE: break blob_name = f"{ENLIGHTEN_STORAGE_PATH_PREFIX}/{str(as_of_date.year)}/enlighten_stats_{as_of_date.strftime('%Y%m%d')}.json" blob_exists = next((i for i in already_fetched if ( i.get('name') == blob_name)), None) is not None if not blob_exists: gcp_logger.info('blob %s not exists, downloading.', blob_name) resp = get_enlighten_stats_resp( ENLIGHTEN_API_KEY, ENLIGHTEN_USER_ID, ENLIGHTEN_SYSTEM_ID, as_of_date) new_blob = bucket.blob(blob_name) new_blob.upload_from_string(resp.text) fetched_counter += 1 else: gcp_logger.debug('blob %s already exists, skipping.', blob_name) return ('', 200)
def test_handle_enlighten_blob(): # setup fdb = init_firestore_client() storage_client = init_storage_client() logger = logging.getLogger() # given blob_name = 'enlighten/2019/enlighten_stats_20190407.json' bucket = storage_client.get_bucket(GCP_STORAGE_BUCKET_ID) # when handle_enlighten_blob(None, None, storage_client, bucket, blob_name, 'test_sites', logger) # then doc = fdb.collection( 'test_sites/6408091979/dailies').document('20190407').get() assert doc.exists # tear down doc = fdb.collection( 'test_sites/6408091979/dailies').document('20190407').delete()
def test_handle_lems_blob(): # setup fdb = init_firestore_client() storage_client = init_storage_client() logger = logging.getLogger() # given blob_name = 'lems/2019/lems_data_20190406.csv' bucket = storage_client.get_bucket(GCP_STORAGE_BUCKET_ID) # when handle_lems_blob(None, None, storage_client, bucket, blob_name, 'test_sites', logger) # then yesterday_doc = fdb.collection( 'test_sites/6408091979/dailies').document('20190405').get() assert yesterday_doc.exists # tear down yesterday_doc = fdb.collection( 'test_sites/6408091979/dailies').document('20190405').delete()
'min_temperature_c', min_date) df_max = _storage_csv_to_df(max_file_name, bucket, 'Maximum temperature (Degree C)', 'max_temperature_c', min_date) df_result = df_min.join(df_max) return df_result # %% scoresby_min_filename = 'SCORESBY_MIN_TEMP_IDCJAC0011_086104_1800_Data.csv' scoresby_max_filename = 'SCORESBY_MAX_TEMP_IDCJAC0010_086104_1800_Data.csv' viewbank_min_filename = 'VIEWBANK_MIN_TEMP_IDCJAC0011_086068_1800_Data.csv' viewbank_max_filename = 'VIEWBANK_MAX_TEMP_IDCJAC0010_086068_1800_Data.csv' storage_client = init_storage_client() bucket = storage_client.get_bucket(GCP_STORAGE_BUCKET_ID) nem12_min_date = datetime.strptime(NEM12_DATA_MIN_DATE, '%Y-%m-%d') df_viewbank = _min_max_storage_csv_to_df( (viewbank_min_filename, viewbank_max_filename), bucket, nem12_min_date) df_scoresby = _min_max_storage_csv_to_df( (scoresby_min_filename, scoresby_max_filename), bucket, nem12_min_date) df_viewbank['min_temperature_c'].fillna(df_scoresby['min_temperature_c'], inplace=True) df_viewbank['max_temperature_c'].fillna(df_scoresby['max_temperature_c'], inplace=True) df_viewbank # %%