Ejemplo n.º 1
0
def on_http_reload_lems(request):
    gcp_logger = init_gcp_logger()
    request_args = request.args
    gcp_logger.info('on_http_reload_lems(), args=%s', request_args)

    year = request_args[
        'year'] if request_args and 'year' in request_args else datetime.now(
        ).year
    storage_client = init_storage_client()
    bucket = storage_client.get_bucket(GCP_STORAGE_BUCKET_ID)

    already_fetched = [
        x.get('name') for x in get_already_fetched(
            storage_client, bucket, LEMS_STORAGE_PATH_PREFIX, 1024)
    ]
    file_name_mask = f"lems_data_{str(year)}"
    to_reload = [af for af in already_fetched if file_name_mask in af]

    dfs = [
        _blob_to_df(bucket, gcp_logger, blob_name) for blob_name in to_reload
    ]

    df_all_dates = pd.concat(dfs)
    df_unique = df_all_dates.loc[~df_all_dates.index.duplicated(keep='first')]
    merge_df_to_db(NMI, df_unique, 'sites', gcp_logger)

    return ('', 200)
Ejemplo n.º 2
0
def on_http_reload_enlighten(request):
    gcp_logger = init_gcp_logger()
    request_args = request.args
    gcp_logger.info('on_http_reload_enlighten(), args=%s', request_args)

    year = request_args[
        'year'] if request_args and 'year' in request_args else datetime.now(
        ).year
    storage_client = init_storage_client()
    bucket = storage_client.get_bucket(GCP_STORAGE_BUCKET_ID)
    gcp_logger = init_gcp_logger()

    already_fetched = [
        x.get('name') for x in get_already_fetched(
            storage_client, bucket, ENLIGHTEN_STORAGE_PATH_PREFIX, 1024)
        if f"/{str(year)}/" in x.get('name')
    ]

    dfs = [
        _blob_to_df(bucket, gcp_logger, blob_name)
        for blob_name in already_fetched
    ]

    df_all_dates = pd.concat(dfs)
    merge_df_to_db(NMI, df_all_dates, 'sites', gcp_logger)

    return ('', 200)
Ejemplo n.º 3
0
def on_http_reload_nem12(request):
    gcp_logger = init_gcp_logger()
    gcp_logger.info('on_http_reload_nem12(), args=%s', request.args)

    storage_client = init_storage_client()
    blob_name = f"nem12/merged/nem12_{NMI}.csv"
    bucket = storage_client.get_bucket(GCP_STORAGE_BUCKET_ID)
    gcp_logger = init_gcp_logger()

    handle_nem12_blob_merged(None, None, storage_client, bucket, blob_name,
                             'sites', gcp_logger)

    return ('', 200)
Ejemplo n.º 4
0
def on_http_fetch_dailies(request):
    gcp_logger = init_gcp_logger()
    gcp_logger.info('on_http_fetch_dailies(), args=%s', request.args)
    storage_client = init_storage_client()
    fdb = init_firestore_client()

    columns = [
        'meter_consumptions_kwh', 'meter_generations_kwh',
        'solar_generations_kwh', 'solar_mean_powrs_kw',
        'solar_devices_reportings', 'capacities_kw', 'charge_quantities_kwh',
        'deterioration_states_pct', 'discharge_quantities_kwh',
        'power_at_charges_kw', 'residual_capacities_pct',
        'total_charge_quantities_kwh', 'total_discharge_quantities_kwh',
        'min_temperature_c', 'max_temperature_c'
    ]

    df_all_dailies = pd.DataFrame(index=pd.DatetimeIndex([]), columns=columns)
    missing_values = np.zeros(48)

    for doc in fdb.collection(f"sites/{NMI}/dailies").order_by(
            'interval_date', direction='ASCENDING').stream():
        doc_fields = list(doc.to_dict().keys())
        if 'interval_date' not in doc_fields:
            gcp_logger.info('Missing field interval_date')
            continue

        gcp_logger.info('Processing interval_date=%s',
                        doc.get('interval_date'))

        doc_dict = {}
        for column in columns:
            doc_dict[column] = np.array(
                doc.get(column)) if column in doc_fields else missing_values

        df_all_dailies.loc[doc.get('interval_date')] = doc_dict

    pkl_file_name = f"dailies_{NMI}.pkl"
    pkl_file_path = f"/tmp/{pkl_file_name}"
    df_all_dailies.to_pickle(f"/tmp/{pkl_file_name}")

    bucket = storage_client.get_bucket(GCP_STORAGE_BUCKET_ID)
    blob = Blob(pkl_file_name, bucket)
    with open(pkl_file_path, "rb") as pkl_file:
        blob.upload_from_file(pkl_file)

    os.remove(pkl_file_path)

    return ('', 200)
Ejemplo n.º 5
0
def on_storage_blob(data, context):
    """Background Cloud Function to be triggered by Cloud Storage.
       This generic function logs relevant data when a file is changed.

    Args:
        data (dict): The Cloud Functions event payload.
        context (google.cloud.functions.Context): Metadata of triggering event.
    Returns:
        None; the output is written to Stackdriver Logging
    """
    gcp_logger = init_gcp_logger()
    gcp_logger.info('on_http_get_lems_data()')
    storage_client = init_storage_client()

    event_id = context.event_id
    event_type = context.event_type
    blob_created = data.get('timeCreated')
    blob_updated = data.get('updated')
    blob_name = data.get('name')
    bucket_name = data.get('bucket')
    gcp_logger.info(
        'event_id=%s, event_type=%s, bucket=%s, name=%s, metageneration=%s, created=%s, updated=%s',
        event_id, event_type, bucket_name, blob_name,
        data.get('metageneration'), blob_created, blob_updated)

    bucket = storage_client.get_bucket(bucket_name)

    if blob_name.startswith(NEM12_STORAGE_PATH_IN):
        handle_nem12_blob_in(data, context, storage_client, bucket, blob_name,
                             gcp_logger)
    elif blob_name.startswith(NEM12_STORAGE_PATH_MERGED):
        handle_nem12_blob_merged(data, context, storage_client, bucket,
                                 blob_name, 'sites', gcp_logger)
    elif blob_name.startswith(ENLIGHTEN_STORAGE_PATH_PREFIX):
        handle_enlighten_blob(data, context, storage_client, bucket, blob_name,
                              'sites', gcp_logger)
    elif blob_name.startswith(LEMS_STORAGE_PATH_PREFIX):
        handle_lems_blob(data, context, storage_client, bucket, blob_name,
                         'sites', gcp_logger)
    else:
        gcp_logger.debug('Skipping storage event event_id=%s, event_type=%s',
                         context.event_id, context.event_type)

    return ('', 200)
Ejemplo n.º 6
0
def test_handle_nem12_blob_merged():
    # setup
    storage_client = init_storage_client()
    fdb = init_firestore_client()
    logger = logging.getLogger()

    # given
    blob_name = 'nem12/merged/nem12_6408091979_small.csv'
    bucket = storage_client.get_bucket(GCP_STORAGE_BUCKET_ID)

    # when
    handle_nem12_blob_merged(None, None, storage_client, bucket, blob_name,
                             'test_sites', logger)

    # then
    dailies = [
        d for d in fdb.collection('test_sites').document(
            '6408091979').collection('dailies').stream()
    ]
    assert len(dailies) > 100
Ejemplo n.º 7
0
def on_http_get_lems_data(request):
    """Responds to any HTTP request.
    Args:
        request (flask.Request): HTTP request object.
    Returns:
        The response text or any set of values that can be turned into a
        Response object using
        `make_response <http://flask.pocoo.org/docs/1.0/api/#flask.Flask.make_response>`.
    """
    gcp_logger = init_gcp_logger()
    gcp_logger.info('on_http_get_lems_data(), args=%s', request.args)

    storage_client = init_storage_client()

    bucket = storage_client.get_bucket(GCP_STORAGE_BUCKET_ID)

    min_date = datetime.combine(date.fromisoformat(LEMS_DATA_MIN_DATE),
                                datetime.min.time())
    yesterday = datetime.combine(date.today(),
                                 datetime.min.time()) - timedelta(days=1)
    already_fetched = get_already_fetched(
        storage_client, bucket, LEMS_STORAGE_PATH_PREFIX,
        ALREADY_FETCHED_SIZE_THRESHOLD_BYTES)

    for as_of_date in idate_range(min_date, yesterday):
        blob_name = f"{LEMS_STORAGE_PATH_PREFIX}/{str(as_of_date.year)}/lems_data_{as_of_date.strftime('%Y%m%d')}.csv"
        blob_exists = next(
            (i for i in already_fetched if (i.get('name') == blob_name)),
            None) is not None
        if not blob_exists:
            gcp_logger.info('blob %s not exists, downloading.', blob_name)
            resp = get_lems_data_resp(LEMS_USER, LEMS_PASSWORD,
                                      LEMS_BATTERY_ID, as_of_date)
            dfm = pd.DataFrame(resp.json())

            new_blob = bucket.blob(blob_name)
            new_blob.upload_from_string(dfm.to_csv())
        else:
            gcp_logger.debug('blob %s already exists, skipping.', blob_name)

    return ('', 200)
Ejemplo n.º 8
0
def on_http_get_enlighten_data(request):
    """Responds to any HTTP request.
    Args:
        request (flask.Request): HTTP request object.
    Returns:
        The response text or any set of values that can be turned into a
        Response object using
        `make_response <http://flask.pocoo.org/docs/1.0/api/#flask.Flask.make_response>`.
    """
    gcp_logger = init_gcp_logger()
    gcp_logger.info('on_http_get_enlighten_data(), args=%s', request.args)
    storage_client = init_storage_client()

    bucket = storage_client.get_bucket(GCP_STORAGE_BUCKET_ID)

    min_date = datetime.combine(date.fromisoformat(
        ENLIGHTEN_DATA_MIN_DATE), datetime.min.time())
    yesterday = datetime.combine(
        date.today(), datetime.min.time()) - timedelta(days=1)
    already_fetched = get_already_fetched(
        storage_client, bucket, ENLIGHTEN_STORAGE_PATH_PREFIX, ALREADY_FETCHED_SIZE_THRESHOLD_BYTES)
    fetched_counter = 0

    for as_of_date in idate_range(min_date, yesterday):
        if fetched_counter >= MAX_FETCHED_BATCH_SIZE:
            break

        blob_name = f"{ENLIGHTEN_STORAGE_PATH_PREFIX}/{str(as_of_date.year)}/enlighten_stats_{as_of_date.strftime('%Y%m%d')}.json"
        blob_exists = next((i for i in already_fetched if (
            i.get('name') == blob_name)), None) is not None
        if not blob_exists:
            gcp_logger.info('blob %s not exists, downloading.', blob_name)
            resp = get_enlighten_stats_resp(
                ENLIGHTEN_API_KEY, ENLIGHTEN_USER_ID, ENLIGHTEN_SYSTEM_ID, as_of_date)
            new_blob = bucket.blob(blob_name)
            new_blob.upload_from_string(resp.text)
            fetched_counter += 1
        else:
            gcp_logger.debug('blob %s already exists, skipping.', blob_name)

    return ('', 200)
Ejemplo n.º 9
0
def test_handle_enlighten_blob():
    # setup
    fdb = init_firestore_client()
    storage_client = init_storage_client()
    logger = logging.getLogger()

    # given
    blob_name = 'enlighten/2019/enlighten_stats_20190407.json'
    bucket = storage_client.get_bucket(GCP_STORAGE_BUCKET_ID)

    # when
    handle_enlighten_blob(None, None, storage_client,
                          bucket, blob_name, 'test_sites', logger)

    # then
    doc = fdb.collection(
        'test_sites/6408091979/dailies').document('20190407').get()
    assert doc.exists

    # tear down
    doc = fdb.collection(
        'test_sites/6408091979/dailies').document('20190407').delete()
Ejemplo n.º 10
0
def test_handle_lems_blob():
    # setup
    fdb = init_firestore_client()
    storage_client = init_storage_client()
    logger = logging.getLogger()

    # given
    blob_name = 'lems/2019/lems_data_20190406.csv'
    bucket = storage_client.get_bucket(GCP_STORAGE_BUCKET_ID)

    # when
    handle_lems_blob(None, None, storage_client,
                     bucket, blob_name, 'test_sites', logger)

    # then
    yesterday_doc = fdb.collection(
        'test_sites/6408091979/dailies').document('20190405').get()
    assert yesterday_doc.exists

    # tear down
    yesterday_doc = fdb.collection(
        'test_sites/6408091979/dailies').document('20190405').delete()
Ejemplo n.º 11
0
                                'min_temperature_c', min_date)
    df_max = _storage_csv_to_df(max_file_name, bucket,
                                'Maximum temperature (Degree C)',
                                'max_temperature_c', min_date)
    df_result = df_min.join(df_max)

    return df_result


# %%
scoresby_min_filename = 'SCORESBY_MIN_TEMP_IDCJAC0011_086104_1800_Data.csv'
scoresby_max_filename = 'SCORESBY_MAX_TEMP_IDCJAC0010_086104_1800_Data.csv'
viewbank_min_filename = 'VIEWBANK_MIN_TEMP_IDCJAC0011_086068_1800_Data.csv'
viewbank_max_filename = 'VIEWBANK_MAX_TEMP_IDCJAC0010_086068_1800_Data.csv'

storage_client = init_storage_client()
bucket = storage_client.get_bucket(GCP_STORAGE_BUCKET_ID)
nem12_min_date = datetime.strptime(NEM12_DATA_MIN_DATE, '%Y-%m-%d')

df_viewbank = _min_max_storage_csv_to_df(
    (viewbank_min_filename, viewbank_max_filename), bucket, nem12_min_date)
df_scoresby = _min_max_storage_csv_to_df(
    (scoresby_min_filename, scoresby_max_filename), bucket, nem12_min_date)

df_viewbank['min_temperature_c'].fillna(df_scoresby['min_temperature_c'],
                                        inplace=True)
df_viewbank['max_temperature_c'].fillna(df_scoresby['max_temperature_c'],
                                        inplace=True)
df_viewbank

# %%