Beispiel #1
0
class DatabaseDummy:
    tenant_id = '###_IBM_###'
    db_type = 'db2'
    model_store = FileModelStore('./data')

    def _init(self):
        return
def test_anomaly_scores():

    numba_logger = logging.getLogger('numba')
    numba_logger.setLevel(logging.ERROR)

    ####
    print('Create dummy database')
    db_schema=None
    db = DatabaseDummy()
    print (db.model_store)

    #####

    jobsettings = { 'db': db, '_db_schema': 'public'}
    EngineLogging.configure_console_logging(logging.DEBUG)

    # Run on the good pump first
    # Get stuff in
    print('Read Anomaly Sample data in')
    df_i = pd.read_csv('./data/AzureAnomalysample.csv', index_col=False, parse_dates=['timestamp'])

    df_i['entity'] = 'MyRoom'
    df_i[Temperature] = df_i['value'] + 20
    df_i = df_i.drop(columns=['value'])

    # and sort it by timestamp
    df_i = df_i.sort_values(by='timestamp')
    df_i = df_i.set_index(['entity', 'timestamp']).dropna()

    for i in range(0, df_i.index.nlevels):
        print(str(df_i.index.get_level_values(i)))

    #####
    print('Use scaling model generated with sklearn 0.21.3')

    print('Compute Saliency Anomaly Score')
    sali = SaliencybasedGeneralizedAnomalyScoreV2(Temperature, 12, True, sal)
    et = sali._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings)
    sali._entity_type = et
    df_i = sali.execute(df=df_i)

    print('Compute FFT Anomaly Score')
    ffti = FFTbasedGeneralizedAnomalyScoreV2(Temperature, 12, True, fft)
    et = ffti._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings)
    ffti._entity_type = et
    df_i = ffti.execute(df=df_i)

    print('Compute K-Means Anomaly Score')
    kmi = KMeansAnomalyScoreV2(Temperature, 12, True, kmeans)
    et = kmi._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings)
    kmi._entity_type = et
    df_comp = kmi.execute(df=df_i)

    print("Executed Anomaly functions on sklearn 0.21.3")

    print("Now generate new scalings with recent sklearn")
    db.model_store = FileModelStore('/tmp')

    print('Compute Spectral Anomaly Score')
    spsi = SpectralAnomalyScoreExt(Temperature, 12, spectral, spectralinv)
    et = spsi._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings)
    spsi._entity_type = et
    df_i = spsi.execute(df=df_i)

    print('Compute Saliency Anomaly Score')
    sali = SaliencybasedGeneralizedAnomalyScoreV2(Temperature, 12, True, sal)
    et = sali._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings)
    sali._entity_type = et
    df_i = sali.execute(df=df_i)

    print('Compute FFT Anomaly Score')
    ffti = FFTbasedGeneralizedAnomalyScoreV2(Temperature, 12, True, fft)
    et = ffti._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings)
    ffti._entity_type = et
    df_i = ffti.execute(df=df_i)

    print('Compute K-Means Anomaly Score')
    kmi = KMeansAnomalyScoreV2(Temperature, 12, True, kmeans)
    et = kmi._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings)
    kmi._entity_type = et
    df_comp = kmi.execute(df=df_i)

    print("Executed Anomaly functions")

    # df_comp.to_csv('./data/AzureAnomalysampleOutputV2.csv')
    df_o = pd.read_csv('./data/AzureAnomalysampleOutputV2.csv')

    # print('Compare Scores - Linf')

    print('Compare Scores R2-score')

    comp2 = {spectral: r2_score(df_o[spectralinv].values, df_comp[spectralinv].values),
             fft: r2_score(df_o[fft].values, df_comp[fft].values),
             sal: r2_score(df_o[sal].values, df_comp[sal].values),
             kmeans: r2_score(df_o[kmeans].values, df_comp[kmeans].values)}

    print(comp2)

    # assert_true(comp2[spectral] > 0.9)
    assert_true(comp2[fft] > 0.9)
    assert_true(comp2[sal] > 0.9)
    # assert_true(comp2[kmeans] > 0.9)

    df_agg = df_i.copy()

    # add frequency to time
    df_agg = df_agg.reset_index().set_index(['timestamp']).asfreq(freq='T')
    df_agg['site'] = 'Munich'
    df_agg = df_agg.reset_index().set_index(['entity', 'timestamp', 'site']).dropna()

    print('Compute Spectral Anomaly Score - aggr')
    spsi = SpectralAnomalyScoreExt(Temperature, 12, spectral, spectralinv)
    et = spsi._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings)
    spsi._entity_type = et
    df_agg = spsi.execute(df=df_agg)

    print('Compute K-Means Anomaly Score - aggr')
    kmi = KMeansAnomalyScoreV2(Temperature, 12, True, kmeans)
    et = kmi._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings)
    kmi._entity_type = et
    df_agg = kmi.execute(df=df_agg)

    print('Compute Saliency Anomaly Score - aggr')
    sali = SaliencybasedGeneralizedAnomalyScoreV2(Temperature, 12, True, sal)
    et = sali._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings)
    sali._entity_type = et
    df_agg = sali.execute(df=df_agg)

    print('Compute FFT Anomaly Score - aggr')
    ffti = FFTbasedGeneralizedAnomalyScoreV2(Temperature, 12, True, fft)
    et = ffti._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings)
    ffti._entity_type = et
    df_agg = ffti.execute(df=df_agg)

    print(df_agg.describe())

    comp3 = {spectral: r2_score(df_o[spectralinv].values, df_agg[spectralinv].values),
             fft: r2_score(df_o[fft].values, df_agg[fft].values),
             sal: r2_score(df_o[sal].values, df_agg[sal].values),
             kmeans: r2_score(df_o[kmeans].values, df_agg[kmeans].values)}

    print(comp3)

    print("Executed Anomaly functions on aggregation data")

    pass
def test_light_gbm():

    numba_logger = logging.getLogger('numba')
    numba_logger.setLevel(logging.ERROR)

    # Run on the good pump first
    # Get stuff in
    print('Read Regressor Sample data in')
    df_i = pd.read_csv('./data/RegressionTestData.csv', index_col=False, parse_dates=['DATETIME'])
    df_i = df_i.rename(columns={'DATETIME': 'timestamp'})

    df_i['entity'] = 'MyShop'
    df_i[Temperature] = pd.to_numeric(df_i[Temperature], errors='coerce')
    df_i[Humidity] = pd.to_numeric(df_i[Humidity], errors='coerce')

    # and sort it by timestamp
    df_i = df_i.sort_values(by='timestamp')
    df_i = df_i.set_index(['entity', 'timestamp']).dropna()

    for i in range(0, df_i.index.nlevels):
        print(str(df_i.index.get_level_values(i)))

    EngineLogging.configure_console_logging(logging.DEBUG)

    #####
    print('Create dummy database')
    db_schema=None
    db = DatabaseDummy()
    print (db.model_store)

    #####

    print('lightGBM regressor - testing training pipeline with sklearn 0.21.3')
    db.model_store = FileModelStore('/tmp')

    jobsettings = { 'db': db, '_db_schema': 'public'}

    brgi = GBMRegressor(features=[Temperature, Humidity], targets=[KW], predictions=['KW_pred'],
                        n_estimators=500, num_leaves=40, learning_rate=0.2, max_depth=-1)

    brgi.stop_auto_improve_at = 0.4
    brgi.active_models = dict()

    et = brgi._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings)
    brgi._entity_type = et

    df_i = brgi.execute(df=df_i)

    print('lightGBM regressor - testing training pipeline with recent sklearn and lightgbm')

    print('lightGBM regressor - first time training')
    jobsettings = { 'db': db, '_db_schema': 'public'}

    brgi = GBMRegressor(features=[Temperature, Humidity], targets=[KW], predictions=['KW_pred'],
                        n_estimators=500, num_leaves=40, learning_rate=0.2, max_depth=-1)

    brgi.stop_auto_improve_at = 0.4
    brgi.active_models = dict()

    et = brgi._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings)
    brgi._entity_type = et

    df_i = brgi.execute(df=df_i)
    print('lightGBM regressor done')

    mtrc = brgi.active_models['model.TEST_ENTITY_FOR_GBMREGRESSOR.GBMRegressor.KW.MyShop'][0].eval_metric_test
    print ('Trained model r2 ', mtrc)
    assert_true(mtrc > 0.4)

    print('lightGBM regressor - testing training pipeline done ')


    #####

    print('lightGBM regressor - inference')

    print('lightGBM regressor - first time training')
    jobsettings = { 'db': db, '_db_schema': 'public'} #, 'save_trace_to_file' : True}

    brgi = GBMRegressor(features=[Temperature, Humidity], targets=[KW], predictions=['KW_pred'])
    brgi.stop_auto_improve_at = 0.4
    brgi.active_models = dict()

    et = brgi._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings)

    brgi._entity_type = et
    df_i = brgi.execute(df=df_i)
    print('lightGBM regressor done')

    mtrc = brgi.active_models['model.TEST_ENTITY_FOR_GBMREGRESSOR.GBMRegressor.KW.MyShop'][0].eval_metric_test
    print ('Trained model r2 ', mtrc)
    assert_true(mtrc > 0.4)

    print('lightGBM regressor - inference done')

    #####

    print('lightGBM regressor - enforce retraining')

    print('lightGBM regressor - first time training')
    jobsettings = { 'db': db, '_db_schema': 'public'} #, 'save_trace_to_file' : True}

    brgi = GBMRegressor(features=[Temperature, Humidity], targets=[KW], predictions=['KW_pred'])
    brgi.stop_auto_improve_at = mtrc + 2  # force retrain as r2 metric is considered bad now
    brgi.active_models = dict()

    et = brgi._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings)
    brgi._entity_type = et
    df_i = brgi.execute(df=df_i)
    print('lightGBM regressor done')

    mtrc = brgi.active_models['model.TEST_ENTITY_FOR_GBMREGRESSOR.GBMRegressor.KW.MyShop'][0].eval_metric_test
    print ('Trained model r2 ', mtrc)
    assert_true(mtrc > 0.4)

    print('lightGBM regressor - enforce retraining done')

    #####

    print('lightGBM forecaster - first time training')
    jobsettings = { 'db': db, '_db_schema': 'public'} #, 'save_trace_to_file' : True}

    brgei = GBMForecaster(features=[Temperature, Humidity], targets=[KW], predictions=['KW_pred'], lags=[1,3,7])
    brgei.stop_auto_improve_at = mtrc + 2  # force retrain as r2 metric is considered bad now
    brgei.active_models = dict()

    et = brgei._build_entity_type(columns=[Column(Temperature, Float())], **jobsettings)
    brgei._entity_type = et
    df_i = brgei.execute(df=df_i)
    print('lightGBM forecaster done')

    mtrc = brgei.active_models['model.TEST_ENTITY_FOR_GBMFORECASTER.GBMForecaster.KW.MyShop'][0].eval_metric_test
    print ('Trained model r2 ', mtrc)
    assert_true(mtrc > 0.4)

    print('lightGBM forecaster - training done')


    pass
#!/usr/bin/python3
#Import packages and libraries

import datetime as dt
import json
import pandas as pd
import numpy as np
from sqlalchemy import Column, Integer, String, Float, DateTime, Boolean, func
import iotfunctions.bif as bif
from iotfunctions.metadata import EntityType, LocalEntityType
from iotfunctions.db import Database
from iotfunctions.dbtables import FileModelStore

#Connect to the service
with open('credentials_as_monitor_demo.json', encoding='utf-8') as F:
    credentials = json.loads(F.read())
db_schema = None
db = Database(credentials=credentials)

#Write the function


def f(df, parameters=None):
    adjusted_distance = df['distance'] * 0.9
    return adjusted_distance


#Save the function to a local model store
model_store = FileModelStore()
model_store.store_model('adjusted_distance', f)