def main(argv): sys.path.append( os.path.realpath(os.path.join(os.path.dirname(__file__), '..'))) credPath = os.path.join(os.path.dirname(__file__), f"credentials_as_{os.environ['USERNAME']}.json") print(f"Loading credentials from {credPath}") with io.open(credPath, encoding='utf-8') as F: credentials = json.loads(F.read()) db_schema = None db = Database(credentials=credentials) from goodvibrations.predictStatus import PredictCondition print(f"Registering function") db.unregister_functions(["PredictCondition"]) try: db.register_functions([PredictCondition]) except Exception as exc: print(exc) fn = PredictCondition(condition='predStatus') df = fn.execute_local_test(db=db, db_schema=db_schema, generate_days=1, to_csv=True) print(df)
def add_functions(json_payload, credentials=None): """ add kpi functions to a given entity type Uses the following APIs: POST /api/kpi/v1/{orgId}/entityType/{entityTypeName}/kpiFunction :param credentials: dict analytics-service dev credentials :param json_payload: ``` { "entity_type_name": "sample_entity_type_name" "functions": [ { "name": "RandomUniform", #a valid catalog function name # PARAMETERS REQUIRED FOR THE FUNCTION # For example bif.RandomUniform needs these additional parameters "parameters" : { "min_value" : 0.1, "max_value" : 0.2, "output_item" : "discharge_perc" } } ] } ``` :return: """ # 1. INPUT CHECKING logger.debug('Performing Input Checking') payload = validateJSON(json_payload) # input is valid json validate(instance=payload, schema=create_kpifunction_schema) # input has valid schema # 2. INPUT PARSING if 'entity_type_name' not in payload: raise Exception('No Entity Type was specified') functions = None if 'functions' in payload: functions = payload['functions'] functions = parse_input_functions(functions, credentials=credentials) # 3. DATABASE CONNECTION # :description: to access Watson IOT Platform Analytics DB. logger.debug('Connecting to Database') db = Database(credentials=credentials) # 4. CREATE CUSTOM ENTITY FROM JSON # 4.a Instantiate a custom entity type entity_type = BaseCustomEntityType(name=payload['entity_type_name'], db=db, functions=functions) # 4.b Publish kpi to register kpis and constants to appear in the UI entity_type.publish_kpis() # 5. CLOSE DB CONNECTION db.release_resource() return
def __init__(self, entity_type_name=None, entity_name=None): # replace with valid table and column names self.entity_type_name = entity_type_name self.entity_name = entity_name self.db_schema = "public" # only required if you are not using the default self.table_name = entity_type_name.upper( ) # change to a valid entity time series table name self.dim_table_name = "DM_" + self.table_name # change to a entity dimenstion table name self.timestamp = 'evt_timestamp' self.credentials = settings.CREDENTIALS # logging.info('username %s' %self.credentials['db2']['username']) # logging.info('password %s' %self.credentials['db2']['password']) # logging.info('host %s' %self.credentials['db2']['host']) # logging.info('port %s' %self.credentials['db2']['port']) # logging.info('databaseName%s' %self.credentials['db2']['databaseName']) self.db = Database(credentials=self.credentials)
def query_db(query, dbtype='db2'): if dbtype is 'db2': with open('./dev_resources/credentials_as_dev.json', encoding='utf-8') as F: credentials = json.loads(F.read()) elif dbtype is 'postgres': with open('./dev_resources/credentials_as_postgre.json', encoding='utf-8') as F: credentials = json.loads(F.read()) db = Database(credentials=credentials) df = pd.read_sql_query(sql=text(query), con=db.connection) csvname = dbtype + 'results' + time.strftime("%Y%m%d-%H%M%S") df.to_csv('data/' + csvname) db.connection.dispose()
def __init__(self, entity_type_name=None): self.entity_type_name = entity_type_name logging.info(self.entity_type_name) self.db_schema = "public" # only required if you are not using the default # self.table_name = entity_type_name.upper() # change to a valid entity time series table name # self.dim_table_name = "DM_"+self.table_name # change to a entity dimenstion table name # self.timestamp = 'evt_timestamp' self.credentials = settings.CREDENTIALS # logging.info('username %s' %self.credentials['db2']['username']) # logging.info('password %s' %self.credentials['db2']['password']) # logging.info('host %s' %self.credentials['db2']['host']) # logging.info('port %s' %self.credentials['db2']['port']) # logging.info('databaseName%s' %self.credentials['db2']['databaseName']) self.db = Database(credentials=self.credentials) self.entity_names = self.get_entity_names if entity_type_name != None: self.table_name = entity_type_name.upper( ) # change to a valid entity time series table name self.dim_table_name = "DM_" + self.table_name # change to a entity dimenstion table name self.timestamp = 'evt_timestamp' self.http = urllib3.PoolManager()
with open('credentials.json', encoding='utf-8') as F: #with open('credentials_dev.json', encoding='utf-8') as F: credentials = json.loads(F.read()) ''' Developing Test Pipelines ------------------------- When creating a set of functions you can test how they these functions will work together by creating a test pipeline. ''' ''' Create a database object to access Watson IOT Platform Analytics DB. ''' db = Database(credentials = credentials) db_schema = None # set if you are not using the default ''' To do anything with IoT Platform Analytics, you will need one or more entity type. You can create entity types through the IoT Platform or using the python API as shown below. The database schema is only needed if you are not using the default schema. You can also rename the timestamp. ''' entity_name = settings.ENTITY_NAME or 'buildings' db_schema = settings.DB_SCHEMA or "BLUADMIN" # None # replace if you are not using the default schema db.drop_table(entity_name, schema = db_schema) # Credentials to access Building Insights API. USERNAME = settings.USERNAME PASSWORD = settings.PASSWORD # TENANT_ID = settings.TENANT_ID
def load_metrics_data_from_csv(entity_type_name, file_path, credentials=None, **kwargs): """ reads metrics data from csv and stores in entity type metrics table Note: make sure 'deviceid' and 'evt_timestamp' columns are present in csv 'evt_timestamp' column will be inferred to be current time if None present :param entity_type_name: str name of entity we want to load data for :param file_path: str path to csv file :param credentials: dict analytics-service dev credentials :param **kwargs { db_schema str if no schema is provided will use the default schema if_exists str default:append } :return: """ # load csv in dataframe df = pd.read_csv(file_path) # Map the lowering function to all column names # required columns are lower case df.columns = map(str.lower, df.columns) # DATABASE CONNECTION # :description: to access Watson IOT Platform Analytics DB. logger.debug('Connecting to Database') db = Database(credentials=credentials, entity_type=entity_type_name) # check if entity type table exists db_schema = None if 'db_schema' in kwargs: db_schema = kwargs['db_schema'] #get the entity type to add data to entity_type_metadata = db.entity_type_metadata.get(entity_type_name) logger.debug(entity_type_metadata) if entity_type_metadata is None: raise RuntimeError( f'No entity type {entity_type_name} found.' f'Make sure you create entity type before loading data using csv.' f'Refer to create_custom_entitytype() to create the entity type first' ) # find required columns timestamp_col_name = entity_type_metadata['metricTimestampColumn'] logical_name = entity_type_metadata['name'] table_name = db_table_name(entity_type_metadata['metricTableName'], db.db_type) deviceid_col = 'deviceid' required_cols = db.get_column_names(table=table_name, schema=db_schema) missing_cols = list(set(required_cols) - set(df.columns)) logger.debug(f'missing_cols : {missing_cols}') # Add data for missing columns that are required #required columns that can't be NULL {'evt_timestamp','device_id','updated_utc','devicetype','rcv_timestamp_utc'} for m in missing_cols: if m == timestamp_col_name or m == 'rcv_timestamp_utc': #get possible timestamp columns and select the first one from all candidate df_timestamp = df.filter(like='_timestamp') if not df_timestamp.empty: df_timestamp_columns = df_timestamp.columns timestamp_col = df_timestamp_columns[0] df[m] = pd.to_datetime(df_timestamp[timestamp_col]) logger.debug( f'Inferred column {timestamp_col} as missing column {m}') else: df[m] = dt.datetime.utcnow() - dt.timedelta(seconds=15) logger.debug( f'Adding data: current time to missing column {m}') elif m == 'devicetype': df[m] = logical_name logger.debug(f'Adding data: {logical_name} to missing column {m}') elif m == 'updated_utc': logger.debug(f'Adding data: current time to missing column {m}') df[m] = dt.datetime.utcnow() - dt.timedelta(seconds=15) elif m == deviceid_col: raise RuntimeError(f'Missing required column {m}') else: df[m] = None # DATA CHECKS # 1. Check pd.DataFrame data types against entitytype/database data types # coerce data frame object data type to corresponding database-data_type # Add None for missing columns (Not added to the db) logger.debug(f'Dataframe columns before data check 1. {df.columns}') entity_type_columns = entity_type_metadata['dataItemDto'] df = change_df_dtype_to_db_dtype(df, entity_type_columns) logger.debug(f'Dataframe columns after data check 1. {df.columns}') # 2. allowed device_id name: alpha-numeric + hypen + underscore + period + between [1,36] length # Drop rows with un-allowed device_id names logger.debug( f'Dataframe has {len(df.index)} rows of data before data check 2') df = df[df[deviceid_col].str.contains(r'^[A-Za-z0-9._-]+$')] df = df[df[deviceid_col].str.len() <= 36] logger.warning( f'This function will ignore rows where deviceid has values that are not allowed' ) logger.warning( f'(NOTE) Allowed characters in deviceid string are: alpha-numeric/hypen/underscore/period with ' f'length of 1 to 36 characters') logger.debug( f'Dataframe has {len(df.index)} rows of data after data check 2') # remove columns that are not required/ in entity type definition logger.debug(f'Updating columns: {required_cols}') df = df[required_cols] logger.debug(f'Top 5 elements of the df written to the db: \n{df.head(5)}') # write the dataframe to the database table db.write_frame(df=df, table_name=table_name) logger.debug( f'Generated {len(df.index)} rows of data and inserted into {table_name}' ) # CLOSE DB CONNECTION db.release_resource()
def create_custom_entitytype(json_payload, credentials=None, **kwargs): """ creates an entity type using the given json payload Uses the following APIs: POST /meta/v1/{orgId}/entityType POST /api/kpi/v1/{orgId}/entityType/{entity_type_name}/kpiFunctions/import POST /api/constants/v1/{orgId} :param json_payload: JSON describes metadata required for creating desired entity type expected json schema is as follows: ``` example_schema = { "type": "object", "properties": { "entity_type_name": {"type": "string"}, "metrics": {"type": "array", "items": {"type": "object"}}, "constants": {"type": "array", "items": {"type": "object"}}, "dimensions": {"type": "array", "items": {"type": "object"}}, "functions": {"type": "array", "items": {"type": "object"}}, "metric_timestamp_column_name":{"type": "string"} }, "required": ["entity_type_name"] } ``` example example_schema.metrics/dimensions property ``` [{ 'name': 'metric_a', 'datatype': 'str' # allowed column types number, boolean, literal/string, timestamp # accepted datatypes: 'str'/'string, 'int'/'integer', 'number'/'float','datetime', 'bool'/'boolean' }] ``` example example_schema.constants property ``` [{ 'name': 'sample_constant_name', 'datatype' : 'number', 'value': 0.3, 'default': 0.3, 'description': 'optional' # accepted datatypes: 'str'/'string, 'int'/'integer', 'number'/'float','datetime', 'bool'/'boolean' }] ``` example example_schema.functions property ``` [{ 'name': 'RandomUniform', #a valid catalog function name # PARAMETERS REQUIRED FOR THE FUNCTION # For example bif.RandomUniform needs these addition parameters 'parameters' : { 'min_value' : 0.1, 'max_value' : 0.2, 'output_item' : 'discharge_perc' } }] ``` :param credentials: dict analytics-service dev credentials :param **kwargs { drop_existing bool delete existing table and rebuild the entity type table in Db db_schema str if no schema is provided will use the default schema } :return: """ # 1. INPUT CHECKING logger.debug('Performing Input Checking') payload = validateJSON(json_payload) # input is valid json validate(instance=payload, schema=create_custom_schema) # input has valid schema # 2. INPUT PARSING metrics = None constants = None dimensions = None functions = None if 'metrics' in payload: metrics = payload['metrics'] metrics = parse_input_columns(metrics) if 'constants' in payload: constants = payload['constants'] constants = parse_input_constants(constants) if 'dimensions' in payload: dimensions = payload['dimensions'] dimensions = parse_input_columns(dimensions) if 'functions' in payload: functions = payload['functions'] functions = parse_input_functions(functions, credentials=credentials) # 3. DATABASE CONNECTION # :description: to access Watson IOT Platform Analytics DB. logger.debug('Connecting to Database') db = Database(credentials=credentials) # 4. CREATE CUSTOM ENTITY FROM JSON # 4.a Instantiate a custom entity type # overrides the _timestamp='evt_timestamp' if 'metric_timestamp_column_name' in payload.keys(): BaseCustomEntityType._timestamp = payload[ 'metric_timestamp_column_name'] # TODO: BaseCustomEntityType.timestamp= add user defined timestamp column entity_type = BaseCustomEntityType(name=payload['entity_type_name'], db=db, columns=metrics, constants=constants, dimension_columns=dimensions, functions=functions, **kwargs) # 4.b Register entity_type so that it creates a table for input data and appears in the UI # Publish kpi to register kpis and constants to appear in the UI entity_type.register(publish_kpis=True) # 5. CLOSE DB CONNECTION db.release_resource()
import logging logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) import pandas as pd import json #supply credentials with open('credentials.json', encoding='utf-8') as F: credentials = json.loads(F.read()) # import classes needed from iotfunctions from iotfunctions.db import Database from iotfunctions.ui import UISingle,UIMulti # Connect to Analytic Service db = Database(credentials = credentials, tenant_id=credentials['tennant_id']) ''' Constants are defined by identifying the UI control that will manager them. Constants may either be scalars or arrays. Single valued (scalars) will be managed using single line edit, which is derived from the UISingle class. ''' gamma = UISingle(name='gamma', description= 'Sample single valued parameter', datatype=float) ''' Arrays are managed using a multi-select control '''
def main(argv): # entityType = 'Clients04' entityType = '' featureC = 'pressure' targetC = 'temperature' predictC = 'predict' startTime = None endTime = None startTimeV = dt.datetime.utcnow() endTimeV = dt.datetime.utcnow() helpString = 'train.py -E <entityType> -f <feature column> -o <target column> -p <prediction column> \ -s <starttime> -e <endtime>' try: opts, args = getopt.getopt( argv, "hf:t:p:s:e:E:", ["featureC=", "targetC=", "predictC=", "startTime=", "endTime=", "entityType="]) except getopt.GetoptError: print(helpString) sys.exit(2) for opt, arg in opts: if opt == '-h': print(helpString) sys.exit() elif opt in ("-E", "--entityType"): entityType = arg elif opt in ("-f", "--feature"): featureC = arg elif opt in ("-t", "--target"): targetC = arg elif opt in ("-p", "--predict"): predictC = arg elif opt in ("-s", "--starttime"): startTime = arg elif opt in ("-e", "--endtime"): endTime = arg print('EntityType "', entityType) print('Feature Column (X) "', featureC) print('Target Column (Y) "', targetC) print('Predictor Column "', predictC) print('StartTime "', startTime) print('EndTime "', endTime) if entityType == '': print('entityType name is missing') print(helpString) sys.exit(3) # endTime == None means now if startTime == None: print('startTime is missing, please specify relative to endTime (-3 means 3 days before endTime)') print(helpString) sys.exit(4) else: startTimeV = dt.datetime.utcnow() - dt.timedelta(days=int(startTime)) # db_schema = None db = Database(credentials=credentials) print(db) meta = db.get_entity_type(entityType) logger.info('Connected to database') est = estimator.SimpleRegressor(features=[featureC], targets=[targetC], predictions=[predictC]) est.delete_existing_models = True meta._functions = [est] logger.info('Created Regressor') # make sure the results of the python expression is saved to the derived metrics table meta._data_items.append({'columnName': predictC, 'columnType': 'NUMBER', 'kpiFunctionId': 22856, 'kpiFunctionDto': {'output': {'name': predictC}}, 'name': predictC, 'parentDataItemName': None, 'sourceTableName': 'DM_CLIENTS04', 'tags': {}, 'transient': True, 'type': 'DERIVED_METRIC'}) jobsettings = {'_production_mode': False, '_start_ts_override': dt.datetime.utcnow() - dt.timedelta(days=10), '_end_ts_override': (dt.datetime.utcnow() - dt.timedelta(days=1)), # .strftime('%Y-%m-%d %H:%M:%S'), '_db_schema': 'BLUADMIN', 'save_trace_to_file': True} logger.info('Instantiated training job') job = pp.JobController(meta, **jobsettings) job.execute() logger.info('Model trained') return
def main(argv): global db, db_connection, entityType, featureC, targetC, predictC, metric, startTime, endTime, startTimeV, endTimeV, helpString get_options(argv) # endTime == None means now if endTime is None: endTimeV = 0 else: endTimeV = ast.literal_eval(endTime) startTimeV = ast.literal_eval(startTime) + endTimeV # db_schema = None db = Database(credentials=credentials) print(db) # establish a native connection to db2 to store the model db_connection = ibm_db.connect(DB2ConnString, '', '') print(db_connection) model_store = DBModelStore(credentials['tenantId'], entityType, credentials['db2']['username'], db_connection, 'db2') db.model_store = model_store # with open('output.json', 'w+', encoding='utf-8') as G: # json.dump(db.entity_type_metadata, G) logger.info('Connected to database - SQL alchemy and native') meta = None try: meta = db.get_entity_type(entityType) print('Entity is ', meta) except Exception as e: logger.error('Failed to retrieve information about entityType ' + str(entityType) + ' from the database because of ' + str(e)) # make sure the results of the python expression is saved to the derived metrics table if metric == '': # take the first suitable choice if there is no metric sourceTableName = '' for di in meta['dataItemDto']: sourceTableName = di['sourceTableName'] if len(sourceTableName) > 0: break if len(sourceTableName) > 0: meta._data_items.append({ 'columnName': predictC, 'columnType': 'NUMBER', 'kpiFunctionId': 22856, 'kpiFunctionDto': { 'output': { 'name': predictC } }, 'name': predictC, 'parentDataItemName': None, 'sourceTableName': sourceTableName, 'tags': {}, 'transient': True, 'type': 'DERIVED_METRIC' }) else: logger.error('No suitable derived metric table found') return else: found = False try: for di in meta['dataItemDto']: if di.name == metric: found = True predictC = di.columnName break if not found: logger.error('Metric does not exist') return except Exception: pass print('Feature ', featureC, 'targets ', targetC) gbm = GBMRegressor(features=[featureC], targets=[targetC], predictions=[predictC], max_depth=20, num_leaves=40, n_estimators=4000, learning_rate=0.001) setattr(gbm, 'n_estimators', 4000) setattr(gbm, 'max_depth', 20) setattr(gbm, 'num_leaves', 40) setattr(gbm, 'learning_rate', 0.001) gbm.delete_existing_models = True logger.info('Created Regressor') jobsettings = { 'db': db, '_production_mode': False, '_start_ts_override': (dt.datetime.utcnow() - dt.timedelta(days=startTimeV)), '_end_ts_override': (dt.datetime.utcnow() - dt.timedelta(days=endTimeV)), '_db_schema': credentials['db2']['username'], 'save_trace_to_file': True } if meta is not None: meta._functions = [gbm] else: logger.error('No valid entity') return logger.info('Instantiated training job') job = pp.JobController(meta, **jobsettings) job.execute() logger.info('Model trained') return
from iotfunctions.enginelog import EngineLogging from iotfunctions.metadata import Granularity EngineLogging.configure_console_logging(logging.DEBUG) ''' You can test functions locally before registering them on the server to understand how they work. Supply credentials by pasting them from the usage section into the UI. Place your credentials in a separate file that you don't check into the repo. ''' with open('../dev_resources/credentials_as_dev.json', encoding='utf-8') as F: credentials = json.loads(F.read()) db_schema = None db = Database(credentials=credentials, entity_type='shraddha_boiler') def local_func_execute(fn): ''' The local test will generate data instead of using server data. By default it will assume that the input data items are numeric. Required data items will be inferred from the function inputs. The function below executes an expression involving a column called x1 The local test function will generate data dataframe containing the column x1 ''' df = fn.execute_local_test(db=db, db_schema=db_schema, to_csv=False) print(df) print('-------------------------END OF LOCAL FUNC EXECUTE-------------------------------')
def load_metrics_data_from_csv(entity_type_name, file_path, credentials=None, **kwargs): """ reads metrics data from csv and stores in entity type metrics table Note: make sure 'deviceid' and 'evt_timestamp' columns are present in csv 'evt_timestamp' column will be inferred to be current time if None present :param entity_type_name: str name of entity we want to load data for :param file_path: str path to csv file :param credentials: dict analytics-service dev credentials :param **kwargs { db_schema str if no schema is provided will use the default schema if_exists str default:append } :return: """ # load csv in dataframe df = pd.read_csv(file_path) # Map the lowering function to all column names # required columns are lower case df.columns = map(str.lower, df.columns) # DATABASE CONNECTION # :description: to access Watson IOT Platform Analytics DB. logger.debug('Connecting to Database') db = Database(credentials=credentials) # check if entity type table exists db_schema = None if 'db_schema' in kwargs: db_schema = kwargs['db_schema'] #get the entity type to add data to try: entity_type = db.get_entity_type(entity_type_name) except: raise Exception( f'No entity type {entity_type_name} found.' f'Make sure you create entity type before loading data using csv.' f'Refer to create_custom_entitytype() to create the entity type first' ) # find required columns required_cols = db.get_column_names(table=entity_type.name, schema=db_schema) missing_cols = list(set(required_cols) - set(df.columns)) logger.debug(f'missing_cols : {missing_cols}') # Add data for missing columns that are required # required columns that can't be NULL {'evt_timestamp',', 'updated_utc', 'devicetype'} for m in missing_cols: if m == entity_type._timestamp: #get possible timestamp columns and select the first one from all candidate df_timestamp = df.filter(like='_timestamp') if not df_timestamp.empty: df_timestamp_columns = df_timestamp.columns timestamp_col = df_timestamp_columns[0] df[m] = pd.to_datetime(df_timestamp[timestamp_col]) logger.debug( f'Inferred column {timestamp_col} as missing column {m}') else: df[m] = dt.datetime.utcnow() - dt.timedelta(seconds=15) logger.debug( f'Adding data: current time to missing column {m}') elif m == 'devicetype': df[m] = entity_type.logical_name logger.debug( f'Adding data: {entity_type.logical_name} to missing column {m}' ) elif m == 'updated_utc': logger.debug(f'Adding data: current time to missing column {m}') df[m] = dt.datetime.utcnow() - dt.timedelta(seconds=15) elif m == entity_type._entity_id: raise Exception(f'Missing required column {m}') else: df[m] = None # remove columns that are not required df = df[required_cols] # write the dataframe to the database table db.write_frame(df=df, table_name=entity_type.name) logger.debug( f'Generated {len(df.index)} rows of data and inserted into {entity_type.name}' ) # CLOSE DB CONNECTION db.release_resource() return