def data_gen_process_env(*args, **kwargs): # logger log = kwargs['logger'] try: # read the data from the database df = kwargs['df'].copy() # smooth the data # df = a_utils.dfsmoothing(df=df, column_names=list(df.columns)) df.clip(lower=0, inplace=True ) # Remove <0 values for all columns as a result of smoothing # aggregate data rolling_sum_target, rolling_mean_target = [], [] for col_name in df.columns: if kwargs['agg'][col_name] == 'sum': rolling_sum_target.append(col_name) else: rolling_mean_target.append(col_name) df[rolling_sum_target] = a_utils.window_sum( df, window_size=6, column_names=rolling_sum_target) df[rolling_mean_target] = a_utils.window_mean( df, window_size=6, column_names=rolling_mean_target) df = a_utils.dropNaNrows(df) # Sample the data at period intervals df = a_utils.sample_timeseries_df(df, period=6) # scale the columns: here we will use min-max df[df.columns] = kwargs['scaler'].minmax_scale(df, df.columns, df.columns) # creating sat-oat for the data df['sat-oat'] = df['sat'] - df['oat'] # create avg_stpt column stpt_cols = [ele for ele in df.columns if 'vrf' in ele] df['avg_stpt'] = df[stpt_cols].mean(axis=1) # drop individual set point cols df.drop(columns=stpt_cols, inplace=True) # select retrain range of the data time_start_of_train = df.index[-1] - timedelta( weeks=kwargs['retrain_range_rl_weeks']) df = df.loc[time_start_of_train:, :] # save the data frame df.to_pickle(kwargs['save_path'] + 'env_data/env_data.pkl') except Exception as e: log.error('ENV Data Generator Module: %s', str(e)) log.debug(e, exc_info=True)
def get_real_obs(client, time_stamp, meta_data_: dict, obs_space_vars: list, scaler, period, log, lookback_dur_min, measurement, hist_stpt_backup): try: log.info('Deploy Control Module: Getting Data from TSDB') result_obj = client.query("select * from {} where time >= '{}' - {}m \ and time <= '{}'" .format(measurement, \ str(time_stamp),lookback_dur_min,str(time_stamp)), dropna=False) if len(result_obj.keys()) != 0: # no data available df_ = result_obj[measurement] df_ = df_.drop( columns=['data_cleaned', 'aggregated', 'time-interval']) if (df_.empty) | (df_.isnull().any().any()) | (df_.shape[0] < 6): log.info( 'Deploy Control Module: TSDB returned data with incorrect info; using backup data' ) df_ = read_pickle('data/trend_data/backup_tsdb.pkl') else: df_.to_pickle('data/trend_data/backup_tsdb.pkl') else: log.info( 'Deploy Control Module: TSDB returned empty data; using backup data' ) df_ = read_pickle('data/trend_data/backup_tsdb.pkl') # pathogenic case where hist set point is unavailable from TSDB if 'sat_stpt' not in df_.columns: log.info( 'Deploy Control Module: TSDB has no sat_stpt; adding last backup value as column' ) df_['sat_stpt'] = hist_stpt_backup # clip less than 0 values df_.clip(lower=0, inplace=True) # aggregate data rolling_sum_target, rolling_mean_target = [], [] for col_name in df_.columns: if meta_data_['column_agg_type'][col_name] == 'sum': rolling_sum_target.append(col_name) else: rolling_mean_target.append(col_name) df_[rolling_sum_target] = a_utils.window_sum( df_, window_size=6, column_names=rolling_sum_target) df_[rolling_mean_target] = a_utils.window_mean( df_, window_size=6, column_names=rolling_mean_target) df_ = a_utils.dropNaNrows(df_) # collect current set point hist_stpt = df_.loc[df_.index[-1], ['sat_stpt']].to_numpy().copy().flatten() # Sample the last half hour data df_ = df_.iloc[[-1], :] # also need an unscaled version of the observation for logging df_unscaled = df_.copy() # scale the columns: here we will use min-max df_[df_.columns] = scaler.minmax_scale(df_, df_.columns, df_.columns) # collect scaled historical setpoint for reward calculation hist_stpt_scaled = df_.loc[df_.index[-1], ['sat_stpt']].to_numpy().copy().flatten() # create avg_stpt column stpt_cols = [ele for ele in df_.columns if 'vrf' in ele] df_['avg_stpt'] = df_[stpt_cols].mean(axis=1) # drop individual set point cols df_.drop(columns=stpt_cols, inplace=True) vars_next = df_.copy() print("df scaled and all:\n") print(df_.columns) # rearrange observation cols df_ = df_[obs_space_vars] # create avg_stpt column stpt_cols = [ele for ele in df_unscaled.columns if 'vrf' in ele] df_unscaled['avg_stpt'] = df_unscaled[stpt_cols].mean(axis=1) # drop individual set point cols df_unscaled.drop(columns=stpt_cols, inplace=True) # rearrange observation cols df_unscaled = df_unscaled[obs_space_vars] print("df scaled and for observed:\n") print(df_.columns) return df_, df_unscaled, hist_stpt, hist_stpt_scaled, vars_next except Exception as e: log.error('Deploy Control Module: %s', str(e)) log.debug(e, exc_info=True)
def get_real_obs(api_args: dict, meta_data_: dict, obs_space_vars : list, scaler, period, log): try: # arguements for the api query time_args = {'trend_id' : '2681', 'save_path' : 'data/trend_data/alumni_data_deployment.csv'} start_fields = ['start_'+i for i in ['year','month','day', 'hour', 'minute', 'second']] end_fields = ['end_'+i for i in ['year','month','day', 'hour', 'minute', 'second']] end_time = datetime.now(tz=pytz.utc) time_gap_minutes = int((period+3)*5) start_time = end_time - timedelta(minutes=time_gap_minutes) for idx, i in enumerate(start_fields): time_args[i] = start_time.timetuple()[idx] for idx, i in enumerate(end_fields): time_args[i] = end_time.timetuple()[idx] api_args.update(time_args) # pull the data into csv file try: dp.pull_online_data(**api_args) log.info('Deploy Control Module: Deployment Data obtained from API') except Exception: log.info('Deploy Control Module: BdX API could not get data: will resuse old data') # get the dataframe from a csv df_ = read_csv('data/trend_data/alumni_data_deployment.csv', ) df_['time'] = to_datetime(df_['time']) to_zone = tz.tzlocal() df_['time'] = df_['time'].apply(lambda x: x.astimezone(to_zone)) # convert time to loca timezones df_.set_index(keys='time',inplace=True, drop = True) df_ = a_utils.dropNaNrows(df_) # add wet bulb temperature to the data log.info('Deploy Control Module: Start of Wet Bulb Data Calculation') rh = df_['WeatherDataProfile humidity']/100 rh = rh.to_numpy() t_db = 5*(df_['AHU_1 outdoorAirTemp']-32)/9 + 273.15 t_db = t_db.to_numpy() T = HAPropsSI('T_wb','R',rh,'T',t_db,'P',101325) t_f = 9*(T-273.15)/5 + 32 df_['wbt'] = t_f log.info('Deploy Control Module: Wet Bulb Data Calculated') # rename the columns new_names = [] for i in df_.columns: new_names.append(meta_data_["reverse_col_alias"][i]) df_.columns = new_names # collect current set point hist_stpt = df_.loc[df_.index[-1],['sat_stpt']].to_numpy().copy().flatten() # clean the data df_cleaned = dp.online_data_clean( meta_data_ = meta_data_, df = df_ ) # clip less than 0 values df_cleaned.clip(lower=0, inplace=True) # aggregate data rolling_sum_target, rolling_mean_target = [], [] for col_name in df_cleaned.columns: if meta_data_['column_agg_type'][col_name] == 'sum' : rolling_sum_target.append(col_name) else: rolling_mean_target.append(col_name) df_cleaned[rolling_sum_target] = a_utils.window_sum(df_cleaned, window_size=6, column_names=rolling_sum_target) df_cleaned[rolling_mean_target] = a_utils.window_mean(df_cleaned, window_size=6, column_names=rolling_mean_target) df_cleaned = a_utils.dropNaNrows(df_cleaned) # Sample the last half hour data df_cleaned = df_cleaned.iloc[[-1],:] # also need an unscaled version of the observation for logging df_unscaled = df_cleaned.copy() # scale the columns: here we will use min-max df_cleaned[df_cleaned.columns] = scaler.minmax_scale(df_cleaned, df_cleaned.columns, df_cleaned.columns) # create avg_stpt column stpt_cols = [ele for ele in df_cleaned.columns if 'vrf' in ele] df_cleaned['avg_stpt'] = df_cleaned[stpt_cols].mean(axis=1) # drop individual set point cols df_cleaned.drop( columns = stpt_cols, inplace = True) # rearrange observation cols df_cleaned = df_cleaned[obs_space_vars] # create avg_stpt column stpt_cols = [ele for ele in df_unscaled.columns if 'vrf' in ele] df_unscaled['avg_stpt'] = df_unscaled[stpt_cols].mean(axis=1) # drop individual set point cols df_unscaled.drop( columns = stpt_cols, inplace = True) # rearrange observation cols df_unscaled = df_unscaled[obs_space_vars] return df_cleaned, df_unscaled, hist_stpt except Exception as e: log.error('Deploy Control Module: %s', str(e)) log.debug(e, exc_info=True)
def data_gen_process_vlv(*args, **kwargs): # logger log = kwargs['logger'] try: # read the data from the database df = kwargs['df'].copy() # smooth the data # df = a_utils.dfsmoothing(df=df, column_names=list(df.columns)) df.clip(lower=0, inplace=True ) # Remove <0 values for all columns as a result of smoothing # aggregate data rolling_sum_target, rolling_mean_target = [], [] for col_name in df.columns: if kwargs['agg'][col_name] == 'sum': rolling_sum_target.append(col_name) else: rolling_mean_target.append(col_name) df[rolling_sum_target] = a_utils.window_sum( df, window_size=6, column_names=rolling_sum_target) df[rolling_mean_target] = a_utils.window_mean( df, window_size=6, column_names=rolling_mean_target) df = a_utils.dropNaNrows(df) # Sample the data at period intervals df = a_utils.sample_timeseries_df(df, period=6) # scale the columns: here we will use min-max df[df.columns] = kwargs['scaler'].minmax_scale(df, df.columns, df.columns) # creating sat-oat for the data df['sat-oat'] = df['sat'] - df['oat'] # add binary classification column df['vlv'] = 1.0 df.loc[df['hwe'] <= 0.001, ['vlv']] = 0 # determine split point for last 1 week test data t_train_end = df.index[-1] - timedelta(weeks=10) test_df = df.loc[t_train_end:, :] splitvalue = test_df.shape[0] # create train and test/validate data X_test, X_train, y_test, y_train = a_utils.df_2_arrays( df=df, predictorcols=['oat', 'oah', 'wbt', 'sat-oat'], outputcols=['vlv'], lag=0, scaling=False, scaler=None, scaleX=True, scaleY=True, split=splitvalue, shuffle=False, reshaping=True, input_timesteps=1, output_timesteps=1, ) y_train = to_categorical(y_train) y_test = to_categorical(y_test) # save test ids for later plots # idx_end = -max(X_test.shape[1],y_test.shape[1]) # idx_start = idx_end - X_test.shape[0] + 1 # test_idx = df.index[[ i for i in range(idx_start, idx_end+1, 1) ]] # test_info = {'test_idx' : [str(i) for i in test_idx], 'year_num': kwargs['year_num'], 'week_num':kwargs['week_num'] } # with open(kwargs['save_path']+'vlv_data/vlv_test_info.txt', 'a') as ifile: # ifile.write(json.dumps(test_info)+'\n') np.save(kwargs['save_path'] + 'vlv_data/vlv_X_train.npy', X_train) np.save(kwargs['save_path'] + 'vlv_data/vlv_X_val.npy', X_test) np.save(kwargs['save_path'] + 'vlv_data/vlv_y_train.npy', y_train) np.save(kwargs['save_path'] + 'vlv_data/vlv_y_val.npy', y_test) except Exception as e: log.error('VLV Data Generator Module: %s', str(e)) log.debug(e, exc_info=True)