def nilm_naive(home, season, folder_path): season_path = os.path.join(folder_path, season) home_path = season_path + "/" + str(home) + ".csv" df = pd.read_csv(home_path, index_col=0) df.index = pd.to_datetime(df.index) df = df.between_time("06:00", "22:00") train_size = len(df) / 2 df = df.tail(train_size) from nilmtk.feature_detectors.steady_states import find_steady_states, find_steady_states_transients """ X = df y = X.pop('occupancy') X_train_idx,X_test_idx,y_train,y_test = train_test_split(X.index,y,test_size=0.2) X_train = X.ix[X_train_idx] X_test = X.ix[X_test_idx] """ ss, tr = find_steady_states(df[["power"]]) pred = pd.Series(np.zeros(len(df)), name="occupancy", index=df.index) pred.ix[tr.index] = 1 pred_resampled = pred.resample("15T", how="max") return pred_resampled gt_occupancy = df["occupancy"].resample("15T", how="max").dropna() index_intersection = gt_occupancy.index.intersection(pred_resampled.index) return { "Accuracy": tp(gt_occupancy.ix[index_intersection], pred_resampled.ix[index_intersection]), "MCC": matthews_corrcoef(gt_occupancy.ix[index_intersection], pred_resampled.ix[index_intersection]) }
def nilm_naive(df): from nilmtk.feature_detectors.steady_states import find_steady_states, find_steady_states_transients """ X = df y = X.pop('occupancy') X_train_idx,X_test_idx,y_train,y_test = train_test_split(X.index,y,test_size=0.2) X_train = X.ix[X_train_idx] X_test = X.ix[X_test_idx] """ ss, tr = find_steady_states(df[["power"]]) pred = pd.DataFrame({"occupancy": np.zeros(len(df))}, index=df.index) pred.ix[tr.index] = 1 pred_resampled = pred.resample("15T", how="max") return pred_resampled gt_occupancy = df["occupancy"].resample("15T", how="max").dropna() index_intersection = gt_occupancy.index.intersection(pred_resampled.index) return { "tp": tp_score(gt_occupancy, pred_resampled), "fp": fp_score(gt_occupancy, pred_resampled), "tn": tn_score(gt_occupancy, pred_resampled), "fn": fn_score(gt_occupancy, pred_resampled), "Accuracy": accuracy_score(gt_occupancy.ix[index_intersection], pred_resampled.ix[index_intersection]), "MCC": matthews_corrcoef(gt_occupancy.ix[index_intersection], pred_resampled.ix[index_intersection]) }
def nilm_pair(home, season, folder_path, freq="15", flag=0): from copy import deepcopy season_path = os.path.join(folder_path, season) home_path = season_path + "/" + str(home) + ".csv" df = pd.read_csv(home_path, index_col=0) df.index = pd.to_datetime(df.index) df = df.between_time("06:00", "22:00") train_size = len(df) / 2 df = df.tail(train_size) gt_power[season][home] = df from nilmtk.feature_detectors.steady_states import find_steady_states, find_steady_states_transients from nilmtk.disaggregate.hart_85 import Hart85 h = Hart85() # Train on DF to get pairs. # 1. Between pairs put occupancy as 1 # 2. If not a pair, then also put occupancy as 1 ss, tr = find_steady_states(df[["power"]]) pred = pd.Series(np.zeros(len(df)), name="occupancy", index=df.index) pred.ix[tr.index] = 1 h.transients = deepcopy(tr) pair_df = h.pair(buffer_size=20, min_tolerance=100, percent_tolerance=0.035, large_transition=1000) for idx, row in pair_df.iterrows(): start = row['T1 Time'] end = row['T2 Time'] pred[start:end] = 1 pred_resampled = pred.resample(str(freq) + "T", how="max") return pred_resampled gt_occupancy = df["occupancy"].resample(str(freq) + "T", how="max").dropna() index_intersection = gt_occupancy.index.intersection(pred_resampled.index) if flag is 0: return { "Accuracy": tp(gt_occupancy.ix[index_intersection], pred_resampled.ix[index_intersection]), "MCC": matthews_corrcoef(gt_occupancy.ix[index_intersection], pred_resampled.ix[index_intersection]) } else: return {}
def nilm_pair(df, freq="15", flag=0): from copy import deepcopy from nilmtk.feature_detectors.steady_states import find_steady_states, find_steady_states_transients from nilmtk.disaggregate.hart_85 import Hart85 h = Hart85() # Train on DF to get pairs. # 1. Between pairs put occupancy as 1 # 2. If not a pair, then also put occupancy as 1 ss, tr = find_steady_states(df[["power"]]) pred = pd.DataFrame({"occupancy": np.zeros(len(df))}, index=df.index) pred.ix[tr.index] = 1 h.transients = deepcopy(tr) pair_df = h.pair(buffer_size=20, min_tolerance=100, percent_tolerance=0.035, large_transition=1000) for idx, row in pair_df.iterrows(): start = row['T1 Time'] end = row['T2 Time'] pred[start:end] = 1 pred_resampled = pred.resample(str(freq) + "T", how="max") return pred_resampled print pred_resampled.sum(), "Pair" gt_occupancy = df["occupancy"].resample(str(freq) + "T", how="max").dropna() index_intersection = gt_occupancy.index.intersection(pred_resampled.index) if flag is 0: return { "tp": tp_score(gt_occupancy, pred_resampled), "fp": fp_score(gt_occupancy, pred_resampled), "tn": tn_score(gt_occupancy, pred_resampled), "fn": fn_score(gt_occupancy, pred_resampled), "Accuracy": accuracy_score(gt_occupancy.ix[index_intersection], pred_resampled.ix[index_intersection]), "MCC": matthews_corrcoef(gt_occupancy.ix[index_intersection], pred_resampled.ix[index_intersection]) } else: return {}
clf.train(train_elec.mains()) d = (clf.centroids - fridge_power.mean()).abs() fridge_num = d[('power','active')].argmin() fridge_identifier_tuple = ('unknown', fridge_num) else: clf.train(top_k_train_elec, num_states_dict=num_states_dict) fridge_instance = fridges.meters[f_id].appliances[0].identifier.instance fridge_identifier_tuple = ('fridge', fridge_instance) print("-"*80) print("Disaggregating") print("-"*80) test_mains_df = test_mains.load().next() if clf_name=="Hart": [_, transients] = find_steady_states(test_mains_df, clf.cols, clf.state_threshold, clf.noise_level) pred_df_fridge = clf.disaggregate_chunk(test_mains_df, {}, transients)[[fridge_num]] pred_ser_fridge = pred_df_fridge.squeeze() pred_ser_fridge.name="Hart" out[f_id][clf_name]=pred_ser_fridge elif clf_name=="CO": pred_df = clf.disaggregate_chunk(test_mains_df) pred_df.columns = [clf.model[i]['training_metadata'] for i in pred_df.columns] pred_df_fridge = pred_df[[find_specific_appliance('fridge', fridge_instance, pred_df.columns.tolist())]] pred_ser_fridge = pred_df_fridge.squeeze() pred_ser_fridge.name="CO" out[f_id][clf_name]=pred_ser_fridge
def nilm_pair_remove_fridge(home, season, folder_path): from copy import deepcopy season_path = os.path.join(folder_path, season) home_path = season_path + "/" + str(home) + ".csv" df = pd.read_csv(home_path, index_col=0) df.index = pd.to_datetime(df.index) df = df.between_time("06:00", "22:00").copy() train_size = len(df) / 2 df = df.tail(train_size) from nilmtk.feature_detectors.steady_states import find_steady_states, find_steady_states_transients from nilmtk.disaggregate.hart_85 import Hart85 h = Hart85() fridge_min = fridge_power[home] - 30 fridge_max = fridge_power[home] + 30 # Train on DF to get pairs. # 1. Between pairs put occupancy as 1 # 2. If not a pair, then also put occupancy as 1 ss, tr = find_steady_states(df[["power"]]) pred = pd.Series(np.zeros(len(df)), name="occupancy", index=df.index) # Find unique days days = pd.DatetimeIndex(np.unique(df.index.date)) for day in days: events_day = tr[day.strftime("%Y-%m-%d")].abs() # Find first non-fridge event event_df = events_day[(events_day['active transition'] <= fridge_min) | (events_day['active transition'] >= fridge_max)] if len(event_df) > 0: first_event = event_df.index[0] last_event = event_df.index[-1] pred[day.strftime("%Y-%m-%d")][:first_event] = 1 pred[day.strftime("%Y-%m-%d")][last_event:] = 1 for ix, row in tr.iterrows(): if not (fridge_min <= row['active transition'] <= fridge_max): pred.ix[tr.index] = 1 h.transients = deepcopy(tr) pair_df = h.pair(buffer_size=20, min_tolerance=100, percent_tolerance=0.035, large_transition=1000) pair_df_dict[season][home] = pair_df transients_dict[season][home] = h.transients for idx, row in pair_df.iterrows(): start = row['T1 Time'] end = row['T2 Time'] if not (fridge_min <= row['T1 Active'] <= fridge_max): time_delta = (end - start) / np.timedelta64(1, 'h') if time_delta < 24: pred[start:end] = 1 else: print "*" * 80 pred_resampled = pred.resample("15T", how="max") return pred_resampled gt_occupancy = df["occupancy"].resample("15T", how="max").dropna() index_intersection = gt_occupancy.index.intersection(pred_resampled.index) return { "Accuracy": tp(gt_occupancy.ix[index_intersection], pred_resampled.ix[index_intersection]), "MCC": matthews_corrcoef(gt_occupancy.ix[index_intersection], pred_resampled.ix[index_intersection]) }
def nilm_pair_remove_fridge(df, start_event_remove=True): global a from copy import deepcopy df = df.between_time("06:00", "22:00").copy() from nilmtk.feature_detectors.steady_states import find_steady_states, find_steady_states_transients from nilmtk.disaggregate.hart_85 import Hart85 h = Hart85() geyser_min = geyser_power - 100 geyser_max = geyser_power + 100 fridge_min = fridge_power - 100 fridge_max = fridge_power + 100 # Train on DF to get pairs. # 1. Between pairs put occupancy as 1 # 2. If not a pair, then also put occupancy as 1 pred_index = pd.DatetimeIndex(start="2015-02-01", end="2015-02-13", freq='1s') pred = pd.DataFrame({"occupancy": np.zeros(len(pred_index))}, index=pred_index) #pred = pd.DataFrame({"occupancy":np.zeros(len(df_power_copy))}, index=df_power_copy.index) ss, tr = find_steady_states(df_power_copy) days = pd.DatetimeIndex(np.unique(tr.index.date)) #for df_data in [df_power_copy]: for num, df_data in enumerate([df_light_copy, df_power_copy]): ss, tr = find_steady_states(df_data) days = pd.DatetimeIndex(np.unique(tr.index.date)) # Find unique days for day in days: events_day = tr[day.strftime("%Y-%m-%d")].abs() # Find first non-fridge event event_df = events_day #event_df = events_day[(events_day['active transition']<=fridge_min)|(events_day['active transition']>=fridge_max)] #event_df = event_df[(event_df['active transition']<=geyser_min)|(events_day['active transition']>=geyser_max)] if len(event_df) > 0: first_event = event_df.index[0] last_event = event_df.index[-1] if start_event_remove: pred[day.strftime("%Y-%m-%d")][:first_event] = 1 pred[day.strftime("%Y-%m-%d")][last_event:] = 1 for ix, row in tr.iterrows(): if num == 0: #Light, every thing should be added to TP! pred.ix[tr.index] = 1 else: # Power, ignore fridge and heater if not (fridge_min <= row['active transition'] <= fridge_max or geyser_min <= row['active transition'] <= geyser_max): print row, "A" pred.ix[tr.index] = 1 h.transients = deepcopy(tr) pair_df = h.pair(buffer_size=20, min_tolerance=100, percent_tolerance=0.035, large_transition=1000) for idx, row in pair_df.iterrows(): start = row['T1 Time'] end = row['T2 Time'] if num == 0: pred[start:end] = 1 else: if not ((fridge_min <= row['T1 Active'] <= fridge_max) or (geyser_min <= row['T1 Active'] <= geyser_max)): print "-----Filling between----", row pred[start:end] = 1 else: print "********Fridge or geyser**********", row pred_resampled = pred.resample("15T", how="max") return pred_resampled #return pred_resampled print pred_resampled.sum(), "Our" gt_occupancy = df["occupancy"].resample("15T", how="max").dropna() index_intersection = gt_occupancy.index.intersection(pred_resampled.index) return { "tp": tp_score(gt_occupancy, pred_resampled), "fp": fp_score(gt_occupancy, pred_resampled), "tn": tn_score(gt_occupancy, pred_resampled), "fn": fn_score(gt_occupancy, pred_resampled), "Accuracy": accuracy_score(gt_occupancy.ix[index_intersection], pred_resampled.ix[index_intersection]), "MCC": matthews_corrcoef(gt_occupancy.ix[index_intersection], pred_resampled.ix[index_intersection]) }
def our_approach(df): df = df.between_time("06:00", "22:00").copy() from nilmtk.feature_detectors.steady_states import find_steady_states, find_steady_states_transients from nilmtk.disaggregate.hart_85 import Hart85 h = Hart85() geyser_min = geyser_power - 200 geyser_max = geyser_power + 200 fridge_min = fridge_power - 40 fridge_max = fridge_power + 40 pred_index = pd.DatetimeIndex(start="2015-02-01", end="2015-02-13", freq='1s') pred = pd.DataFrame({"occupancy": np.zeros(len(pred_index))}, index=pred_index) # Look at power stream ss_power, tr_power = find_steady_states(df_power_copy) days_power = pd.DatetimeIndex(np.unique(tr_power.index.date)) ss_light, tr_light = find_steady_states(df_light_copy) days_light = pd.DatetimeIndex(np.unique(tr_light.index.date)) days = pd.DatetimeIndex(np.union1d(days_light, days_power)) # Find last non-fridge and non-geyser event for day in days: light_exists = False try: tr_day_light = tr_light[day.strftime("%Y-%m-%d")] light_exists = True except: light_exists = False pass tr_day_power = tr_power[day.strftime("%Y-%m-%d")].abs() # Check for transitions which are not due to geyser or fridge tr_day_power_non_bg = tr_day_power[ (tr_day_power['active transition'] < fridge_min) | (tr_day_power['active transition'] > fridge_max) | (tr_day_power['active transition'] < geyser_min) | (tr_day_power['active transition'] > geyser_max)] if len(tr_day_power_non_bg) > 0: # Some other appliance exists last_time_power = tr_day_power_non_bg.index[-1] if light_exists and len(tr_day_light) > 0: last_time_light = tr_day_light.index[-1] if last_time_light <= last_time_power: pred[day.strftime("%Y-%m-%d")][last_time_power:] = 1 else: pred[day.strftime("%Y-%m-%d")][last_time_light:] = 1 else: pred[day.strftime("%Y-%m-%d")][last_time_power:] = 1 else: if light_exists and len(tr_day_light) > 0: last_time_light = tr_day_light.index[-1] pred[day.strftime("%Y-%m-%d")][last_time_light:] = 1 from copy import deepcopy # Pair everything in light and signal occupancy between them h.transients = deepcopy(tr_light) pair_df_light = h.pair(buffer_size=20, min_tolerance=100, percent_tolerance=0.035, large_transition=1000) for idx, row in pair_df_light.iterrows(): start = row['T1 Time'] end = row['T2 Time'] pred[start:end] = 1 # Pair non-bg loads tr_non_bg_power_overall = tr_power[ (tr_power['active transition'] < fridge_min) | (tr_power['active transition'] > fridge_max) | (tr_power['active transition'] < geyser_min) | (tr_power['active transition'] > geyser_max)] h.transients = deepcopy(tr_non_bg_power_overall) pair_df_power = h.pair(buffer_size=20, min_tolerance=100, percent_tolerance=0.035, large_transition=1000) for idx, row in pair_df_light.iterrows(): start = row['T1 Time'] end = row['T2 Time'] pred[start:end] = 1 pred.ix[tr_light.index] = 1 #pred.ix[tr_non_bg_power_overall.index] = 1 return pred.resample("15T", how="max")
def disaggregate_chunk(self, test_mains): """ Parameters ---------- chunk : pd.DataFrame mains power prev transients : returned by find_steady_state_transients Returns ------- states : pd.DataFrame with same index as `chunk`. """ #print(test_mains) test_predictions_list = [] for chunk in test_mains: [_, transients ] = find_steady_states(test_mains[0], state_threshold=self.state_threshold, noise_level=self.noise_level) #print('Transients:',transients) # For now ignoring the first transient # transients = transients[1:] # Initially all appliances/meters are in unknown state (denoted by -1) prev = OrderedDict() learnt_meters = self.centroids.index.values for meter in learnt_meters: prev[meter] = -1 states = pd.DataFrame(-1, index=chunk.index, columns=self.centroids.index.values) #print('STATES:',states) for transient_tuple in transients.itertuples(): if transient_tuple[0] < chunk.index[0]: # Transient occurs before chunk has started; do nothing pass elif transient_tuple[0] > chunk.index[-1]: # Transient occurs after chunk has ended; do nothing pass else: # Absolute value of transient abs_value = np.abs(transient_tuple[1:]) positive = transient_tuple[1] > 0 abs_value_transient_minus_centroid = pd.DataFrame( (self.centroids - abs_value).abs()) if len(transient_tuple) == 2: # 1d data index_least_delta = (abs_value_transient_minus_centroid .idxmin().values[0]) else: # 2d data. # Need to find absolute value before computing minimum columns = abs_value_transient_minus_centroid.columns abs_value_transient_minus_centroid["multidim"] = ( abs_value_transient_minus_centroid[columns[0]]**2 + abs_value_transient_minus_centroid[columns[1]]**2) index_least_delta = ( abs_value_transient_minus_centroid["multidim"]. idxmin()) if positive: # Turned on states.loc[transient_tuple[0]][index_least_delta] = 1 else: # Turned off states.loc[transient_tuple[0]][index_least_delta] = 0 prev = states.iloc[-1].to_dict() power_chunk_dict = self.assign_power_from_states(states, prev) self.power_dict = power_chunk_dict self.chunk_index = chunk.index # Check whether 1d data or 2d data and converting dict to dataframe if len(transient_tuple) == 2: temp_df = pd.DataFrame(power_chunk_dict, index=chunk.index) else: tuples = [] for i in range(len(self.centroids.index.values)): for j in range(0, 2): tuples.append([i, j]) columns = pd.MultiIndex.from_tuples(tuples) temp_df = pd.DataFrame(power_chunk_dict, index=chunk.index, columns=columns) for i in range(len(chunk.index)): for j in range(len(self.centroids.index.values)): for k in range(0, 2): temp_df.iloc[i, j, k] = power_chunk_dict[j, i, k] temp_df = temp_df.fillna(0) temp = pd.DataFrame() for appliance in self.appliances: matched_col = self.best_matches[appliance] temp[appliance] = temp_df[matched_col] test_predictions_list.append(temp) return test_predictions_list
def partial_fit(self, train_main, train_appliances, buffer_size=20, noise_level=70, state_threshold=15, min_tolerance=100, percent_tolerance=0.035, large_transition=1000, **kwargs): """ Train using Hart85. Places the learnt model in `model` attribute. Parameters ---------- metergroup : a nilmtk.MeterGroup object columns: nilmtk.Measurement, should be one of the following [('power','active')] [('power','apparent')] [('power','reactive')] [('power','active'), ('power', 'reactive')] buffer_size: int, optional size of the buffer to use for finding edges min_tolerance: int, optional variance in power draw allowed for pairing a match percent_tolerance: float, optional if transition is greater than large_transition, then use percent of large_transition large_transition: float, optional power draw of a Large transition """ # Train_appliances : list of tuples [('appliance',df),('appliance',df)] self.appliances = [] for row in train_appliances: self.appliances.append(row[0]) print( "...........................Hart_85 Partial Fit Running..............." ) train_main = train_main[0] l = [] l.append(train_main.columns[0]) columns = l self.columns = columns self.state_threshold = state_threshold self.noise_level = noise_level [self.steady_states, self.transients] = find_steady_states(train_main, noise_level=noise_level, state_threshold=state_threshold) self.pair_df = self.pair(buffer_size, min_tolerance, percent_tolerance, large_transition) self.centroids = hart85_means_shift_cluster(self.pair_df, columns) print( '..............................Predicting Centroid Matching..........................' ) chunk = train_main transients = self.transients temp_df = pd.DataFrame() # For now ignoring the first transient # transients = transients[1:] # Initially all appliances/meters are in unknown state (denoted by -1) prev = OrderedDict() learnt_meters = self.centroids.index.values for meter in learnt_meters: prev[meter] = -1 states = pd.DataFrame(-1, index=chunk.index, columns=self.centroids.index.values) for transient_tuple in transients.itertuples(): if transient_tuple[0] < chunk.index[0]: # Transient occurs before chunk has started; do nothing pass elif transient_tuple[0] > chunk.index[-1]: # Transient occurs after chunk has ended; do nothing pass else: # Absolute value of transient abs_value = np.abs(transient_tuple[1:]) positive = transient_tuple[1] > 0 abs_value_transient_minus_centroid = pd.DataFrame( (self.centroids - abs_value).abs()) if len(transient_tuple) == 2: # 1d data index_least_delta = ( abs_value_transient_minus_centroid.idxmin().values[0]) else: # 2d data. # Need to find absolute value before computing minimum columns = abs_value_transient_minus_centroid.columns abs_value_transient_minus_centroid["multidim"] = ( abs_value_transient_minus_centroid[columns[0]]**2 + abs_value_transient_minus_centroid[columns[1]]**2) index_least_delta = ( abs_value_transient_minus_centroid["multidim"].idxmin( )) if positive: # Turned on states.loc[transient_tuple[0]][index_least_delta] = 1 else: # Turned off states.loc[transient_tuple[0]][index_least_delta] = 0 prev = states.iloc[-1].to_dict() power_chunk_dict = self.assign_power_from_states(states, prev) self.power_dict = power_chunk_dict self.chunk_index = chunk.index # Check whether 1d data or 2d data and converting dict to dataframe #print('LEN of Transient Tuple',len(transient_tuple)) if len(transient_tuple) == 2: temp_df = pd.DataFrame(power_chunk_dict, index=chunk.index) else: tuples = [] for i in range(len(self.centroids.index.values)): for j in range(0, 2): tuples.append([i, j]) columns = pd.MultiIndex.from_tuples(tuples) temp_df = pd.DataFrame(power_chunk_dict, index=chunk.index, columns=columns) for i in range(len(chunk.index)): for j in range(len(self.centroids.index.values)): for k in range(0, 2): temp_df.iloc[i, j, k] = power_chunk_dict[j, i, k] self.best_matches = {} temp_df = temp_df.fillna(0) best_matches = {} for row in train_appliances: appliance = row[0] appliance_df = row[1][0] matched_col = self.min_rmse_column(temp_df, appliance_df['power']) best_matches[appliance] = matched_col self.best_matches = best_matches print( '...................................End Centroid Matching............................' ) self.model = dict( best_matches=best_matches, columns=columns, state_threshold=state_threshold, noise_level=noise_level, steady_states=self.steady_states, transients=self.transients, # pair_df=self.pair_df, centroids=self.centroids)