def __init__(self, EVALUATION_DATA_PATH, TARGET_APPLIANCE, ON_POWER_THRESHOLD, MAX_TARGET_POWER, disagg_func, disagg_kwargs, remove_vampire_power=False, pad_mains=True, pad_appliance=False, downsample_factor=1): self.EVALUATION_DATA_PATH = EVALUATION_DATA_PATH self.BUILDINGS = [] self.TARGET_APPLIANCE = TARGET_APPLIANCE if TARGET_APPLIANCE == "dish washer": self.ON_POWER_THRESHOLD = 15 else: self.ON_POWER_THRESHOLD = ON_POWER_THRESHOLD self.MAX_TARGET_POWER = MAX_TARGET_POWER self.PAD_WIDTH = 1536 self.pad_mains = pad_mains self.pad_appliance = pad_appliance self.disagg_func = disagg_func self.disagg_kwargs = disagg_kwargs self.downsample_factor = downsample_factor self.metrics = MetricsAccumulator(self.ON_POWER_THRESHOLD, 4200) self._init_data(remove_vampire_power)
def run_experiment(dataset, INPUT_MEAN, INPUT_STD, SOURCE_TYPES, VALIDATION_SOURCE_TYPES, DOWNSAMPLE_FACTOR, SEQ_LENGTH, TARGET_SEQ_LENGTH, MAX_TARGET_POWER, TARGET_APPLIANCE, TRAINING_SEED, VERBOSE_TRAINING, LEARNING_RATE, NUM_SEQ_PER_BATCH, EPOCHS, STEPS_PER_EPOCH, USE_CUDA, CHECKPOINT_BEST_MSE, CHECKPOINTING_EVERY_N_EPOCHS, TEST_DISAGGREGATE_EVERY_N_EPOCHS, _run): torch.manual_seed(TRAINING_SEED) OUTPUT_FOLDER = os.path.join(ex.get_experiment_info()['name'], "output") for observer in _run.observers: if type(observer) is FileStorageObserver: OUTPUT_FOLDER = os.path.join(observer.basedir, str(_run._id)) VERBOSE_TRAINING = 0 os.makedirs(OUTPUT_FOLDER, exist_ok=True) writer = SummaryWriter(log_dir=OUTPUT_FOLDER) # From dataset Ingredient TRAIN_BUILDINGS = dataset["TRAIN_BUILDINGS"] ON_POWER_THRESHOLD = dataset["ON_POWER_THRESHOLD"] ############################################################################################## #PREPARE DATASET (DATALOADERs) ############################################################################################## running_data_processes = [] # stop these at the end sources, validation_sources = get_sources( training_source_names=SOURCE_TYPES, validation_source_names=VALIDATION_SOURCE_TYPES, seq_length=SEQ_LENGTH, sources_seed=TRAINING_SEED, validation_stride=128) input_processing_steps = [ Add(-INPUT_MEAN), DivideBy(INPUT_STD), Transpose((0, 2, 1)) ] target_processing_steps = [ Add(-INPUT_MEAN), DivideBy(INPUT_STD), Transpose((0, 2, 1)) ] if DOWNSAMPLE_FACTOR > 1: downsample_rng = np.random.RandomState(TRAINING_SEED) input_processing_steps_training = [ DownSample(DOWNSAMPLE_FACTOR, downsample_rng) ] + input_processing_steps else: input_processing_steps_training = input_processing_steps validation_pipeline = DataPipeline( sources=validation_sources, num_seq_per_batch=NUM_SEQ_PER_BATCH, input_processing=input_processing_steps_training, target_processing=target_processing_steps) validation_batches = get_validation_batches(validation_pipeline) print("appliance {} has {} validation batches".format( TARGET_APPLIANCE, sum([len(v) for k, v in validation_batches.items()]))) data_pipeline = DataPipeline( sources=sources, num_seq_per_batch=NUM_SEQ_PER_BATCH, input_processing=input_processing_steps_training, target_processing=target_processing_steps) data_thread = DataProcess(data_pipeline) data_thread.start() running_data_processes.append(data_thread) net = _Net(SEQ_LENGTH) print(net) metrics_accu = MetricsAccumulator(on_power_threshold=ON_POWER_THRESHOLD, max_power=MAX_TARGET_POWER) # note: MSE - Mean Squared Error criterion = torch.nn.MSELoss() stop_training = False best_mse = None # PREPARE TEST DISAGGREGATOR if TEST_DISAGGREGATE_EVERY_N_EPOCHS is not None: test_disaggregator = Disaggregator( EVALUATION_DATA_PATH=dataset['EVALUATION_DATA_PATH'], TARGET_APPLIANCE=TARGET_APPLIANCE, ON_POWER_THRESHOLD=ON_POWER_THRESHOLD, MAX_TARGET_POWER=MAX_TARGET_POWER, pad_mains=True, pad_appliance=False, disagg_func=disag_seq2seq, downsample_factor=DOWNSAMPLE_FACTOR, disagg_kwargs=dict(model=net, input_processing=input_processing_steps, target_processing=target_processing_steps, n_seq_per_batch=NUM_SEQ_PER_BATCH, seq_length=SEQ_LENGTH, target_seq_length=TARGET_SEQ_LENGTH, USE_CUDA=USE_CUDA, stride=1)) # PREPARE TENSORS, WHICH WILL BE FED USED DURING TRAINING AND VALIDATION input = torch.FloatTensor(NUM_SEQ_PER_BATCH, 1, SEQ_LENGTH) target = torch.FloatTensor(NUM_SEQ_PER_BATCH, 1, TARGET_SEQ_LENGTH) if USE_CUDA: # note: push to GPU net.cuda() criterion.cuda() input, target = input.cuda(), target.cuda() # setup optimizer. TODO: Should we use 'Adam' for disaggregator? optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE, betas=(0.9, 0.999)) #optimizer = optim.SGD(net.parameters(), momentum=0.9, nesterov=True, lr=LEARNING_RATE) scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[50, 75], gamma=0.1) history = {} csvpath = os.path.join(OUTPUT_FOLDER, "history.csv") if os.path.exists(csvpath): print("Already exists: {}".format(csvpath)) return -1 progbar_epoch = tqdm(desc="Epoch", total=EPOCHS, unit="epoch", disable=(not VERBOSE_TRAINING)) for epoch in range(EPOCHS): # TRAINING metrics_log = {'training': {}} training_loss = 0.0 progbar = tqdm(desc="Train", total=STEPS_PER_EPOCH, leave=False, disable=(not VERBOSE_TRAINING)) for i in range(STEPS_PER_EPOCH): net.zero_grad() batch = data_thread.get_batch() while batch is None: batch = data_thread.get_batch() qsize = data_thread._queue.qsize() aggregated_signal = torch.from_numpy(batch.after_processing.input) target_signal = torch.from_numpy(batch.after_processing.target) if USE_CUDA: aggregated_signal = aggregated_signal.cuda() target_signal = target_signal.cuda() input.resize_as_(aggregated_signal).copy_(aggregated_signal) target.resize_as_(target_signal).copy_(target_signal) inputv = Variable(input, requires_grad=False) targetv = Variable(target, requires_grad=False) output = net(inputv) loss = criterion(output, targetv) loss.backward() optimizer.step() training_loss += loss.item() progbar.set_postfix(dict(loss="{:.4f}".format(loss.item()), qsize=qsize), refresh=False) progbar.update() metrics_log['training']['loss'] = float(training_loss / STEPS_PER_EPOCH) metrics_log['training']['lr'] = optimizer.param_groups[0]['lr'] # VALIDATION #pr_num_thresholds = 127 for fold in validation_batches: metrics_accu.reset_accumulator() #accumulated_pr = {} #for cl in ["tp", "tn", "fp", "fn"]: # accumulated_pr[cl] = torch.LongTensor(pr_num_thresholds).zero_() for batch in validation_batches[fold]: aggregated_signal = torch.from_numpy( batch.after_processing.input) target_signal = torch.from_numpy(batch.after_processing.target) if USE_CUDA: aggregated_signal = aggregated_signal.cuda() target_signal = target_signal.cuda() input.resize_as_(aggregated_signal).copy_(aggregated_signal) target.resize_as_(target_signal).copy_(target_signal) with torch.no_grad(): inputv = Variable(input) targetv = Variable(target) output = net(inputv) val_loss = criterion(output, targetv) loss_value = val_loss.item() # other metrics pred_y = data_pipeline.apply_inverse_processing( output.cpu().data.numpy(), 'target') true_y = batch.before_processing.target metrics_accu.accumulate_metrics(true_y, pred_y, val_loss=loss_value) #calculate_pr_curve_torch(accumulated_pr, MAX_TARGET_POWER, true_y, pred_y, num_thresholds=pr_num_thresholds) for key, value in metrics_accu.finalize_metrics().items(): metrics_log.setdefault(fold[0], {}).setdefault(key, {})[fold[1]] = value #precision = accumulated_pr["tp"] / (accumulated_pr["tp"] + accumulated_pr["fp"]) #recall = accumulated_pr["tp"] / (accumulated_pr["tp"] + accumulated_pr["fn"]) #writer.add_pr_curve_raw("pr_{}/{}".format(fold[0], fold[1]), # true_positive_counts=accumulated_pr["tp"], # false_positive_counts=accumulated_pr["fp"], # true_negative_counts=accumulated_pr["tn"], # false_negative_counts=accumulated_pr["fn"], # precision=precision, recall=recall, # global_step=(epoch+1)*STEPS_PER_EPOCH, num_thresholds=pr_num_thresholds) # LR Scheduler val_loss = metrics_log['unseen_activations']['val_loss']['rss'] #val_loss = metrics_log['mean_squared_error']['unseen_activations']['rss'] #scheduler.step(val_loss) scheduler.step() # PRINT STATS if not VERBOSE_TRAINING: print('[{:d}/{:d}] {}'.format(epoch + 1, EPOCHS, metrics_log['training'])) else: progbar_epoch.set_postfix( dict(loss=metrics_log['training']['loss']), refresh=False) progbar_epoch.update() progbar.close() # store in history / tensorboard for fold, metrics_for_fold in metrics_log.items(): for metric_name, value in metrics_for_fold.items(): if type(value) == dict: SW_add_scalars2(writer, "{}/{}".format(fold, metric_name), value, (epoch + 1) * STEPS_PER_EPOCH) for k, v in value.items(): name = "{}/{}/{}".format(fold, metric_name, k) history.setdefault(name, []).append(v) else: name = "{}/{}".format(fold, metric_name) writer.add_scalar(name, value, (epoch + 1) * STEPS_PER_EPOCH) history.setdefault(name, []).append(value) # CHECKPOINTING if CHECKPOINT_BEST_MSE: mse = val_loss if best_mse is None: best_mse = mse if best_mse > mse: msg = "[{:d}/{:d}] MSE improved from {:.4f} to {:.4f} (d={:f}), saving model...".format( epoch + 1, EPOCHS, best_mse, mse, best_mse - mse) if not VERBOSE_TRAINING: print(msg) else: progbar_epoch.write(msg) torch.save( { 'epoch': epoch + 1, 'step': (epoch + 1) * STEPS_PER_EPOCH, 'mse': mse, 'model': net.state_dict() }, '{}/net_best_mse.pth.tar'.format(OUTPUT_FOLDER)) best_mse = mse if CHECKPOINTING_EVERY_N_EPOCHS is not None: if (epoch + 1) % CHECKPOINTING_EVERY_N_EPOCHS == 0: torch.save( net.state_dict(), '{}/net_step_{:06d}.pth'.format( OUTPUT_FOLDER, (epoch + 1) * STEPS_PER_EPOCH)) if TEST_DISAGGREGATE_EVERY_N_EPOCHS is not None: if (epoch + 1) % TEST_DISAGGREGATE_EVERY_N_EPOCHS == 0: scores = test_disaggregator.calculate_metrics() scores_by_metric = {} for building_i, building in scores.items(): for metric, value in building.items(): scores_by_metric.setdefault(metric, {})[building_i] = value for metric, building_d in scores_by_metric.items(): SW_add_scalars2(writer, "test_score/{}".format(metric), building_d, (epoch + 1) * STEPS_PER_EPOCH) if stop_training: break # CHECKPOINTING at end torch.save( { 'epoch': epoch + 1, 'step': (epoch + 1) * STEPS_PER_EPOCH, 'model': net.state_dict(), 'optimizer': optimizer.state_dict(), #'scheduler': scheduler.state_dict() # TODO: scheduler is not saved this way, scheduler.state_dict() does not exist }, '{}/net_step_{:06d}.pth.tar'.format(OUTPUT_FOLDER, (epoch + 1) * STEPS_PER_EPOCH)) df = pd.DataFrame(history) df.to_csv(csvpath) for p in running_data_processes: p.stop() writer.close() #return 42 return metrics_log['training']['loss']
class Disaggregator(): def __init__(self, EVALUATION_DATA_PATH, TARGET_APPLIANCE, ON_POWER_THRESHOLD, MAX_TARGET_POWER, disagg_func, disagg_kwargs, remove_vampire_power=False, pad_mains=True, pad_appliance=False, downsample_factor=1): self.EVALUATION_DATA_PATH = EVALUATION_DATA_PATH self.BUILDINGS = [] self.TARGET_APPLIANCE = TARGET_APPLIANCE if TARGET_APPLIANCE == "dish washer": self.ON_POWER_THRESHOLD = 15 else: self.ON_POWER_THRESHOLD = ON_POWER_THRESHOLD self.MAX_TARGET_POWER = MAX_TARGET_POWER self.PAD_WIDTH = 1536 self.pad_mains = pad_mains self.pad_appliance = pad_appliance self.disagg_func = disagg_func self.disagg_kwargs = disagg_kwargs self.downsample_factor = downsample_factor self.metrics = MetricsAccumulator(self.ON_POWER_THRESHOLD, 4200) self._init_data(remove_vampire_power) def _init_data(self, remove_vampire_power): re_building_filename = re.compile("^((.*)_(.*))\\.pkl$") self.mains = {} self.appliance_y_true = {} for filename in os.listdir(self.EVALUATION_DATA_PATH): re_match = re_building_filename.match(filename) if re_match: building_i = re_match.group(1) mains, y_true = ( self._load_data(filename, remove_vampire_power, pad_mains=self.pad_mains, pad_appliance=self.pad_appliance)) if mains is None: continue self.BUILDINGS.append(building_i) self.mains[building_i] = mains self.appliance_y_true[building_i] = y_true def _load_data(self, filename, remove_vampire_power, pad_mains=True, pad_appliance=False): # Load mains filename = os.path.join(self.EVALUATION_DATA_PATH, filename) df = pd.read_pickle(filename) if not self.TARGET_APPLIANCE in df: return None, None mains = df['mains'].values if remove_vampire_power: vampire_power = df['mains'].quantile(0.0002) mains = np.clip(mains-vampire_power, 0, None) if self.downsample_factor > 1: mains = self._resample_mains(mains) # Pad if pad_mains: mains = np.pad( mains, pad_width=(self.PAD_WIDTH, self.PAD_WIDTH), mode='constant') y_true = df[self.TARGET_APPLIANCE].values if pad_appliance: y_true = np.pad( y_true, pad_width=(self.PAD_WIDTH, self.PAD_WIDTH), mode='constant') return mains, y_true def _resample_mains(self, mains): mains_length_odd = len(mains) downsample_factor = self.downsample_factor n_samples_new = int(np.ceil(mains_length_odd/downsample_factor)) mains_length_even = n_samples_new*downsample_factor mains_resampled = np.pad(mains, pad_width=(0, mains_length_even-mains_length_odd), mode='constant') mains_resampled = mains_resampled.reshape((n_samples_new, downsample_factor)) mains_resampled[:, :] = mains_resampled.mean(axis=1)[:, np.newaxis] return mains_resampled.reshape((-1))[:mains_length_odd] def get_mains(self, building_i): if pad_mains: return self.mains[building_i][self.PAD_WIDTH:-self.PAD_WIDTH] else: return self.mains[building_i] def disaggregate(self, building_i, return_sequences=False): kwargs = dict( mains=self.mains[building_i], target=self.appliance_y_true[building_i], max_target_power=self.MAX_TARGET_POWER, building_i=building_i, return_sequences=return_sequences ) kwargs.update(self.disagg_kwargs) if return_sequences: estimates, sequences = self.disagg_func(**kwargs) else: estimates = self.disagg_func(**kwargs) if self.pad_mains and not self.pad_appliance: estimates = estimates[self.PAD_WIDTH:-self.PAD_WIDTH] # remove padding estimates = np.round(estimates).astype(int) if return_sequences: return estimates, sequences else: return estimates def calculate_metrics(self, return_sequences=False): scores = {} estimates = {} sequences = {} for building_i in self.BUILDINGS: mains = self.mains[building_i] y_true = self.appliance_y_true[building_i] if return_sequences: y_pred, y_sequences = self.disaggregate(building_i, return_sequences=True) else: y_pred = self.disaggregate(building_i) if self.pad_appliance: y_true = y_true[self.PAD_WIDTH:-self.PAD_WIDTH] # remove padding y_pred = y_pred[self.PAD_WIDTH:-self.PAD_WIDTH] # Truncate n = min(len(y_true), len(y_pred)) y_true = y_true[:n] y_pred = y_pred[:n] #np.savez("building_{}".format(building_i), y_true=y_true, y_pred=y_pred, mains=mains) scores[building_i] = self.metrics.run_metrics(y_true, y_pred, mains) if return_sequences: sequences[building_i] = y_sequences estimates[building_i] = y_pred if return_sequences: return scores, estimates, sequences else: return scores def save_disaggregation_data(self, model_name, estimates, sequences, SAVETO_DIR='./disaggregation_data/'): model_name = model_name.replace(" ", "_") SAVETO_DIR = os.path.join(SAVETO_DIR, model_name) for building_i in sequences: SAVETO_PartialPATH = os.path.join(SAVETO_DIR, str(building_i)) os.makedirs(SAVETO_PartialPATH, exist_ok=True) SAVETO_PATHs = [os.path.join(SAVETO_PartialPATH, "estimate_windows.npz"), os.path.join(SAVETO_PartialPATH, "estimate_average.npy"), os.path.join(SAVETO_PartialPATH, "mains.npy"), os.path.join(SAVETO_PartialPATH, "appliance.npy")] for SAVETO_PATH in SAVETO_PATHs: assert not os.path.exists(SAVETO_PATH), "ERROR: File {} already exists !!!!!!".format(SAVETO_PATH) np.savez(SAVETO_PATHs[0], **sequences[building_i]) np.save(SAVETO_PATHs[1], estimates[building_i]) np.save(SAVETO_PATHs[2], self.mains[building_i]) np.save(SAVETO_PATHs[3], self.appliance_y_true[building_i]) print("INFO: saved estimates windows to {}".format(SAVETO_PATHs[0])) print("INFO: saved estimates averages to {}".format(SAVETO_PATHs[1])) print("INFO: saved mains to {}".format(SAVETO_PATHs[2])) print("INFO: saved appliance_y_true to {}".format(SAVETO_PATHs[3])) def load_disaggregation_data(PATH): return np.load(PATH)