def __init__( self, path_data: str, subset: str = "train", input_len: int = 510, border: int = 15, buildings: dict = None, train_size: float = 0.8, valid_size: float = 0.1, random_split: bool = False, random_seed: int = 0, threshold: dict = None, **kwargs, ): self.subset = subset self.border = border self.length = input_len self.buildings = {} if buildings is None else buildings[subset] self.train_size = train_size self.validation_size = valid_size self.random_split = random_split self.random_seed = random_seed self._list_files(path_data) self._get_parameters_from_file() # Set the parameters according to given threshold method param_thresh = {} if threshold is None else threshold self.threshold = Threshold(appliances=self.appliances, **param_thresh) logger.debug( f"Dataset received extra kwargs, not used:\n {', '.join(kwargs.keys())}" )
def __init__( self, border: int = 15, input_len: int = 510, regression_w: float = 1, classification_w: float = 1, batch_size: int = 32, shuffle: bool = True, reg_loss_avg: float = 0.68, class_loss_avg: float = 0.0045, name: str = "Model", epochs: int = 300, patience: int = 300, appliances: list = None, init_features: int = 32, dropout: float = 0.1, learning_rate: float = 1e-4, power_scale: int = 2000, threshold: dict = None, **kwargs, ): self.device = ( torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") ) logger.debug(f"Using device: {self.device}") # Parameters expected to be found in the configuration dictionary self.border = border self._limit = self.border self.input_len = input_len self.output_len = self.input_len - 2 * self.border self.pow_w = regression_w self.act_w = classification_w self.batch_size = batch_size self.shuffle = shuffle self.pow_loss_avg = reg_loss_avg self.act_loss_avg = class_loss_avg self.epochs = epochs self.patience = patience self.name = name self.appliances = [] if appliances is None else sorted(appliances) self.status = [app + "_status" for app in self.appliances] self.num_apps = len(self.appliances) self.init_features = init_features self.dropout = dropout self.learning_rate = learning_rate self.power_scale = power_scale # Set the parameters according to given threshold method param_thresh = {} if threshold is None else threshold self.threshold = Threshold(appliances=self.appliances, **param_thresh) logger.debug(f"Received extra kwargs, not used:\n {', '.join(kwargs.keys())}")
def test_many_models(path_data: str, path_output: str, config: dict): """ Runs several models with the same conditions. Stores plots and the average scores of those models. """ # Set output path path_out_model = generate_path_output_model(path_output, config["name"]) path_out_model_params = generate_path_output_model_params(path_out_model, config) # Load dataloader dataloader_test = DataLoader(path_data, subset="test", shuffle=False, **config) # Initialize lists list_dict_scores = [{}] * config["num_models"] for i in range(config["num_models"]): logger.debug(f"\nModel {i + 1}\n") model = initialize_model(config) # Load the model path_model = os.path.join(path_out_model_params, f"model_{i}.pth") model.load(path_model) dict_pred = model.predictions_to_dictionary(dataloader_test) list_dict_scores[i] = score_dict_predictions(dict_pred) store_scores( config, list_dict_scores[i], path_output=os.path.join(path_out_model_params, f"scores_{i}.txt"), ) del model # List of dicts to unique dict scores dict_scores = average_list_dict_scores(list_dict_scores) # Store scores and plot store_scores( config, dict_scores, path_output=os.path.join(path_out_model_params, "scores.txt"), )
def compute_thresholds(self): """Compute the thresholds of each appliance""" # First try to load the thresholds try: self.dataset.threshold.read_config(self.path_threshold) return # If not possible, compute them except ConfigError: if self.subset != "train": logger.error( "Threshold values not found." "Please compute them first with the train subset!") logger.debug("Threshold values not found. Computing them...") # Loop through each appliance for app_idx, app in enumerate(self.dataset.appliances): ser = self.get_appliance_power_series(app_idx) # Concatenate all values and update the threshold self.dataset.threshold.update_appliance_threshold(ser, app) # Write the config file self.dataset.threshold.write_config(self.path_threshold)
def train_many_models(path_data: str, path_output: str, config: dict): """ Runs several models with the same conditions. Stores plots and the average scores of those models. """ # Set output path path_out_model = generate_path_output_model(path_output, config["name"]) path_out_model_params = generate_path_output_model_params(path_out_model, config) # Load dataloader dataloader_train = DataLoader(path_data, subset="train", shuffle=True, **config) dataloader_validation = DataLoader( path_data, subset="validation", shuffle=True, **config ) dataloader_test = DataLoader(path_data, subset="test", shuffle=False, **config) generate_temporal_data(dataloader_train, path="temp_train") generate_temporal_data(dataloader_validation, path="temp_valid") # Initialize lists list_dict_scores = [{}] * config["num_models"] time_elapsed = [0.0] * config["num_models"] dict_pred = {} for i in range(config["num_models"]): logger.debug(f"\nModel {i + 1}\n") # Initialize the model model = initialize_model(config) # Train time_elapsed[i] = model.train( dataloader_train, dataloader_validation, ) # Store the model path_model = os.path.join(path_out_model_params, f"model_{i}.pth") model.save(path_model) dict_pred = model.predictions_to_dictionary(dataloader_test) list_dict_scores[i] = score_dict_predictions(dict_pred) store_scores( config, list_dict_scores[i], time_elapsed=time_elapsed[i], path_output=os.path.join(path_out_model_params, f"scores_{i}.txt"), ) # List of dicts to unique dict scores dict_scores = average_list_dict_scores(list_dict_scores) # Store scores and plot store_scores( config, dict_scores, time_elapsed=np.mean(time_elapsed), path_output=os.path.join(path_out_model_params, "scores.txt"), ) store_plots(dict_pred, path_output=path_out_model_params) remove_directory("temp_train") remove_directory("temp_valid")