def __init__(
        self,
        path_data: str,
        subset: str = "train",
        input_len: int = 510,
        border: int = 15,
        buildings: dict = None,
        train_size: float = 0.8,
        valid_size: float = 0.1,
        random_split: bool = False,
        random_seed: int = 0,
        threshold: dict = None,
        **kwargs,
    ):
        self.subset = subset
        self.border = border
        self.length = input_len
        self.buildings = {} if buildings is None else buildings[subset]
        self.train_size = train_size
        self.validation_size = valid_size
        self.random_split = random_split
        self.random_seed = random_seed
        self._list_files(path_data)
        self._get_parameters_from_file()

        # Set the parameters according to given threshold method
        param_thresh = {} if threshold is None else threshold
        self.threshold = Threshold(appliances=self.appliances, **param_thresh)

        logger.debug(
            f"Dataset received extra kwargs, not used:\n     {', '.join(kwargs.keys())}"
        )
Beispiel #2
0
    def __init__(
        self,
        border: int = 15,
        input_len: int = 510,
        regression_w: float = 1,
        classification_w: float = 1,
        batch_size: int = 32,
        shuffle: bool = True,
        reg_loss_avg: float = 0.68,
        class_loss_avg: float = 0.0045,
        name: str = "Model",
        epochs: int = 300,
        patience: int = 300,
        appliances: list = None,
        init_features: int = 32,
        dropout: float = 0.1,
        learning_rate: float = 1e-4,
        power_scale: int = 2000,
        threshold: dict = None,
        **kwargs,
    ):
        self.device = (
            torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
        )
        logger.debug(f"Using device: {self.device}")

        # Parameters expected to be found in the configuration dictionary
        self.border = border
        self._limit = self.border
        self.input_len = input_len
        self.output_len = self.input_len - 2 * self.border
        self.pow_w = regression_w
        self.act_w = classification_w
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.pow_loss_avg = reg_loss_avg
        self.act_loss_avg = class_loss_avg
        self.epochs = epochs
        self.patience = patience
        self.name = name
        self.appliances = [] if appliances is None else sorted(appliances)
        self.status = [app + "_status" for app in self.appliances]
        self.num_apps = len(self.appliances)
        self.init_features = init_features
        self.dropout = dropout
        self.learning_rate = learning_rate
        self.power_scale = power_scale

        # Set the parameters according to given threshold method
        param_thresh = {} if threshold is None else threshold
        self.threshold = Threshold(appliances=self.appliances, **param_thresh)

        logger.debug(f"Received extra kwargs, not used:\n   {', '.join(kwargs.keys())}")
Beispiel #3
0
def test_many_models(path_data: str, path_output: str, config: dict):
    """
    Runs several models with the same conditions.
    Stores plots and the average scores of those models.
    """
    # Set output path
    path_out_model = generate_path_output_model(path_output, config["name"])
    path_out_model_params = generate_path_output_model_params(path_out_model, config)

    # Load dataloader
    dataloader_test = DataLoader(path_data, subset="test", shuffle=False, **config)

    # Initialize lists
    list_dict_scores = [{}] * config["num_models"]

    for i in range(config["num_models"]):
        logger.debug(f"\nModel {i + 1}\n")

        model = initialize_model(config)

        # Load the model
        path_model = os.path.join(path_out_model_params, f"model_{i}.pth")
        model.load(path_model)

        dict_pred = model.predictions_to_dictionary(dataloader_test)
        list_dict_scores[i] = score_dict_predictions(dict_pred)

        store_scores(
            config,
            list_dict_scores[i],
            path_output=os.path.join(path_out_model_params, f"scores_{i}.txt"),
        )

        del model

    # List of dicts to unique dict scores
    dict_scores = average_list_dict_scores(list_dict_scores)

    # Store scores and plot
    store_scores(
        config,
        dict_scores,
        path_output=os.path.join(path_out_model_params, "scores.txt"),
    )
Beispiel #4
0
 def compute_thresholds(self):
     """Compute the thresholds of each appliance"""
     # First try to load the thresholds
     try:
         self.dataset.threshold.read_config(self.path_threshold)
         return
     # If not possible, compute them
     except ConfigError:
         if self.subset != "train":
             logger.error(
                 "Threshold values not found."
                 "Please compute them first with the train subset!")
         logger.debug("Threshold values not found. Computing them...")
         # Loop through each appliance
         for app_idx, app in enumerate(self.dataset.appliances):
             ser = self.get_appliance_power_series(app_idx)
             # Concatenate all values and update the threshold
             self.dataset.threshold.update_appliance_threshold(ser, app)
         # Write the config file
         self.dataset.threshold.write_config(self.path_threshold)
Beispiel #5
0
def train_many_models(path_data: str, path_output: str, config: dict):
    """
    Runs several models with the same conditions.
    Stores plots and the average scores of those models.
    """
    # Set output path
    path_out_model = generate_path_output_model(path_output, config["name"])
    path_out_model_params = generate_path_output_model_params(path_out_model, config)

    # Load dataloader
    dataloader_train = DataLoader(path_data, subset="train", shuffle=True, **config)
    dataloader_validation = DataLoader(
        path_data, subset="validation", shuffle=True, **config
    )
    dataloader_test = DataLoader(path_data, subset="test", shuffle=False, **config)

    generate_temporal_data(dataloader_train, path="temp_train")
    generate_temporal_data(dataloader_validation, path="temp_valid")

    # Initialize lists
    list_dict_scores = [{}] * config["num_models"]
    time_elapsed = [0.0] * config["num_models"]
    dict_pred = {}

    for i in range(config["num_models"]):
        logger.debug(f"\nModel {i + 1}\n")

        # Initialize the model
        model = initialize_model(config)

        # Train
        time_elapsed[i] = model.train(
            dataloader_train,
            dataloader_validation,
        )

        # Store the model
        path_model = os.path.join(path_out_model_params, f"model_{i}.pth")
        model.save(path_model)

        dict_pred = model.predictions_to_dictionary(dataloader_test)
        list_dict_scores[i] = score_dict_predictions(dict_pred)

        store_scores(
            config,
            list_dict_scores[i],
            time_elapsed=time_elapsed[i],
            path_output=os.path.join(path_out_model_params, f"scores_{i}.txt"),
        )

    # List of dicts to unique dict scores
    dict_scores = average_list_dict_scores(list_dict_scores)

    # Store scores and plot
    store_scores(
        config,
        dict_scores,
        time_elapsed=np.mean(time_elapsed),
        path_output=os.path.join(path_out_model_params, "scores.txt"),
    )
    store_plots(dict_pred, path_output=path_out_model_params)

    remove_directory("temp_train")
    remove_directory("temp_valid")