Beispiel #1
0
    def read_selected_appliances(self, appliances: List, start: str, end: str, sample_period=6, building=1,
                                 include_mains=True) -> Tuple[DataFrame, MeterGroup]:
        """
        Loads the data of the specified appliances.
        Args:
            appliances (List): A list of appliances to read their records.
            start (str): The starting date in the format "{month}-{day of month}-{year}" e.g. "05-30-2012".
            end (str): The final date in the format "{month}-{day of month}-{year}" e.g. "08-30-2012".
            sample_period (int): The sample period of the records.
            building (int): The building to read the records from.
            include_mains (bool): True if should include main meters.

        Returns:
            Returns a tuple containing the respective DataFrame and MeterGroup of the data that are read.
        """
        debug(f" read_selected_appliances {appliances}, {building}, {start}, {end}, {include_mains}")

        selected_metergroup = self.get_selected_metergroup(appliances, building, end, start, include_mains)

        start_time = time.time() if TIMING else None
        df = selected_metergroup.dataframe_of_meters(sample_period=sample_period)
        timing('NILMTK converting specified appliances to dataframe: {}'.format(round(time.time() - start_time, 2)))

        debug(f"Length of data of read_selected_appliances {len(df)}")
        df.fillna(0, inplace=True)
        return df, selected_metergroup
Beispiel #2
0
    def train(self, appliances: list, raw_data: bool = False):
        """
        Train the algorithm for the specified appliances.
        Args:
            appliances (List): List of appliances to be recognized.
            raw_data (bool): True if the experiment uses raw data without any time series representation.
        Returns:
            The preprocess and the fiting time.
        """
        info("Prepossessing before training...")
        start_time = time.time()
        data, target = self._preprocess(self.train_df,
                                        self.train_labels_df, appliances,
                                        self.get_ts_len(), raw_data)
        preprocess_time = time.time() - start_time
        timing(f"preprocess time {preprocess_time}")

        if len(data.shape) == 3:
            data = np.reshape(data,
                              (data.shape[0], data.shape[1] * data.shape[2]))

        info("Training...")
        start_time = time.time()
        self.multilabel_clf.fit(data, target)
        fit_time = time.time() - start_time
        timing(f"fit time {fit_time}")
        return preprocess_time, fit_time
Beispiel #3
0
def create_multilabels_from_meters(meters: DataFrame, meter_group: MeterGroup,
                                   labels2id: dict) -> DataFrame:
    """
    Creates multi labels from the given meter group using a dictionary as a lookup table.
    Args:
        meters (DataFrame):
        meter_group (MeterGroup):
        labels2id (dict):

    Returns:
        A DataFrame with the multi labels.
    """
    start_time = time.time() if TIMING else None
    labels = dict()
    for col in meters.columns:
        loguru.logger.info(
            f"Creating multilabels from meter {col}, "
            f"\nlabels2id[col] {labels2id[col]}"
            f"\nmetergroup[labels2id[col]] {meter_group[labels2id[col]]}")
        meter = meter_group[labels2id[col]]
        threshold = meter.on_power_threshold()
        vals = meters[col].values.astype(float)
        if vals is None or col == SITE_METER:
            loguru.logger.debug(f"Skipping {col} - {vals}")
            continue
        loguru.logger.debug(f"meters[col].values.astype(float) {col} - {vals}")
        labels[col] = create_labels(vals, threshold)
    timing('Create multilabels from meters {}'.format(
        round(time.time() - start_time, 2)))
    return DataFrame(labels)
Beispiel #4
0
    def discretize(self, data):
        debug('Length of data {}'.format(len(data)))
        start_time = time.time()

        pred = self.clf.predict(data.reshape(-1, 1))

        timing('clf.predict: {}'.format(round(time.time() - start_time, 2)))
        debug('Length of predicted sequence {}'.format(len(pred)))
        debug('Type of discrete sequence {}'.format(type(pred)))

        return pred
Beispiel #5
0
    def get_selected_metergroup(self, appliances, building, end, start, include_mains) -> MeterGroup:
        """
        Gets a MeterGroup with the specified appliances for the given building during the given dates.
        Args:
            appliances (List): A list of appliances to read their records.
            building (int): The building to read the records from.
            start (str): The starting date in the format "{month}-{day of month}-{year}" e.g. "05-30-2012".
            end (str): The final date in the format "{month}-{day of month}-{year}" e.g. "08-30-2012".
            include_mains (bool): True if should include main meters.

        Returns:
            A MeterGroup containing the specified appliances.
        """
        start_time = time.time() if TIMING else None
        self.dataset.set_window(start=start, end=end)
        elec = self.dataset.buildings[building].elec
        appliances_with_one_meter = []
        appliances_with_more_meters = []
        for appliance in appliances:
            metergroup = elec.select_using_appliances(type=appliances)
            if len(metergroup.meters) > 1:
                appliances_with_more_meters.append(appliance)
            else:
                appliances_with_one_meter.append(appliance)

        special_metergroup = None
        for appliance in appliances_with_more_meters:
            inst = 1
            if appliance == 'sockets' and building == 3:
                inst = 4
            if special_metergroup is None:
                special_metergroup = elec.select_using_appliances(type=appliance, instance=inst)
            else:
                special_metergroup = special_metergroup.union(elec.select_using_appliances(type=appliance, instance=1))

        selected_metergroup = elec.select_using_appliances(type=appliances_with_one_meter)
        selected_metergroup = selected_metergroup.union(special_metergroup)
        if include_mains:
            mains_meter = self.dataset.buildings[building].elec.mains()
            if isinstance(mains_meter, MeterGroup):
                if len(mains_meter.meters) > 1:
                    mains_meter = mains_meter.meters[0]
                    mains_metergroup = MeterGroup(meters=[mains_meter])
                else:
                    mains_metergroup = mains_meter
            else:
                mains_metergroup = MeterGroup(meters=[mains_meter])
            selected_metergroup = selected_metergroup.union(mains_metergroup)
        timing('NILMTK select using appliances: {}'.format(round(time.time() - start_time, 2)))
        return selected_metergroup
Beispiel #6
0
    def test(self, appliances: list, raw_data: bool = False):
        """
        Runs a test using the specified appliances.
        Args:
            appliances (List): List of appliances to be recognized.
            raw_data (bool): True if the experiment uses raw data without any time series representation.
        Returns:
            A tuple containing macro, micro, a report, preprocess and fiting time.
        """
        if self.test_df is None or self.test_labels_df is None:
            raise (Exception('Test data or test target is None'))
        info("Prepossessing before testing...")
        start_time = time.time()
        data, target = self._preprocess(self.test_df,
                                        self.test_labels_df,
                                        appliances,
                                        self.get_ts_len(),
                                        raw_data,
                                        should_fit=False)
        preprocess_time = time.time() - start_time
        timing(f"preprocess time {preprocess_time}")
        if len(data.shape) == 3:
            data = np.reshape(data,
                              (data.shape[0], data.shape[1] * data.shape[2]))
        info("Testing...")

        start_time = time.time()
        predictions = self.multilabel_clf.predict(data)
        predictions_time = time.time() - start_time
        timing(f"predictions time {predictions_time}")

        micro = f1_score(target, predictions, average='micro')
        macro = f1_score(target, predictions, average='macro')
        info('F1 macro {}'.format(macro))
        info('F1 micro {}'.format(micro))
        report = classification_report(target,
                                       predictions,
                                       target_names=appliances,
                                       output_dict=True)
        # confusion_matrix = multilabel_confusion_matrix(y_true=target, y_pred=predictions.toarray())
        # confusion_matrix = None
        return macro, micro, report, preprocess_time, predictions_time
Beispiel #7
0
    def _preprocess(self,
                    data_df,
                    labels_df,
                    appliances,
                    ts_length,
                    raw_data,
                    should_fit: bool = True):
        if self.multilabel_clf is None:
            raise Exception('Multilabel classifier has not been placed!')
        if raw_data:
            representation_type = TransformerType.raw
        else:
            representation_type = self.get_type_of_transformer()
        debug(f"Type of transformer {representation_type}")

        start_time = time.time()
        data = self.get_features(data_df, representation_type)
        get_features_time = time.time() - start_time
        timing(f"get features time {get_features_time}")

        debug(f"Features \n {data[:10]}")
        target = self.get_multilabels(labels_df, appliances)
        target = np.array(target.values)
        debug(f"Target \n {target[:10]}")
        window = self.get_window(ts_length)
        rem = len(data) % window
        if rem > 0:
            data = data[:-rem]
            target = target[:-rem]
        target = bucketize_target(target, window)
        data = bucketize_data(data, window)
        # if representation_type == TransformerType.raw or representation_type == TransformerType.approximate:
        #     pass
        if representation_type == TransformerType.approximate \
                or representation_type == TransformerType.transform_and_approximate:
            start_time = time.time()
            data = self.reduce_dimensions(data, window, target, should_fit)
            reduce_dimensions_time = time.time() - start_time
            timing(f"reduce dimensions time {reduce_dimensions_time}")

        return data, target
Beispiel #8
0
    def run(self):
        self.setup_environment()
        if len(self.transformers) != len(self.classifiers):
            raise Exception(
                "List of transformers doesn't have the same length with list of classifiers. "
                "It should be a 1-1 map")

        for model_index in range(len(self.transformers)):
            transformer = self.transformers[model_index]
            transformer_descr = str(transformer)
            clf = self.classifiers[model_index]
            clf_descr = str(clf)
            for i in range(self.repeat):
                self.env.place_multilabel_classifier(clf)
                self.env.place_ts_transformer(transformer)
                start_time = time.time()
                preprocess_train_time, fit_time = self.env.train(
                    self.train_appliances)
                training_time = time.time() - start_time
                timing(f"training time {training_time}")
                start_time = time.time()
                macro, micro, report, preprocess_time, prediction_time = self.env.test(
                    self.test_appliances)
                testing_time = time.time() - start_time
                timing(f"testing time {testing_time}")

                description = self.create_description(
                    type(clf).__name__, clf_descr, transformer.get_name(),
                    str(self.env.get_type_of_transformer()), transformer_descr,
                    "train/test", macro, None, micro, None,
                    str(len(self.train_appliances)),
                    str(self.train_appliances),
                    str(report), str(training_time), str(testing_time),
                    str(preprocess_time), str(prediction_time),
                    str(preprocess_train_time), str(fit_time))

                self.save_experiment(description, reset_results,
                                     self.results_file)
Beispiel #9
0
def create_multilabels(appliances: dict, meter_group: MeterGroup) -> dict:
    """
        Creates labels from the given meter group for the given appliances.
    Args:
        appliances (dict): dict with keys ['oven', 'microwave', 'dish washer', 'fridge freezer', 'kettle', 'washer dryer',
              'toaster', 'boiler', 'television', 'hair dryer', 'vacuum cleaner', 'light']

        meter_group (MeterGroup): A MeterGroup object.

    Returns:
        A dictionary with labels per meter.
    """
    start_time = time.time() if TIMING else None
    labels = dict()

    for key in appliances.keys():
        meter = meter_group.submeters()[key]
        threshold = meter.on_power_threshold()
        labels[meter.label() + str(meter.instance())] = create_labels(
            appliances[key], threshold)
        debug('{} threshold = {}'.format(meter.label(), threshold))

    timing('Create multilabels {}'.format(round(time.time() - start_time, 2)))
    return labels
Beispiel #10
0
    def read_mains(self, start, end, sample_period=6, building=1) -> Tuple[DataFrame, MeterGroup]:
        """
        Loads the data of the specified appliances.
        Args:
            start (str): The starting date in the format "{month}-{day of month}-{year}" e.g. "05-30-2012".
            end (str): The final date in the format "{month}-{day of month}-{year}" e.g. "08-30-2012".
            sample_period (int): The sample period of the records.
            building (int): The building to read the records from.

        Returns:
            Returns a tuple containing the respective DataFrame and MeterGroup of the data that are read.
        """
        self.dataset.set_window(start=start, end=end)
        mains_meter = self.dataset.buildings[building].elec.mains()
        if isinstance(mains_meter, MeterGroup):
            mains_metergroup = mains_meter
        else:
            mains_metergroup = MeterGroup(meters=[mains_meter])
        start_time = time.time() if TIMING else None
        df = mains_metergroup.dataframe_of_meters(sample_period=sample_period)
        timing('NILMTK converting mains to dataframe: {}'.format(round(time.time() - start_time, 2)))

        df.fillna(0, inplace=True)
        return df, mains_metergroup
Beispiel #11
0
    def read_all_meters(self, start: str, end: str, sample_period: int = 6, building: int = 1) \
            -> Tuple[DataFrame, MeterGroup]:
        """
        Read the records during the given start and end dates, for all the meters of the given building.
        Args:
            start (str): The starting date in the format "{month}-{day of month}-{year}" e.g. "05-30-2012".
            end (str): The final date in the format "{month}-{day of month}-{year}" e.g. "08-30-2012".
            sample_period (int): The sample period of the records.
            building (int): The building to read the records from.

        Returns:
            Returns a tuple containing the respective DataFrame and MeterGroup of the data that are read.
        """
        start_time = time.time() if TIMING else None
        self.dataset.set_window(start=start, end=end)
        elec = self.dataset.buildings[building].elec
        timing('NILMTK selecting all meters: {}'.format(round(time.time() - start_time, 2)))

        start_time = time.time() if TIMING else None
        df = elec.dataframe_of_meters(sample_period=sample_period)
        timing('NILMTK converting all meters to dataframe: {}'.format(round(time.time() - start_time, 2)))

        df.fillna(0, inplace=True)
        return df, elec
Beispiel #12
0
 def map_into_vectors(self, sequence):
     start_time = time.time()
     sequence_of_vectors = [self.embedding[str(i)] for i in sequence]
     timing('Appending vectors to list : {}'.format(
         round(time.time() - start_time, 2)))
     return sequence_of_vectors
Beispiel #13
0
 def clean_nans(data):
     start_time = time.time() if TIMING else None
     np.nan_to_num(data, False)
     timing('None to num: {}'.format(round(time.time() - start_time, 2)))