def read_selected_appliances(self, appliances: List, start: str, end: str, sample_period=6, building=1, include_mains=True) -> Tuple[DataFrame, MeterGroup]: """ Loads the data of the specified appliances. Args: appliances (List): A list of appliances to read their records. start (str): The starting date in the format "{month}-{day of month}-{year}" e.g. "05-30-2012". end (str): The final date in the format "{month}-{day of month}-{year}" e.g. "08-30-2012". sample_period (int): The sample period of the records. building (int): The building to read the records from. include_mains (bool): True if should include main meters. Returns: Returns a tuple containing the respective DataFrame and MeterGroup of the data that are read. """ debug(f" read_selected_appliances {appliances}, {building}, {start}, {end}, {include_mains}") selected_metergroup = self.get_selected_metergroup(appliances, building, end, start, include_mains) start_time = time.time() if TIMING else None df = selected_metergroup.dataframe_of_meters(sample_period=sample_period) timing('NILMTK converting specified appliances to dataframe: {}'.format(round(time.time() - start_time, 2))) debug(f"Length of data of read_selected_appliances {len(df)}") df.fillna(0, inplace=True) return df, selected_metergroup
def train(self, appliances: list, raw_data: bool = False): """ Train the algorithm for the specified appliances. Args: appliances (List): List of appliances to be recognized. raw_data (bool): True if the experiment uses raw data without any time series representation. Returns: The preprocess and the fiting time. """ info("Prepossessing before training...") start_time = time.time() data, target = self._preprocess(self.train_df, self.train_labels_df, appliances, self.get_ts_len(), raw_data) preprocess_time = time.time() - start_time timing(f"preprocess time {preprocess_time}") if len(data.shape) == 3: data = np.reshape(data, (data.shape[0], data.shape[1] * data.shape[2])) info("Training...") start_time = time.time() self.multilabel_clf.fit(data, target) fit_time = time.time() - start_time timing(f"fit time {fit_time}") return preprocess_time, fit_time
def create_multilabels_from_meters(meters: DataFrame, meter_group: MeterGroup, labels2id: dict) -> DataFrame: """ Creates multi labels from the given meter group using a dictionary as a lookup table. Args: meters (DataFrame): meter_group (MeterGroup): labels2id (dict): Returns: A DataFrame with the multi labels. """ start_time = time.time() if TIMING else None labels = dict() for col in meters.columns: loguru.logger.info( f"Creating multilabels from meter {col}, " f"\nlabels2id[col] {labels2id[col]}" f"\nmetergroup[labels2id[col]] {meter_group[labels2id[col]]}") meter = meter_group[labels2id[col]] threshold = meter.on_power_threshold() vals = meters[col].values.astype(float) if vals is None or col == SITE_METER: loguru.logger.debug(f"Skipping {col} - {vals}") continue loguru.logger.debug(f"meters[col].values.astype(float) {col} - {vals}") labels[col] = create_labels(vals, threshold) timing('Create multilabels from meters {}'.format( round(time.time() - start_time, 2))) return DataFrame(labels)
def discretize(self, data): debug('Length of data {}'.format(len(data))) start_time = time.time() pred = self.clf.predict(data.reshape(-1, 1)) timing('clf.predict: {}'.format(round(time.time() - start_time, 2))) debug('Length of predicted sequence {}'.format(len(pred))) debug('Type of discrete sequence {}'.format(type(pred))) return pred
def get_selected_metergroup(self, appliances, building, end, start, include_mains) -> MeterGroup: """ Gets a MeterGroup with the specified appliances for the given building during the given dates. Args: appliances (List): A list of appliances to read their records. building (int): The building to read the records from. start (str): The starting date in the format "{month}-{day of month}-{year}" e.g. "05-30-2012". end (str): The final date in the format "{month}-{day of month}-{year}" e.g. "08-30-2012". include_mains (bool): True if should include main meters. Returns: A MeterGroup containing the specified appliances. """ start_time = time.time() if TIMING else None self.dataset.set_window(start=start, end=end) elec = self.dataset.buildings[building].elec appliances_with_one_meter = [] appliances_with_more_meters = [] for appliance in appliances: metergroup = elec.select_using_appliances(type=appliances) if len(metergroup.meters) > 1: appliances_with_more_meters.append(appliance) else: appliances_with_one_meter.append(appliance) special_metergroup = None for appliance in appliances_with_more_meters: inst = 1 if appliance == 'sockets' and building == 3: inst = 4 if special_metergroup is None: special_metergroup = elec.select_using_appliances(type=appliance, instance=inst) else: special_metergroup = special_metergroup.union(elec.select_using_appliances(type=appliance, instance=1)) selected_metergroup = elec.select_using_appliances(type=appliances_with_one_meter) selected_metergroup = selected_metergroup.union(special_metergroup) if include_mains: mains_meter = self.dataset.buildings[building].elec.mains() if isinstance(mains_meter, MeterGroup): if len(mains_meter.meters) > 1: mains_meter = mains_meter.meters[0] mains_metergroup = MeterGroup(meters=[mains_meter]) else: mains_metergroup = mains_meter else: mains_metergroup = MeterGroup(meters=[mains_meter]) selected_metergroup = selected_metergroup.union(mains_metergroup) timing('NILMTK select using appliances: {}'.format(round(time.time() - start_time, 2))) return selected_metergroup
def test(self, appliances: list, raw_data: bool = False): """ Runs a test using the specified appliances. Args: appliances (List): List of appliances to be recognized. raw_data (bool): True if the experiment uses raw data without any time series representation. Returns: A tuple containing macro, micro, a report, preprocess and fiting time. """ if self.test_df is None or self.test_labels_df is None: raise (Exception('Test data or test target is None')) info("Prepossessing before testing...") start_time = time.time() data, target = self._preprocess(self.test_df, self.test_labels_df, appliances, self.get_ts_len(), raw_data, should_fit=False) preprocess_time = time.time() - start_time timing(f"preprocess time {preprocess_time}") if len(data.shape) == 3: data = np.reshape(data, (data.shape[0], data.shape[1] * data.shape[2])) info("Testing...") start_time = time.time() predictions = self.multilabel_clf.predict(data) predictions_time = time.time() - start_time timing(f"predictions time {predictions_time}") micro = f1_score(target, predictions, average='micro') macro = f1_score(target, predictions, average='macro') info('F1 macro {}'.format(macro)) info('F1 micro {}'.format(micro)) report = classification_report(target, predictions, target_names=appliances, output_dict=True) # confusion_matrix = multilabel_confusion_matrix(y_true=target, y_pred=predictions.toarray()) # confusion_matrix = None return macro, micro, report, preprocess_time, predictions_time
def _preprocess(self, data_df, labels_df, appliances, ts_length, raw_data, should_fit: bool = True): if self.multilabel_clf is None: raise Exception('Multilabel classifier has not been placed!') if raw_data: representation_type = TransformerType.raw else: representation_type = self.get_type_of_transformer() debug(f"Type of transformer {representation_type}") start_time = time.time() data = self.get_features(data_df, representation_type) get_features_time = time.time() - start_time timing(f"get features time {get_features_time}") debug(f"Features \n {data[:10]}") target = self.get_multilabels(labels_df, appliances) target = np.array(target.values) debug(f"Target \n {target[:10]}") window = self.get_window(ts_length) rem = len(data) % window if rem > 0: data = data[:-rem] target = target[:-rem] target = bucketize_target(target, window) data = bucketize_data(data, window) # if representation_type == TransformerType.raw or representation_type == TransformerType.approximate: # pass if representation_type == TransformerType.approximate \ or representation_type == TransformerType.transform_and_approximate: start_time = time.time() data = self.reduce_dimensions(data, window, target, should_fit) reduce_dimensions_time = time.time() - start_time timing(f"reduce dimensions time {reduce_dimensions_time}") return data, target
def run(self): self.setup_environment() if len(self.transformers) != len(self.classifiers): raise Exception( "List of transformers doesn't have the same length with list of classifiers. " "It should be a 1-1 map") for model_index in range(len(self.transformers)): transformer = self.transformers[model_index] transformer_descr = str(transformer) clf = self.classifiers[model_index] clf_descr = str(clf) for i in range(self.repeat): self.env.place_multilabel_classifier(clf) self.env.place_ts_transformer(transformer) start_time = time.time() preprocess_train_time, fit_time = self.env.train( self.train_appliances) training_time = time.time() - start_time timing(f"training time {training_time}") start_time = time.time() macro, micro, report, preprocess_time, prediction_time = self.env.test( self.test_appliances) testing_time = time.time() - start_time timing(f"testing time {testing_time}") description = self.create_description( type(clf).__name__, clf_descr, transformer.get_name(), str(self.env.get_type_of_transformer()), transformer_descr, "train/test", macro, None, micro, None, str(len(self.train_appliances)), str(self.train_appliances), str(report), str(training_time), str(testing_time), str(preprocess_time), str(prediction_time), str(preprocess_train_time), str(fit_time)) self.save_experiment(description, reset_results, self.results_file)
def create_multilabels(appliances: dict, meter_group: MeterGroup) -> dict: """ Creates labels from the given meter group for the given appliances. Args: appliances (dict): dict with keys ['oven', 'microwave', 'dish washer', 'fridge freezer', 'kettle', 'washer dryer', 'toaster', 'boiler', 'television', 'hair dryer', 'vacuum cleaner', 'light'] meter_group (MeterGroup): A MeterGroup object. Returns: A dictionary with labels per meter. """ start_time = time.time() if TIMING else None labels = dict() for key in appliances.keys(): meter = meter_group.submeters()[key] threshold = meter.on_power_threshold() labels[meter.label() + str(meter.instance())] = create_labels( appliances[key], threshold) debug('{} threshold = {}'.format(meter.label(), threshold)) timing('Create multilabels {}'.format(round(time.time() - start_time, 2))) return labels
def read_mains(self, start, end, sample_period=6, building=1) -> Tuple[DataFrame, MeterGroup]: """ Loads the data of the specified appliances. Args: start (str): The starting date in the format "{month}-{day of month}-{year}" e.g. "05-30-2012". end (str): The final date in the format "{month}-{day of month}-{year}" e.g. "08-30-2012". sample_period (int): The sample period of the records. building (int): The building to read the records from. Returns: Returns a tuple containing the respective DataFrame and MeterGroup of the data that are read. """ self.dataset.set_window(start=start, end=end) mains_meter = self.dataset.buildings[building].elec.mains() if isinstance(mains_meter, MeterGroup): mains_metergroup = mains_meter else: mains_metergroup = MeterGroup(meters=[mains_meter]) start_time = time.time() if TIMING else None df = mains_metergroup.dataframe_of_meters(sample_period=sample_period) timing('NILMTK converting mains to dataframe: {}'.format(round(time.time() - start_time, 2))) df.fillna(0, inplace=True) return df, mains_metergroup
def read_all_meters(self, start: str, end: str, sample_period: int = 6, building: int = 1) \ -> Tuple[DataFrame, MeterGroup]: """ Read the records during the given start and end dates, for all the meters of the given building. Args: start (str): The starting date in the format "{month}-{day of month}-{year}" e.g. "05-30-2012". end (str): The final date in the format "{month}-{day of month}-{year}" e.g. "08-30-2012". sample_period (int): The sample period of the records. building (int): The building to read the records from. Returns: Returns a tuple containing the respective DataFrame and MeterGroup of the data that are read. """ start_time = time.time() if TIMING else None self.dataset.set_window(start=start, end=end) elec = self.dataset.buildings[building].elec timing('NILMTK selecting all meters: {}'.format(round(time.time() - start_time, 2))) start_time = time.time() if TIMING else None df = elec.dataframe_of_meters(sample_period=sample_period) timing('NILMTK converting all meters to dataframe: {}'.format(round(time.time() - start_time, 2))) df.fillna(0, inplace=True) return df, elec
def map_into_vectors(self, sequence): start_time = time.time() sequence_of_vectors = [self.embedding[str(i)] for i in sequence] timing('Appending vectors to list : {}'.format( round(time.time() - start_time, 2))) return sequence_of_vectors
def clean_nans(data): start_time = time.time() if TIMING else None np.nan_to_num(data, False) timing('None to num: {}'.format(round(time.time() - start_time, 2)))