Beispiel #1
0
    def train(self, metergroup, cols=[('power', 'active')],
              buffer_size=20, noise_level=70, state_threshold=15,
              min_tolerance=100, percent_tolerance=0.035,
              large_transition=1000, **kwargs):
        """
        Train using Hart85. Places the learnt model in `model` attribute.

        Parameters
        ----------
        metergroup : a nilmtk.MeterGroup object
        cols: nilmtk.Measurement, should be one of the following
            [('power','active')]
            [('power','apparent')]
            [('power','reactive')]
            [('power','active'), ('power', 'reactive')]
        buffer_size: int, optional
            size of the buffer to use for finding edges
        min_tolerance: int, optional
            variance in power draw allowed for pairing a match
        percent_tolerance: float, optional
            if transition is greater than large_transition,
            then use percent of large_transition
        large_transition: float, optional
            power draw of a Large transition
        """
        self.cols = cols
        self.state_threshold = state_threshold
        self.noise_level = noise_level
        [self.steady_states, self.transients] = find_steady_states_transients(
            metergroup, cols, noise_level, state_threshold, **kwargs)
        self.pair_df = self.pair(
            buffer_size, min_tolerance, percent_tolerance, large_transition)
        self.centroids = hart85_means_shift_cluster(self.pair_df, cols)
Beispiel #2
0
    def disaggregate(self, mains, output_datastore, **load_kwargs):
        """Disaggregate mains according to the model learnt previously.

        Parameters
        ----------
        mains : nilmtk.ElecMeter or nilmtk.MeterGroup
        output_datastore : instance of nilmtk.DataStore subclass
            For storing power predictions from disaggregation algorithm.
        sample_period : number, optional
            The desired sample period in seconds.
        **load_kwargs : key word arguments
            Passed to `mains.power_series(**kwargs)`
        """
        load_kwargs = self._pre_disaggregation_checks(load_kwargs)

        load_kwargs.setdefault('sample_period', 60)
        load_kwargs.setdefault('sections', mains.good_sections())

        timeframes = []
        building_path = '/building{}'.format(mains.building())
        mains_data_location = building_path + '/elec/meter1'
        data_is_available = False

        [_, transients] = find_steady_states_transients(
            mains,
            columns=self.columns,
            state_threshold=self.state_threshold,
            noise_level=self.noise_level,
            **load_kwargs)

        # For now ignoring the first transient
        # transients = transients[1:]

        # Initially all appliances/meters are in unknown state (denoted by -1)
        prev = OrderedDict()
        learnt_meters = self.centroids.index.values
        for meter in learnt_meters:
            prev[meter] = -1

        timeframes = []
        # Now iterating over mains data and disaggregating chunk by chunk
        if len(self.columns) == 1:
            ac_type = self.columns[0][1]
        else:
            ac_type = ['active', 'reactive']

        for chunk in mains.power_series(**load_kwargs):
            # Record metadata

            timeframes.append(chunk.timeframe)
            measurement = chunk.name
            power_df, dimen = self.disaggregate_chunk(chunk, prev, transients)

            if dimen == 2:
                columns = pd.MultiIndex.from_tuples([chunk.name])

            else:
                tuples = list(self.columns)
                columns = pd.MultiIndex.from_tuples(tuples)

            for meter in learnt_meters:
                data_is_available = True
                df = power_df[[meter]]
                df.columns = columns
                df.columns.names = ['physical_quantity', 'type']
                key = '{}/elec/meter{:d}'.format(building_path, meter + 2)
                val = df.apply(pd.to_numeric).astype('float32')
                output_datastore.append(key, value=val)
            print('Next Chunk..')

        print('Appending mains data to datastore')

        for chunk_mains in mains.load(ac_type=ac_type):
            chunk_df = chunk_mains

        chunk_df = chunk_df.apply(pd.to_numeric).astype('float32')
        print('Done')

        output_datastore.append(key=mains_data_location, value=chunk_df)
        # save metadata
        if data_is_available:
            self._save_metadata_for_disaggregation(
                output_datastore=output_datastore,
                sample_period=load_kwargs['sample_period'],
                measurement=measurement,
                timeframes=timeframes,
                building=mains.building(),
                supervised=False,
                num_meters=len(self.centroids))
        return power_df
Beispiel #3
0
    def disaggregate_chunk(self, test_mains):
        """
        Parameters
        ----------
        chunk : pd.DataFrame
            mains power
        prev
        transients : returned by find_steady_state_transients

        Returns
        -------
        states : pd.DataFrame
            with same index as `chunk`.
        """
        #print(test_mains)
        test_predictions_list = []
        print(
            '...............................Disaggregator starts...............................'
        )
        for chunk in test_mains:

            [_, transients] = find_steady_states_transients(
                test_mains[0],
                columns=self.columns,
                state_threshold=self.state_threshold,
                noise_level=self.noise_level)
            #print('Transients:',transients)
            # For now ignoring the first transient
            # transients = transients[1:]

            # Initially all appliances/meters are in unknown state (denoted by -1)
            prev = OrderedDict()
            learnt_meters = self.centroids.index.values
            for meter in learnt_meters:
                prev[meter] = -1
            states = pd.DataFrame(-1,
                                  index=chunk.index,
                                  columns=self.centroids.index.values)
            #print('STATES:',states)
            for transient_tuple in transients.itertuples():
                if transient_tuple[0] < chunk.index[0]:
                    # Transient occurs before chunk has started; do nothing
                    pass
                elif transient_tuple[0] > chunk.index[-1]:
                    # Transient occurs after chunk has ended; do nothing
                    pass
                else:
                    # Absolute value of transient
                    abs_value = np.abs(transient_tuple[1:])
                    positive = transient_tuple[1] > 0
                    abs_value_transient_minus_centroid = pd.DataFrame(
                        (self.centroids - abs_value).abs())
                    if len(transient_tuple) == 2:
                        # 1d data
                        index_least_delta = (abs_value_transient_minus_centroid
                                             .idxmin().values[0])
                    else:
                        # 2d data.
                        # Need to find absolute value before computing minimum
                        columns = abs_value_transient_minus_centroid.columns
                        abs_value_transient_minus_centroid["multidim"] = (
                            abs_value_transient_minus_centroid[columns[0]]**2 +
                            abs_value_transient_minus_centroid[columns[1]]**2)
                        index_least_delta = (
                            abs_value_transient_minus_centroid["multidim"].
                            idxmin())
                    if positive:
                        # Turned on
                        states.loc[transient_tuple[0]][index_least_delta] = 1
                    else:
                        # Turned off
                        states.loc[transient_tuple[0]][index_least_delta] = 0
            prev = states.iloc[-1].to_dict()
            power_chunk_dict = self.assign_power_from_states(states, prev)
            self.power_dict = power_chunk_dict
            self.chunk_index = chunk.index
            # Check whether 1d data or 2d data and converting dict to dataframe
            if len(transient_tuple) == 2:

                temp_df = pd.DataFrame(power_chunk_dict, index=chunk.index)

            else:
                tuples = []

                for i in range(len(self.centroids.index.values)):
                    for j in range(0, 2):
                        tuples.append([i, j])

                columns = pd.MultiIndex.from_tuples(tuples)

                temp_df = pd.DataFrame(power_chunk_dict,
                                       index=chunk.index,
                                       columns=columns)

                for i in range(len(chunk.index)):
                    for j in range(len(self.centroids.index.values)):
                        for k in range(0, 2):
                            temp_df.iloc[i, j, k] = power_chunk_dict[j, i, k]

        temp_df = temp_df.fillna(0)

        temp = pd.DataFrame()
        for appliance in self.appliances:
            matched_col = self.best_matches[appliance]
            temp[appliance] = temp_df[matched_col]
        test_predictions_list.append(temp)

        print(
            '............................Disaggregator ends...........................'
        )
        return test_predictions_list
Beispiel #4
0
    def partial_fit(self,
                    train_main,
                    train_appliances,
                    buffer_size=20,
                    noise_level=70,
                    state_threshold=15,
                    min_tolerance=100,
                    percent_tolerance=0.035,
                    large_transition=1000,
                    **kwargs):
        """
        Train using Hart85. Places the learnt model in `model` attribute.

        Parameters
        ----------
        metergroup : a nilmtk.MeterGroup object
        columns: nilmtk.Measurement, should be one of the following
            [('power','active')]
            [('power','apparent')]
            [('power','reactive')]
            [('power','active'), ('power', 'reactive')]
        buffer_size: int, optional
            size of the buffer to use for finding edges
        min_tolerance: int, optional
            variance in power draw allowed for pairing a match
        percent_tolerance: float, optional
            if transition is greater than large_transition,
            then use percent of large_transition
        large_transition: float, optional
            power draw of a Large transition
        """

        # Train_appliances : list of tuples [('appliance',df),('appliance',df)]

        self.appliances = []
        for row in train_appliances:
            self.appliances.append(row[0])
        print(
            "...........................Hart_85 Partial Fit Running..............."
        )

        train_main = train_main[0]
        l = []
        l.append(train_main.columns[0])
        columns = l
        self.columns = columns
        self.state_threshold = state_threshold
        self.noise_level = noise_level
        [self.steady_states, self.transients
         ] = find_steady_states_transients(train_main, columns, noise_level,
                                           state_threshold)
        self.pair_df = self.pair(buffer_size, min_tolerance, percent_tolerance,
                                 large_transition)
        self.centroids = hart85_means_shift_cluster(self.pair_df, columns)

        print(
            '..............................Predicting Centroid Matching..........................'
        )
        chunk = train_main

        transients = self.transients
        temp_df = pd.DataFrame()
        # For now ignoring the first transient
        # transients = transients[1:]

        # Initially all appliances/meters are in unknown state (denoted by -1)
        prev = OrderedDict()
        learnt_meters = self.centroids.index.values
        for meter in learnt_meters:
            prev[meter] = -1

        states = pd.DataFrame(-1,
                              index=chunk.index,
                              columns=self.centroids.index.values)

        for transient_tuple in transients.itertuples():
            if transient_tuple[0] < chunk.index[0]:
                # Transient occurs before chunk has started; do nothing
                pass
            elif transient_tuple[0] > chunk.index[-1]:
                # Transient occurs after chunk has ended; do nothing
                pass
            else:
                # Absolute value of transient
                abs_value = np.abs(transient_tuple[1:])
                positive = transient_tuple[1] > 0
                abs_value_transient_minus_centroid = pd.DataFrame(
                    (self.centroids - abs_value).abs())
                if len(transient_tuple) == 2:
                    # 1d data
                    index_least_delta = (
                        abs_value_transient_minus_centroid.idxmin().values[0])
                else:
                    # 2d data.
                    # Need to find absolute value before computing minimum
                    columns = abs_value_transient_minus_centroid.columns
                    abs_value_transient_minus_centroid["multidim"] = (
                        abs_value_transient_minus_centroid[columns[0]]**2 +
                        abs_value_transient_minus_centroid[columns[1]]**2)
                    index_least_delta = (
                        abs_value_transient_minus_centroid["multidim"].idxmin(
                        ))
                if positive:
                    # Turned on
                    states.loc[transient_tuple[0]][index_least_delta] = 1
                else:
                    # Turned off
                    states.loc[transient_tuple[0]][index_least_delta] = 0
        prev = states.iloc[-1].to_dict()
        power_chunk_dict = self.assign_power_from_states(states, prev)
        self.power_dict = power_chunk_dict
        self.chunk_index = chunk.index
        # Check whether 1d data or 2d data and converting dict to dataframe
        #print('LEN of Transient Tuple',len(transient_tuple))
        if len(transient_tuple) == 2:

            temp_df = pd.DataFrame(power_chunk_dict, index=chunk.index)
        else:
            tuples = []

            for i in range(len(self.centroids.index.values)):
                for j in range(0, 2):
                    tuples.append([i, j])

            columns = pd.MultiIndex.from_tuples(tuples)

            temp_df = pd.DataFrame(power_chunk_dict,
                                   index=chunk.index,
                                   columns=columns)

            for i in range(len(chunk.index)):
                for j in range(len(self.centroids.index.values)):
                    for k in range(0, 2):
                        temp_df.iloc[i, j, k] = power_chunk_dict[j, i, k]

        self.best_matches = {}
        temp_df = temp_df.fillna(0)
        best_matches = {}

        for row in train_appliances:
            appliance = row[0]
            appliance_df = row[1][0]
            matched_col = self.min_rmse_column(temp_df, appliance_df['power'])
            best_matches[appliance] = matched_col
        self.best_matches = best_matches

        print(
            '...................................End Centroid Matching............................'
        )

        self.model = dict(
            best_matches=best_matches,
            columns=columns,
            state_threshold=state_threshold,
            noise_level=noise_level,
            steady_states=self.steady_states,
            transients=self.transients,
            # pair_df=self.pair_df,
            centroids=self.centroids)
Beispiel #5
0
    def disaggregate(self, mains, output_datastore, **load_kwargs):
        """Disaggregate mains according to the model learnt previously.

        Parameters
        ----------
        mains : nilmtk.ElecMeter or nilmtk.MeterGroup
        output_datastore : instance of nilmtk.DataStore subclass
            For storing power predictions from disaggregation algorithm.
        sample_period : number, optional
            The desired sample period in seconds.
        **load_kwargs : key word arguments
            Passed to `mains.power_series(**kwargs)`
        """
        load_kwargs = self._pre_disaggregation_checks(load_kwargs)

        load_kwargs.setdefault('sample_period', 60)
        load_kwargs.setdefault('sections', mains.good_sections())

        timeframes = []
        building_path = '/building{}'.format(mains.building())
        mains_data_location = building_path + '/elec/meter1'
        data_is_available = False

        [_, transients] = find_steady_states_transients(
            mains, cols=self.cols, state_threshold=self.state_threshold,
            noise_level=self.noise_level, **load_kwargs)

        # For now ignoring the first transient
        # transients = transients[1:]

        # Initially all appliances/meters are in unknown state (denoted by -1)
        prev = OrderedDict()
        learnt_meters = self.centroids.index.values
        for meter in learnt_meters:
            prev[meter] = -1

        timeframes = []
        # Now iterating over mains data and disaggregating chunk by chunk
        for chunk in mains.power_series(**load_kwargs):
            # Record metadata
            timeframes.append(chunk.timeframe)
            measurement = chunk.name
            power_df = self.disaggregate_chunk(
                chunk, prev, transients)

            cols = pd.MultiIndex.from_tuples([chunk.name])

            for meter in learnt_meters:
                data_is_available = True
                df = power_df[[meter]]
                df.columns = cols
                key = '{}/elec/meter{:d}'.format(building_path, meter + 2)
                output_datastore.append(key, df)

            output_datastore.append(key=mains_data_location,
                                    value=pd.DataFrame(chunk, columns=cols))

        if data_is_available:
            self._save_metadata_for_disaggregation(
                output_datastore=output_datastore,
                sample_period=load_kwargs['sample_period'],
                measurement=measurement,
                timeframes=timeframes,
                building=mains.building(),
                supervised=False,
                num_meters=len(self.centroids)
            )
Beispiel #6
0
    def disaggregate(self,
                     mains,
                     output_datastore=None,
                     exact_nilm_datastore=None,
                     **load_kwargs):
        """Disaggregate mains according to the model learnt previously.

        Parameters
        ----------
        mains : nilmtk.ElecMeter or nilmtk.MeterGroup
        output_datastore : instance of nilmtk.DataStore subclass
            For storing power predictions from disaggregation algorithm.
        sample_period : number, optional
            The desired sample period in seconds.
        **load_kwargs : key word arguments
            Passed to `mains.power_series(**kwargs)`
        """
        mains = mains.sitemeters()  # Only the main elements are interesting
        load_kwargs = self._pre_disaggregation_checks(mains, load_kwargs)

        load_kwargs.setdefault('sample_period', 2)
        load_kwargs.setdefault('sections', mains.good_sections())

        timeframes = []
        building_path = '/building{}'.format(mains.building() * 10)
        mains_data_location = building_path + '/elec/meter1'
        data_is_available = False

        [_, transients] = find_steady_states_transients(
            mains,
            columns=self.columns,
            state_threshold=self.state_threshold,
            noise_level=self.noise_level,
            **load_kwargs)

        # For now ignoring the first transient
        # transients = transients[1:]

        # Initially all appliances/meters are in unknown state (denoted by -1)
        prev = OrderedDict()
        learnt_meters = self.centroids.index.values
        for meter in learnt_meters:
            prev[meter] = -1

        timeframes = []
        disaggregation_overall = None
        # Now iterating over mains data and disaggregating chunk by chunk
        for chunk in mains.power_series(**load_kwargs):
            # Record metadata
            timeframes.append(chunk.timeframe)
            measurement = chunk.name
            power_df = self.disaggregate_chunk(chunk, prev, transients)

            columns = pd.MultiIndex.from_tuples([chunk.name])

            cols = pd.MultiIndex.from_tuples([chunk.name])

            if False:  #output_datastore != None:
                for meter in learnt_meters:
                    data_is_available = True
                    df = power_df[[meter]]
                    df.columns = columns
                    key = '{}/elec/meter{:d}'.format(
                        building_path, meter + 2
                    )  # Weil 0 nicht gibt und Meter1 das undiaggregierte ist und
                    output_datastore.append(key, df)
                output_datastore.append(
                    key=mains_data_location,
                    value=pd.DataFrame(
                        chunk,
                        columns=columns))  # Das Main wird auf Meter 1 gesetzt.
            else:
                if disaggregation_overall is None:
                    disaggregation_overall = power_df
                else:
                    disaggregation_overall = disaggregation_overall.append(
                        power_df)

        for column in disaggregation_overall:
            key = '{}/elec/meter{:d}'.format(
                building_path, column + 2)  # 0 not existing and Meter1 is rest
            tmp = disaggregation_overall[[column]]
            tmp.columns = (pd.MultiIndex.from_tuples(
                [('power', 'active')], names=['physical_quantity', 'type']))
            output_datastore.append(key, tmp)
            if not exact_nilm_datastore is None:
                exact_nilm_datastore.append(
                    key, self.model.appliances_detailed[[column]])
        #output_datastore.append('{}/elec/meter{:d}'.format(building_path, 1), self.model.overall_powerflow[phase])
        num_meters = [len(disaggregation_overall.columns)]
        stores = [(output_datastore, 300,
                   True)] if exact_nilm_datastore is None else [
                       (output_datastore, 300, True),
                       (exact_nilm_datastore, 0, False)
                   ]
        for store, res, rest_included in stores:
            self._save_metadata_for_disaggregation(
                output_datastore=store,
                sample_period=res,
                measurement=pd.MultiIndex.from_tuples(
                    [('power', 'active')], names=['physical_quantity',
                                                  'type']),
                timeframes=TimeFrameGroup([
                    TimeFrame(start=disaggregation_overall[0].index[0],
                              end=disaggregation_overall[0].index[-1])
                ]),
                building=mains.building(),
                supervised=False,
                num_meters=num_meters,
                original_building_meta=mains.meters[0].building_metadata,
                rest_powerflow_included=False)
Beispiel #7
0
    def train(self, metergroup, **load_kwargs):
        """ Gets a site meter and trains the model based on it. 
        Goes chunkwise through the dataset and returns the events.
        In the end does a clustering for identifying the events.
        For signature description see basic class: It should get a sitemeter for unsupervised learning.

        Parameters
        ----------
        metergroup : a nilmtk.MeterGroup object
        For custom baranski (is unsupervised), this is a single site meter.
        """
        
        # Go through all parts and extract events
        events = []
        # 1. Get Events (Das ist ja schon vorhanden) -> Das sollte ich ausbauen -> GetSignatures
        # -> man separiert in die verschiedenen moeglichen Signaturen
        # --> Einen Separator als Oberklasse, Dann mehrere Separatoren fuer die einzelnen Typen an Effekt 
        #       -Rising Spike: 
        #       -Rising Spike:  
        #       -Pulse
        #       -Fluctuation
        #       -Quick Vibrate
        #       -Gradual Falling
        #       -Flatt
        # --> Man arbeitet mit Verdeckung: Event, verdeckt wenn RaisingSpike, FallingSpike, verdeckt wenn Pulse
        #
        # --> Jede Signatur hat eigene spezielle Eigenschaften
        # --> Einige sollten eine Wildcard beinhalten
        # Ich will hier ein 3d Pandas aufbauen
        #events = self._load_if_available()
        #if not events is None:
        #    self.events = events
        #    return

        events = pd.DataFrame()
        for i, elec in enumerate(metergroup.all_meters()):
            print("Find Events for " + str(elec.metadata))
            transitions = find_steady_states_transients(
                elec, cols=self.cols, state_threshold=self.state_threshold,
                noise_level=self.noise_level, **load_kwargs)[1]
            # Mark as on- or off-event
            transitions['type'] = transitions >= 0
            transitions['meter'] = elec
            events = events.append(transitions)

        events.index.rename('time', inplace=True)
        events.set_index(['type', 'meter'], append=True, inplace=True)
        events = events.reorder_levels([2,1,0])
        events.sort_index(inplace=True)
        # Hier vielleicht noch die Kombinationen finden
        self.events = events
        
        #self._save(events)


  
        # 2. Cluster the events using different cluster methodologies (Zuweisung passiert automatisch)
        # Ah es gibt doch ein predict: Und zwar elemente Clustern zuweisen        
        clusters = None #self.  _load_if_available(what='cluster')
        if clusters is None:
            for curGroup, groupEvents in events.groupby(['meter','type']):
                centroids, assignments = self._cluster_events(groupEvents, max_num_clusters=self.max_num_clusters, method='kmeans')
                events.loc[curGroup,'cluster'] = assignments  
            #self._save(events, 'cluster')
        else:
            pass
            #events = clusters

        self.model = events