예제 #1
0
    def get_home_readings(self,
                          homeid,
                          merge_mains_clamps=True,
                          oem_mains_readings=True,
                          unusable_sensors=None,
                          appliance_readings=True,
                          cutoff_date=None):
        """ get processed and merged readings from locally stored reading data.
        Must run store_gold_elec_data_locally.py before calling this method

        :param homeid: int
            homeid of the home for which to retrive readings
        :return: DataFrame
            processed readings for electrical mains and appliances
        """
        anomalous_sensors = None
        if unusable_sensors is None:
            anomalous_sensors = pd.read_csv(
                'anomalous_sensors.csv',
                dtype={
                    'homeid': np.int32,
                    'sensorid': np.int32,
                    'notes': str
                },
                parse_dates=['starttime', 'endtime'])
            unusable_sensors = anomalous_sensors[
                (anomalous_sensors.starttime == pd.NaT)
                & (anomalous_sensors.endtime == pd.NaT)].sensorid.values

        # get metadata and readings store
        with MetaDataStore() as s:
            metadata = MetaData(s)

        reading_store = ReadingDataStore()

        duplicated_sensors = [
            u for v in self.sensors_to_merge.values() for u in v
        ]

        sensors = metadata.sensor_merged()
        indices = sensors['sensorid'].isin(reading_store.get_sensorids())\
            & (sensors['homeid'] == homeid)\
            & ~sensors.sensorid.isin(duplicated_sensors)

        indices = indices & ~sensors.sensorid.isin(unusable_sensors)

        sensors = sensors.loc[indices]

        # get sensorids
        mains_30A_sensorid, mains_100A_sensorid = [
            sensors.sensorid[sensors.sensorid.isin(ids)] for ids in [
                metadata.mains_30A_rms_sensors(),
                metadata.mains_100A_rms_sensors()
            ]
        ]

        dummy_readings = pd.DataFrame(
            columns=['time', 'value', 'tenths_seconds_since_last_reading'])
        dummy_readings['time'] = dummy_readings['time'].astype(
            'datetime64[ns]')

        # get apparent power readings
        mains_30A_readings, mains_100A_readings = [
            self.get_sensor_readings(
                int(sid), reading_store.get_sensor_readings, anomalous_sensors)
            if (sid.shape[0] == 1) else dummy_readings.copy()
            for sid in [mains_30A_sensorid, mains_100A_sensorid]
        ]

        if cutoff_date is not None:
            mains_30A_readings, mains_100A_readings = [
                readings[readings.time > cutoff_date]
                for readings in [mains_30A_readings, mains_100A_readings]
            ]

        readings_processed = self.process_mains_clamp(mains_30A_readings,
                                                      mains_100A_readings,
                                                      merge=merge_mains_clamps)

        del mains_30A_readings, mains_100A_readings

        oem_sensors = []

        if appliance_readings:
            # get oem and zwave appliance readings
            oem_appliances = metadata.appliance_oem_sensors()
            indices = oem_appliances.sensorid.isin(sensors.sensorid)
            oem_appliances = oem_appliances[indices]

            oem_sensors.extend(list(oem_appliances.appliancetype.values))

            zwave_appliances = metadata.appliance_zwave_sensors()
            indices = zwave_appliances.sensorid.isin(sensors.sensorid)
            zwave_appliances = zwave_appliances[indices]

            for appliances, readings_processor in zip(
                [oem_appliances, zwave_appliances], [
                    self.process_oem_appliance_readings,
                    self.process_zwave_readings
                ]):

                for index, row in appliances.iterrows():

                    readings = self.get_sensor_readings(
                        int(row.sensorid), reading_store.get_sensor_readings,
                        anomalous_sensors)

                    if cutoff_date is not None:
                        readings = readings[readings.time > cutoff_date]

                    readings = readings_processor(readings)
                    readings.rename(columns={'power': row.appliancetype},
                                    inplace=True)

                    # merge multiple appliances of same type
                    if row.appliancetype in readings_processed.keys():
                        readings_processed[row.appliancetype] = \
                            readings_processed[row.appliancetype] + \
                            readings[row.appliancetype]
                        readings_processed[row.appliancetype].fillna(
                            readings[row.appliancetype])
                    else:
                        readings_processed = readings_processed.join(
                            readings, how='left')
                    del readings

                    gc.collect()

        if oem_mains_readings:
            # get oem mains readings
            mains_oem_sensorid = sensors.sensorid[sensors.sensorid.isin(
                metadata.mains_oem_sensors())]

            if len(mains_oem_sensorid) == 1:

                mains_oem_readings = self.get_sensor_readings(int(mains_oem_sensorid),
                                            reading_store.get_sensor_readings, anomalous_sensors) \
                    if (mains_oem_sensorid.shape[0] == 1) else dummy_readings.copy()

                if cutoff_date is not None:
                    mains_oem_readings = mains_oem_readings[
                        mains_oem_readings.time > cutoff_date]

                mains_oem_readings = self.process_oem_mains_readings(
                    mains_oem_readings)
                readings_processed = readings_processed.join(
                    mains_oem_readings, how='left')
                del mains_oem_readings
                readings_processed.rename(columns={'power': 'mains_real'},
                                          inplace=True)

                oem_sensors.append('mains_real')

                # replace OEM flatlines with NAN
                if readings_processed.shape[0] > 0:
                    oem_flat_periods = self.find_oem_flatline(
                        readings_processed.mains_real)

                    for start_time, period in oem_flat_periods.iterrows():
                        end_time = start_time + period.duration
                        readings_processed.loc[start_time:end_time,
                                               oem_sensors] = np.nan

                readings_processed.loc[readings_processed.mains_real.isnull(),
                                       oem_sensors] = np.NaN

        # close files
        reading_store.close()

        return readings_processed