예제 #1
0
def main():
    # set up logging
    logger = logging.getLogger('store_data_locally')
    logger.setLevel(logging.DEBUG)
    fh = logging.FileHandler('store_data_locally.log')
    fh.setLevel(logging.DEBUG)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    fh.setFormatter(formatter)
    logger.addHandler(fh)

    # get arguments
    parser = argparse.ArgumentParser(
        description='Query sensor readings from the IDEAL database'
        'and store locally.')
    parser.add_argument('--dataset_path',
                        help='directory of the original IDEAL dataset')
    parser.add_argument('--data_path',
                        default=LOCAL_DATA_DIR,
                        help='directory to store data')

    args = parser.parse_args()

    # store metadata locally
    converter = IdealCSV2Hdf5(args.dataset_path, data_dir=args.data_path)
    converter.store_metadata()

    with MetaDataStore(data_dir=args.data_path) as s:
        metadata = MetaData(s)

    # get relevant sensorids
    sensors = metadata.sensor_merged()
    indices = pd.Series([False] * sensors.shape[0], index=sensors.index.copy())
    indices = indices | sensors.sensorid.isin(metadata.electric_sensors())
    indices = indices & sensors.homeid.astype(int).isin(metadata.gold_homes())
    sensorids = sensors.sensorid[indices]
    sensorids_to_store = sensorids

    print('Query and store readings from {0} sensors'.format(
        len(sensorids_to_store)))

    for idx, sensorid in enumerate(sensorids_to_store):
        converter = IdealCSV2Hdf5(args.dataset_path, data_dir=args.data_path)

        logger.info('({0}/{1}) Sensorid: {2}'.format(idx + 1,
                                                     len(sensorids_to_store),
                                                     sensorid))

        converter.store_readings(sensorid)

    # try and read stored data
    readings_store = ReadingDataStore(data_dir=args.data_path)
    readings_count = 0

    for idx, sensorid in enumerate(sensorids):
        readings = readings_store.get_sensor_readings(sensorid)
        readings_count += len(readings)

    logger.info('Total readings : {0}'.format(readings_count))
예제 #2
0
    def get_home_readings(self,
                          homeid,
                          merge_mains_clamps=True,
                          oem_mains_readings=True,
                          unusable_sensors=None,
                          appliance_readings=True,
                          cutoff_date=None):
        """ get processed and merged readings from locally stored reading data.
        Must run store_gold_elec_data_locally.py before calling this method

        :param homeid: int
            homeid of the home for which to retrive readings
        :return: DataFrame
            processed readings for electrical mains and appliances
        """
        anomalous_sensors = None
        if unusable_sensors is None:
            anomalous_sensors = pd.read_csv(
                'anomalous_sensors.csv',
                dtype={
                    'homeid': np.int32,
                    'sensorid': np.int32,
                    'notes': str
                },
                parse_dates=['starttime', 'endtime'])
            unusable_sensors = anomalous_sensors[
                (anomalous_sensors.starttime == pd.NaT)
                & (anomalous_sensors.endtime == pd.NaT)].sensorid.values

        # get metadata and readings store
        with MetaDataStore() as s:
            metadata = MetaData(s)

        reading_store = ReadingDataStore()

        duplicated_sensors = [
            u for v in self.sensors_to_merge.values() for u in v
        ]

        sensors = metadata.sensor_merged()
        indices = sensors['sensorid'].isin(reading_store.get_sensorids())\
            & (sensors['homeid'] == homeid)\
            & ~sensors.sensorid.isin(duplicated_sensors)

        indices = indices & ~sensors.sensorid.isin(unusable_sensors)

        sensors = sensors.loc[indices]

        # get sensorids
        mains_30A_sensorid, mains_100A_sensorid = [
            sensors.sensorid[sensors.sensorid.isin(ids)] for ids in [
                metadata.mains_30A_rms_sensors(),
                metadata.mains_100A_rms_sensors()
            ]
        ]

        dummy_readings = pd.DataFrame(
            columns=['time', 'value', 'tenths_seconds_since_last_reading'])
        dummy_readings['time'] = dummy_readings['time'].astype(
            'datetime64[ns]')

        # get apparent power readings
        mains_30A_readings, mains_100A_readings = [
            self.get_sensor_readings(
                int(sid), reading_store.get_sensor_readings, anomalous_sensors)
            if (sid.shape[0] == 1) else dummy_readings.copy()
            for sid in [mains_30A_sensorid, mains_100A_sensorid]
        ]

        if cutoff_date is not None:
            mains_30A_readings, mains_100A_readings = [
                readings[readings.time > cutoff_date]
                for readings in [mains_30A_readings, mains_100A_readings]
            ]

        readings_processed = self.process_mains_clamp(mains_30A_readings,
                                                      mains_100A_readings,
                                                      merge=merge_mains_clamps)

        del mains_30A_readings, mains_100A_readings

        oem_sensors = []

        if appliance_readings:
            # get oem and zwave appliance readings
            oem_appliances = metadata.appliance_oem_sensors()
            indices = oem_appliances.sensorid.isin(sensors.sensorid)
            oem_appliances = oem_appliances[indices]

            oem_sensors.extend(list(oem_appliances.appliancetype.values))

            zwave_appliances = metadata.appliance_zwave_sensors()
            indices = zwave_appliances.sensorid.isin(sensors.sensorid)
            zwave_appliances = zwave_appliances[indices]

            for appliances, readings_processor in zip(
                [oem_appliances, zwave_appliances], [
                    self.process_oem_appliance_readings,
                    self.process_zwave_readings
                ]):

                for index, row in appliances.iterrows():

                    readings = self.get_sensor_readings(
                        int(row.sensorid), reading_store.get_sensor_readings,
                        anomalous_sensors)

                    if cutoff_date is not None:
                        readings = readings[readings.time > cutoff_date]

                    readings = readings_processor(readings)
                    readings.rename(columns={'power': row.appliancetype},
                                    inplace=True)

                    # merge multiple appliances of same type
                    if row.appliancetype in readings_processed.keys():
                        readings_processed[row.appliancetype] = \
                            readings_processed[row.appliancetype] + \
                            readings[row.appliancetype]
                        readings_processed[row.appliancetype].fillna(
                            readings[row.appliancetype])
                    else:
                        readings_processed = readings_processed.join(
                            readings, how='left')
                    del readings

                    gc.collect()

        if oem_mains_readings:
            # get oem mains readings
            mains_oem_sensorid = sensors.sensorid[sensors.sensorid.isin(
                metadata.mains_oem_sensors())]

            if len(mains_oem_sensorid) == 1:

                mains_oem_readings = self.get_sensor_readings(int(mains_oem_sensorid),
                                            reading_store.get_sensor_readings, anomalous_sensors) \
                    if (mains_oem_sensorid.shape[0] == 1) else dummy_readings.copy()

                if cutoff_date is not None:
                    mains_oem_readings = mains_oem_readings[
                        mains_oem_readings.time > cutoff_date]

                mains_oem_readings = self.process_oem_mains_readings(
                    mains_oem_readings)
                readings_processed = readings_processed.join(
                    mains_oem_readings, how='left')
                del mains_oem_readings
                readings_processed.rename(columns={'power': 'mains_real'},
                                          inplace=True)

                oem_sensors.append('mains_real')

                # replace OEM flatlines with NAN
                if readings_processed.shape[0] > 0:
                    oem_flat_periods = self.find_oem_flatline(
                        readings_processed.mains_real)

                    for start_time, period in oem_flat_periods.iterrows():
                        end_time = start_time + period.duration
                        readings_processed.loc[start_time:end_time,
                                               oem_sensors] = np.nan

                readings_processed.loc[readings_processed.mains_real.isnull(),
                                       oem_sensors] = np.NaN

        # close files
        reading_store.close()

        return readings_processed