Exemple #1
0
    def test_as_dataframe_columns(self):
        N_ROWS = 5
        columns = []

        # Create columns using every permutation of ac_type and cumulative
        for ac_type in measure.AC_TYPES:
            columns.append(('power', ac_type))
            for cumulative in [True, False]:
                if cumulative:
                    columns.append(('cumulative energy', ac_type))
                else:
                    columns.append(('energy', ac_type))
        columns.append(('voltage', ''))

        # Create DataFrame
        N_COLS = len(columns)
        df = pd.DataFrame(np.arange(N_COLS).reshape((1,N_COLS)), 
                          columns=measure.measurement_columns(columns))

        # Try accessing columns
        i = 0
        for column in columns:
            series = df[column]
            self.assertIsInstance(series, pd.Series)
            self.assertEqual(series.name, column)
            self.assertEqual(series.shape, (1,))
            self.assertEqual(series.sum(), i)
            i += 1
Exemple #2
0
def create_random_df():
    N_PERIODS = 1E4
    rng = pd.date_range('2012-01-01', freq='S', periods=N_PERIODS)
    data = np.random.randint(
        low=0, high=1000, size=(N_PERIODS, len(MEASUREMENTS)))
    return pd.DataFrame(data=data, index=rng, dtype=np.float32,
                        columns=measurement_columns(MEASUREMENTS))
    def test_as_dataframe_columns(self):
        N_ROWS = 5
        columns = []

        # Create columns using every permutation of ac_type and cumulative
        for ac_type in measure.AC_TYPES:
            columns.append(('power', ac_type))
            for cumulative in [True, False]:
                if cumulative:
                    columns.append(('cumulative energy', ac_type))
                else:
                    columns.append(('energy', ac_type))
        columns.append(('voltage', ''))

        # Create DataFrame
        N_COLS = len(columns)
        df = pd.DataFrame(np.arange(N_COLS).reshape((1, N_COLS)),
                          columns=measure.measurement_columns(columns))

        # Try accessing columns
        i = 0
        for column in columns:
            series = df[column]
            self.assertIsInstance(series, pd.Series)
            self.assertEqual(series.name, column)
            self.assertEqual(series.shape, (1, ))
            self.assertEqual(series.sum(), i)
            i += 1
def _wikienergy_dataframe_to_hdf(wikienergy_dataframe, store):
    local_dataframe = wikienergy_dataframe.copy()
    
    # remove timezone information to avoid append errors
    local_dataframe['localminute'] = pd.DatetimeIndex([i.replace(tzinfo=None) 
                                                       for i in local_dataframe['localminute']])
    # set timestamp as frame index
    local_dataframe = local_dataframe.set_index('localminute')

    # Column names for dataframe
    columns = measurement_columns([('power', 'active')])
    
    for building_id in local_dataframe['dataid'].unique():
        # remove building id column
        feeds_dataframe = local_dataframe.drop('dataid', axis=1)
        # convert from kW to W
        feeds_dataframe = feeds_dataframe.mul(1000)
        meter_id = 1
        for column in feeds_dataframe.columns:
            if feeds_dataframe[column].notnull().sum() > 0:
                # convert timeseries into dataframe
                feed_dataframe = pd.DataFrame(feeds_dataframe[column],
                                              columns=columns)
                
                key = 'building{:d}/elec/meter{:d}'.format(building_id, meter_id)
                store.append(key, feed_dataframe)
            meter_id += 1
    return 0
Exemple #5
0
def create_co_test_hdf5():
    FILENAME = join(data_dir(), 'co_test.h5')
    N_METERS = 3
    chunk = 1000
    N_PERIODS = 4 * chunk
    rng = pd.date_range('2012-01-01', freq='S', periods=N_PERIODS)

    dfs = OrderedDict()
    data = OrderedDict()

    # mains meter data
    data[1] = np.array([0, 200, 1000, 1200] * chunk)

    # appliance 1 data
    data[2] = np.array([0, 200, 0, 200] * chunk)

    # appliance 2 data
    data[3] = np.array([0, 0, 1000, 1000] * chunk)

    for i in range(1, 4):
        dfs[i] = pd.DataFrame(data=data[i],
                              index=rng,
                              dtype=np.float32,
                              columns=measurement_columns([('power', 'active')
                                                           ]))

    store = pd.HDFStore(FILENAME, 'w', complevel=9, complib='zlib')
    elec_meter_metadata = {}
    for meter in range(1, N_METERS + 1):
        key = 'building1/elec/meter{:d}'.format(meter)
        print("Saving", key)
        store.put(key, dfs[meter], format='table')
        elec_meter_metadata[meter] = {
            'device_model': TEST_METER['model'],
            'submeter_of': 1,
            'data_location': key
        }

    # For mains meter, we need to specify that it is a site meter
    del elec_meter_metadata[1]['submeter_of']
    elec_meter_metadata[1]['site_meter'] = True

    # Save dataset-wide metadata
    store.root._v_attrs.metadata = {
        'meter_devices': {
            TEST_METER['model']: TEST_METER
        }
    }
    print(store.root._v_attrs.metadata)

    # Building metadata
    add_building_metadata(store, elec_meter_metadata)
    for key in store.keys():
        print(store[key])

    store.flush()
    store.close()
Exemple #6
0
def power_data(simple=True):
    """
    Returns
    -------
    DataFrame
    """

    if simple:
        STEP = 10
        data = [0, 0, 0, 100, 100, 100, 150, 150, 200, 0, 0, 100, 5000, 0]
        secs = np.arange(start=0, stop=len(data) * STEP, step=STEP)
    else:
        data = [0, 0, 0, 100, 100, 100, 150, 150, 200, 0, 0, 100, 5000, 0]
        secs = [
            0, 10, 20, 30, 200, 210, 220, 230, 240, 249, 260, 270, 290, 1000
        ]

    data = np.array(data, dtype=np.float32)
    active = data
    reactive = data * 0.9
    apparent = data * 1.1

    index = [
        pd.Timestamp('2010-01-01') + timedelta(seconds=sec) for sec in secs
    ]
    column_tuples = [('power', ac_type)
                     for ac_type in ['active', 'reactive', 'apparent']]
    df = pd.DataFrame(np.array([active, reactive, apparent]).transpose(),
                      index=index,
                      dtype=np.float32,
                      columns=measurement_columns(column_tuples))

    # calculate energy
    # this is not cumulative energy
    timedelta_secs = np.diff(secs).clip(0,
                                        MAX_SAMPLE_PERIOD).astype(np.float32)

    for ac_type in AC_TYPES:
        joules = timedelta_secs * df['power', ac_type].values[:-1]
        joules = np.concatenate([joules, [0]])
        kwh = joules / JOULES_PER_KWH
        if ac_type == 'reactive':
            df['energy', ac_type] = kwh
        elif ac_type == 'apparent':
            df['cumulative energy', ac_type] = kwh.cumsum()

    return df
def create_co_test_hdf5():
    FILENAME = join(data_dir(), "co_test.h5")
    N_METERS = 3
    chunk = 1000
    N_PERIODS = 4 * chunk
    rng = pd.date_range("2012-01-01", freq="S", periods=N_PERIODS)

    dfs = OrderedDict()
    data = OrderedDict()

    # mains meter data
    data[1] = np.array([0, 200, 1000, 1200] * chunk)

    # appliance 1 data
    data[2] = np.array([0, 200, 0, 200] * chunk)

    # appliance 2 data
    data[3] = np.array([0, 0, 1000, 1000] * chunk)

    for i in range(1, 4):
        dfs[i] = pd.DataFrame(
            data=data[i], index=rng, dtype=np.float32, columns=measurement_columns([("power", "active")])
        )

    store = pd.HDFStore(FILENAME, "w", complevel=9, complib="zlib")
    elec_meter_metadata = {}
    for meter in range(1, N_METERS + 1):
        key = "building1/elec/meter{:d}".format(meter)
        print("Saving", key)
        store.put(key, dfs[meter], format="table")
        elec_meter_metadata[meter] = {"device_model": TEST_METER["model"], "submeter_of": 1, "data_location": key}

    # For mains meter, we need to specify that it is a site meter
    del elec_meter_metadata[1]["submeter_of"]
    elec_meter_metadata[1]["site_meter"] = True

    # Save dataset-wide metadata
    store.root._v_attrs.metadata = {"meter_devices": {TEST_METER["model"]: TEST_METER}}
    print(store.root._v_attrs.metadata)

    # Building metadata
    add_building_metadata(store, elec_meter_metadata)
    for key in store.keys():
        print(store[key])

    store.flush()
    store.close()
def power_data(simple=True):
    """
    Returns
    -------
    DataFrame
    """

    if simple:
        STEP = 10
        data = [0, 0, 0, 100, 100, 100, 150, 150, 200, 0, 0, 100, 5000, 0]
        secs = np.arange(start=0, stop=len(data) * STEP, step=STEP)
    else:
        data = [0, 0, 0, 100, 100, 100, 150, 150, 200, 0, 0, 100, 5000, 0]
        secs = [0, 10, 20, 30, 200, 210, 220, 230, 240, 249, 260, 270, 290, 1000]

    data = np.array(data, dtype=np.float32)
    active = data
    reactive = data * 0.9
    apparent = data * 1.1

    index = [pd.Timestamp("2010-01-01") + timedelta(seconds=sec) for sec in secs]
    column_tuples = [("power", ac_type) for ac_type in ["active", "reactive", "apparent"]]
    df = pd.DataFrame(
        np.array([active, reactive, apparent]).transpose(),
        index=index,
        dtype=np.float32,
        columns=measurement_columns(column_tuples),
    )

    # calculate energy
    # this is not cumulative energy
    timedelta_secs = np.diff(secs).clip(0, MAX_SAMPLE_PERIOD).astype(np.float32)

    for ac_type in AC_TYPES:
        joules = timedelta_secs * df["power", ac_type].values[:-1]
        joules = np.concatenate([joules, [0]])
        kwh = joules / JOULES_PER_KWH
        if ac_type == "reactive":
            df["energy", ac_type] = kwh
        elif ac_type == "apparent":
            df["cumulative energy", ac_type] = kwh.cumsum()

    return df
def create_random_df():
    N_PERIODS = 1E4
    rng = pd.date_range('2012-01-01', freq='S', periods=N_PERIODS)
    data = np.random.randint(low=0, high=1000, size=(N_PERIODS, len(MEASUREMENTS)))
    return pd.DataFrame(data=data, index=rng, dtype=np.float32,
                        columns=measurement_columns(MEASUREMENTS))