def test_as_dataframe_columns(self): N_ROWS = 5 columns = [] # Create columns using every permutation of ac_type and cumulative for ac_type in measure.AC_TYPES: columns.append(('power', ac_type)) for cumulative in [True, False]: if cumulative: columns.append(('cumulative energy', ac_type)) else: columns.append(('energy', ac_type)) columns.append(('voltage', '')) # Create DataFrame N_COLS = len(columns) df = pd.DataFrame(np.arange(N_COLS).reshape((1,N_COLS)), columns=measure.measurement_columns(columns)) # Try accessing columns i = 0 for column in columns: series = df[column] self.assertIsInstance(series, pd.Series) self.assertEqual(series.name, column) self.assertEqual(series.shape, (1,)) self.assertEqual(series.sum(), i) i += 1
def create_random_df(): N_PERIODS = 1E4 rng = pd.date_range('2012-01-01', freq='S', periods=N_PERIODS) data = np.random.randint( low=0, high=1000, size=(N_PERIODS, len(MEASUREMENTS))) return pd.DataFrame(data=data, index=rng, dtype=np.float32, columns=measurement_columns(MEASUREMENTS))
def test_as_dataframe_columns(self): N_ROWS = 5 columns = [] # Create columns using every permutation of ac_type and cumulative for ac_type in measure.AC_TYPES: columns.append(('power', ac_type)) for cumulative in [True, False]: if cumulative: columns.append(('cumulative energy', ac_type)) else: columns.append(('energy', ac_type)) columns.append(('voltage', '')) # Create DataFrame N_COLS = len(columns) df = pd.DataFrame(np.arange(N_COLS).reshape((1, N_COLS)), columns=measure.measurement_columns(columns)) # Try accessing columns i = 0 for column in columns: series = df[column] self.assertIsInstance(series, pd.Series) self.assertEqual(series.name, column) self.assertEqual(series.shape, (1, )) self.assertEqual(series.sum(), i) i += 1
def _wikienergy_dataframe_to_hdf(wikienergy_dataframe, store): local_dataframe = wikienergy_dataframe.copy() # remove timezone information to avoid append errors local_dataframe['localminute'] = pd.DatetimeIndex([i.replace(tzinfo=None) for i in local_dataframe['localminute']]) # set timestamp as frame index local_dataframe = local_dataframe.set_index('localminute') # Column names for dataframe columns = measurement_columns([('power', 'active')]) for building_id in local_dataframe['dataid'].unique(): # remove building id column feeds_dataframe = local_dataframe.drop('dataid', axis=1) # convert from kW to W feeds_dataframe = feeds_dataframe.mul(1000) meter_id = 1 for column in feeds_dataframe.columns: if feeds_dataframe[column].notnull().sum() > 0: # convert timeseries into dataframe feed_dataframe = pd.DataFrame(feeds_dataframe[column], columns=columns) key = 'building{:d}/elec/meter{:d}'.format(building_id, meter_id) store.append(key, feed_dataframe) meter_id += 1 return 0
def create_co_test_hdf5(): FILENAME = join(data_dir(), 'co_test.h5') N_METERS = 3 chunk = 1000 N_PERIODS = 4 * chunk rng = pd.date_range('2012-01-01', freq='S', periods=N_PERIODS) dfs = OrderedDict() data = OrderedDict() # mains meter data data[1] = np.array([0, 200, 1000, 1200] * chunk) # appliance 1 data data[2] = np.array([0, 200, 0, 200] * chunk) # appliance 2 data data[3] = np.array([0, 0, 1000, 1000] * chunk) for i in range(1, 4): dfs[i] = pd.DataFrame(data=data[i], index=rng, dtype=np.float32, columns=measurement_columns([('power', 'active') ])) store = pd.HDFStore(FILENAME, 'w', complevel=9, complib='zlib') elec_meter_metadata = {} for meter in range(1, N_METERS + 1): key = 'building1/elec/meter{:d}'.format(meter) print("Saving", key) store.put(key, dfs[meter], format='table') elec_meter_metadata[meter] = { 'device_model': TEST_METER['model'], 'submeter_of': 1, 'data_location': key } # For mains meter, we need to specify that it is a site meter del elec_meter_metadata[1]['submeter_of'] elec_meter_metadata[1]['site_meter'] = True # Save dataset-wide metadata store.root._v_attrs.metadata = { 'meter_devices': { TEST_METER['model']: TEST_METER } } print(store.root._v_attrs.metadata) # Building metadata add_building_metadata(store, elec_meter_metadata) for key in store.keys(): print(store[key]) store.flush() store.close()
def power_data(simple=True): """ Returns ------- DataFrame """ if simple: STEP = 10 data = [0, 0, 0, 100, 100, 100, 150, 150, 200, 0, 0, 100, 5000, 0] secs = np.arange(start=0, stop=len(data) * STEP, step=STEP) else: data = [0, 0, 0, 100, 100, 100, 150, 150, 200, 0, 0, 100, 5000, 0] secs = [ 0, 10, 20, 30, 200, 210, 220, 230, 240, 249, 260, 270, 290, 1000 ] data = np.array(data, dtype=np.float32) active = data reactive = data * 0.9 apparent = data * 1.1 index = [ pd.Timestamp('2010-01-01') + timedelta(seconds=sec) for sec in secs ] column_tuples = [('power', ac_type) for ac_type in ['active', 'reactive', 'apparent']] df = pd.DataFrame(np.array([active, reactive, apparent]).transpose(), index=index, dtype=np.float32, columns=measurement_columns(column_tuples)) # calculate energy # this is not cumulative energy timedelta_secs = np.diff(secs).clip(0, MAX_SAMPLE_PERIOD).astype(np.float32) for ac_type in AC_TYPES: joules = timedelta_secs * df['power', ac_type].values[:-1] joules = np.concatenate([joules, [0]]) kwh = joules / JOULES_PER_KWH if ac_type == 'reactive': df['energy', ac_type] = kwh elif ac_type == 'apparent': df['cumulative energy', ac_type] = kwh.cumsum() return df
def create_co_test_hdf5(): FILENAME = join(data_dir(), "co_test.h5") N_METERS = 3 chunk = 1000 N_PERIODS = 4 * chunk rng = pd.date_range("2012-01-01", freq="S", periods=N_PERIODS) dfs = OrderedDict() data = OrderedDict() # mains meter data data[1] = np.array([0, 200, 1000, 1200] * chunk) # appliance 1 data data[2] = np.array([0, 200, 0, 200] * chunk) # appliance 2 data data[3] = np.array([0, 0, 1000, 1000] * chunk) for i in range(1, 4): dfs[i] = pd.DataFrame( data=data[i], index=rng, dtype=np.float32, columns=measurement_columns([("power", "active")]) ) store = pd.HDFStore(FILENAME, "w", complevel=9, complib="zlib") elec_meter_metadata = {} for meter in range(1, N_METERS + 1): key = "building1/elec/meter{:d}".format(meter) print("Saving", key) store.put(key, dfs[meter], format="table") elec_meter_metadata[meter] = {"device_model": TEST_METER["model"], "submeter_of": 1, "data_location": key} # For mains meter, we need to specify that it is a site meter del elec_meter_metadata[1]["submeter_of"] elec_meter_metadata[1]["site_meter"] = True # Save dataset-wide metadata store.root._v_attrs.metadata = {"meter_devices": {TEST_METER["model"]: TEST_METER}} print(store.root._v_attrs.metadata) # Building metadata add_building_metadata(store, elec_meter_metadata) for key in store.keys(): print(store[key]) store.flush() store.close()
def power_data(simple=True): """ Returns ------- DataFrame """ if simple: STEP = 10 data = [0, 0, 0, 100, 100, 100, 150, 150, 200, 0, 0, 100, 5000, 0] secs = np.arange(start=0, stop=len(data) * STEP, step=STEP) else: data = [0, 0, 0, 100, 100, 100, 150, 150, 200, 0, 0, 100, 5000, 0] secs = [0, 10, 20, 30, 200, 210, 220, 230, 240, 249, 260, 270, 290, 1000] data = np.array(data, dtype=np.float32) active = data reactive = data * 0.9 apparent = data * 1.1 index = [pd.Timestamp("2010-01-01") + timedelta(seconds=sec) for sec in secs] column_tuples = [("power", ac_type) for ac_type in ["active", "reactive", "apparent"]] df = pd.DataFrame( np.array([active, reactive, apparent]).transpose(), index=index, dtype=np.float32, columns=measurement_columns(column_tuples), ) # calculate energy # this is not cumulative energy timedelta_secs = np.diff(secs).clip(0, MAX_SAMPLE_PERIOD).astype(np.float32) for ac_type in AC_TYPES: joules = timedelta_secs * df["power", ac_type].values[:-1] joules = np.concatenate([joules, [0]]) kwh = joules / JOULES_PER_KWH if ac_type == "reactive": df["energy", ac_type] = kwh elif ac_type == "apparent": df["cumulative energy", ac_type] = kwh.cumsum() return df
def create_random_df(): N_PERIODS = 1E4 rng = pd.date_range('2012-01-01', freq='S', periods=N_PERIODS) data = np.random.randint(low=0, high=1000, size=(N_PERIODS, len(MEASUREMENTS))) return pd.DataFrame(data=data, index=rng, dtype=np.float32, columns=measurement_columns(MEASUREMENTS))