def convert_hipe(hipe_path, output_filename, format="HDF"): """Convert the HIPE data set to the NILMTK-format. This method works with the 1 week and the 3 month data. Parameters ---------- hipe_path : str The root path of the HIPE dataset. output_filename : str The destination filename (including path and suffix). format : str format of output. Either "HDF" or "CSV". Defaults to "HDF". """ datastore = get_datastore(output_filename, format, mode="w") _convert(hipe_path, datastore, _hipe_measurement_mapping_func, "Europe/Berlin") metadata_path = "metadata" save_yaml_to_datastore(metadata_path, datastore) datastore.close() print("Done converting HIPE!")
def convert_refit(input_path, output_filename, format='HDF'): """ Parameters ---------- input_path : str The root path of the CSV files, e.g. House1.csv output_filename : str The destination filename (including path and suffix). format : str format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF' """ # Open DataStore store = get_datastore(output_filename, format, mode='w') # Convert raw data to DataStore _convert(input_path, store, 'Europe/London') # Add metadata save_yaml_to_datastore(join(get_module_directory(), 'dataset_converters', 'refit', 'metadata'), store) store.close() print("Done converting REFIT to HDF5!")
def convert_lab(lab_path, output_filename, format='HDF'): """ Parameters ---------- lab_path : str The root path of the LAB dataset. output_filename : str The destination filename (including path and suffix). format : str format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF' """ #estava <=2 e o primeiro ac_type = apparent def _lab_measurement_mapping_func(house_id, chan_id): ac_type = 'active' if chan_id <= 1 else 'active' return [('power', ac_type)] # Open DataStore store = get_datastore(output_filename, format, mode='w') # Convert raw data to DataStore _convert(lab_path, store, _lab_measurement_mapping_func, 'America/Fortaleza') # Add metadata save_yaml_to_datastore(join(get_module_directory(), 'dataset_converters', 'lab', 'metadata'), store) store.close() print("Done converting LAB to HDF5!")
def convert_enertalk(input_path, output_filename, format='HDF', tz='Asia/Seoul'): """ Parameters ---------- input_path : str The root path of ENEERTAK dataset. output_filename : str The destination filename (including path and suffix). format : str format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF' tz : str Timezone e.g. 'Asia/Seoul' """ # Open DataStore store = get_datastore(output_filename, format, mode='w') # convert raw data to DataStore _convert(input_path, store, tz=tz) # Add metadata save_yaml_to_datastore('metadata/', store) store.close()
def convert_redd(redd_path, output_filename, format='HDF'): """ Parameters ---------- redd_path : str The root path of the REDD low_freq dataset. output_filename : str The destination filename (including path and suffix). format : str format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF' """ def _redd_measurement_mapping_func(house_id, chan_id): return [('power', 'active')] # Open DataStore store = get_datastore(output_filename, format, mode='w') # Convert raw data to DataStore _convert(redd_path, store, _redd_measurement_mapping_func, 'Asia/Taipei') s = redd_path + 'metadata' # Add metadata save_yaml_to_datastore(s, store) store.close() print("Done converting III to HDF5!")
def convert_redd(redd_path, output_filename, format="HDF"): """ Parameters ---------- redd_path : str The root path of the REDD low_freq dataset. output_filename : str The destination filename (including path and suffix). format : str format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF' """ def _redd_measurement_mapping_func(house_id, chan_id): ac_type = "apparent" if chan_id <= 2 else "active" return [("power", ac_type)] # Open DataStore store = get_datastore(output_filename, format, mode="w") # Convert raw data to DataStore _convert(redd_path, store, _redd_measurement_mapping_func, "US/Eastern") # Add metadata save_yaml_to_datastore(join(get_module_directory(), "dataset_converters", "redd", "metadata"), store) store.close() print("Done converting REDD to HDF5!")
def convert_lab(lab_path, output_filename, format='HDF'): """ Parameters ---------- lab_path : str The root path of the LAB dataset. output_filename : str The destination filename (including path and suffix). format : str format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF' """ #estava <=2 e o primeiro ac_type = apparent def _lab_measurement_mapping_func(house_id, chan_id): ac_type = 'active' if chan_id <= 1 else 'active' return [('power', ac_type)] # Open DataStore store = get_datastore(output_filename, format, mode='w') # Convert raw data to DataStore _convert(lab_path, store, _lab_measurement_mapping_func, 'America/Fortaleza') # Add metadata save_yaml_to_datastore( join(get_module_directory(), 'dataset_converters', 'lab', 'metadata'), store) store.close() print("Done converting LAB to HDF5!")
def convert_deddiag(connection, output_filename, format='HDF', start_date=DEFAULT_START_DATE, end_date=DEFAULT_END_DATE, tz=DEFAULT_TZ): """ Parameters ---------- connection: Connection Connection to the DEDDIAG database Example: connection = Connection(host="localhost", port="5432", db_name="postgres", user="******", password="******") output_filename : str The destination filename including path and suffix Example: ./data/deddiag.h5 format : str format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF' """ # Open DataStore # todo try catch dest_file = get_datastore(output_filename, format, mode='w') # Convert raw data to DataStore _convert(connection, dest_file, start_date, end_date, tz) path_to_metadata = join(get_module_directory(), 'dataset_converters', 'deddiag', 'metadata') # Add metadata save_yaml_to_datastore(path_to_metadata, dest_file) print("Done converting DEDDIAG to HDF5!")
def convert_redd(redd_path, output_filename, format='HDF'): """ Parameters ---------- redd_path : str The root path of the REDD low_freq dataset. output_filename : str The destination filename (including path and suffix). format : str format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF' """ def _redd_measurement_mapping_func(house_id, chan_id): ac_type = 'apparent' if chan_id <= 2 else 'active' return [('power', ac_type)] # Open DataStore store = get_datastore(output_filename, format, mode='w') # Convert raw data to DataStore _convert(redd_path, store, _redd_measurement_mapping_func, 'US/Eastern') s = join(get_module_directory(), 'dataset_converters', 'redd', 'metadata') # Add metadata save_yaml_to_datastore( join(get_module_directory(), 'dataset_converters', 'redd', 'metadata'), store) store.close() print("Done converting REDD to HDF5!")
def convert_alva(alva_path, output_filename, format='HDF'): """ Parameters ---------- alva_path : str The root path of the alva low_freq dataset. output_filename : str The destination filename (including path and suffix). format : str format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF' """ def _alva_measurement_mapping_func(house_id, chan_id): ac_type = 'apparent' if chan_id <= 2 else 'active' return [('power', ac_type)] # Open DataStore store = get_datastore(output_filename, format, mode='w') # Convert raw data to DataStore _convert(alva_path, store, _alva_measurement_mapping_func, 'US/Eastern') # Add metadata save_yaml_to_datastore(join(get_module_directory(), 'dataset_converters', 'alva', 'metadata'), store) store.close() print("Done converting alva to HDF5!")
def convert_refit(input_path, output_filename, format='HDF'): """ Parameters ---------- input_path : str The root path of the CSV files, e.g. House1.csv output_filename : str The destination filename (including path and suffix). format : str format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF' """ # Open DataStore store = get_datastore(output_filename, format, mode='w') # Convert raw data to DataStore _convert(input_path, store, 'Europe/London') # Add metadata save_yaml_to_datastore( join(get_module_directory(), 'dataset_converters', 'refit', 'metadata'), store) store.close() print("Done converting REFIT to HDF5!")
def convert_ideal(ideal_path, output_filename, format='HDF'): """ Convert the IDEAL dataset to NILMTK HDF5 format. From https://datashare.ed.ac.uk/handle/10283/3647 download these zips below: - household_sensors.zip (14.77Gb). - room_and_appliance_sensors.zip (9.317Gb). Both zips contain a folder called "sensorsdata". Create a new folder, e.g. called "ideal_dataset", and into it - Extract the folder "household_sensors.zip/sensordata" with the name household_sensordata - Extract the folder "room_and_appliance_sensors/sensordata" with the name rooms_appliance_sensensensordata Then run the function convert_ideal with ideal_path="ideal_dataset". Parameters ---------- ideal_path : str The root path of the ideal low_freq dataset. output_filename : str The destination filename (including path and suffix). format : str format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF' """ def _ideal_measurement_mapping_func(house_id, chan_id, category_id): if (category_id == "electric-appliance"): ac_type = 'active' return [('power', ac_type)] else: ac_type = 'apparent' return [('power', ac_type)] # Open DataStore store = get_datastore(output_filename, format, mode='w') #household_sensordata contains mains reading #rooms_appliance_sensordata contains appliance reading folders = [] for root, dirs, files in os.walk(ideal_path): for folder in dirs: if (folder == "household_sensordata" or folder == "rooms_appliance_sensordata"): folders.append(folder) #valid_home_id are home ids which contain both mains and appliance reading valid_home_id = mains_plus_appliance_home_id(ideal_path, folders) for folder in folders: input_path = join(ideal_path, folder) # Convert raw data to DataStore _convert(input_path, store, _ideal_measurement_mapping_func, 'Europe/London', valid_home_id) metadata_path = join(get_module_directory(), 'dataset_converters', 'ideal', 'metadata') # Add metadata save_yaml_to_datastore(metadata_path, store) store.close() print("Done converting ideal to HDF5!")
def convert_ps(ps_path, output_path, out_format="HDF"): # open datastore store = get_datastore(output_path, out_format, mode="w") # TODO: check 'US/Central' data_path = join(ps_path, "data") _convert_to_datastore(data_path, store, 'US/Central') # add metadata meta_path = join(ps_path, "meta") save_yaml_to_datastore(meta_path, store) store.close() print ("Done converting Pecan Street to HDF5")
def convert_unifei(redd_path, output_filename, format='HDF'): """ Parameters ---------- redd_path : str The root path of the REDD low_freq dataset. output_filename : str The destination filename (including path and suffix). format : str format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF' """ def _redd_measurement_mapping_func(house_id, chan_id): ac_type = 'active' return [('power', ac_type)] # Open DataStore store = get_datastore(output_filename, format, mode='w') # Convert raw data to DataStore _convert(redd_path, store, _redd_measurement_mapping_func, 'America/Sao_Paulo') print("Done convert...") #Aqui é necessário colocar o endereço de onde fica a metadata print(get_module_directory()) s=join(get_module_directory(), 'dataset_converters', 'unifei', 'metadata') print(s) # Add metadata # Aqui também é necessário colocar o endereço correto da metadata save_yaml_to_datastore(join(get_module_directory(), 'dataset_converters', 'unifei', 'metadata'), store) store.close() print("Done converting REDD to HDF5!")
pqenergy = get_datastore('pqenergy.h5', 'HDF', mode='w') pqenergy.put('/building1/elec/meter1', convert_pq('aggr_p', 'aggr_q')) pqenergy.put('/building1/elec/meter2', convert_pq('aircon_p', 'aircon_q')) pqenergy.put('/building1/elec/meter3', convert_pq('hdryer_p', 'hdryer_q')) pqenergy.put('/building1/elec/meter4', convert_pq('wboiler_p', 'wboiler_q')) pqenergy.put('/building1/elec/meter5', convert_pq('ecooker_p', 'ecooker_q')) pqenergy.put('/building1/elec/meter6', convert_pq('dehumid_p', 'dehumid_q')) pqenergy.put('/building1/elec/meter7', convert_pq('fridge_p', 'fridge_q')) pqenergy.put('/building1/elec/meter8', convert_pq('aheater_p', 'aheater_q')) pqenergy.put('/building1/elec/meter9', convert_pq('ciron_p', 'ciron_q')) pqenergy.put('/building1/elec/meter10', convert_pq('rcooker_p', 'rcooker_q')) pqenergy.put('/building1/elec/meter11', convert_pq('tv_p', 'tv_q')) pqenergy.put('/building1/elec/meter12', convert_pq('vhood_p', 'vhood_q')) pqenergy.put('/building1/elec/meter13', convert_pq('washer_p', 'washer_q')) save_yaml_to_datastore('metadata_pq/', pqenergy) pqenergy.close() #%% def convert_s(s_index): dataset = pd.read_csv('export.csv', sep=',') dataset.drop_duplicates(subset=["timestamp"], inplace=True) dataset["timestamp"] = pd.to_datetime(dataset.timestamp.values, unit='s', utc=True).tz_convert(TIMEZONE) meter = dataset[['timestamp', s_index]].copy() meter.columns = pd.MultiIndex.from_tuples([ ('physical_quantity', 'type'), ('power', 'active'), # ('power','apparent'),
def convert_deps(deps_path, input_filename, output_filename, format='HDF'): """ Parameters ---------- deps_path : str The root path of the DEPS dataset. e.g 'C:/data/deps' input_filename : str The rawdata filename (including path and suffix). e.g 'C:/data/rawdata.csv' output_filename : str The destination HDF5 filename (including path and suffix). e.g 'C:/data/deps/DEPS_data.h5' format : str format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF' Meters & Measurements : ---------- Measurement assignment (idMeasurement) in rawdata to REDD format Measurements id's Units Meters Name 14011 14012 --> W VAr --> Main_RST 14001 14007 14014 14017 --> V A W VAr --> Main_R 14002 14008 14015 14018 --> V A W VAr --> Main_S 14003 14009 14016 14019 --> V A W VAr --> Main_T 13001 --> W --> Lights_1 13002 --> W --> Lights_2 10003 10006 10014 10018 --> V A W VAr --> HVAC_1 10002 10005 10013 10017 --> V A W VAr --> HVAC_2 10001 10004 10012 10016 --> V A W VAr --> HVAC_4 21001 21002 21003 21005 --> V A W VAr --> Rack Example ---------- raw_data.csv (input_filename): -- idMeasurement, UNIX_timestamp(tStampUTC), dataValue 14011, 1583103600, 123 14012, 1583103600, -416 14011, 1583103601, 126 14012, 1583103601, -416 ... ... ... 14011, 1583535599, 121 14012, 1583535599, -411 Outputs REDD format: deps_path/classroom1/ : -- channel_1.dat: 1583103600 123 -416 1583103600 126 -416 ... ... ... 1583103600 121 -411 -- labels.dat: 1 Main_RST Output HDF5 file: output_filename.h5 """ #-------------------------------------------------------------------- # writed by Andrés Arias Silva # Raw data converter to REDD format extracted from DEPS SQL database _deps_to_redd_format(deps_path, input_filename) #-------------------------------------------------------------------- def _deps_measurement_mapping_func(classroom_id, chan_id): if chan_id == 1: meas = ([('power', 'active'), ('power', 'reactive')]) elif chan_id > 1 and chan_id <= 4: meas = ([('voltage', ''), ('current', ''), ('power', 'active'), ('power', 'reactive')]) elif chan_id > 4 and chan_id <= 6: meas = ([('power', 'active')]) elif chan_id > 6 and chan_id <= 10: meas = ([ ('voltage', ''), ('current', ''), ('power', 'active'), ('power', 'reactive'), ]) else: raise NameError('incorrect channel number') return meas # Open DataStore store = get_datastore(output_filename, format, mode='w') # Convert raw data to DataStore _convert(deps_path, store, _deps_measurement_mapping_func, 'Europe/Madrid') # s=join(get_module_directory(), # 'dataset_converters', # 'deps', # 'metadata') # Add metadata save_yaml_to_datastore( join(get_module_directory(), 'dataset_converters', 'deps', 'metadata'), store) store.close() print("Done converting DEPS data to HDF5!")
else: f.write(' ' + str(instance + 1) + ': *generic\n') f.write('\nappliances:') for instance, app in enumerate(np.unique(appliances)): f.write('\n- ' + 'original_name: ' + app + '\n') f.write(' ' + 'type: unknown\n') f.write(' ' + 'instance: ' + str(instance + 1) + '\n') f.write(' ' + 'meters: [' + str(instance + 1) + ']\n') f.close() # dataset metadata f = open(pJoin(modelDir, 'train', 'dataset.yaml'), 'w') f.write('name: trainData\n') f.close() # meterdevices metadata f = open(pJoin(modelDir, 'train', 'meter_devices.yaml'), 'w') f.write('generic:\n') f.write(' ' + 'model: generic\n') f.write(' ' + 'sample_period: ' + samplePeriod + '\n') f.write(' ' + 'max_sample_period: ' + samplePeriod + '\n') f.write(' ' + 'measurements:\n') f.write(' ' + '- physical_quantity: power\n') f.write(' ' + 'type: apparent\n') f.write(' ' + 'upper_limit: 1000000000\n') f.write(' ' + 'lower_limit: 0\n') f.close() # save data and metadata save_yaml_to_datastore(pJoin(modelDir, 'train'), store) store.close()
def convert_sortd(input_path, output_filename, format='HDF'): """Converts the dataset to NILMTK HDF5 format. For more information about the SOR test dataset, contact Samuel Marisa. Parameters ---------- input_path : str The root path of the dataset. It is assumed that the YAML metadata is in 'input_path/metadata'. output_filename : str The destination filename (including path and suffix). format : str format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF' Example usage: -------------- convert('/sortd', 'store.h5') """ print( 'Attempting to convert the SORTD dataset at %s into %s in NILMTK %s format...' % (input_path, output_filename, format)) # Ensure that the input directory exists check_directory_exists(input_path) # Load the dataset metadata with open(join(input_path, 'metadata/dataset.yaml'), 'r') as stream: dataset_metadata = yaml.load(stream) # Open the datastore store = get_datastore(output_filename, format, mode='w') # Iterate through all building metadata files found in the dataset for metadata_file in glob.glob( join(input_path, 'metadata/building[0-9]*.yaml')): # Load the building metadata with open(metadata_file, 'r') as stream: metadata = yaml.load(stream) building_id = int(metadata['instance']) print('==> Loading building %d defined at %s. Please wait...' % (building_id, metadata_file)) for meter_id, meter_data in metadata['elec_meters'].items(): meter_id = int(meter_id) key = Key(building=building_id, meter=meter_id) # Load the raw data from the data location print(' - Loading meter %s from %s...' % (meter_id, meter_data['data_location'])) columns = [('power', 'active')] df = pd.read_csv(join(input_path, meter_data['data_location']), sep=',', names=columns, dtype={m: np.float32 for m in columns}) # Convert the timestamp index column to timezone-aware datetime df.index = pd.to_datetime(df.index.values, unit='s', utc=True) df = df.tz_convert(dataset_metadata['timezone']) #df = pd.read_csv(join(input_path, db_file), sep=';', names=('Datetime', 'P1', 'P2', 'P3'), dtype={'P1': np.float64, 'P2': np.float64, 'P3': np.float64}, parse_dates=[1]) print(df.info()) print(df.head()) #print(df.tail()) print(" - Storing data under key %s in the datastore..." % (str(key))) store.put(str(key), df) print(" - Building %s loaded!" % (building_id)) print("Adding the metadata into the store...") save_yaml_to_datastore(join(input_path, 'metadata'), store) print("Closing the store...") store.close() print("Done converting SORTD dataset to HDF5!")