Ejemplo n.º 1
0
def get_csv(
        path="/Users/rishi/Documents/Master_folder/IIITD/6th_semester/BTP/NIOMTK",
        output_filename="occupancy",
        format='HDF'):
    csv_files = [i for i in listdir(path) if '_csv' in i]
    csv_list = []
    for i in csv_files:
        directory = join(path, i)
        file_list = [j for j in listdir(directory) if ".csv" in j]
        for j in file_list:
            csv_list.append(join(directory, j))
    print(csv_list)
    print(len(csv_list))
    store = get_datastore(output_filename, format, mode='w')
    for i in range(len(csv_list)):
        dataframe = pd.read_csv(csv_list[i])
        out = []
        print "The current file is: ", csv_list[i]
        df_new = []
        for j in range(len(dataframe)):
            out.append(dataframe.ix[j].values[1:])
            out_1d = list(chain.from_iterable(out))
        index = pd.DatetimeIndex(start=dataframe.values[0][0],
                                 periods=len(out_1d),
                                 freq="1s")
        df = pd.DataFrame(out_1d, index)
        # key = Key(building=1, meter=(i + 1))
        key = "/building" + str(i) + "/elec/meter" + str(i + 1)
        if "summer" in csv_list[i]:
            key = join(str(key), "summer")
        else:
            key = join(str(key), "winter")
        store.put(key, df)
Ejemplo n.º 2
0
def convert_combed(combed_path, output_filename, format='HDF'):
    """
    Parameters
    ----------
    combed_path : str
        The root path of the combed dataset.
    output_filename : str
        The destination HDF5 filename (including path and suffix).
    """

    check_directory_exists(combed_path)

    # Open store
    store = get_datastore(output_filename, format, mode='w')

    for building_name, building_mapping in overall_dataset_mapping.iteritems():
        for load_name, load_mapping in building_mapping.iteritems():
            for load_mapping_path, meter_number in load_mapping.iteritems():
                building_number = building_number_mapping[building_name]
                key = Key(building=building_number, meter=meter_number)
                dfs = []
                for attribute in column_mapping.keys():
                    filename_attribute = join(combed_path, building_name, load_name, load_mapping_path, "%s.csv" %attribute)
                    print(filename_attribute)
                    dfs.append(pd.read_csv(filename_attribute, parse_dates=True, index_col=0, header=True, names=[attribute]))
                total = pd.concat(dfs, axis=1)
                total = total.tz_localize('UTC').tz_convert('Asia/Kolkata')
                total.rename(columns=lambda x: column_mapping[x], inplace=True)
                total.columns.set_names(LEVEL_NAMES, inplace=True)
                assert total.index.is_unique
                store.put(str(key), total)
    convert_yaml_to_hdf5(join(_get_module_directory(), 'metadata'),
                         output_filename)

    print("Done converting COMBED to HDF5!")
Ejemplo n.º 3
0
def convert_lab(lab_path, output_filename, format='HDF'):
    """
    Parameters
    ----------
    lab_path : str
        The root path of the LAB dataset.
    output_filename : str
        The destination filename (including path and suffix).
    format : str
        format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF'
    """

    #estava <=2 e o primeiro ac_type = apparent
    def _lab_measurement_mapping_func(house_id, chan_id):
        ac_type = 'active' if chan_id <= 1 else 'active'
        return [('power', ac_type)]

    # Open DataStore
    store = get_datastore(output_filename, format, mode='w')

    # Convert raw data to DataStore
    _convert(lab_path, store, _lab_measurement_mapping_func,
             'America/Fortaleza')

    # Add metadata
    save_yaml_to_datastore(
        join(get_module_directory(), 'dataset_converters', 'lab', 'metadata'),
        store)
    store.close()

    print("Done converting LAB to HDF5!")
def convert_redd(redd_path, output_filename, format='HDF'):
    """
    Parameters
    ----------
    redd_path : str
        The root path of the REDD low_freq dataset.
    output_filename : str
        The destination filename (including path and suffix).
    format : str
        format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF'
    """
    def _redd_measurement_mapping_func(house_id, chan_id):
        return [('power', 'active')]

    # Open DataStore
    store = get_datastore(output_filename, format, mode='w')

    # Convert raw data to DataStore
    _convert(redd_path, store, _redd_measurement_mapping_func, 'Asia/Taipei')

    s = redd_path + 'metadata'

    # Add metadata
    save_yaml_to_datastore(s, store)
    store.close()

    print("Done converting III to HDF5!")
Ejemplo n.º 5
0
def convert_redd(redd_path, output_filename, format="HDF"):
    """
    Parameters
    ----------
    redd_path : str
        The root path of the REDD low_freq dataset.
    output_filename : str
        The destination filename (including path and suffix).
    format : str
        format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF'
    """

    def _redd_measurement_mapping_func(house_id, chan_id):
        ac_type = "apparent" if chan_id <= 2 else "active"
        return [("power", ac_type)]

    # Open DataStore
    store = get_datastore(output_filename, format, mode="w")

    # Convert raw data to DataStore
    _convert(redd_path, store, _redd_measurement_mapping_func, "US/Eastern")

    # Add metadata
    save_yaml_to_datastore(join(get_module_directory(), "dataset_converters", "redd", "metadata"), store)
    store.close()

    print("Done converting REDD to HDF5!")
Ejemplo n.º 6
0
def convert_hipe(hipe_path, output_filename, format="HDF"):
    """Convert the HIPE data set to the NILMTK-format. This method works
    with the 1 week and the 3 month data.

    Parameters
    ----------
    hipe_path : str
        The root path of the HIPE dataset.
    output_filename : str
        The destination filename (including path and suffix).
    format : str
        format of output. Either "HDF" or "CSV". Defaults to "HDF".

    """

    datastore = get_datastore(output_filename, format, mode="w")

    _convert(hipe_path, datastore, _hipe_measurement_mapping_func,
             "Europe/Berlin")

    metadata_path = "metadata"

    save_yaml_to_datastore(metadata_path, datastore)

    datastore.close()

    print("Done converting HIPE!")
Ejemplo n.º 7
0
def convert_enertalk(input_path,
                     output_filename,
                     format='HDF',
                     tz='Asia/Seoul'):
    """
    Parameters
    ----------
    input_path : str
        The root path of ENEERTAK dataset.
    output_filename : str
        The destination filename (including path and suffix).
    format : str
        format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF'
    tz : str
        Timezone e.g. 'Asia/Seoul'
    """

    # Open DataStore
    store = get_datastore(output_filename, format, mode='w')

    # convert raw data to DataStore
    _convert(input_path, store, tz=tz)

    # Add metadata
    save_yaml_to_datastore('metadata/', store)
    store.close()
Ejemplo n.º 8
0
def convert_refit(input_path, output_filename, format='HDF'):
    """
    Parameters
    ----------
    input_path : str
        The root path of the CSV files, e.g. House1.csv
    output_filename : str
        The destination filename (including path and suffix).
    format : str
        format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF'
    """

    # Open DataStore
    store = get_datastore(output_filename, format, mode='w')

    # Convert raw data to DataStore
    _convert(input_path, store, 'Europe/London')

    # Add metadata
    save_yaml_to_datastore(
        join(get_module_directory(), 'dataset_converters', 'refit',
             'metadata'), store)
    store.close()

    print("Done converting REFIT to HDF5!")
Ejemplo n.º 9
0
def convert_alva(alva_path, output_filename, format='HDF'):
    """
    Parameters
    ----------
    alva_path : str
        The root path of the alva low_freq dataset.
    output_filename : str
        The destination filename (including path and suffix).
    format : str
        format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF'
    """

    def _alva_measurement_mapping_func(house_id, chan_id):
        ac_type = 'apparent' if chan_id <= 2 else 'active'
        return [('power', ac_type)]
        
    # Open DataStore
    store = get_datastore(output_filename, format, mode='w')

    # Convert raw data to DataStore
    _convert(alva_path, store, _alva_measurement_mapping_func, 'US/Eastern')

    # Add metadata
    save_yaml_to_datastore(join(get_module_directory(), 
                              'dataset_converters', 
                              'alva', 
                              'metadata'),
                         store)
    store.close()

    print("Done converting alva to HDF5!")
Ejemplo n.º 10
0
def convert_deddiag(connection,
                    output_filename,
                    format='HDF',
                    start_date=DEFAULT_START_DATE,
                    end_date=DEFAULT_END_DATE,
                    tz=DEFAULT_TZ):
    """
    Parameters
    ----------
    connection: Connection
        Connection to the DEDDIAG database
        Example: connection = Connection(host="localhost", port="5432", db_name="postgres", user="******", password="******")
    output_filename : str
        The destination filename including path and suffix
        Example: ./data/deddiag.h5
    format : str
        format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF'
    """

    # Open DataStore
    # todo try catch

    dest_file = get_datastore(output_filename, format, mode='w')

    # Convert raw data to DataStore
    _convert(connection, dest_file, start_date, end_date, tz)

    path_to_metadata = join(get_module_directory(), 'dataset_converters',
                            'deddiag', 'metadata')

    # Add metadata
    save_yaml_to_datastore(path_to_metadata, dest_file)

    print("Done converting DEDDIAG to HDF5!")
Ejemplo n.º 11
0
def convert_lab(lab_path, output_filename, format='HDF'):
    """
    Parameters
    ----------
    lab_path : str
        The root path of the LAB dataset.
    output_filename : str
        The destination filename (including path and suffix).
    format : str
        format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF'
    """

    #estava <=2 e o primeiro ac_type = apparent
    def _lab_measurement_mapping_func(house_id, chan_id):
        ac_type = 'active' if chan_id <= 1 else 'active'
        return [('power', ac_type)]
        
    # Open DataStore
    store = get_datastore(output_filename, format, mode='w')

    # Convert raw data to DataStore
    _convert(lab_path, store, _lab_measurement_mapping_func, 'America/Fortaleza')

    # Add metadata
    save_yaml_to_datastore(join(get_module_directory(), 
                              'dataset_converters', 
                              'lab', 
                              'metadata'),
                         store)
    store.close()

    print("Done converting LAB to HDF5!")
Ejemplo n.º 12
0
def convert_ampds(input_path, output_filename, format='HDF'):
    """
    Convert AMPds R2013 as seen on Dataverse. Download the files
    as CSVs and put them in the `input_path` folder for conversion.
    
    Download URL: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/MXB7VO
    
    Parameters: 
    -----------
    input_path: str
            The path of the directory where all the csv 
            files are supposed to be stored
    output_filename: str
            The path of the h5 file where all the 
            standardized data is supposed to go. The path 
            should refer to a particular file and not just a
             random directory in order for this to work.
    format: str
        Defaults to HDF5
    Example usage:
    --------------
    convert('/AMPds/electricity', 'store.h5')    

    """
    check_directory_exists(input_path)
    files = [
        f for f in listdir(input_path)
        if isfile(join(input_path, f)) and '.csv' in f and '.swp' not in f
    ]
    # Sorting Lexicographically
    files.sort()

    # Remove Whole Home and put it at top
    files.remove("WHE.csv")
    files.insert(0, "WHE.csv")
    assert isdir(input_path)
    store = get_datastore(output_filename, format, mode='w')
    for i, csv_file in enumerate(files):
        key = Key(building=1, meter=(i + 1))
        print('Loading file #', (i + 1), ' : ', csv_file, '. Please wait...')
        df = pd.read_csv(join(input_path, csv_file))
        # Due to fixed width, column names have spaces :(
        df.columns = [x.replace(" ", "") for x in df.columns]
        df.index = pd.to_datetime(df[TIMESTAMP_COLUMN_NAME],
                                  unit='s',
                                  utc=True)
        df = df.drop(TIMESTAMP_COLUMN_NAME, 1)
        df = df.tz_convert(TIMEZONE)
        df.rename(columns=lambda x: columnNameMapping[x], inplace=True)
        df.columns.set_names(LEVEL_NAMES, inplace=True)
        df = df.apply(pd.to_numeric, errors='ignore')
        df = df.dropna()
        df = df.astype(np.float32)
        store.put(str(key), df)
        print("Done with file #", (i + 1))

    store.close()
    metadata_path = join(_get_module_directory(), 'metadata')
    print('Processing metadata...')
    convert_yaml_to_hdf5(metadata_path, output_filename)
def convert_redd(redd_path, output_filename, format='HDF'):
    """
    Parameters
    ----------
    redd_path : str
        The root path of the REDD low_freq dataset.
    output_filename : str
        The destination filename (including path and suffix).
    format : str
        format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF'
    """
    def _redd_measurement_mapping_func(house_id, chan_id):
        ac_type = 'apparent' if chan_id <= 2 else 'active'
        return [('power', ac_type)]

    # Open DataStore
    store = get_datastore(output_filename, format, mode='w')

    # Convert raw data to DataStore
    _convert(redd_path, store, _redd_measurement_mapping_func, 'US/Eastern')

    s = join(get_module_directory(), 'dataset_converters', 'redd', 'metadata')

    # Add metadata
    save_yaml_to_datastore(
        join(get_module_directory(), 'dataset_converters', 'redd', 'metadata'),
        store)
    store.close()

    print("Done converting REDD to HDF5!")
Ejemplo n.º 14
0
def convert_refit(input_path, output_filename, format='HDF'):
    """
    Parameters
    ----------
    input_path : str
        The root path of the CSV files, e.g. House1.csv
    output_filename : str
        The destination filename (including path and suffix).
    format : str
        format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF'
    """
        
    # Open DataStore
    store = get_datastore(output_filename, format, mode='w')

    # Convert raw data to DataStore
    _convert(input_path, store, 'Europe/London')

    # Add metadata
    save_yaml_to_datastore(join(get_module_directory(), 
                              'dataset_converters', 
                              'refit', 
                              'metadata'),
                         store)
    store.close()

    print("Done converting REFIT to HDF5!")
Ejemplo n.º 15
0
def convert_ideal(ideal_path, output_filename, format='HDF'):
    """
    Convert the IDEAL dataset to NILMTK HDF5 format.
    From https://datashare.ed.ac.uk/handle/10283/3647 download these zips below:
        - household_sensors.zip (14.77Gb).
        - room_and_appliance_sensors.zip (9.317Gb).
    Both zips contain a folder called "sensorsdata".
    Create a new folder, e.g. called "ideal_dataset", and into it
        - Extract the folder "household_sensors.zip/sensordata" with the name 
          household_sensordata
        - Extract the folder "room_and_appliance_sensors/sensordata" with the 
          name rooms_appliance_sensensensordata

    Then run the function convert_ideal with ideal_path="ideal_dataset".

    Parameters
    ----------
    ideal_path : str
        The root path of the ideal low_freq dataset.
    output_filename : str
        The destination filename (including path and suffix).
    format : str
        format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF'
    """
    def _ideal_measurement_mapping_func(house_id, chan_id, category_id):
        if (category_id == "electric-appliance"):
            ac_type = 'active'
            return [('power', ac_type)]
        else:
            ac_type = 'apparent'
            return [('power', ac_type)]

    # Open DataStore
    store = get_datastore(output_filename, format, mode='w')

    #household_sensordata contains mains reading
    #rooms_appliance_sensordata contains appliance reading
    folders = []
    for root, dirs, files in os.walk(ideal_path):
        for folder in dirs:
            if (folder == "household_sensordata"
                    or folder == "rooms_appliance_sensordata"):
                folders.append(folder)
    #valid_home_id are home ids which contain both mains and appliance reading
    valid_home_id = mains_plus_appliance_home_id(ideal_path, folders)
    for folder in folders:
        input_path = join(ideal_path, folder)
        # Convert raw data to DataStore
        _convert(input_path, store, _ideal_measurement_mapping_func,
                 'Europe/London', valid_home_id)

    metadata_path = join(get_module_directory(), 'dataset_converters', 'ideal',
                         'metadata')

    # Add metadata
    save_yaml_to_datastore(metadata_path, store)
    store.close()

    print("Done converting ideal to HDF5!")
Ejemplo n.º 16
0
def convert_combed(combed_path, output_filename, format='HDF'):
    """
    Parameters
    ----------
    combed_path : str
        The root path of the combed dataset.
    output_filename : str
        The destination HDF5 filename (including path and suffix).
    """

    check_directory_exists(combed_path)

    # Open store
    store = get_datastore(output_filename, format, mode='w')

    any_file_converted = False
    
    for building_name, building_mapping in iteritems(overall_dataset_mapping):
        for load_name, load_mapping in iteritems(building_mapping):
            for load_mapping_path, meter_number in iteritems(load_mapping):
                building_number = building_number_mapping[building_name]
                key = Key(building=building_number, meter=meter_number)
                dfs = []
                for attribute in column_mapping.keys():
                    filename_attribute = join(combed_path, building_name, load_name, load_mapping_path, "%s.csv" %attribute)
                    if not os.path.isfile(filename_attribute):
                        # File not found directly in the combed_path provided
                        # Try adding 'iiitd' to it
                        filename_attribute = join(combed_path, 'iiitd', building_name, load_name, load_mapping_path, "%s.csv" %attribute)
                    
                    if os.path.isfile(filename_attribute):
                        exists = True
                        print(filename_attribute)
                        df = pd.read_csv(filename_attribute, names=["timestamp", attribute])
                        df.index = pd.to_datetime(df["timestamp"], unit='ms')
                        df = df.drop("timestamp", 1)
                        dfs.append(df)
                    else:
                        exists = False
                        
                if exists:
                    total = pd.concat(dfs, axis=1)
                    total = total.tz_localize('UTC').tz_convert('Asia/Kolkata')
                    total.columns = pd.MultiIndex.from_tuples([column_mapping[x] for x in total.columns])
                    total.columns.set_names(LEVEL_NAMES, inplace=True)
                    assert total.index.is_unique
                    store.put(str(key), total)
                    any_file_converted = True
                    
    if not any_file_converted:
        raise RuntimeError('No files converted, did you specify the correct path?')
                    
    convert_yaml_to_hdf5(
        join(get_module_directory(), 'dataset_converters', 'combed', 'metadata'),
        output_filename
    )

    print("Done converting COMBED to HDF5!")
Ejemplo n.º 17
0
def convert_combed(combed_path, output_filename, format='HDF'):
    """
    Parameters
    ----------
    combed_path : str
        The root path of the combed dataset.
    output_filename : str
        The destination HDF5 filename (including path and suffix).
    """

    check_directory_exists(combed_path)

    # Open store
    store = get_datastore(output_filename, format, mode='w')

    any_file_converted = False
    
    for building_name, building_mapping in iteritems(overall_dataset_mapping):
        for load_name, load_mapping in iteritems(building_mapping):
            for load_mapping_path, meter_number in iteritems(load_mapping):
                building_number = building_number_mapping[building_name]
                key = Key(building=building_number, meter=meter_number)
                dfs = []
                for attribute in column_mapping.keys():
                    filename_attribute = join(combed_path, building_name, load_name, load_mapping_path, "%s.csv" %attribute)
                    if not os.path.isfile(filename_attribute):
                        # File not found directly in the combed_path provided
                        # Try adding 'iiitd' to it
                        filename_attribute = join(combed_path, 'iiitd', building_name, load_name, load_mapping_path, "%s.csv" %attribute)
                    
                    if os.path.isfile(filename_attribute):
                        exists = True
                        print(filename_attribute)
                        df = pd.read_csv(filename_attribute, names=["timestamp", attribute])
                        df.index = pd.to_datetime(df["timestamp"], unit='ms')
                        df = df.drop("timestamp", 1)
                        dfs.append(df)
                    else:
                        exists = False
                        
                if exists:
                    total = pd.concat(dfs, axis=1)
                    total = total.tz_localize('UTC').tz_convert('Asia/Kolkata')
                    total.columns = pd.MultiIndex.from_tuples([column_mapping[x] for x in total.columns])
                    total.columns.set_names(LEVEL_NAMES, inplace=True)
                    assert total.index.is_unique
                    store.put(str(key), total)
                    any_file_converted = True
                    
    if not any_file_converted:
        raise RuntimeError('No files converted, did you specify the correct path?')
                    
    convert_yaml_to_hdf5(join(_get_module_directory(), 'metadata'),
                         output_filename)

    print("Done converting COMBED to HDF5!")
Ejemplo n.º 18
0
def convert_ampds(input_path, output_filename, format='HDF'):
    """
    Convert AMPds R2013 as seen on Dataverse. Download the files
    as CSVs and put them in the `input_path` folder for conversion.
    
    Download URL: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/MXB7VO
    
    Parameters: 
    -----------
    input_path: str
            The path of the directory where all the csv 
            files are supposed to be stored
    output_filename: str
            The path of the h5 file where all the 
            standardized data is supposed to go. The path 
            should refer to a particular file and not just a
             random directory in order for this to work.
    format: str
        Defaults to HDF5
    Example usage:
    --------------
    convert('/AMPds/electricity', 'store.h5')    

    """
    check_directory_exists(input_path)
    files = [f for f in listdir(input_path) if isfile(join(input_path, f)) and
             '.csv' in f and '.swp' not in f]
    # Sorting Lexicographically
    files.sort()

    # Remove Whole Home and put it at top
    files.remove("WHE.csv")
    files.insert(0, "WHE.csv")
    assert isdir(input_path)
    store = get_datastore(output_filename, format, mode='w')
    for i, csv_file in enumerate(files):
        key = Key(building=1, meter=(i + 1))
        print('Loading file #', (i + 1), ' : ', csv_file, '. Please wait...')
        df = pd.read_csv(join(input_path, csv_file))
        # Due to fixed width, column names have spaces :(
        df.columns = [x.replace(" ", "") for x in df.columns]
        df.index = pd.to_datetime(df[TIMESTAMP_COLUMN_NAME], unit='s', utc=True)
        df = df.drop(TIMESTAMP_COLUMN_NAME, 1)
        df = df.tz_convert(TIMEZONE)
        df.rename(columns=lambda x: columnNameMapping[x], inplace=True)
        df.columns.set_names(LEVEL_NAMES, inplace=True)
        df = df.apply(pd.to_numeric, errors='ignore')
        df = df.dropna()
        df = df.astype(np.float32)
        store.put(str(key), df)
        print("Done with file #", (i + 1))
        
    store.close()
    metadata_path = join(get_module_directory(), 'dataset_converters', 'ampds', 'metadata')
    print('Processing metadata...')
    convert_yaml_to_hdf5(metadata_path, output_filename)
Ejemplo n.º 19
0
def convert_ukdale(ukdale_path,
                   output_filename,
                   format='HDF',
                   drop_duplicates=True):
    """Converts the UK-DALE dataset to NILMTK HDF5 format.

    For more information about the UK-DALE dataset, and to download
    it, please see http://www.doc.ic.ac.uk/~dk3810/data/

    Parameters
    ----------
    ukdale_path : str
        The root path of the UK-DALE dataset.  It is assumed that the YAML
        metadata is in 'ukdale_path/metadata'.
    output_filename : str
        The destination filename (including path and suffix).
    format : str
        format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF'
    drop_duplicates : bool
        Remove entries with duplicated timestamp (keeps the first value)
        Defaults to True.
    """
    ac_type_map = _get_ac_type_map(ukdale_path)

    def _ukdale_measurement_mapping_func(house_id, chan_id):
        ac_type = ac_type_map[(house_id, chan_id)][0]
        return [('power', ac_type)]

    # Open DataStore
    store = get_datastore(output_filename, format, mode='w')

    # Convert 6-second data
    _convert(ukdale_path,
             store,
             _ukdale_measurement_mapping_func,
             TZ,
             sort_index=False,
             drop_duplicates=drop_duplicates)
    store.close()

    # Add metadata
    if format == 'HDF':
        convert_yaml_to_hdf5(join(ukdale_path, 'metadata'), output_filename)

    # Convert 1-second data
    store.open(mode='a')
    _convert_one_sec_data(ukdale_path, store, ac_type_map, drop_duplicates)

    store.close()
    print("Done converting UK-DALE to HDF5!")
Ejemplo n.º 20
0
def convert_ampds(input_path, output_filename, format="HDF"):
    """
    Parameters: 
    -----------
    input_path: str
            The path of the directory where all the csv 
            files are supposed to be stored
    output_filename: str
            The path of the h5 file where all the 
            standardized data is supposed to go. The path 
            should refer to a particular file and not just a
             random directory in order for this to work.
    format: str
        Defaults to HDF5
    Example usage:
    --------------
    convert('/AMPds/electricity', 'store.h5')    

    """
    check_directory_exists(input_path)
    files = [f for f in listdir(input_path) if isfile(join(input_path, f)) and ".csv" in f and ".swp" not in f]
    # Sorting Lexicographically
    files.sort()

    # Remove Whole Home and put it at top
    files.remove("WHE.csv")
    files.insert(0, "WHE.csv")
    assert isdir(input_path)
    store = get_datastore(output_filename, format, mode="w")
    for i, csv_file in enumerate(files):
        key = Key(building=1, meter=(i + 1))
        print("Loading file #", (i + 1), " : ", csv_file, ". Please wait...")
        df = pd.read_csv(join(input_path, csv_file))
        # Due to fixed width, column names have spaces :(
        df.columns = [x.replace(" ", "") for x in df.columns]
        df.index = pd.to_datetime(df[TIMESTAMP_COLUMN_NAME], unit="s", utc=True)
        df = df.drop(TIMESTAMP_COLUMN_NAME, 1)
        df = df.tz_localize("GMT").tz_convert(TIMEZONE)
        df.rename(columns=lambda x: columnNameMapping[x], inplace=True)
        df.columns.set_names(LEVEL_NAMES, inplace=True)
        df = df.convert_objects(convert_numeric=True)
        df = df.dropna()
        df = df.astype(np.float32)
        store.put(str(key), df)
        print("Done with file #", (i + 1))
    store.close()
    metadata_path = join(_get_module_directory(), "metadata")
    print("Processing metadata...")
    convert_yaml_to_hdf5(metadata_path, output_filename)
Ejemplo n.º 21
0
def convert_ps(ps_path, output_path, out_format="HDF"):
    # open datastore
    store = get_datastore(output_path, out_format, mode="w")

    # TODO: check 'US/Central'
    data_path = join(ps_path, "data")
    _convert_to_datastore(data_path, store, 'US/Central')

    # add metadata
    meta_path = join(ps_path, "meta")
    save_yaml_to_datastore(meta_path, store)

    store.close()

    print ("Done converting Pecan Street to HDF5")
Ejemplo n.º 22
0
def convert_combed(combed_path, output_filename, format='HDF'):
    """
    Parameters
    ----------
    combed_path : str
        The root path of the combed dataset.
    output_filename : str
        The destination HDF5 filename (including path and suffix).
    """

    check_directory_exists(combed_path)

    # Open store
    store = get_datastore(output_filename, format, mode='w')

    for building_name, building_mapping in iteritems(overall_dataset_mapping):
        for load_name, load_mapping in iteritems(building_mapping):
            for load_mapping_path, meter_number in iteritems(load_mapping):
                building_number = building_number_mapping[building_name]
                key = Key(building=building_number, meter=meter_number)
                dfs = []
                for attribute in column_mapping.keys():
                    filename_attribute = join(combed_path, building_name,
                                              load_name, load_mapping_path,
                                              "%s.csv" % attribute)
                    if os.path.isfile(filename_attribute):
                        exists = True
                        print(filename_attribute)
                        df = pd.read_csv(filename_attribute,
                                         header=True,
                                         names=["timestamp", attribute])
                        df.index = pd.to_datetime(df["timestamp"], unit='ms')
                        df = df.drop("timestamp", 1)
                        dfs.append(df)
                    else:
                        exists = False
                if exists:
                    total = pd.concat(dfs, axis=1)
                    total = total.tz_localize('UTC').tz_convert('Asia/Kolkata')
                    total.rename(columns=lambda x: column_mapping[x],
                                 inplace=True)
                    total.columns.set_names(LEVEL_NAMES, inplace=True)
                    assert total.index.is_unique
                    store.put(str(key), total)
    convert_yaml_to_hdf5(join(_get_module_directory(), 'metadata'),
                         output_filename)

    print("Done converting COMBED to HDF5!")
Ejemplo n.º 23
0
def convert_iawe(iawe_path, output_filename, format="HDF"):
    """
    Parameters
    ----------
    iawe_path : str
        The root path of the iawe dataset.
    output_filename : str
        The destination filename (including path and suffix).
    """

    check_directory_exists(iawe_path)
    idx = pd.DatetimeIndex(start=START_DATETIME, end=END_DATETIME, freq=FREQ)
    idx = idx.tz_localize('GMT').tz_convert(TIMEZONE)

    # Open data store
    store = get_datastore(output_filename, format, mode='w')
    electricity_path = join(iawe_path, "electricity")

    # Mains data
    for chan in range(1, 12):
        key = Key(building=1, meter=chan)
        filename = join(electricity_path, "%d.csv" % chan)
        print('Loading ', chan)
        df = pd.read_csv(filename, dtype=np.float64, na_values='\\N')
        df.drop_duplicates(subset=["timestamp"], inplace=True)
        df.index = pd.to_datetime(df.timestamp.values, unit='s', utc=True)
        df = df.tz_convert(TIMEZONE)
        df = df.drop(TIMESTAMP_COLUMN_NAME, 1)
        df.columns = pd.MultiIndex.from_tuples(
            [column_mapping[x] for x in df.columns], names=LEVEL_NAMES)
        df = df.apply(pd.to_numeric, errors='ignore')
        df = df.dropna()
        df = df.astype(np.float32)
        df = df.sort_index()
        df = df.resample("1T").mean()
        df = reindex_fill_na(df, idx)
        assert df.isnull().sum().sum() == 0
        store.put(str(key), df)
    store.close()

    metadata_dir = join(get_module_directory(), 'dataset_converters', 'iawe',
                        'metadata')
    convert_yaml_to_hdf5(metadata_dir, output_filename)

    print("Done converting iAWE to HDF5!")
Ejemplo n.º 24
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('inpath', help='input directory (ANTgen output)', nargs='?', default='../output')
    parser.add_argument('outfile', help='output file (HDF5 file)', nargs='?', default='../output/ANTgen.h5')
    args = parser.parse_args()

    if not os.path.exists('metadata') or not os.path.isfile('metadata/building1.yaml'):
        print("No metadata found. Please run 'generate_metadata.py' before using this tool...")
        exit(1)

    print("Converting ANTgen output from '{}' to file '{}'".format(args.inpath, args.outfile))

    with open('metadata/building1.yaml', 'r') as f:
        yaml_dict = yaml.load(f, Loader=yaml.FullLoader)

    channel_list = ['total']  # pre-populate with aggregate data (total.csv)
    for app in yaml_dict['appliances']:
        channel_list.append(app['original_name'])

    store = get_datastore(args.outfile, 'HDF', mode='w')

    for i, app_name in enumerate(channel_list):
        print("Adding virtual meter ID {:02d}: {}".format(1+i, app_name))
        key = Key(building=1, meter=(i + 1))

        csvfile = os.path.join(args.inpath, str(app_name)+'.csv')
        try:
            df = pd.read_csv(csvfile, sep=';', encoding='utf-8', index_col=0)
            df.columns = pd.MultiIndex.from_tuples([('power', 'active') for x in df.columns], names=LEVEL_NAMES)
            df.index = pd.to_datetime(df.index)

            tz_naive = df.index
            tz_aware = tz_naive.tz_localize(tz='Europe/Vienna', ambiguous=True, nonexistent=pd.Timedelta('1H'))
            df.index = tz_aware

            df = df.tz_convert('Europe/Vienna')

            store.put(str(key), df)
        except FileNotFoundError:
            print("Input file '{}' not found - your HDF5 file will be incomplete!".format(csvfile))
            continue

    print('Adding metadata...')
    convert_yaml_to_hdf5('metadata/', args.outfile)
Ejemplo n.º 25
0
def convert_caxe(file_path):
    '''
    Parameters
    ------------
    Takes input csv_file name to be tested as string.
    Data columns of the csv should contain following the following values in columns:
    timestamp,reactive_power,apparent_power,current,frequency,voltage,active_power) 
    Converts it into hdf5 Format and save as test.h5.
    '''
    df = pd.read_csv(f'{file_path}',
                     names=['timestamp', 'R', 'A', 'C', 'F', 'V', 'T'])
    column_mapping = {
        'F': ('frequency', ""),
        'V': ('voltage', ""),
        'T': ('power', 'active'),
        'C': ('current', ''),
        'R': ('power', 'reactive'),
        'A': ('power', 'apparent'),
    }

    output_filename = 'test.h5'

    # Open data store
    store = get_datastore(output_filename, format='HDF', mode='w')
    key = Key(building=1, meter=1)
    print('Loading ', 1)
    df.index = pd.to_datetime(df.timestamp.values)
    df = df.tz_convert(
        TIMEZONE)  #  if error occurs use tz_localize for tz naive timestamps
    df = df.drop(TIMESTAMP_COLUMN_NAME, 1)
    df.index = pd.to_datetime(df.index.values)
    df.columns = pd.MultiIndex.from_tuples(
        [column_mapping[x] for x in df.columns], names=LEVEL_NAMES)
    df = df.apply(pd.to_numeric, errors='ignore')
    df = df.dropna()
    df = df.astype(np.float32)
    df = df.sort_index()
    df = df.resample("1T").mean()
    assert df.isnull().sum().sum() == 0
    store.put(str(key), df)
    store.close()
    convert_yaml_to_hdf5('./metadata', output_filename)

    print("Done converting test data to HDF5!")
Ejemplo n.º 26
0
def convert_iawe(iawe_path, output_filename, format="HDF"):
    """
    Parameters
    ----------
    iawe_path : str
        The root path of the iawe dataset.
    output_filename : str
        The destination filename (including path and suffix).
    """

    check_directory_exists(iawe_path)
    idx = pd.DatetimeIndex(start=START_DATETIME, end=END_DATETIME, freq=FREQ)
    idx = idx.tz_localize('GMT').tz_convert(TIMEZONE)

    # Open data store
    store = get_datastore(output_filename, format, mode='w')
    electricity_path = join(iawe_path, "electricity")

    # Mains data
    for chan in range(1, 12):
        key = Key(building=1, meter=chan)
        filename = join(electricity_path, "%d.csv" % chan)
        print('Loading ', chan)
        df = pd.read_csv(filename, dtype=np.float64, na_values='\\N')
        df.drop_duplicates(subset=["timestamp"], inplace=True)
        df.index = pd.to_datetime(df.timestamp.values, unit='s', utc=True)
        df = df.tz_convert(TIMEZONE)
        df = df.drop(TIMESTAMP_COLUMN_NAME, 1)
        df.rename(columns=lambda x: column_mapping[x], inplace=True)
        df.columns.set_names(LEVEL_NAMES, inplace=True)
        df = df.apply(pd.to_numeric, errors='ignore')
        df = df.dropna()
        df = df.astype(np.float32)
        df = df.sort_index()
        df = df.resample("1T").mean()
        df = reindex_fill_na(df, idx)
        assert df.isnull().sum().sum() == 0
        store.put(str(key), df)
    store.close()
    
    metadata_dir = join(get_module_directory(), 'dataset_converters', 'iawe', 'metadata')
    convert_yaml_to_hdf5(metadata_dir, output_filename)

    print("Done converting iAWE to HDF5!")
Ejemplo n.º 27
0
def convert_ukdale(ukdale_path, output_filename, format='HDF'):
    """Converts the UK-DALE dataset to NILMTK HDF5 format.

    For more information about the UK-DALE dataset, and to download
    it, please see http://www.doc.ic.ac.uk/~dk3810/data/

    Parameters
    ----------
    ukdale_path : str
        The root path of the UK-DALE dataset.  It is assumed that the YAML
        metadata is in 'ukdale_path/metadata'.
    output_filename : str
        The destination filename (including path and suffix).
    format : str
        format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF'
    """
    ac_type_map = _get_ac_type_map(ukdale_path)

    def _ukdale_measurement_mapping_func(house_id, chan_id):
        ac_type = ac_type_map[(house_id, chan_id)][0]
        return [('power', ac_type)]

    # Open DataStore
    store = get_datastore(output_filename, format, mode='w')

    # Convert 6-second data
    _convert(ukdale_path, store, _ukdale_measurement_mapping_func, TZ,
             sort_index=False)
    store.close()

    # Add metadata
    if format == 'HDF':
        convert_yaml_to_hdf5(join(ukdale_path, 'metadata'), output_filename)

    # Convert 1-second data
    store.open(mode='a')
    _convert_one_sec_data(ukdale_path, store, ac_type_map)

    store.close()
    print("Done converting UK-DALE to HDF5!")
Ejemplo n.º 28
0
def convert_unifei(redd_path, output_filename, format='HDF'):
    """
    Parameters
    ----------
    redd_path : str
        The root path of the REDD low_freq dataset.
    output_filename : str
        The destination filename (including path and suffix).
    format : str
        format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF'
    """

    def _redd_measurement_mapping_func(house_id, chan_id):
        ac_type = 'active'
        return [('power', ac_type)]
        
    # Open DataStore
    store = get_datastore(output_filename, format, mode='w')
    # Convert raw data to DataStore
    _convert(redd_path, store, _redd_measurement_mapping_func, 'America/Sao_Paulo')
    print("Done convert...")
    
    #Aqui é necessário colocar o endereço de onde fica a metadata
    print(get_module_directory())
    s=join(get_module_directory(),
                              'dataset_converters',
                              'unifei',
                              'metadata')
    print(s)

    # Add metadata
    # Aqui também é necessário colocar o endereço correto da metadata
    save_yaml_to_datastore(join(get_module_directory(), 
                              'dataset_converters', 
                              'unifei', 
                              'metadata'),
                         store)
    store.close()

    print("Done converting REDD to HDF5!")
Ejemplo n.º 29
0
def convert_iawe(iawe_path, output_filename, format="HDF"):
    """
    Parameters
    ----------
    iawe_path : str
        The root path of the iawe dataset.
    output_filename : str
        The destination filename (including path and suffix).
    """

    check_directory_exists(iawe_path)

    # Open data store
    store = get_datastore(output_filename, format, mode='w')
    electricity_path = join(iawe_path, "electricity")

    # Mains data
    for chan in range(1, 13):
        key = Key(building=1, meter=chan)
        filename = join(electricity_path, "%d.csv" % chan)
        print('Loading ', chan)
        df = pd.read_csv(filename)
        df.drop_duplicates(subset=["timestamp"], inplace=True)
        df.index = pd.to_datetime(df.timestamp.values, unit='s', utc=True)
        df = df.tz_convert(TIMEZONE)
        df = df.drop(TIMESTAMP_COLUMN_NAME, 1)
        df.rename(columns=lambda x: column_mapping[x], inplace=True)
        df.columns.set_names(LEVEL_NAMES, inplace=True)
        df = df.convert_objects(convert_numeric=True)
        df = df.dropna()
        df = df.astype(np.float32)
        df = df.sort_index()
        store.put(str(key), df)
    store.close()
    convert_yaml_to_hdf5(join(_get_module_directory(), 'metadata'),
                         output_filename)

    print("Done converting iAWE to HDF5!")
Ejemplo n.º 30
0
def convert_gjw(gjw_path, output_filename):
    """
    Parameters
    ----------
    gjw_path : str
        The root path of the gjw dataset.
    output_filename : str
        The destination filename (including path and suffix), will default if not specified
    directory and file structure
    nilm_gjw_data
        building<1>
            elec
                4-POWER_REAL_FINE <date> Dump.csv
                5-POWER_REACTIVE_STANDARD <date> Dump.csv
                ...
        ...
        building<n>
        HDF5
            nilm_gjw_data.hdf5
        metadata
            building1.yaml
            dataset.yaml
            meter_devices.yaml
        other files    
    """
    if gjw_path is None: gjw_path = home_dir
    check_directory_exists(gjw_path)
    os.chdir(gjw_path)
    gjw_path = os.getcwd()  # sort out potential issue with slashes or backslashes
    if output_filename is None:
        output_filename =join(home_dir,'HDF5','nilm_gjw_data.hdf5')
    # Open data store
    print( 'opening datastore', output_filename)
    store = get_datastore(output_filename, format, mode='w')
    # walk the directory tree from the dataset home directory
    #clear dataframe & add column headers
    df = pd.DataFrame(columns=[ACTIVE_COLUMN_NAME,REACTIVE_COLUMN_NAME])
    found = False
    for current_dir, _, files in os.walk(gjw_path):
        #unused second parameter of for dirs_in_current_dir
        if current_dir.find('.git')!=-1 or current_dir.find('.ipynb') != -1:
            #print( 'Skipping ', current_dir)
            continue
        print( 'checking', current_dir)
        m = bld_re.search(current_dir)
        if m: #The csv files may be further down the tree so this section may be repeated
            building_name = m.group()
            building_nbr = int(bld_nbr_re.search(building_name).group())
            meter_nbr = 1
            key = Key(building=building_nbr, meter=meter_nbr)
        for items in fnmatch.filter(files, "4*.csv"):
            # process any .CSV files found
            found = True
            ds = iso_date_re.search(items).group()
            # print( 'found files for date:', ds,end=" ")
            # found files to process
            df1 = _read_file_pair(current_dir,ds) # read two csv files into a dataframe    
            df = pd.concat([df,df1]) # concatenate the results into one long dataframe
        if found:
            found = False
            df = _prepare_data_for_toolkit(df)
            _summarise_dataframe(df,'Prepared for tool kit')
            store.put(str(key), df)
            #clear dataframe & add column headers
            #df = pd.DataFrame(columns=[ACTIVE_COLUMN_NAME,REACTIVE_COLUMN_NAME])
            break # only 1 folder with .csv files at present
    store.close()
    convert_yaml_to_hdf5(join(gjw_path, 'metadata'),output_filename)
    print("Done converting gjw to HDF5!")
        ('power', 'reactive')
    ])
    meter.set_index(('physical_quantity', 'type'), inplace=True, drop=True)
    meter.columns.set_names(('physical_quantity', 'type'), inplace=True)

    meter = meter.convert_objects(convert_numeric=True)
    meter = meter.dropna()
    meter = meter.astype(float)
    meter = meter.sort_index()
    #    meter = meter.resample("1S")
    #    meter = reindex_fill_na(meter, idx)
    assert meter.isnull().sum().sum() == 0
    return meter


pqenergy = get_datastore('pqenergy.h5', 'HDF', mode='w')
pqenergy.put('/building1/elec/meter1', convert_pq('aggr_p', 'aggr_q'))
pqenergy.put('/building1/elec/meter2', convert_pq('aircon_p', 'aircon_q'))
pqenergy.put('/building1/elec/meter3', convert_pq('hdryer_p', 'hdryer_q'))
pqenergy.put('/building1/elec/meter4', convert_pq('wboiler_p', 'wboiler_q'))
pqenergy.put('/building1/elec/meter5', convert_pq('ecooker_p', 'ecooker_q'))
pqenergy.put('/building1/elec/meter6', convert_pq('dehumid_p', 'dehumid_q'))
pqenergy.put('/building1/elec/meter7', convert_pq('fridge_p', 'fridge_q'))
pqenergy.put('/building1/elec/meter8', convert_pq('aheater_p', 'aheater_q'))
pqenergy.put('/building1/elec/meter9', convert_pq('ciron_p', 'ciron_q'))
pqenergy.put('/building1/elec/meter10', convert_pq('rcooker_p', 'rcooker_q'))
pqenergy.put('/building1/elec/meter11', convert_pq('tv_p', 'tv_q'))
pqenergy.put('/building1/elec/meter12', convert_pq('vhood_p', 'vhood_q'))
pqenergy.put('/building1/elec/meter13', convert_pq('washer_p', 'washer_q'))
save_yaml_to_datastore('metadata_pq/', pqenergy)
pqenergy.close()
Ejemplo n.º 32
0
    if isMedal:
        START_DATE = np.datetime64(
            '2017-05-12T11:08:28') - np.timedelta64('2', 'h')
    else:
        START_DATE = np.datetime64(
            '2017-05-12T11:08:46') - np.timedelta64('2', 'h')
    time_indices = [START_DATE]
    for i in range(1, num_rows):
        time_indices.append(time_indices[i-1] + np.timedelta64('1', 's'))
    return time_indices


if not os.path.exists('../data/'):
    os.makedirs('../data/')

store = get_datastore("../data/converted_sum.hdf5", 'HDF', mode='w')

"""
Gets CLEAR and MEDAL data and puts them into the store
with the right key and instance numbers.
"""
frames = get_clear_data()
for phase in range(1, 4):
    key = Key(building=1, meter=phase)
    print('Adding phase {}'.format(phase))
    store.put(str(key), frames[phase-1])


for medal_id in range(1, 16):
    frames = get_summary_data(medal_id)
    for i in range(1, 7):
Ejemplo n.º 33
0
def convert_hes(data_dir, output_filename, format='HDF', max_chunks=None):
    metadata = {
        'name': 'HES',
        'geographic_coordinates': (51.464462, -0.076544),  # London
        'timezone': 'Europe/London'
    }

    # Open DataStore
    store = get_datastore(output_filename, format, mode='w')

    # load list of appliances
    hes_to_nilmtk_appliance_lookup = pd.read_csv(
        join(get_module_directory(), 'dataset_converters', 'hes',
             'hes_to_nilmtk_appliance_lookup.csv'))

    # load list of houses
    hes_house_ids = load_list_of_house_ids(data_dir)
    nilmtk_house_ids = np.arange(1, len(hes_house_ids) + 1)
    hes_to_nilmtk_house_ids = dict(zip(hes_house_ids, nilmtk_house_ids))

    # array of hes_house_codes: nilmtk_building_code = house_codes.index(hes_house_code)
    house_codes = []

    # map
    house_appliance_codes = dict()

    # Create a temporary metadata dir
    original_metadata_dir = join(get_module_directory(), 'dataset_converters',
                                 'hes', 'metadata')
    tmp_dir = tempfile.mkdtemp()
    metadata_dir = join(tmp_dir, 'metadata')
    shutil.copytree(original_metadata_dir, metadata_dir)
    print("Using temporary dir for metadata:", metadata_dir)

    # Iterate over files
    for filename in FILENAMES:
        # Load appliance energy data chunk-by-chunk
        full_filename = join(data_dir, filename)
        print('Loading', full_filename)
        try:
            reader = pd.read_csv(full_filename,
                                 names=COL_NAMES,
                                 index_col=False,
                                 chunksize=CHUNKSIZE)
        except IOError as e:
            print(e, file=stderr)
            continue

        # Iterate over chunks in file
        chunk_i = 0
        for chunk in reader:
            if max_chunks is not None and chunk_i >= max_chunks:
                break

            print(' processing chunk', chunk_i, 'of', filename)
            # Convert date and time columns to np.datetime64 objects
            dt = chunk['date'] + ' ' + chunk['time']
            del chunk['date']
            del chunk['time']
            chunk['datetime'] = pd.to_datetime(dt,
                                               format='%Y-%m-%d %H:%M:%S',
                                               utc=True)

            # Data is either tenths of a Wh or tenths of a degree
            chunk['data'] *= 10
            chunk['data'] = chunk['data'].astype(np.float32)

            # Iterate over houses in chunk
            for hes_house_id, hes_house_id_df in chunk.groupby('house id'):
                if hes_house_id not in house_codes:
                    house_codes.append(hes_house_id)

                if hes_house_id not in house_appliance_codes.keys():
                    house_appliance_codes[hes_house_id] = []

                nilmtk_house_id = house_codes.index(hes_house_id) + 1

                # Iterate over appliances in house
                for appliance_code, appliance_df in chunk.groupby(
                        'appliance code'):
                    if appliance_code not in house_appliance_codes[
                            hes_house_id]:
                        house_appliance_codes[hes_house_id].append(
                            appliance_code)
                    nilmtk_meter_id = house_appliance_codes[
                        hes_house_id].index(appliance_code) + 1
                    _process_meter_in_chunk(nilmtk_house_id, nilmtk_meter_id,
                                            hes_house_id_df, store,
                                            appliance_code)

            chunk_i += 1

    print('houses with some data loaded:', house_appliance_codes.keys())

    store.close()

    # generate building yaml metadata
    for hes_house_id in house_codes:
        nilmtk_building_id = house_codes.index(hes_house_id) + 1
        building_metadata = {}
        building_metadata['instance'] = nilmtk_building_id
        building_metadata['original_name'] = int(
            hes_house_id)  # use python int
        building_metadata['elec_meters'] = {}
        building_metadata['appliances'] = []

        # initialise dict of instances of each appliance type
        instance_counter = {}

        for appliance_code in house_appliance_codes[hes_house_id]:
            nilmtk_meter_id = house_appliance_codes[hes_house_id].index(
                appliance_code) + 1
            # meter metadata
            if appliance_code in MAINS_CODES:
                meter_metadata = {
                    'device_model': 'multivoies',
                    'site_meter': True
                }
                break
            elif appliance_code in CIRCUIT_CODES:
                meter_metadata = {'device_model': 'multivoies'}
                break
            elif appliance_code in TEMPERATURE_CODES:
                break
            else:  # is appliance
                meter_metadata = {'device_model': 'wattmeter'}

            # only appliance meters at this point
            building_metadata['elec_meters'][nilmtk_meter_id] = meter_metadata
            # appliance metadata
            lookup_row = hes_to_nilmtk_appliance_lookup[
                hes_to_nilmtk_appliance_lookup.Code == appliance_code].iloc[0]
            appliance_metadata = {
                'original_name': lookup_row.Name,
                'meters': [nilmtk_meter_id]
            }
            # appliance type
            appliance_metadata.update({'type': lookup_row.nilmtk_name})
            # TODO appliance room

            # appliance instance number
            if instance_counter.get(lookup_row.nilmtk_name) == None:
                instance_counter[lookup_row.nilmtk_name] = 0
            instance_counter[lookup_row.nilmtk_name] += 1
            appliance_metadata['instance'] = instance_counter[
                lookup_row.nilmtk_name]

            building_metadata['appliances'].append(appliance_metadata)

        building = 'building{:d}'.format(nilmtk_building_id)

        yaml_full_filename = join(metadata_dir, building + '.yaml')

        with open(yaml_full_filename, 'w') as outfile:
            #print(building_metadata)
            outfile.write(yaml.dump(building_metadata))

    # write yaml metadata to hdf5
    convert_yaml_to_hdf5(metadata_dir, output_filename)

    # remote the temporary dir when finished
    shutil.rmtree(tmp_dir)
Ejemplo n.º 34
0
def convert_deps(deps_path, input_filename, output_filename, format='HDF'):
    """
    Parameters
    ----------
    deps_path : str
        The root path of the DEPS dataset. 
        e.g 'C:/data/deps'
    input_filename : str
        The rawdata filename (including path and suffix).
        e.g 'C:/data/rawdata.csv'
    output_filename : str
        The destination HDF5 filename (including path and suffix).
        e.g 'C:/data/deps/DEPS_data.h5'
    format : str
        format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF'
        
    Meters & Measurements :
    ----------
    Measurement assignment (idMeasurement) in rawdata to REDD format
    
    Measurements id's           Units           Meters Name
    14011 14012             --> W VAr       --> Main_RST 
    14001 14007 14014 14017 --> V A W VAr   --> Main_R 
    14002 14008 14015 14018 --> V A W VAr   --> Main_S
    14003 14009 14016 14019 --> V A W VAr   --> Main_T
    13001                   --> W           --> Lights_1
    13002                   --> W           --> Lights_2
    10003 10006 10014 10018 --> V A W VAr   --> HVAC_1 
    10002 10005 10013 10017 --> V A W VAr   --> HVAC_2
    10001 10004 10012 10016 --> V A W VAr   --> HVAC_4
    21001 21002 21003 21005 --> V A W VAr   --> Rack    
          
    Example
    ----------
    raw_data.csv (input_filename):
    --
    idMeasurement, UNIX_timestamp(tStampUTC), dataValue
    14011,         1583103600,                      123
    14012,         1583103600,                     -416
    14011,         1583103601,                      126
    14012,         1583103601,                     -416
    ...            ...                              ...
    14011,         1583535599,                      121
    14012,         1583535599,                     -411
    
    Outputs REDD format: deps_path/classroom1/ :
    --
    channel_1.dat: 
    1583103600 123 -416
    1583103600 126 -416
    ...        ...  ...  
    1583103600 121 -411
    --
    labels.dat:   
    1 Main_RST
    
    Output HDF5 file: output_filename.h5    
        
    """
    #--------------------------------------------------------------------
    # writed by Andrés Arias Silva
    # Raw data converter to REDD format extracted from DEPS SQL database
    _deps_to_redd_format(deps_path, input_filename)

    #--------------------------------------------------------------------

    def _deps_measurement_mapping_func(classroom_id, chan_id):

        if chan_id == 1:
            meas = ([('power', 'active'), ('power', 'reactive')])
        elif chan_id > 1 and chan_id <= 4:
            meas = ([('voltage', ''), ('current', ''), ('power', 'active'),
                     ('power', 'reactive')])
        elif chan_id > 4 and chan_id <= 6:
            meas = ([('power', 'active')])
        elif chan_id > 6 and chan_id <= 10:
            meas = ([
                ('voltage', ''),
                ('current', ''),
                ('power', 'active'),
                ('power', 'reactive'),
            ])
        else:
            raise NameError('incorrect channel number')
        return meas

    # Open DataStore
    store = get_datastore(output_filename, format, mode='w')

    # Convert raw data to DataStore
    _convert(deps_path, store, _deps_measurement_mapping_func, 'Europe/Madrid')

    #    s=join(get_module_directory(),
    #                              'dataset_converters',
    #                              'deps',
    #                              'metadata')

    # Add metadata
    save_yaml_to_datastore(
        join(get_module_directory(), 'dataset_converters', 'deps', 'metadata'),
        store)
    store.close()

    print("Done converting DEPS data to HDF5!")
Ejemplo n.º 35
0
def convert_hes(data_dir, output_filename, format='HDF', max_chunks=None):
    metadata = {
        'name': 'HES',
        'geographic_coordinates': (51.464462,-0.076544), # London
        'timezone': 'Europe/London'
    }
    
    # Open DataStore
    store = get_datastore(output_filename, format, mode='w')
    
    # load list of appliances
    hes_to_nilmtk_appliance_lookup = pd.read_csv(join(get_module_directory(), 
                                        'dataset_converters', 
                                        'hes', 
                                        'hes_to_nilmtk_appliance_lookup.csv'))

    # load list of houses
    hes_house_ids = load_list_of_house_ids(data_dir)
    nilmtk_house_ids = np.arange(1,len(hes_house_ids)+1)
    hes_to_nilmtk_house_ids = dict(zip(hes_house_ids, nilmtk_house_ids))

    # array of hes_house_codes: nilmtk_building_code = house_codes.index(hes_house_code)
    house_codes = []
    # map 
    house_appliance_codes = dict()

    # Iterate over files
    for filename in FILENAMES:
        # Load appliance energy data chunk-by-chunk
        full_filename = join(data_dir, filename)
        print('loading', full_filename)
        try:
            reader = pd.read_csv(full_filename, names=COL_NAMES, 
                                 index_col=False, chunksize=CHUNKSIZE)
        except IOError as e:
            print(e, file=stderr)
            continue

        # Iterate over chunks in file
        chunk_i = 0
        for chunk in reader:
            if max_chunks is not None and chunk_i >= max_chunks:
                break

            print(' processing chunk', chunk_i, 'of', filename)
            # Convert date and time columns to np.datetime64 objects
            dt = chunk['date'] + ' ' + chunk['time']
            del chunk['date']
            del chunk['time']
            chunk['datetime'] = dt.apply(datetime_converter)

            # Data is either tenths of a Wh or tenths of a degree
            chunk['data'] *= 10
            chunk['data'] = chunk['data'].astype(np.float32)

            # Iterate over houses in chunk
            for hes_house_id, hes_house_id_df in chunk.groupby('house id'):
                if hes_house_id not in house_codes:
                    house_codes.append(hes_house_id)
                    
                if hes_house_id not in house_appliance_codes.keys():
                    house_appliance_codes[hes_house_id] = []
                
                nilmtk_house_id = house_codes.index(hes_house_id)+1
                
                # Iterate over appliances in house
                for appliance_code, appliance_df in chunk.groupby('appliance code'):
                    if appliance_code not in house_appliance_codes[hes_house_id]:
                        house_appliance_codes[hes_house_id].append(appliance_code)
                    nilmtk_meter_id = house_appliance_codes[hes_house_id].index(appliance_code)+1
                    _process_meter_in_chunk(nilmtk_house_id, nilmtk_meter_id, hes_house_id_df, store, appliance_code)
                    
            chunk_i += 1
    print('houses with some data loaded:', house_appliance_codes.keys())
    
    store.close()
    
    # generate building yaml metadata
    for hes_house_id in house_codes:
        nilmtk_building_id = house_codes.index(hes_house_id)+1
        building_metadata = {}
        building_metadata['instance'] = nilmtk_building_id
        building_metadata['original_name'] = int(hes_house_id) # use python int
        building_metadata['elec_meters'] = {}
        building_metadata['appliances'] = []
        
        # initialise dict of instances of each appliance type
        instance_counter = {}
        
        for appliance_code in house_appliance_codes[hes_house_id]:
            nilmtk_meter_id = house_appliance_codes[hes_house_id].index(appliance_code)+1
            # meter metadata
            if appliance_code in MAINS_CODES:
                meter_metadata = {'device_model': 'multivoies',
                                  'site_meter': True}
                break
            elif appliance_code in CIRCUIT_CODES:
                meter_metadata = {'device_model': 'multivoies'}
                break
            elif appliance_code in TEMPERATURE_CODES:
                break
            else: # is appliance
                meter_metadata = {'device_model': 'wattmeter'}
                
            # only appliance meters at this point
            building_metadata['elec_meters'][nilmtk_meter_id] = meter_metadata
            # appliance metadata
            lookup_row = hes_to_nilmtk_appliance_lookup[hes_to_nilmtk_appliance_lookup.Code==appliance_code].iloc[0]
            appliance_metadata = {'original_name': lookup_row.Name, 
                                      'meters': [nilmtk_meter_id] }
            # appliance type
            appliance_metadata.update({'type': lookup_row.nilmtk_name})
            # TODO appliance room
            
            # appliance instance number
            if instance_counter.get(lookup_row.nilmtk_name) == None:
                instance_counter[lookup_row.nilmtk_name] = 0
            instance_counter[lookup_row.nilmtk_name] += 1 
            appliance_metadata['instance'] = instance_counter[lookup_row.nilmtk_name]
            
            building_metadata['appliances'].append(appliance_metadata)
        building = 'building{:d}'.format(nilmtk_building_id)
        yaml_full_filename = join(_get_module_directory(), 'metadata', building + '.yaml')
        with open(yaml_full_filename, 'w') as outfile:
            #print(building_metadata)
            outfile.write(yaml.dump(building_metadata))
            
    
    # write yaml metadata to hdf5
    convert_yaml_to_hdf5(join(_get_module_directory(), 'metadata'),
                         output_filename)
Ejemplo n.º 36
0
f = open(wattsFile, 'rb')
watts = pickle.load(f)
f.close()
f = open(timeStampsFile, 'rb')
timeStamps = pickle.load(f)
f.close()
f = open(appliancesFile, 'rb')
appliances = pickle.load(f)
f.close()

watts = np.array(watts)
appliances = np.array(appliances)
timeStamps = np.array(timeStamps)

store = get_datastore(outputFilename, 'HDF', mode='w')

# breakdown the data by appliance and set every time point where
# the appliance wasnt used to 0
for instance, app in enumerate(np.unique(appliances)):

    # get the time points where a given appliance is on and
    # also where it is off
    appIndices = np.where(appliances == app)[0]
    nonAppIndices = np.where(appliances != app)[0]

    # keep only the data for when the appliance is on
    wattsFiltered = np.delete(np.copy(watts), nonAppIndices)
    timeFiltered = np.delete(np.copy(timeStamps), nonAppIndices)

    # create zeroed data when the appliance is off
Ejemplo n.º 37
0
def convert_gjw(gjw_path, output_filename):
    """
    Parameters
    ----------
    gjw_path : str
        The root path of the gjw dataset.
    output_filename : str
        The destination filename (including path and suffix), will default if not specified
    directory and file structure
    nilm_gjw_data
        building<1>
            elec
                4-POWER_REAL_FINE <date> Dump.csv
                5-POWER_REACTIVE_STANDARD <date> Dump.csv
                ...
        ...
        building<n>
        HDF5
            nilm_gjw_data.hdf5
        metadata
            building1.yaml
            dataset.yaml
            meter_devices.yaml
        other files    
    """
    if gjw_path is None: gjw_path = home_dir
    check_directory_exists(gjw_path)
    os.chdir(gjw_path)
    gjw_path = os.getcwd(
    )  # sort out potential issue with slashes or backslashes
    if output_filename is None:
        output_filename = join(home_dir, 'HDF5', 'nilm_gjw_data.hdf5')
    # Open data store
    print('opening datastore', output_filename)
    store = get_datastore(output_filename, format, mode='w')
    # walk the directory tree from the dataset home directory
    #clear dataframe & add column headers
    df = pd.DataFrame(columns=[ACTIVE_COLUMN_NAME, REACTIVE_COLUMN_NAME])
    found = False
    for current_dir, _, files in os.walk(gjw_path):
        #unused second parameter of for dirs_in_current_dir
        if current_dir.find('.git') != -1 or current_dir.find('.ipynb') != -1:
            #print( 'Skipping ', current_dir)
            continue
        print('checking', current_dir)
        m = bld_re.search(current_dir)
        if m:  #The csv files may be further down the tree so this section may be repeated
            building_name = m.group()
            building_nbr = int(bld_nbr_re.search(building_name).group())
            meter_nbr = 1
            key = Key(building=building_nbr, meter=meter_nbr)
        for items in fnmatch.filter(files, "4*.csv"):
            # process any .CSV files found
            found = True
            ds = iso_date_re.search(items).group()
            # print( 'found files for date:', ds,end=" ")
            # found files to process
            df1 = _read_file_pair(current_dir,
                                  ds)  # read two csv files into a dataframe
            df = pd.concat(
                [df, df1])  # concatenate the results into one long dataframe
        if found:
            found = False
            df = _prepare_data_for_toolkit(df)
            _summarise_dataframe(df, 'Prepared for tool kit')
            store.put(str(key), df)
            #clear dataframe & add column headers
            #df = pd.DataFrame(columns=[ACTIVE_COLUMN_NAME,REACTIVE_COLUMN_NAME])
            break  # only 1 folder with .csv files at present
    store.close()
    convert_yaml_to_hdf5(join(gjw_path, 'metadata'), output_filename)
    print("Done converting gjw to HDF5!")
Ejemplo n.º 38
0
def convert_sortd(input_path, output_filename, format='HDF'):
    """Converts the dataset to NILMTK HDF5 format.

    For more information about the SOR test dataset, contact Samuel Marisa.

    Parameters
    ----------
    input_path : str
        The root path of the dataset.  It is assumed that the YAML
        metadata is in 'input_path/metadata'.
    output_filename : str
        The destination filename (including path and suffix).
    format : str
        format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF'

    Example usage:
    --------------
    convert('/sortd', 'store.h5')
    """
    print(
        'Attempting to convert the SORTD dataset at %s into %s in NILMTK %s format...'
        % (input_path, output_filename, format))
    # Ensure that the input directory exists
    check_directory_exists(input_path)
    # Load the dataset metadata
    with open(join(input_path, 'metadata/dataset.yaml'), 'r') as stream:
        dataset_metadata = yaml.load(stream)
    # Open the datastore
    store = get_datastore(output_filename, format, mode='w')
    # Iterate through all building metadata files found in the dataset
    for metadata_file in glob.glob(
            join(input_path, 'metadata/building[0-9]*.yaml')):
        # Load the building metadata
        with open(metadata_file, 'r') as stream:
            metadata = yaml.load(stream)
        building_id = int(metadata['instance'])
        print('==> Loading building %d defined at %s. Please wait...' %
              (building_id, metadata_file))
        for meter_id, meter_data in metadata['elec_meters'].items():
            meter_id = int(meter_id)
            key = Key(building=building_id, meter=meter_id)
            # Load the raw data from the data location
            print('  - Loading meter %s from %s...' %
                  (meter_id, meter_data['data_location']))
            columns = [('power', 'active')]
            df = pd.read_csv(join(input_path, meter_data['data_location']),
                             sep=',',
                             names=columns,
                             dtype={m: np.float32
                                    for m in columns})
            # Convert the timestamp index column to timezone-aware datetime
            df.index = pd.to_datetime(df.index.values, unit='s', utc=True)
            df = df.tz_convert(dataset_metadata['timezone'])
            #df = pd.read_csv(join(input_path, db_file), sep=';', names=('Datetime', 'P1', 'P2', 'P3'), dtype={'P1': np.float64, 'P2': np.float64, 'P3': np.float64}, parse_dates=[1])
            print(df.info())
            print(df.head())
            #print(df.tail())
            print("  - Storing data under key %s in the datastore..." %
                  (str(key)))
            store.put(str(key), df)
        print("  - Building %s loaded!" % (building_id))
    print("Adding the metadata into the store...")
    save_yaml_to_datastore(join(input_path, 'metadata'), store)
    print("Closing the store...")
    store.close()
    print("Done converting SORTD dataset to HDF5!")