Beispiel #1
0
 def test_get_table_names(self):
     s_tables = psda.get_table_names('shared')
     c_tables = psda.get_table_names('curated')
     r_tables = psda.get_table_names('raw')
     self.assertIn('group1_disaggregated_2012_12', c_tables,
                   'curated schema has correct tables')
     self.assertIn('egauge_15min_2013', r_tables,
                   'raw schema has correct tables')
     self.assertIn('validated_01_2014', s_tables,
                   'shared schema has correct tables')
 def test_get_table_names(self):
     s_tables = psda.get_table_names('shared')
     c_tables = psda.get_table_names('curated')
     r_tables = psda.get_table_names('raw')
     self.assertIn('group1_disaggregated_2012_12', c_tables,
                   'curated schema has correct tables')
     self.assertIn('egauge_15min_2013', r_tables,
                   'raw schema has correct tables')
     self.assertIn('validated_01_2014', s_tables,
                   'shared schema has correct tables')
Beispiel #3
0
def get_type_from_dataset(device_name, table_num, limit=0):
    """
    Given the device name
    """

    devices_types = {}
    devices_types_unsampled = {}
    db_url = "postgresql://*****:*****@db.wiki-energy.org:5432/postgres"
    psda.set_url(db_url)
    schema = "shared"
    table = "validated_0" + str(table_num) + "_2014"
    ids_for_device = psda.get_dataids_with_real_values(schema, table, device_name)
    if limit > len(ids_for_device) or limit == 0:
        limit = len(ids_for_device)
    device_type_orig = psda.generate_type_for_appliance_by_dataids(schema, table, device_name, ids_for_device[:limit])
    return device_type_orig
def create_dataset(schema,tables,ids, n_classes, which = None):
    all_instances = psda.generate_instances_for_appliances_by_dataids(schema,tables,
            ['use','air1','furnace1'],ids,sample_rate='15T')

    X_arrays = []
    y_arrays = []
    sorted_classes = np.linspace(0,1,n_classes + 1)[:-1]
    for instances,dataid in zip(all_instances,ids):
        use = instances[0].traces[0]
        use.series.fillna(0,inplace=True)
        use.series = use.series.astype(float).clip(0.0000001)
        air1 = instances[1].traces[0]
        furnace1 = instances[2].traces[0]
        total_air = da.utils.aggregate_traces([air1,furnace1],{})
        total_air.series.fillna(0,inplace=True)
        total_air.series = total_air.series.astype(float)
        ratio_series = total_air.series/use.series
        ratios = da.appliance.ApplianceTrace(ratio_series,{})
        use_windows = use.get_windows(window_length,window_stride)
        ratio_windows = ratios.get_windows(window_length,window_stride)
        X_arrays.append(use_windows)
        ratio_windows = ratio_windows[:,prediction_index].clip(0,1)
        classes = np.searchsorted(sorted_classes,ratio_windows,side='right') - 1
        y_arrays.append(classes_to_onehot(classes,n_classes))
    X = np.concatenate(X_arrays,axis=0)
    y = np.concatenate(y_arrays,axis=0)
    dataset = ds.DenseDesignMatrix(X=X,y=y)
    with open(os.path.join(args.data_dir,args.prefix+'_'+which+'.pkl'),'w') as f:
        pickle.dump(dataset,f)
def get_type_from_dataset(device_name,schema,table_num,limit=0):
    '''
    Given the device name
    '''

    devices_types={}
    devices_types_unsampled={}
    db_url='postgresql://*****:*****@db.wiki-energy.org:5432/postgres'
    psda.set_url(db_url)
    table='group1_disaggregated_2013_0'+str(table_num)
    ids_for_device=psda.get_dataids_with_real_values(schema,table,device_name)
    if(limit>len(ids_for_device) or limit==0):
        limit=len(ids_for_device)
    device_type_orig=psda.generate_type_for_appliance_by_dataids(schema,
            table,device_name,ids_for_device[:limit])
    return device_type_orig
Beispiel #6
0
 def test_table_metadata(self):
     ids, cols = psda.get_table_dataids_and_column_names(
         'shared', 'validated_01_2014')
     self.assertIn(744, ids, 'shared table 01 2014 has dataid 744')
     self.assertIn('use', cols, 'shared table 01 2014 has column "use"')
     self.assertIn('air1', cols, 'shared table 01 2014 has column "air1"')
     pass
def create_dataset(schema, tables, ids, n_classes, which=None):
    all_instances = psda.generate_instances_for_appliances_by_dataids(
        schema, tables, ['use', 'air1', 'furnace1'], ids, sample_rate='15T')

    X_arrays = []
    y_arrays = []
    sorted_classes = np.linspace(0, 1, n_classes + 1)[:-1]
    for instances, dataid in zip(all_instances, ids):
        use = instances[0].traces[0]
        use.series.fillna(0, inplace=True)
        use.series = use.series.astype(float).clip(0.0000001)
        air1 = instances[1].traces[0]
        furnace1 = instances[2].traces[0]
        total_air = da.utils.aggregate_traces([air1, furnace1], {})
        total_air.series.fillna(0, inplace=True)
        total_air.series = total_air.series.astype(float)
        ratio_series = total_air.series / use.series
        ratios = da.appliance.ApplianceTrace(ratio_series, {})
        use_windows = use.get_windows(window_length, window_stride)
        ratio_windows = ratios.get_windows(window_length, window_stride)
        X_arrays.append(use_windows)
        ratio_windows = ratio_windows[:, prediction_index].clip(0, 1)
        classes = np.searchsorted(sorted_classes, ratio_windows,
                                  side='right') - 1
        y_arrays.append(classes_to_onehot(classes, n_classes))
    X = np.concatenate(X_arrays, axis=0)
    y = np.concatenate(y_arrays, axis=0)
    dataset = ds.DenseDesignMatrix(X=X, y=y)
    with open(os.path.join(args.data_dir, args.prefix + '_' + which + '.pkl'),
              'w') as f:
        pickle.dump(dataset, f)
Beispiel #8
0
def get_type_from_dataset(device_name, schema, table_num, limit=0):
    '''
    Given the device name
    '''

    devices_types = {}
    devices_types_unsampled = {}
    db_url = 'postgresql://*****:*****@db.wiki-energy.org:5432/postgres'
    psda.set_url(db_url)
    table = 'group1_disaggregated_2013_0' + str(table_num)
    ids_for_device = psda.get_dataids_with_real_values(schema, table,
                                                       device_name)
    if (limit > len(ids_for_device) or limit == 0):
        limit = len(ids_for_device)
    device_type_orig = psda.generate_type_for_appliance_by_dataids(
        schema, table, device_name, ids_for_device[:limit])
    return device_type_orig
def create_dataset(schema,tables,ids, n_classes, which = None):
    all_instances = psda.generate_instances_for_appliances_by_dataids(schema,tables,
            ['use','air1','furnace1'],ids,sample_rate='15T')

    energy_arrays = []
    temperature_arrays = []
    time_arrays = []
    weekday_arrays = []
    target_arrays = []
    sorted_classes = np.linspace(0,1,n_classes + 1)[:-1]
    for instances,dataid in zip(all_instances,ids):
        # format use correctly
        use = instances[0].traces[0]
        use.series.fillna(0,inplace=True)
        use.series = use.series.astype(float).clip(0.0000001)
        use_windows = use.get_windows(window_length,window_stride)

        # create features sources
        energy_arrays.append(use_windows)
        temperature_arrays.append(np.tile([70],(use_windows.shape[0],1)))
        time_arrays.append(np.tile([12],(use_windows.shape[0],1)))
        weekday_arrays.append(np.tile([1,0,0,0,0,0,0],(use_windows.shape[0],1)))

        # determine targets
        air1 = instances[1].traces[0]
        furnace1 = instances[2].traces[0]
        total_air = da.utils.aggregate_traces([air1,furnace1],{})
        total_air.series.fillna(0,inplace=True)
        total_air.series = total_air.series.astype(float)
        ratio_series = total_air.series/use.series
        ratios = da.appliance.ApplianceTrace(ratio_series,{})
        ratio_windows = ratios.get_windows(window_length,window_stride)
        ratio_windows = ratio_windows[:,prediction_index].clip(0,1)
        classes = np.searchsorted(sorted_classes,ratio_windows,side='right') - 1
        target_arrays.append(classes_to_onehot(classes,n_classes))

    # create data tuple
    energy_arrays = np.concatenate(energy_arrays,axis=0)[:,:,np.newaxis,np.newaxis]
    temperature_arrays = np.concatenate(temperature_arrays,axis=0)
    time_arrays = np.concatenate(time_arrays,axis=0)
    weekday_arrays = csr_matrix(np.concatenate(weekday_arrays,axis=0))
    target_arrays = csr_matrix(np.concatenate(target_arrays,axis=0))
    data = (energy_arrays,temperature_arrays,time_arrays,weekday_arrays,target_arrays)

    # define the data specs
    space = CompositeSpace([
        Conv2DSpace(shape=[10,1],num_channels=1),
        VectorSpace(dim=1),
        VectorSpace(dim=1),
        VectorSpace(dim=7,sparse=True),
        VectorSpace(dim=n_classes,sparse=True)])
    source = ('features0','features1','features2','features3','targets')
    data_specs = (space,source)
    dataset = VectorSpacesDataset(data=data,data_specs=data_specs)
    with open(os.path.join(args.data_dir,args.prefix+'_'+which+'.pkl'),'w') as f:
        pickle.dump(dataset,f)
def get_training_arrays(schema, table, ids, column, sample_rate,
        window_length, window_step,label):
    training_array = []
    for id_ in ids:
        trace = psda.generate_appliance_trace(
            schema, table, column, id_, sample_rate)
        id_array_chunk = trace_windows(trace,window_length,window_step)
        training_array.append(id_array_chunk)
    training_array = np.concatenate(training_array,axis=0)
    label_array = np.array([label for _ in xrange(training_array.shape[0])])
    return training_array,label_array
def get_training_arrays(schema, table, ids, column, sample_rate, window_length,
                        window_step, label):
    training_array = []
    for id_ in ids:
        trace = psda.generate_appliance_trace(schema, table, column, id_,
                                              sample_rate)
        id_array_chunk = trace_windows(trace, window_length, window_step)
        training_array.append(id_array_chunk)
    training_array = np.concatenate(training_array, axis=0)
    label_array = np.array([label for _ in xrange(training_array.shape[0])])
    return training_array, label_array
Beispiel #12
0
def get_test_data(num_houses):
	devices_types_unsampled={}
	ids_for_devices={}
	db_url='postgresql://*****:*****@db.wiki-energy.org:5432/postgres'
	psda.set_url(db_url)
	schema = 'shared'
	tables= psda.get_table_names(schema)
	print tables
	table=tables[3]
	ids_device_name='air1'
	ids_for_devices[ids_device_name]=psda.get_dataids_with_real_values(schema,table,ids_device_name)

	device_name='air1'
	devices_types_unsampled[device_name]=psda.generate_type_for_appliance_by_dataids(schema,table,device_name,ids_for_devices[ids_device_name][:num_houses])
	device_name='use'
	devices_types_unsampled[device_name]=psda.generate_type_for_appliance_by_dataids(schema,table,device_name,ids_for_devices[ids_device_name][:num_houses])
	

	#Resamples the data
	devices_types={}
	devices_types_unsplit={}
	sample_rate='1T'
	length='D'
	for key in devices_types_unsampled:
	    devices_types_unsplit[key]=devices_types_unsampled[key].resample(sample_rate)
	    #devices_types[key]=devices_types_unsplit[key].split_by(length)
	    devices_types[key]=devices_types_unsplit[key]
	    print "Resampled " + str(key)
	
	return devices_types
Beispiel #13
0
def get_test_data(num_houses):
    devices_types_unsampled = {}
    ids_for_devices = {}
    db_url = 'postgresql://*****:*****@db.wiki-energy.org:5432/postgres'
    psda.set_url(db_url)
    schema = 'shared'
    tables = psda.get_table_names(schema)
    print tables
    table = tables[3]
    ids_device_name = 'air1'
    ids_for_devices[ids_device_name] = psda.get_dataids_with_real_values(
        schema, table, ids_device_name)

    device_name = 'air1'
    devices_types_unsampled[
        device_name] = psda.generate_type_for_appliance_by_dataids(
            schema, table, device_name,
            ids_for_devices[ids_device_name][:num_houses])
    device_name = 'use'
    devices_types_unsampled[
        device_name] = psda.generate_type_for_appliance_by_dataids(
            schema, table, device_name,
            ids_for_devices[ids_device_name][:num_houses])

    #Resamples the data
    devices_types = {}
    devices_types_unsplit = {}
    sample_rate = '1T'
    length = 'D'
    for key in devices_types_unsampled:
        devices_types_unsplit[key] = devices_types_unsampled[key].resample(
            sample_rate)
        #devices_types[key]=devices_types_unsplit[key].split_by(length)
        devices_types[key] = devices_types_unsplit[key]
        print "Resampled " + str(key)

    return devices_types
Beispiel #14
0
                        type=str,
                        help='directory in which to store data')
    parser.add_argument('prefix', type=str, help='prefix for dataset files')
    args = parser.parse_args()

    schema = 'shared'
    tables = [
        u'validated_01_2014',
        u'validated_02_2014',
        u'validated_03_2014',
        u'validated_04_2014',
        u'validated_05_2014',
    ]

    db_url = "postgresql:/USERNAME:[email protected]:5432/postgres"
    psda.set_url(db_url)

    window_length = 10
    window_stride = 1
    prediction_index = 6

    all_ids = []
    for month in range(5):
        air1_ids = psda.get_dataids_with_real_values(schema, tables[month],
                                                     'air1')
        furnace1_ids = psda.get_dataids_with_real_values(
            schema, tables[month], 'furnace1')
        all_ids.append(air1_ids)
        all_ids.append(furnace1_ids)
    common_ids = da.utils.get_common_ids(all_ids)
Beispiel #15
0
import sys
import os.path

sys.path.append(os.path.join(os.pardir, os.pardir))

import disaggregator as da
import disaggregator.PecanStreetDatasetAdapter as psda

import argparse

parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('appliance')
args = parser.parse_args()

db_url = "postgresql://*****:*****@db.wiki-energy.org:5432/postgres"
psda.set_url(db_url)

schema = 'shared'
tables = [
    u'validated_01_2014',
    u'validated_02_2014',
    u'validated_03_2014',
    u'validated_04_2014',
    u'validated_05_2014',
]

ids = []
for table in tables:
    ids.append(psda.get_dataids_with_real_values(schema, table,
                                                 args.appliance))
Beispiel #16
0
import sys
sys.path.append('../')
#print sys.path
'''

   An interface for adapting Pecan Street data.

'''

import disaggregator.PecanStreetDatasetAdapter as pecan
import pickle
import disaggregator.utils as utils

# Open db connection
db_url = "postgresql://*****:*****@db.wiki-energy.org:5432/postgres"
pecan.set_url(db_url)

schema_names = {
    'curated': '\"PecanStreet_CuratedSets\"',
    'raw': '\"PecanStreet_RawData\"',
    'shared': '\"PecanStreet_SharedData\"'
}

print '''There are two datasets you can view right now, one is called curated,
and one is called shared. The shared data set has one minute interval data
for Jan-May 2014, for about 200 homes.

The curated data set has 15 minute interval data for 2013 and 2012 (the largest
interval lasts from 12/12-11/13.

If you want to analyze longer term data, the curated set is recommended,
Beispiel #17
0
 def setUp(self):
     psda.set_url(settings.PECAN_STREET_DB_URL)
    2158, 2171, 2233, 2242, 2337, 2449, 2470, 2575, 2606, 2638, 2769, 2814,
    2829, 2864, 2945, 2953, 2974, 3092, 3192, 3221, 3263, 3367, 3394, 3456,
    3482, 3504, 3544, 3649, 3652, 3723, 3736, 3778, 3795, 3893, 3918, 4031,
    4135, 4154, 4298, 4313, 4447, 4505, 4526, 4641, 4732, 4767, 4874, 4922,
    4956, 4957, 4998, 5026, 5109, 5209, 5218, 5262, 5275, 5357, 5395, 5545,
    5568, 5677, 5785, 5814, 5874, 5938, 5949, 5972, 6139, 6412, 6636, 6673,
    6730, 6836, 6910, 6941, 7062, 7319, 7390, 7531, 7536, 7617, 7731, 7769,
    7788, 7800, 7850, 7863, 7875, 7940, 7951, 8046, 8079, 8084, 8142, 8197,
    8292, 8317, 8342, 8419, 8467, 8645, 8669, 8741, 8829, 8852, 8956, 9019,
    9036, 9121, 9160, 9343, 9356, 9484, 9555, 9578, 9609, 9643, 9654, 9701,
    9729, 9737, 9771, 9830, 9875, 9915, 9922, 9926, 9932, 9934, 9937, 9938,
    9939, 9982, 9983
]

db_url = "postgresql://*****:*****@db.wiki-energy.org:5432/postgres"
psda.set_url(db_url)

schema = 'shared'
tables = [
    u'validated_01_2014',
    u'validated_02_2014',
    u'validated_03_2014',
    u'validated_04_2014',
    u'validated_05_2014',
]


def trace_windows(trace, window_length, window_step):
    total_length = trace.series.size
    n_steps = int((total_length - window_length) / window_step)
    windows = []
 def test_table_metadata(self):
     ids,cols = psda.get_table_dataids_and_column_names('shared','validated_01_2014')
     self.assertIn(744,ids,'shared table 01 2014 has dataid 744')
     self.assertIn('use',cols,'shared table 01 2014 has column "use"')
     self.assertIn('air1',cols,'shared table 01 2014 has column "air1"')
     pass
Beispiel #20
0
#print sys.path

'''

   An interface for adapting Pecan Street data.

'''


import disaggregator.PecanStreetDatasetAdapter as pecan
import pickle
import disaggregator.utils as utils

# Open db connection
db_url = "postgresql://*****:*****@db.wiki-energy.org:5432/postgres"
pecan.set_url(db_url)

schema_names = {'curated':'\"PecanStreet_CuratedSets\"',
        'raw':'\"PecanStreet_RawData\"',
        'shared':'\"PecanStreet_SharedData\"'}

print '''There are two datasets you can view right now, one is called curated,
and one is called shared. The shared data set has one minute interval data
for Jan-May 2014, for about 200 homes.

The curated data set has 15 minute interval data for 2013 and 2012 (the largest
interval lasts from 12/12-11/13.

If you want to analyze longer term data, the curated set is recommended,
whereas if you want shorter but more frequent data the shared set
is recommended.
    parser = argparse.ArgumentParser(description='create appliance detection datasets for pylearn2.')
    parser.add_argument('data_dir',type=str,
            help='directory in which to store data')
    parser.add_argument('prefix',type=str,
            help='prefix for dataset files')
    args = parser.parse_args()

    schema = 'shared'
    tables = [u'validated_01_2014',
              u'validated_02_2014',
              u'validated_03_2014',
              u'validated_04_2014',
              u'validated_05_2014',]

    db_url = "postgresql://*****:*****@db.wiki-energy.org:5432/postgres"
    psda.set_url(db_url)

    window_length = 10
    window_stride = 1
    prediction_index = 6

    all_ids = []
    for month in range(5):
        air1_ids = psda.get_dataids_with_real_values(schema,tables[month],'air1')
        furnace1_ids = psda.get_dataids_with_real_values(schema,tables[month],'furnace1')
        all_ids.append(air1_ids)
        all_ids.append(furnace1_ids)
    common_ids = da.utils.get_common_ids(all_ids)

    n = len(common_ids)
    n_train = n/2
Beispiel #22
0
def create_dataset(schema, tables, ids, n_classes, which=None):
    all_instances = psda.generate_instances_for_appliances_by_dataids(
        schema, tables, ['use', 'air1', 'furnace1'], ids, sample_rate='15T')

    energy_arrays = []
    temperature_arrays = []
    time_arrays = []
    weekday_arrays = []
    target_arrays = []
    sorted_classes = np.linspace(0, 1, n_classes + 1)[:-1]
    for instances, dataid in zip(all_instances, ids):
        # format use correctly
        use = instances[0].traces[0]
        use.series.fillna(0, inplace=True)
        use.series = use.series.astype(float).clip(0.0000001)
        use_windows = use.get_windows(window_length, window_stride)

        # create features sources
        energy_arrays.append(use_windows)
        temperature_arrays.append(np.tile([70], (use_windows.shape[0], 1)))
        time_arrays.append(np.tile([12], (use_windows.shape[0], 1)))
        weekday_arrays.append(
            np.tile([1, 0, 0, 0, 0, 0, 0], (use_windows.shape[0], 1)))

        # determine targets
        air1 = instances[1].traces[0]
        furnace1 = instances[2].traces[0]
        total_air = da.utils.aggregate_traces([air1, furnace1], {})
        total_air.series.fillna(0, inplace=True)
        total_air.series = total_air.series.astype(float)
        ratio_series = total_air.series / use.series
        ratios = da.appliance.ApplianceTrace(ratio_series, {})
        ratio_windows = ratios.get_windows(window_length, window_stride)
        ratio_windows = ratio_windows[:, prediction_index].clip(0, 1)
        classes = np.searchsorted(sorted_classes, ratio_windows,
                                  side='right') - 1
        target_arrays.append(classes_to_onehot(classes, n_classes))

    # create data tuple
    energy_arrays = np.concatenate(energy_arrays, axis=0)[:, :, np.newaxis,
                                                          np.newaxis]
    temperature_arrays = np.concatenate(temperature_arrays, axis=0)
    time_arrays = np.concatenate(time_arrays, axis=0)
    weekday_arrays = csr_matrix(np.concatenate(weekday_arrays, axis=0))
    target_arrays = csr_matrix(np.concatenate(target_arrays, axis=0))
    data = (energy_arrays, temperature_arrays, time_arrays, weekday_arrays,
            target_arrays)

    # define the data specs
    space = CompositeSpace([
        Conv2DSpace(shape=[10, 1], num_channels=1),
        VectorSpace(dim=1),
        VectorSpace(dim=1),
        VectorSpace(dim=7, sparse=True),
        VectorSpace(dim=n_classes, sparse=True)
    ])
    source = ('features0', 'features1', 'features2', 'features3', 'targets')
    data_specs = (space, source)
    dataset = VectorSpacesDataset(data=data, data_specs=data_specs)
    with open(os.path.join(args.data_dir, args.prefix + '_' + which + '.pkl'),
              'w') as f:
        pickle.dump(dataset, f)
                        type=str,
                        help='directory in which to store data')
    parser.add_argument('prefix', type=str, help='prefix for dataset files')
    args = parser.parse_args()

    schema = 'shared'
    tables = [
        u'validated_01_2014',
        u'validated_02_2014',
        u'validated_03_2014',
        u'validated_04_2014',
        u'validated_05_2014',
    ]

    db_url = "postgresql://*****:*****@db.wiki-energy.org:5432/postgres"
    psda.set_url(db_url)

    window_length = 24 * 4 * 7
    window_stride = 24 * 4
    train, valid, test = psda.get_appliance_detection_arrays(
        schema, tables, args.appliance, window_length, window_stride, 10)
    train_dataset = ds.DenseDesignMatrix(X=train[0], y=train[1])
    valid_dataset = ds.DenseDesignMatrix(X=valid[0], y=valid[1])
    test_dataset = ds.DenseDesignMatrix(X=test[0], y=test[1])

    with open(
            '{data_dir}/{prefix}_train.pkl'.format(data_dir=args.data_dir,
                                                   prefix=args.prefix),
            'w') as f:
        pickle.dump(train_dataset, f)
import sys
import os.path
sys.path.append(os.path.join(os.pardir,os.pardir))

import disaggregator as da
import disaggregator.PecanStreetDatasetAdapter as psda

import argparse
parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('appliance')
args = parser.parse_args()

db_url = "postgresql://*****:*****@db.wiki-energy.org:5432/postgres"
psda.set_url(db_url)

schema = 'shared'
tables = [u'validated_01_2014',
          u'validated_02_2014',
          u'validated_03_2014',
          u'validated_04_2014',
          u'validated_05_2014',]

ids = []
for table in tables:
    ids.append(psda.get_dataids_with_real_values(schema,table,args.appliance))

print sorted(da.utils.get_common_ids(ids))
 def setUp(self):
     psda.set_url(settings.PECAN_STREET_DB_URL)
import sys
import os.path

sys.path.append(os.path.abspath(os.path.join(os.pardir, os.pardir)))
import disaggregator as da
import disaggregator.PecanStreetDatasetAdapter as psda
import pickle

db_url = "postgresql://*****:*****@db.wiki-energy.org:5432/postgres"
psda.set_url(db_url)

schema = "shared"
table = "validated_01_2014"
dataid = "3893"
sample_rate = "15T"
appliance_set = psda.generate_set_by_table_and_dataid(schema, table, dataid, sample_rate)


appliance_set = appliance_set.generate_non_zero_set()

with open(os.path.join(os.pardir, os.pardir, "data", "home_3893_set_01_2014.pkl"), "w") as f:
    pickle.dump(appliance_set, f)
Beispiel #27
0
import sys
import os.path
sys.path.append(os.path.abspath(os.path.join(os.pardir,os.pardir)))
import disaggregator as da
import disaggregator.PecanStreetDatasetAdapter as psda
import pickle

db_url = "postgresql://*****:*****@db.wiki-energy.org:5432/postgres"
psda.set_url(db_url)

schema = 'shared'
table = 'validated_01_2014'
dataid = '3893'
sample_rate = '15T'
appliance_set = psda.generate_set_by_table_and_dataid(
	schema, table, dataid, sample_rate)


appliance_set = appliance_set.generate_non_zero_set()

with open(os.path.join(os.pardir,os.pardir,'data','home_3893_set_01_2014.pkl'),'w') as f:
	pickle.dump(appliance_set,f)

            help='appliance to make the datasets around')
    parser.add_argument('data_dir',type=str,
            help='directory in which to store data')
    parser.add_argument('prefix',type=str,
            help='prefix for dataset files')
    args = parser.parse_args()

    schema = 'shared'
    tables = [u'validated_01_2014',
              u'validated_02_2014',
              u'validated_03_2014',
              u'validated_04_2014',
              u'validated_05_2014',]

    db_url = "postgresql://*****:*****@db.wiki-energy.org:5432/postgres"
    psda.set_url(db_url)

    window_length=24*4*7
    window_stride=24*4
    train,valid,test = psda.get_appliance_detection_arrays(
        schema,tables,args.appliance,window_length,window_stride,10)
    train_dataset = ds.DenseDesignMatrix(X=train[0],y=train[1])
    valid_dataset = ds.DenseDesignMatrix(X=valid[0],y=valid[1])
    test_dataset = ds.DenseDesignMatrix(X=test[0],y=test[1])

    with open('{data_dir}/{prefix}_train.pkl'
            .format(data_dir=args.data_dir,prefix=args.prefix), 'w') as f:
        pickle.dump(train_dataset,f)

    with open('{data_dir}/{prefix}_valid.pkl'
            .format(data_dir=args.data_dir,prefix=args.prefix), 'w') as f:
import sys
import os.path
sys.path.append(os.path.abspath(os.path.join(os.pardir,os.pardir)))
import disaggregator as da
import disaggregator.PecanStreetDatasetAdapter as psda
import pickle
import numpy as np
import pylearn2
import pylearn2.datasets as ds

db_url = "postgresql://*****:*****@db.wiki-energy.org:5432/postgres"
psda.set_url(db_url)

schema = 'shared'
tables = [u'validated_01_2014',
          u'validated_02_2014',
          u'validated_03_2014',
          u'validated_04_2014',
          u'validated_05_2014',]

'''
all_car_ids = []
for table in tables:
    all_car_ids.append(psda.get_dataids_with_real_values(schema,table,'car1'))

common_car_ids = sorted(da.utils.get_common_ids(all_car_ids))

all_use_ids = []
for table in tables:
    all_use_ids.append(psda.get_dataids_with_real_values(schema,table,'use'))