def create_dataset(schema,tables,ids, n_classes, which = None):
    all_instances = psda.generate_instances_for_appliances_by_dataids(schema,tables,
            ['use','air1','furnace1'],ids,sample_rate='15T')

    X_arrays = []
    y_arrays = []
    sorted_classes = np.linspace(0,1,n_classes + 1)[:-1]
    for instances,dataid in zip(all_instances,ids):
        use = instances[0].traces[0]
        use.series.fillna(0,inplace=True)
        use.series = use.series.astype(float).clip(0.0000001)
        air1 = instances[1].traces[0]
        furnace1 = instances[2].traces[0]
        total_air = da.utils.aggregate_traces([air1,furnace1],{})
        total_air.series.fillna(0,inplace=True)
        total_air.series = total_air.series.astype(float)
        ratio_series = total_air.series/use.series
        ratios = da.appliance.ApplianceTrace(ratio_series,{})
        use_windows = use.get_windows(window_length,window_stride)
        ratio_windows = ratios.get_windows(window_length,window_stride)
        X_arrays.append(use_windows)
        ratio_windows = ratio_windows[:,prediction_index].clip(0,1)
        classes = np.searchsorted(sorted_classes,ratio_windows,side='right') - 1
        y_arrays.append(classes_to_onehot(classes,n_classes))
    X = np.concatenate(X_arrays,axis=0)
    y = np.concatenate(y_arrays,axis=0)
    dataset = ds.DenseDesignMatrix(X=X,y=y)
    with open(os.path.join(args.data_dir,args.prefix+'_'+which+'.pkl'),'w') as f:
        pickle.dump(dataset,f)
def create_dataset(schema, tables, ids, n_classes, which=None):
    all_instances = psda.generate_instances_for_appliances_by_dataids(
        schema, tables, ['use', 'air1', 'furnace1'], ids, sample_rate='15T')

    X_arrays = []
    y_arrays = []
    sorted_classes = np.linspace(0, 1, n_classes + 1)[:-1]
    for instances, dataid in zip(all_instances, ids):
        use = instances[0].traces[0]
        use.series.fillna(0, inplace=True)
        use.series = use.series.astype(float).clip(0.0000001)
        air1 = instances[1].traces[0]
        furnace1 = instances[2].traces[0]
        total_air = da.utils.aggregate_traces([air1, furnace1], {})
        total_air.series.fillna(0, inplace=True)
        total_air.series = total_air.series.astype(float)
        ratio_series = total_air.series / use.series
        ratios = da.appliance.ApplianceTrace(ratio_series, {})
        use_windows = use.get_windows(window_length, window_stride)
        ratio_windows = ratios.get_windows(window_length, window_stride)
        X_arrays.append(use_windows)
        ratio_windows = ratio_windows[:, prediction_index].clip(0, 1)
        classes = np.searchsorted(sorted_classes, ratio_windows,
                                  side='right') - 1
        y_arrays.append(classes_to_onehot(classes, n_classes))
    X = np.concatenate(X_arrays, axis=0)
    y = np.concatenate(y_arrays, axis=0)
    dataset = ds.DenseDesignMatrix(X=X, y=y)
    with open(os.path.join(args.data_dir, args.prefix + '_' + which + '.pkl'),
              'w') as f:
        pickle.dump(dataset, f)
def create_dataset(schema,tables,ids, n_classes, which = None):
    all_instances = psda.generate_instances_for_appliances_by_dataids(schema,tables,
            ['use','air1','furnace1'],ids,sample_rate='15T')

    energy_arrays = []
    temperature_arrays = []
    time_arrays = []
    weekday_arrays = []
    target_arrays = []
    sorted_classes = np.linspace(0,1,n_classes + 1)[:-1]
    for instances,dataid in zip(all_instances,ids):
        # format use correctly
        use = instances[0].traces[0]
        use.series.fillna(0,inplace=True)
        use.series = use.series.astype(float).clip(0.0000001)
        use_windows = use.get_windows(window_length,window_stride)

        # create features sources
        energy_arrays.append(use_windows)
        temperature_arrays.append(np.tile([70],(use_windows.shape[0],1)))
        time_arrays.append(np.tile([12],(use_windows.shape[0],1)))
        weekday_arrays.append(np.tile([1,0,0,0,0,0,0],(use_windows.shape[0],1)))

        # determine targets
        air1 = instances[1].traces[0]
        furnace1 = instances[2].traces[0]
        total_air = da.utils.aggregate_traces([air1,furnace1],{})
        total_air.series.fillna(0,inplace=True)
        total_air.series = total_air.series.astype(float)
        ratio_series = total_air.series/use.series
        ratios = da.appliance.ApplianceTrace(ratio_series,{})
        ratio_windows = ratios.get_windows(window_length,window_stride)
        ratio_windows = ratio_windows[:,prediction_index].clip(0,1)
        classes = np.searchsorted(sorted_classes,ratio_windows,side='right') - 1
        target_arrays.append(classes_to_onehot(classes,n_classes))

    # create data tuple
    energy_arrays = np.concatenate(energy_arrays,axis=0)[:,:,np.newaxis,np.newaxis]
    temperature_arrays = np.concatenate(temperature_arrays,axis=0)
    time_arrays = np.concatenate(time_arrays,axis=0)
    weekday_arrays = csr_matrix(np.concatenate(weekday_arrays,axis=0))
    target_arrays = csr_matrix(np.concatenate(target_arrays,axis=0))
    data = (energy_arrays,temperature_arrays,time_arrays,weekday_arrays,target_arrays)

    # define the data specs
    space = CompositeSpace([
        Conv2DSpace(shape=[10,1],num_channels=1),
        VectorSpace(dim=1),
        VectorSpace(dim=1),
        VectorSpace(dim=7,sparse=True),
        VectorSpace(dim=n_classes,sparse=True)])
    source = ('features0','features1','features2','features3','targets')
    data_specs = (space,source)
    dataset = VectorSpacesDataset(data=data,data_specs=data_specs)
    with open(os.path.join(args.data_dir,args.prefix+'_'+which+'.pkl'),'w') as f:
        pickle.dump(dataset,f)
Example #4
0
def create_dataset(schema, tables, ids, n_classes, which=None):
    all_instances = psda.generate_instances_for_appliances_by_dataids(
        schema, tables, ['use', 'air1', 'furnace1'], ids, sample_rate='15T')

    energy_arrays = []
    temperature_arrays = []
    time_arrays = []
    weekday_arrays = []
    target_arrays = []
    sorted_classes = np.linspace(0, 1, n_classes + 1)[:-1]
    for instances, dataid in zip(all_instances, ids):
        # format use correctly
        use = instances[0].traces[0]
        use.series.fillna(0, inplace=True)
        use.series = use.series.astype(float).clip(0.0000001)
        use_windows = use.get_windows(window_length, window_stride)

        # create features sources
        energy_arrays.append(use_windows)
        temperature_arrays.append(np.tile([70], (use_windows.shape[0], 1)))
        time_arrays.append(np.tile([12], (use_windows.shape[0], 1)))
        weekday_arrays.append(
            np.tile([1, 0, 0, 0, 0, 0, 0], (use_windows.shape[0], 1)))

        # determine targets
        air1 = instances[1].traces[0]
        furnace1 = instances[2].traces[0]
        total_air = da.utils.aggregate_traces([air1, furnace1], {})
        total_air.series.fillna(0, inplace=True)
        total_air.series = total_air.series.astype(float)
        ratio_series = total_air.series / use.series
        ratios = da.appliance.ApplianceTrace(ratio_series, {})
        ratio_windows = ratios.get_windows(window_length, window_stride)
        ratio_windows = ratio_windows[:, prediction_index].clip(0, 1)
        classes = np.searchsorted(sorted_classes, ratio_windows,
                                  side='right') - 1
        target_arrays.append(classes_to_onehot(classes, n_classes))

    # create data tuple
    energy_arrays = np.concatenate(energy_arrays, axis=0)[:, :, np.newaxis,
                                                          np.newaxis]
    temperature_arrays = np.concatenate(temperature_arrays, axis=0)
    time_arrays = np.concatenate(time_arrays, axis=0)
    weekday_arrays = csr_matrix(np.concatenate(weekday_arrays, axis=0))
    target_arrays = csr_matrix(np.concatenate(target_arrays, axis=0))
    data = (energy_arrays, temperature_arrays, time_arrays, weekday_arrays,
            target_arrays)

    # define the data specs
    space = CompositeSpace([
        Conv2DSpace(shape=[10, 1], num_channels=1),
        VectorSpace(dim=1),
        VectorSpace(dim=1),
        VectorSpace(dim=7, sparse=True),
        VectorSpace(dim=n_classes, sparse=True)
    ])
    source = ('features0', 'features1', 'features2', 'features3', 'targets')
    data_specs = (space, source)
    dataset = VectorSpacesDataset(data=data, data_specs=data_specs)
    with open(os.path.join(args.data_dir, args.prefix + '_' + which + '.pkl'),
              'w') as f:
        pickle.dump(dataset, f)