def get_pipeline(period, data_path, target_inclusion_prob, windows, appliances, target_appliance, activations, seq_length, num_seq_per_batch): # Adding a and b to be coherent with buildings chosen in WINDOWS num_seq_per_batch = num_seq_per_batch filtered_activations = filter_activations(windows, appliances, activations) real_agg_source = RealAggregateSource( activations=filtered_activations, target_appliance=target_appliance, seq_length=seq_length, filename=data_path, windows=windows, sample_period=period, target_inclusion_prob=target_inclusion_prob) sample = real_agg_source.get_batch(num_seq_per_batch=seq_length).next() sample = sample.before_processing input_std = sample.input.flatten().std() target_std = sample.target.flatten().std() pipeline = DataPipeline( [real_agg_source], num_seq_per_batch=num_seq_per_batch, input_processing=[DivideBy(input_std), IndependentlyCenter()], target_processing=[DivideBy(target_std)]) return pipeline
def get_pipeline(target_appliance, activations): num_seq_per_batch = 64 if target_appliance == 'kettle': seq_length = 128 train_buildings = [1, 2, 4] unseen_buildings = [5] elif target_appliance == 'microwave': seq_length = 288 train_buildings = [1, 2] unseen_buildings = [5] elif target_appliance == 'washing machine': seq_length = 1024 train_buildings = [1, 5] unseen_buildings = [2] elif target_appliance == 'fridge': seq_length = 512 train_buildings = [1, 2, 4] unseen_buildings = [5] elif target_appliance == 'dish washer': seq_length = 1024 + 512 train_buildings = [1, 2] unseen_buildings = [5] filtered_windows = select_windows(train_buildings, unseen_buildings) filtered_activations = filter_activations(filtered_windows, activations) synthetic_agg_source = SyntheticAggregateSource( activations=filtered_activations, target_appliance=target_appliance, seq_length=seq_length, sample_period=SAMPLE_PERIOD) real_agg_source = RealAggregateSource(activations=filtered_activations, target_appliance=target_appliance, seq_length=seq_length, filename=NILMTK_FILENAME, windows=filtered_windows, sample_period=SAMPLE_PERIOD) stride_source = StrideSource(target_appliance=target_appliance, seq_length=seq_length, filename=NILMTK_FILENAME, windows=filtered_windows, sample_period=SAMPLE_PERIOD, stride=STRIDE) sample = real_agg_source.get_batch(num_seq_per_batch=1024).next() sample = sample.before_processing input_std = sample.input.flatten().std() target_std = sample.target.flatten().std() pipeline = DataPipeline( [synthetic_agg_source, real_agg_source, stride_source], num_seq_per_batch=num_seq_per_batch, input_processing=[DivideBy(input_std), IndependentlyCenter()], target_processing=[DivideBy(target_std)]) return pipeline
def create_data_pipeline(conf, sample_period, num_seq_per_batch, source_probabilities=(.5, .5), windows_key='windows'): appliances = conf['distracting_appliances'] appliances.append(conf['target_appliance']) data_file_path = conf['data_file'] if os.path.isabs( conf['data_file']) else os.path.join( os.path.dirname(__file__) + '/../', conf['data_file']) windows = {} for window_name, window in conf[windows_key].items(): windows[window_name] = {} for house, window_selection in window.items(): windows[window_name][int(house)] = window_selection appliance_activations = load_nilmtk_activations( appliances=appliances, filename=data_file_path, sample_period=sample_period, windows=windows) synthetic_agg_source = SyntheticAggregateSource( activations=appliance_activations, target_appliance=conf['target_appliance'], seq_length=conf['seq_length'], sample_period=sample_period) real_agg_source = RealAggregateSource( activations=appliance_activations, target_appliance=conf['target_appliance'], seq_length=conf['seq_length'], filename=data_file_path, windows=windows, sample_period=sample_period) sample = next(real_agg_source.get_batch(num_seq_per_batch=1024)) sample = sample.before_processing real_input_std = sample.input.flatten().std() real_target_std = sample.target.flatten().std() real_avg_power = sample.target.flatten().sum() / 1024 / conf['seq_length'] pipeline = RectangleDataPipeline( [synthetic_agg_source, real_agg_source], num_seq_per_batch=num_seq_per_batch, source_probabilities=source_probabilities, input_processing=[DivideBy(conf['input_std']), IndependentlyCenter()], target_processing=[ DivideBy(conf['target_std']), start_and_end_and_mean ]) return pipeline, real_input_std, real_target_std, real_avg_power
def get_pipeline(target_appliance, activations): if target_appliance == 'kettle': seq_length = 128 train_buildings = [1, 2, 4] unseen_buildings = [5] num_seq_per_batch = 64 filtered_windows = select_windows(train_buildings, unseen_buildings) filtered_activations = filter_activations(filtered_windows, activations) synthetic_agg_source = SyntheticAggregateSource( activations=filtered_activations, target_appliance=target_appliance, seq_length=seq_length, sample_period=SAMPLE_PERIOD ) real_agg_source = RealAggregateSource( activations=filtered_activations, target_appliance=target_appliance, seq_length=seq_length, filename=NILMTK_FILENAME, windows=filtered_windows, sample_period=SAMPLE_PERIOD ) stride_source = StrideSource( target_appliance=target_appliance, seq_length=seq_length, filename=NILMTK_FILENAME, windows=filtered_windows, sample_period=SAMPLE_PERIOD, stride=STRIDE ) sample = real_agg_source.get_batch(num_seq_per_batch=1024).next() sample = sample.before_processing input_std = sample.input.flatten().std() target_std = sample.target.flatten().std() pipeline = DataPipeline( [synthetic_agg_source, real_agg_source, stride_source], num_seq_per_batch=num_seq_per_batch, input_processing=[DivideBy(input_std), IndependentlyCenter()], target_processing=[DivideBy(target_std)] ) return pipeline
unseen_buildings = [5] DATA_FOLD_NAMES = ('train', 'unseen_appliances', 'unseen_activations_of_seen_appliances') filtered_windows = select_windows(train_buildings, unseen_buildings) filtered_activations = filter_activations(filtered_windows, activations) synthetic_agg_source = SyntheticAggregateSource( activations=filtered_activations, target_appliance=target_appliance, seq_length=seq_length, sample_period=SAMPLE_PERIOD) real_agg_source = RealAggregateSource(activations=filtered_activations, target_appliance=target_appliance, seq_length=seq_length, filename=NILMTK_FILENAME, windows=filtered_windows, sample_period=SAMPLE_PERIOD) # ------------ # needed to rescale the input aggregated data # rescaling is done using the a first batch of num_seq_per_batch sequences sample = real_agg_source.get_batch(num_seq_per_batch=1024).next() sample = sample.before_processing input_std = sample.input.flatten().std() target_std = sample.target.flatten().std() # ------------ pipeline = DataPipeline( [synthetic_agg_source, real_agg_source], num_seq_per_batch=num_seq_per_batch,
def getNILMbatches(period, filename, target_inclusion_prob, windows, appliances, pTrain, pVal, pTest, num_seq_per_batch, seq_length, numApp): activations = load_nilmtk_activations(appliances=appliances, filename=filename, sample_period=period, windows=windows) filtered_activations = filter_activations(windows, appliances, activations) list_of_Xbatches = [] list_of_Ybatches = [] trainSize = int(num_seq_per_batch * pTrain) valSize = int(num_seq_per_batch * pVal) testSize = int(num_seq_per_batch * pTest) if (numApp == -1): print("not implemented") #return None, None ##############3getbatch(enable_all_appliances=True) lenApps = len(appliances) trainSize = trainSize / lenApps valSize = valSizee / lenApps testSize = testSize / lenApps totalX = { 'train': np.empty([0, self.time_steps]), 'val': np.empty([0, self.time_steps]), 'test': np.empty([0, self.time_steps]) } totalY = { 'train': np.empty([0, self.time_steps, lenApps]), 'val': np.empty([0, self.time_steps, lenApps]), 'test': np.empty([0, self.time_steps, lenApps]) } for target_appliance in appliances: real_agg_source = RealAggregateSource( activations=filtered_activations, target_appliance=target_appliance, seq_length=seq_length, filename=filename, windows=windows, sample_period=period, target_inclusion_prob=target_inclusion_prob) #print('train') sampleTrain = real_agg_source.get_batch( num_seq_per_batch=trainSize, fold='train', validation=False).next() Xtrain = sampleTrain.before_processing input_std = Xtrain.input.flatten().std() target_std = Xtrain.target.flatten().std() input_processing = [DivideBy(input_std), IndependentlyCenter()] target_processing = [DivideBy(target_std)] Xtrain, Ytrain = Xtrain.input, Xtrain.target for step in input_processing: Xtrain = step(Xtrain) for step in target_processing: Ytrain = step(Xtrain) #print('validate') sampleVal = real_agg_source.get_batch(num_seq_per_batch=valSize, fold='val', validation=True).next() Xval = sampleVal.before_processing Xval, Yval = Xval.input, Xval.target for step in input_processing: Xval = step(Xval) for step in target_processing: Yval = step(Yval) #print('test') sampleTest = real_agg_source.get_batch(num_seq_per_batch=testSize, fold='test', validation=True).next() Xtest = sampleTest.before_processing Xtest, Ytest = Xtest.input, Xtest.target for step in input_processing: Xtest = step(Xtest) for step in target_processing: Ytest = step(Ytest) ''' pipeline = get_pipeline(period, filename, target_inclusion_prob, windows, appliances, appliances[numApp], activations, seq_length, num_seq_per_batch) batchTrain = pipeline.get_batch(fold='train',validation=False) #define sequence length in get_pipeline() batchVal = pipeline.get_batch(fold='val',validation=True) batchTest = pipeline.get_batch(fold='test',validation=True) ''' ''' print(Xtrain[0]) print(Xtrain[499]) print(Xval[0]) print(Xval[249]) print(Xtest[0]) print(Xtest[249]) ''' totalX = { 'train': np.squeeze(np.array(Xtrain)), 'val': np.squeeze(np.array(Xval)), 'test': np.squeeze(np.array(Xtest)) } totalY = { 'train': np.squeeze(np.array(Ytrain)), 'val': np.squeeze(np.array(Yval)), 'test': np.squeeze(np.array(Ytest)) } else: target_appliance = appliances[numApp] real_agg_source = RealAggregateSource( activations=filtered_activations, target_appliance=target_appliance, seq_length=seq_length, filename=filename, windows=windows, sample_period=period, target_inclusion_prob=target_inclusion_prob) #print('train') sampleTrain = real_agg_source.get_batch(num_seq_per_batch=trainSize, fold='train', validation=False).next() Xtrain = sampleTrain.before_processing input_std = Xtrain.input.flatten().std() target_std = Xtrain.target.flatten().std() input_processing = [DivideBy(input_std), IndependentlyCenter()] target_processing = [DivideBy(target_std)] Xtrain, Ytrain = Xtrain.input, Xtrain.target for step in input_processing: Xtrain = step(Xtrain) for step in target_processing: Ytrain = step(Xtrain) #print('validate') sampleVal = real_agg_source.get_batch(num_seq_per_batch=valSize, fold='val', validation=True).next() Xval = sampleVal.before_processing Xval, Yval = Xval.input, Xval.target for step in input_processing: Xval = step(Xval) for step in target_processing: Yval = step(Yval) #print('test') sampleTest = real_agg_source.get_batch(num_seq_per_batch=testSize, fold='test', validation=True).next() Xtest = sampleTest.before_processing Xtest, Ytest = Xtest.input, Xtest.target for step in input_processing: Xtest = step(Xtest) for step in target_processing: Ytest = step(Ytest) ''' pipeline = get_pipeline(period, filename, target_inclusion_prob, windows, appliances, appliances[numApp], activations, seq_length, num_seq_per_batch) batchTrain = pipeline.get_batch(fold='train',validation=False) #define sequence length in get_pipeline() batchVal = pipeline.get_batch(fold='val',validation=True) batchTest = pipeline.get_batch(fold='test',validation=True) ''' ''' print(Xtrain[0]) print(Xtrain[499]) print(Xval[0]) print(Xval[249]) print(Xtest[0]) print(Xtest[249]) ''' totalX = { 'train': np.squeeze(np.array(Xtrain)), 'val': np.squeeze(np.array(Xval)), 'test': np.squeeze(np.array(Xtest)) } totalY = { 'train': np.squeeze(np.array(Ytrain)), 'val': np.squeeze(np.array(Yval)), 'test': np.squeeze(np.array(Ytest)) } return totalX, totalY, input_std, target_std
'train', 'unseen_appliances', 'unseen_activations_of_seen_appliances') filtered_windows = select_windows(train_buildings, unseen_buildings) filtered_activations = filter_activations(filtered_windows, activations) synthetic_agg_source = SyntheticAggregateSource( activations=filtered_activations, target_appliance=target_appliance, seq_length=seq_length, sample_period=SAMPLE_PERIOD ) real_agg_source = RealAggregateSource( activations=filtered_activations, target_appliance=target_appliance, seq_length=seq_length, filename=NILMTK_FILENAME, windows=filtered_windows, sample_period=SAMPLE_PERIOD ) # ------------ # needed to rescale the input aggregated data # rescaling is done using the a first batch of num_seq_per_batch sequences sample = real_agg_source.get_batch(num_seq_per_batch=1024).next() sample = sample.before_processing input_std = sample.input.flatten().std() target_std = sample.target.flatten().std() # ------------
def get_pipeline(activations): global seq_length agg_source = [] prob = [] target_inclusion_prob = 0.48 + len(APPLIANCES) * 0.1 for task_appliance in APPLIANCES: seq_period = SEQ_PERIODS[task_appliance] seq_length = seq_period // SAMPLE_PERIOD # buildings buildings = BUILDINGS[task_appliance] train_buildings = buildings['train_buildings'] unseen_buildings = buildings['unseen_buildings'] # windows filtered_windows = select_windows(train_buildings, unseen_buildings, WINDOWS) filtered_activations = filter_activations(filtered_windows, activations, BUILDINGS_APPLIANCES) # data sources real_source_prob = min(0.82, target_inclusion_prob) if task_appliance == 'fridge': real_source_prob = 1.0 agg_source.append( RealAggregateSource(activations=filtered_activations, target_appliance=task_appliance, appliances=APPLIANCES, target_inclusion_prob=real_source_prob, seq_length=seq_length, filename=NILMTK_FILENAME, windows=filtered_windows, sample_period=SAMPLE_PERIOD)) prob.append(1.0 / NUM_APPLIANCE) """agg_source.append(SyntheticAggregateSource( activations=filtered_activations, appliances=APPLIANCES, seq_length=seq_length, distractor_inclusion_prob=0.3, target_inclusion_prob=min(0.5, target_inclusion_prob), sample_period=SAMPLE_PERIOD )) agg_source.append(StrideSource( target_appliance=task_appliance, appliances=APPLIANCES, seq_length=seq_length, filename=NILMTK_FILENAME, windows=filtered_windows, sample_period=SAMPLE_PERIOD, stride=None )) prob.append(0.5/NUM_APPLIANCE)""" # look for existing processing parameters only when OVERRIDE is not on; if # none, generate new ones print('Looking for existing processing parameters ... ') proc_params_filename = path.join( dirs.MODELS_DIR, 'proc_params_' + DATASET + '_[' + TARGET_APPLIANCE + ']_' + strftime('%Y-%m-%d_%H_%m') + '.npz') if not OVERRIDE and path.exists(proc_params_filename): print('Found; using them ...') multi_input_std = np.load(proc_params_filename)['multi_input_std'] multi_target_std = np.load(proc_params_filename)['multi_target_std'] else: if OVERRIDE: print('Overridden; generating new ones ...') else: print('Not found; generating new ones ...') multi_input_std = np.array([]) multi_target_std = np.array([]) for sample_source in agg_source: batch_size = 1024 sample = sample_source.get_batch( num_seq_per_batch=batch_size).next() sample = sample.before_processing multi_input_std = np.append(multi_input_std, sample.input.flatten().std()) multi_target_std = np.append(multi_target_std, [ sample.target[:, idx].flatten().std() for idx in range(NUM_APPLIANCE) ]) multi_input_std = np.mean(multi_input_std) multi_target_std = multi_target_std.reshape(-1, NUM_APPLIANCE) multi_target_std = np.mean(multi_target_std, axis=0) print('=' * 10) print('Input std = ', multi_input_std) for idx, appliance in enumerate(APPLIANCES): print(appliance, 'std = ', multi_target_std[idx]) print('=' * 10) print('Saving the processing parameters ...') np.savez(proc_params_filename, multi_input_std=[multi_input_std], multi_target_std=multi_target_std) # generate pipeline pipeline = DataPipeline( agg_source, num_seq_per_batch=NUM_SEQ_PER_BATCH, input_processing=[DivideBy(multi_input_std), IndependentlyCenter()], target_processing=[DivideBy(multi_target_std)], source_probabilities=prob, ) return pipeline, multi_input_std, multi_target_std