def getBatches(period, filename, target_inclusion_prob, windows, appliances, num_batches, num_seq_per_batch, seq_length, numApp): activations = load_nilmtk_activations(appliances=appliances, filename=filename, sample_period=period, windows=windows) list_of_Xbatches = [] list_of_Ybatches = [] if (numApp == -1): for target_appliance in appliances: print("Getting batches for", target_appliance) pipeline = get_pipeline(period, filename, target_inclusion_prob, windows, appliances, target_appliance, activations, seq_length, num_seq_per_batch) for i in range(0, num_batches): batch = pipeline.get_batch( ) #define sequence length in get_pipeline() list_of_Xbatches.append(batch.input) list_of_Ybatches.append(batch.target) else: pipeline = get_pipeline(period, filename, target_inclusion_prob, windows, appliances, appliances[numApp], activations, seq_length, num_seq_per_batch) for i in range(0, num_batches): batch = pipeline.get_batch( ) #define sequence length in get_pipeline() list_of_Xbatches.append(batch.input) list_of_Ybatches.append(batch.target) return np.array(list_of_Xbatches), np.array(list_of_Ybatches)
def run(root_experiment_name): activations = load_nilmtk_activations(appliances=APPLIANCES, filename=NILMTK_FILENAME, sample_period=SAMPLE_PERIOD, windows=WINDOWS) for get_net in [ae]: for target_appliance in ['kettle']: pipeline = get_pipeline(target_appliance, activations) # Build net batch = pipeline.get_batch() net = get_net(batch) # Trainer trainer = Trainer( net=net, data_pipeline=pipeline, experiment_id=[ root_experiment_name, get_net.__name__, target_appliance ], metrics=Metrics( state_boundaries=[2]), # was 3 up until 230000ish learning_rates={0: 1E-2}, repeat_callbacks=[(5000, Trainer.validate), (5000, Trainer.save_params), (5000, Trainer.plot_estimates)]) report = trainer.submit_report() print(report) # Run! trainer.fit(None)
def main(): set_log_level() parse_args() load_config() # load the activations print('Loading activations ...') activations = load_nilmtk_activations( appliances=[TARGET_APPLIANCE], filename=NILMTK_FILENAME, sample_period=SAMPLE_PERIOD, windows=WINDOWS ) # generate pipeline pipeline, input_std, target_std = get_pipeline(activations) # determine the input shape print('Determining input shape ... ', end='') batch = pipeline.get_batch() input_shape = batch.input.shape[1:] print(input_shape) # look for an existing model only when OVERRIDE is not on; if none, then # build a new one print('Looking for an existing model ... ', end='') model_filename = os.path.join(dirs.MODELS_DIR, DATASET + '_' + TARGET_APPLIANCE + '_' + strftime('%Y-%m-%d_%H_%M') +'.h5') if not OVERRIDE and os.path.exists(model_filename): print('Found; loading it ...') from keras.models import load_model model = load_model(model_filename) else: if OVERRIDE: print('Overridden; building a new one with the specified topology ...') else: print('Not found; building a new one with the specified topology ...') # define accuracy #import keras.backend as K #ON_POWER_THRESHOLD = DivideBy(target_std)(10) #def acc(y_true, y_pred): # return K.mean(K.equal(K.greater_equal(y_true, ON_POWER_THRESHOLD), # K.greater_equal(y_pred, ON_POWER_THRESHOLD))) # build model topology_module = importlib.import_module(dirs.TOPOLOGIES_DIR + '.' + TOPOLOGY_NAME, __name__) model = topology_module.build_model(input_shape) print (model.summary()) # train print('Preparing the training process ...') train(pipeline, model) # save the model print('Saving the model to ' + model_filename + ' ...') model.save(model_filename)
def create_data_pipeline(conf, sample_period, num_seq_per_batch, source_probabilities=(.5, .5), windows_key='windows'): appliances = conf['distracting_appliances'] appliances.append(conf['target_appliance']) data_file_path = conf['data_file'] if os.path.isabs( conf['data_file']) else os.path.join( os.path.dirname(__file__) + '/../', conf['data_file']) windows = {} for window_name, window in conf[windows_key].items(): windows[window_name] = {} for house, window_selection in window.items(): windows[window_name][int(house)] = window_selection appliance_activations = load_nilmtk_activations( appliances=appliances, filename=data_file_path, sample_period=sample_period, windows=windows) synthetic_agg_source = SyntheticAggregateSource( activations=appliance_activations, target_appliance=conf['target_appliance'], seq_length=conf['seq_length'], sample_period=sample_period) real_agg_source = RealAggregateSource( activations=appliance_activations, target_appliance=conf['target_appliance'], seq_length=conf['seq_length'], filename=data_file_path, windows=windows, sample_period=sample_period) sample = next(real_agg_source.get_batch(num_seq_per_batch=1024)) sample = sample.before_processing real_input_std = sample.input.flatten().std() real_target_std = sample.target.flatten().std() real_avg_power = sample.target.flatten().sum() / 1024 / conf['seq_length'] pipeline = RectangleDataPipeline( [synthetic_agg_source, real_agg_source], num_seq_per_batch=num_seq_per_batch, source_probabilities=source_probabilities, input_processing=[DivideBy(conf['input_std']), IndependentlyCenter()], target_processing=[ DivideBy(conf['target_std']), start_and_end_and_mean ]) return pipeline, real_input_std, real_target_std, real_avg_power
def run(root_experiment_name): activations = load_nilmtk_activations( appliances=APPLIANCES, filename=NILMTK_FILENAME, sample_period=SAMPLE_PERIOD, windows=WINDOWS ) for get_net in [ae]: for target_appliance in APPLIANCES[2:]: print("Starting training for net {}, appliance {}." .format(get_net.__name__, target_appliance)) pipeline = get_pipeline(target_appliance, activations) # Build net batch = pipeline.get_batch() net = get_net(batch) # Trainer trainer = Trainer( net=net, data_pipeline=pipeline, experiment_id=[ root_experiment_name, get_net.__name__, target_appliance], metrics=Metrics(state_boundaries=[2.5]), learning_rates={ 0: 1e-2, 200000: 1e-3 }, repeat_callbacks=[ (25000, Trainer.validate), (25000, Trainer.save_params), (25000, Trainer.plot_estimates) ] ) report = trainer.submit_report() print(report) # Run! trainer.fit(300000)
def run(root_experiment_name): activations = load_nilmtk_activations(appliances=APPLIANCES, filename=NILMTK_FILENAME, sample_period=SAMPLE_PERIOD, windows=WINDOWS) for get_net in [ae]: for target_appliance in APPLIANCES[2:]: print("Starting training for net {}, appliance {}.".format( get_net.__name__, target_appliance)) pipeline = get_pipeline(target_appliance, activations) # Build net batch = pipeline.get_batch() net = get_net(batch) # Trainer trainer = Trainer(net=net, data_pipeline=pipeline, experiment_id=[ root_experiment_name, get_net.__name__, target_appliance ], metrics=Metrics(state_boundaries=[2.5]), learning_rates={ 0: 1e-2, 200000: 1e-3 }, repeat_callbacks=[(25000, Trainer.validate), (25000, Trainer.save_params), (25000, Trainer.plot_estimates) ]) report = trainer.submit_report() print(report) # Run! trainer.fit(300000)
def run(root_experiment_name): activations = load_nilmtk_activations( appliances=APPLIANCES, filename=NILMTK_FILENAME, sample_period=SAMPLE_PERIOD, windows=WINDOWS ) for get_net in [ae]: for target_appliance in ['kettle']: pipeline = get_pipeline(target_appliance, activations) # Build net batch = pipeline.get_batch() net = get_net(batch) # Trainer trainer = Trainer( net=net, data_pipeline=pipeline, experiment_id=[ root_experiment_name, get_net.__name__, target_appliance], metrics=Metrics(state_boundaries=[2]), # was 3 up until 230000ish learning_rates={0: 1E-2}, repeat_callbacks=[ (5000, Trainer.validate), (5000, Trainer.save_params), (5000, Trainer.plot_estimates) ] ) report = trainer.submit_report() print(report) # Run! trainer.fit(None)
6: ("2011-05-22", "2011-06-14"), }, 'unseen_activations_of_seen_appliances': { 1: ("2011-04-19", None), 2: ("2011-04-19", None), 3: ("2011-04-19", None), 6: ("2011-05-22", None), }, 'unseen_appliances': { 5: ("2011-04-19", None) } } # get the dictionary of activations for each appliance activations = load_nilmtk_activations(appliances=APPLIANCES, filename=NILMTK_FILENAME, sample_period=SAMPLE_PERIOD, windows=WINDOWS) # get pipeline for the fridge example num_seq_per_batch = 16 target_appliance = 'fridge' seq_length = 512 train_buildings = [1, 2, 3, 6] unseen_buildings = [5] DATA_FOLD_NAMES = ('train', 'unseen_appliances', 'unseen_activations_of_seen_appliances') filtered_windows = select_windows(train_buildings, unseen_buildings) filtered_activations = filter_activations(filtered_windows, activations) synthetic_agg_source = SyntheticAggregateSource(
def getNILMbatches(period, filename, target_inclusion_prob, windows, appliances, pTrain, pVal, pTest, num_seq_per_batch, seq_length, numApp): activations = load_nilmtk_activations(appliances=appliances, filename=filename, sample_period=period, windows=windows) filtered_activations = filter_activations(windows, appliances, activations) list_of_Xbatches = [] list_of_Ybatches = [] trainSize = int(num_seq_per_batch * pTrain) valSize = int(num_seq_per_batch * pVal) testSize = int(num_seq_per_batch * pTest) if (numApp == -1): print("not implemented") #return None, None ##############3getbatch(enable_all_appliances=True) lenApps = len(appliances) trainSize = trainSize / lenApps valSize = valSizee / lenApps testSize = testSize / lenApps totalX = { 'train': np.empty([0, self.time_steps]), 'val': np.empty([0, self.time_steps]), 'test': np.empty([0, self.time_steps]) } totalY = { 'train': np.empty([0, self.time_steps, lenApps]), 'val': np.empty([0, self.time_steps, lenApps]), 'test': np.empty([0, self.time_steps, lenApps]) } for target_appliance in appliances: real_agg_source = RealAggregateSource( activations=filtered_activations, target_appliance=target_appliance, seq_length=seq_length, filename=filename, windows=windows, sample_period=period, target_inclusion_prob=target_inclusion_prob) #print('train') sampleTrain = real_agg_source.get_batch( num_seq_per_batch=trainSize, fold='train', validation=False).next() Xtrain = sampleTrain.before_processing input_std = Xtrain.input.flatten().std() target_std = Xtrain.target.flatten().std() input_processing = [DivideBy(input_std), IndependentlyCenter()] target_processing = [DivideBy(target_std)] Xtrain, Ytrain = Xtrain.input, Xtrain.target for step in input_processing: Xtrain = step(Xtrain) for step in target_processing: Ytrain = step(Xtrain) #print('validate') sampleVal = real_agg_source.get_batch(num_seq_per_batch=valSize, fold='val', validation=True).next() Xval = sampleVal.before_processing Xval, Yval = Xval.input, Xval.target for step in input_processing: Xval = step(Xval) for step in target_processing: Yval = step(Yval) #print('test') sampleTest = real_agg_source.get_batch(num_seq_per_batch=testSize, fold='test', validation=True).next() Xtest = sampleTest.before_processing Xtest, Ytest = Xtest.input, Xtest.target for step in input_processing: Xtest = step(Xtest) for step in target_processing: Ytest = step(Ytest) ''' pipeline = get_pipeline(period, filename, target_inclusion_prob, windows, appliances, appliances[numApp], activations, seq_length, num_seq_per_batch) batchTrain = pipeline.get_batch(fold='train',validation=False) #define sequence length in get_pipeline() batchVal = pipeline.get_batch(fold='val',validation=True) batchTest = pipeline.get_batch(fold='test',validation=True) ''' ''' print(Xtrain[0]) print(Xtrain[499]) print(Xval[0]) print(Xval[249]) print(Xtest[0]) print(Xtest[249]) ''' totalX = { 'train': np.squeeze(np.array(Xtrain)), 'val': np.squeeze(np.array(Xval)), 'test': np.squeeze(np.array(Xtest)) } totalY = { 'train': np.squeeze(np.array(Ytrain)), 'val': np.squeeze(np.array(Yval)), 'test': np.squeeze(np.array(Ytest)) } else: target_appliance = appliances[numApp] real_agg_source = RealAggregateSource( activations=filtered_activations, target_appliance=target_appliance, seq_length=seq_length, filename=filename, windows=windows, sample_period=period, target_inclusion_prob=target_inclusion_prob) #print('train') sampleTrain = real_agg_source.get_batch(num_seq_per_batch=trainSize, fold='train', validation=False).next() Xtrain = sampleTrain.before_processing input_std = Xtrain.input.flatten().std() target_std = Xtrain.target.flatten().std() input_processing = [DivideBy(input_std), IndependentlyCenter()] target_processing = [DivideBy(target_std)] Xtrain, Ytrain = Xtrain.input, Xtrain.target for step in input_processing: Xtrain = step(Xtrain) for step in target_processing: Ytrain = step(Xtrain) #print('validate') sampleVal = real_agg_source.get_batch(num_seq_per_batch=valSize, fold='val', validation=True).next() Xval = sampleVal.before_processing Xval, Yval = Xval.input, Xval.target for step in input_processing: Xval = step(Xval) for step in target_processing: Yval = step(Yval) #print('test') sampleTest = real_agg_source.get_batch(num_seq_per_batch=testSize, fold='test', validation=True).next() Xtest = sampleTest.before_processing Xtest, Ytest = Xtest.input, Xtest.target for step in input_processing: Xtest = step(Xtest) for step in target_processing: Ytest = step(Ytest) ''' pipeline = get_pipeline(period, filename, target_inclusion_prob, windows, appliances, appliances[numApp], activations, seq_length, num_seq_per_batch) batchTrain = pipeline.get_batch(fold='train',validation=False) #define sequence length in get_pipeline() batchVal = pipeline.get_batch(fold='val',validation=True) batchTest = pipeline.get_batch(fold='test',validation=True) ''' ''' print(Xtrain[0]) print(Xtrain[499]) print(Xval[0]) print(Xval[249]) print(Xtest[0]) print(Xtest[249]) ''' totalX = { 'train': np.squeeze(np.array(Xtrain)), 'val': np.squeeze(np.array(Xval)), 'test': np.squeeze(np.array(Xtest)) } totalY = { 'train': np.squeeze(np.array(Ytrain)), 'val': np.squeeze(np.array(Yval)), 'test': np.squeeze(np.array(Ytest)) } return totalX, totalY, input_std, target_std
}, 'unseen_activations_of_seen_appliances': { 1: ("2011-04-19", None), 2: ("2011-04-19", None), 3: ("2011-04-19", None), 6: ("2011-05-22", None), }, 'unseen_appliances': { 5: ("2011-04-19", None) } } # get the dictionary of activations for each appliance activations = load_nilmtk_activations( appliances=APPLIANCES, filename=NILMTK_FILENAME, sample_period=SAMPLE_PERIOD, windows=WINDOWS ) # get pipeline for the fridge example num_seq_per_batch = 16 target_appliance = 'fridge' seq_length = 512 train_buildings = [1, 2, 3, 6] unseen_buildings = [5] DATA_FOLD_NAMES = ( 'train', 'unseen_appliances', 'unseen_activations_of_seen_appliances') filtered_windows = select_windows(train_buildings, unseen_buildings) filtered_activations = filter_activations(filtered_windows, activations)
def main(): global BUILDINGS_APPLIANCES, seq_length set_log_level() parse_args() load_config() pipeline = None pipe_path = path.join( dirs.MODELS_DIR, 'pipe_' + DATASET + '_[' + TARGET_APPLIANCE + ']' + '.pkl') if LOAD_PIPELINE: print('Loading pipeline ...') with open(pipe_path, 'rb') as fp: pipeline = cPickle.load(fp) seq_period = SEQ_PERIODS[APPLIANCES[0]] seq_length = seq_period // SAMPLE_PERIOD else: # load the activations print('Loading activations ...') BUILDINGS_APPLIANCES = BUILDINGS.keys() activations = load_nilmtk_activations(appliances=BUILDINGS_APPLIANCES, filename=NILMTK_FILENAME, sample_period=SAMPLE_PERIOD, windows=WINDOWS) # activations_processor = Preprocess(activations) # activations_processor.activations_pruning() # activations_processor.median_filter(k_factor=3) # activations = activations_processor.get_activations() # generate pipeline pipeline, input_std, target_std = get_pipeline(activations) with open(pipe_path, 'wb') as fp: cPickle.dump(pipeline, fp, True) # determine the input shape print('Determining input shape ... ', end='') batch = pipeline.get_batch() input_shape = batch.input.reshape(NUM_SEQ_PER_BATCH, seq_length, 1).shape[1:] print(input_shape) # look for an existing model only when OVERRIDE is not on; if none, then # build a new one print('Looking for an existing model ... ', end='') model_filename = path.join( dirs.MODELS_DIR, DATASET + '_[' + TARGET_APPLIANCE + ']_' + strftime('%Y-%m-%d_%H_%m') + '.h5') if not OVERRIDE and path.exists(model_filename): print('Found; loading it ...') from keras.models import load_model model = load_model(model_filename) else: if OVERRIDE: print( 'Overridden; building a new one with the specified topology ...' ) else: print( 'Not found; building a new one with the specified topology ...' ) # define accuracy #import keras.backend as K #ON_POWER_THRESHOLD = DivideBy(target_std)(10) #def acc(y_true, y_pred): # return K.mean(K.equal(K.greater_equal(y_true, ON_POWER_THRESHOLD), # K.greater_equal(y_pred, ON_POWER_THRESHOLD))) # build model topology_module = importlib.import_module( dirs.TOPOLOGIES_DIR + '.' + TOPOLOGY_NAME, __name__) model = topology_module.build_model(input_shape, APPLIANCES) print(model.summary()) # train print('Preparing the training process ...') train(pipeline, model) # save the model print('Saving the model to ' + model_filename + ' ...') model.save(model_filename)