epochs += 5 disaggregator.export_model("UKDALE-RNN-h{}-{}-{}epochs.h5".format( train_building, meter_key, epochs)) end = time.time() print("Train =", end - start, "seconds.") print("========== DISAGGREGATE ============") disag_filename = "disag-out.h5" output = HDFDataStore(disag_filename, 'w') disaggregator.disaggregate(test_mains, output, train_meter, sample_period=sample_period) output.close() print("========== RESULTS ============") result = DataSet(disag_filename) res_elec = result.buildings[test_building].elec rpaf = metrics.recall_precision_accuracy_f1(res_elec[meter_key], test_elec[meter_key]) print("============ Recall: {}".format(rpaf[0])) print("============ Precision: {}".format(rpaf[1])) print("============ Accuracy: {}".format(rpaf[2])) print("============ F1 Score: {}".format(rpaf[2])) print("============ Relative error in total energy: {}".format( metrics.relative_error_total_energy(res_elec[meter_key], test_elec[meter_key]))) print("============ Mean absolute error(in Watts): {}".format( metrics.mean_absolute_error(res_elec[meter_key], test_elec[meter_key])))
def main(_run, stock_file, days_back, days_forward, max_epochs, early_stopping_threshold, num_neurons, num_hidden_layers, seed, learning_rate, batch_size, activation, optimizer, kernel_init, regularization, loss, timesteps, use_sent_and_trends=False): # Read the stocks csv into a dataframe stock = data.Stocks(stock_file) stock.calc_patel_TI(days_back) if use_sent_and_trends: # If we have a sentiment file add it to the stock df sentiments = pd.read_csv( '../data/nytarticles/microsoft.2013-12-31.2018-12-31.imputed.sent.csv', index_col='date') trends = pd.read_csv( '../data/trends/msft.2013-12-31.2018-12-31.fixed.dates.csv', index_col='date') sent_trends = pd.merge(sentiments, trends, how='left', left_index=True, right_index=True) sent_trends[ 'sent_trends'] = sent_trends['sentiment'] * sent_trends['msft'] import numpy as np sent_trends['randNumCol'] = np.random.randint(1, 100, sent_trends.shape[0]) stock.df = pd.merge(stock.df, sent_trends, how='left', left_index=True, right_index=True) stock.df.drop(['sentiment', 'msft', 'sent_trends'], axis='columns', inplace=True) stock.shift(days_forward) # Create the model model = K.Sequential() # Create the kernel initializer with the seed if kernel_init == 'glorot_uniform': kernel_initializer = K.initializers.glorot_uniform(seed) else: raise NotImplementedError # Add the layers return_sequences = True if num_hidden_layers == 1: return_sequences = False data_dim = stock.raw_values()['X'].shape[1] model.add( K.layers.LSTM(num_neurons, input_shape=(timesteps, data_dim), activation=activation, return_sequences=return_sequences, kernel_initializer=kernel_initializer)) for i in range(num_hidden_layers - 1): # If not in the last layer return sequences if i != num_hidden_layers - 2: model.add( K.layers.LSTM(num_neurons, activation=activation, return_sequences=True, kernel_initializer=kernel_initializer)) else: model.add( K.layers.LSTM(num_neurons, activation=activation, kernel_initializer=kernel_initializer)) # Add output layer model.add( K.layers.Dense(1, activation='linear', kernel_initializer=kernel_initializer)) # Define Root Mean Squared Relative Error metric def root_mean_squared_relative_error(y_true, y_pred): squared_relative_error = K.backend.square( (y_true - y_pred) / K.backend.clip(K.backend.abs(y_true), K.backend.epsilon(), None)) mean_squared_relative_error = K.backend.mean(squared_relative_error, axis=-1) return K.backend.sqrt(mean_squared_relative_error) # Define Direction Accuracy metric def direction_accuracy(y_true, y_pred): # sign returns either -1 (if <0), 0 (if ==0), or 1 (if >0) true_signs = K.backend.sign(y_true[days_forward:] - y_true[:-days_forward]) pred_signs = K.backend.sign(y_pred[days_forward:] - y_true[:-days_forward]) equal_signs = K.backend.equal(true_signs, pred_signs) return K.backend.mean(equal_signs, axis=-1) # Create the optimizer if optimizer == 'adagrad': optimizer = K.optimizers.Adagrad(learning_rate) elif optimizer == 'adam': optimizer = K.optimizers.Adam(learning_rate) else: raise NotImplementedError model.compile(optimizer=optimizer, loss=loss, metrics=[ 'mean_absolute_percentage_error', 'mean_absolute_error', root_mean_squared_relative_error, 'mean_squared_error', direction_accuracy ]) # Create the logging callback # The metrics are logged in the run's metrics and at heartbeat events # every 10 secs they get written to mongodb def on_epoch_end_metrics_log(epoch, logs): for metric_name, metric_value in logs.items(): # The validation set keys have val_ prepended to the metric, # add train_ to the training set keys if 'val' not in metric_name: metric_name = 'train_' + metric_name _run.log_scalar(metric_name, metric_value, epoch) metrics_log_callback = K.callbacks.LambdaCallback( on_epoch_end=on_epoch_end_metrics_log) callbacks_list = [ K.callbacks.EarlyStopping(monitor='val_loss', patience=early_stopping_threshold), K.callbacks.ModelCheckpoint(filepath='../models/best_model.h5', monitor='val_loss', save_best_only=True), metrics_log_callback ] model.fit(stock.raw_values_lstm_wrapper(dataset='train', norm=True, timesteps=timesteps)['X'], stock.raw_values_lstm_wrapper(dataset='train', norm=True, timesteps=timesteps)['y'], epochs=max_epochs, batch_size=batch_size, verbose=0, callbacks=callbacks_list, validation_data=(stock.raw_values_lstm_wrapper( dataset='val', norm=True, timesteps=timesteps)['X'], stock.raw_values_lstm_wrapper( dataset='val', norm=True, timesteps=timesteps)['y'])) # Calculate metrics for normalized values test_norm_metrics = model.evaluate( stock.raw_values_lstm_wrapper(dataset='test', norm=True, timesteps=timesteps)['X'], stock.raw_values_lstm_wrapper(dataset='test', norm=True, timesteps=timesteps)['y'], verbose=0) # Log the metrics from the normalized values for metric in zip(model.metrics_names, test_norm_metrics): _run.log_scalar('test_norm_' + metric[0], metric[1]) # Now calculate and save the unnormalised metrics # Predict returns normalised values y_pred_norm = model.predict( stock.raw_values_lstm_wrapper(dataset='test', norm=True, timesteps=timesteps)['X']) # Scale the output back to the actual stock price y_pred = stock.denorm_predictions(y_pred_norm) # Calculate the unnormalized metrics y_true = stock.raw_values_lstm_wrapper(dataset='test', timesteps=timesteps)['y'] # df1 = pd.DataFrame({'date': stock.df.index.values[-y_pred.shape[0]:], 'y_pred': y_pred.flatten(), 'y_true': y_true.flatten()}) # df1.set_index('date', inplace=True) # df1.to_csv('plot_data_lstm.csv') test_metrics = { 'test_loss': metrics.mean_squared_error(y_true, y_pred), 'test_mean_absolute_percentage_error': metrics.mean_absolute_percentage_error(y_true, y_pred), 'test_mean_absolute_error': metrics.mean_absolute_error(y_true, y_pred), 'test_root_mean_squared_relative_error': metrics.root_mean_squared_relative_error(y_true, y_pred), 'test_mean_squared_error': metrics.mean_squared_error(y_true, y_pred), 'test_direction_accuracy': metrics.direction_accuracy(y_true, y_pred, days_forward) } # Save the metrics for metric_name, metric_value in test_metrics.items(): _run.log_scalar(metric_name, metric_value)
model.fit(X_train, y_train, batch_size=128, epochs=epochs_per_checkpoint, shuffle=True) model.save( "SYNTH-LOOKBACK-{}-ALL-{}epochs-1WIN.h5".format( key_name, epochs + epochs_per_checkpoint), model) # ======= Disaggregation phase mains, meter = opends(test_building, key_name) X_test = mains y_test = meter * mmax # Predict data X_batch, Y_batch = gen_batch(X_test, y_test, len(X_test) - input_window, 0, input_window) pred = model.predict(X_batch) * mmax pred[pred < 0] = 0 pred = np.transpose(pred)[0] # Save results np.save('pred.results', pred) # Calculate and show metrics print("============ Recall Precision Accurracy F1 {}".format( metrics.recall_precision_accuracy_f1(pred, Y_batch, threshold))) print("============ relative_error_total_energy {}".format( metrics.relative_error_total_energy(pred, Y_batch))) print("============ mean_absolute_error {}".format( metrics.mean_absolute_error(pred, Y_batch)))
print("========== TRAIN ============") epochs = 0 for i in range(3): gru.train(train_mains, train_meter, epochs=5, sample_period=sample_period) epochs += 5 gru.export_model("REDD-GRU-h{}-{}-{}epochs.h5".format(train_building, meter_key, epochs)) print("CHECKPOINT {}".format(epochs)) end = time.time() print("Train =", end-start, "seconds.") print("========== DISAGGREGATE ============") disag_filename = 'disag-out.h5' output = HDFDataStore(disag_filename, 'w') gru.disaggregate(test_mains, output, train_meter, sample_period=sample_period) output.close() print("========== RESULTS ============") result = DataSet(disag_filename) res_elec = result.buildings[test_building].elec rpaf = metrics.recall_precision_accuracy_f1(res_elec[meter_key], test_elec[meter_key]) print("============ Recall: {}".format(rpaf[0])) print("============ Precision: {}".format(rpaf[1])) print("============ Accuracy: {}".format(rpaf[2])) print("============ F1 Score: {}".format(rpaf[2])) print("============ Relative error in total energy: {}".format(metrics.relative_error_total_energy(res_elec[meter_key], test_elec[meter_key]))) print("============ Mean absolute error(in Watts): {}".format(metrics.mean_absolute_error(res_elec[meter_key], test_elec[meter_key])))
def experiment(key_name, start_e, end_e): '''Trains a network and disaggregates the testset Displays the metrics for the disaggregated part Parameters ---------- key_name : The string key of the appliance start_e : The starting number of epochs for Training end_e: The ending number of epochs for Training ''' # ======= Open configuration file if (key_name not in allowed_key_names): print(" Device {} not available".format(key_name)) print(" Available device names: {}", allowed_key_names) conf_filename = "appconf/{}.json".format(key_name) with open(conf_filename) as data_file: conf = json.load(data_file) input_window = conf['lookback'] threshold = conf['on_threshold'] mamax = 5000 memax = conf['memax'] mean = conf['mean'] std = conf['std'] train_buildings = conf['train_buildings'] test_building = conf['test_building'] on_threshold = conf['on_threshold'] meter_key = conf['nilmtk_key'] save_path = conf['save_path'] # ======= Training phase print("Training for device: {}".format(key_name)) print(" train_buildings: {}".format(train_buildings)) # Open train sets X_train = np.load("dataset/trainsets/X-{}.npy".format(key_name)) X_train = normalize(X_train, mamax, mean, std) y_train = np.load("dataset/trainsets/Y-{}.npy".format(key_name)) y_train = normalize(y_train, memax, mean, std) model = create_model(input_window) # Train model and save checkpoints if start_e > 0: model = load_model( save_path + "CHECKPOINT-{}-{}epochs.hdf5".format(key_name, start_e)) if end_e > start_e: filepath = save_path + "CHECKPOINT-" + key_name + "-{epoch:01d}epochs.hdf5" checkpoint = ModelCheckpoint(filepath, verbose=1, save_best_only=False) history = model.fit(X_train, y_train, batch_size=128, epochs=end_e, shuffle=True, initial_epoch=start_e, callbacks=[checkpoint]) losses = history.history['loss'] model.save( "{}CHECKPOINT-{}-{}epochs.hdf5".format(save_path, key_name, end_e), model) # Save training loss per epoch try: a = np.loadtxt("{}losses.csv".format(save_path)) losses = np.append(a, losses) except: pass np.savetxt("{}losses.csv".format(save_path), losses, delimiter=",") # ======= Disaggregation phase mains, meter = opends(test_building, key_name) X_test = normalize(mains, mamax, mean, std) y_test = meter # Predict data X_batch, Y_batch = gen_batch(X_test, y_test, len(X_test) - input_window, 0, input_window) pred = model.predict(X_batch) pred = denormalize(pred, memax, mean, std) pred[pred < 0] = 0 pred = np.transpose(pred)[0] # Save results np.save("{}pred-{}-epochs{}".format(save_path, key_name, end_e), pred) rpaf = metrics.recall_precision_accuracy_f1(pred, Y_batch, threshold) rete = metrics.relative_error_total_energy(pred, Y_batch) mae = metrics.mean_absolute_error(pred, Y_batch) print("============ Recall: {}".format(rpaf[0])) print("============ Precision: {}".format(rpaf[1])) print("============ Accuracy: {}".format(rpaf[2])) print("============ F1 Score: {}".format(rpaf[3])) print("============ Relative error in total energy: {}".format(rete)) print("============ Mean absolute error(in Watts): {}".format(mae)) res_out = open( "{}results-pred-{}-{}epochs".format(save_path, key_name, end_e), 'w') for r in rpaf: res_out.write(str(r)) res_out.write(',') res_out.write(str(rete)) res_out.write(',') res_out.write(str(mae)) res_out.close()
result = DataSet(DISAG) res_elec = result.buildings[TEST_BUILDING].elec predicted = res_elec[APPLIANCE] ground_truth = test_elec[APPLIANCE] fig = plt.figure() ax = plt.subplot(111) ax.plot(ground_truth.power_series_all_data(), label='ground truth') ax.plot(predicted.power_series_all_data(), label='predicted') #plt.xlim('2017-10-08 00:00:00', '2017-10-08 01:00:00') #plt.ylim(0, 300) plt.xlabel('Time') plt.ylabel('Power [W]') plt.title(APPLIANCE + ' Disaggregation') myFmt = mdates.DateFormatter('%d:%H:%M') ax.xaxis.set_major_formatter(myFmt) ax.legend() plt.savefig(APPLIANCE + "_mlp.png") import metrics print("============ Relative error in total energy: {}".format( metrics.relative_error_total_energy(predicted, ground_truth))) print("============ Mean absolute error(in Watts): {}".format( metrics.mean_absolute_error(predicted, ground_truth))) print("============ List of percentages for every days\n") date_series = pd.date_range(start=START_TEST, end=END_TEST, freq='D') print( metrics.daily_relative_consume(predicted, ground_truth, test_mains, date_series))
def runExperiment(experiment: experimentInfo, metricsResFileName, clearMetricsFile): dsPathsList_Test = experiment.dsList outFileName = experiment.outName test_building = experiment.building meter_key = experiment.meter_key pathOrigDS = experiment.pathOrigDS meterTH = experiment.meterTH print('House ', test_building) # Load a "complete" dataset to have the test's timerange test = DataSet(dsPathsList_Test[0]) test_elec = test.buildings[test_building].elec testRef_meter = test_elec.submeters( )[meter_key] # will be used as reference to align all meters based on this # Align every test meter with testRef_meter as master test_series_list = [] for path in dsPathsList_Test: test = DataSet(path) test_elec = test.buildings[test_building].elec test_meter = test_elec.submeters()[meter_key] # print('Stack test: ', test_meter.get_timeframe().start.date(), " - ", test_meter.get_timeframe().end.date()) aligned_meters = align_two_meters(testRef_meter, test_meter) test_series_list.append(aligned_meters) # Init vars for the output MIN_CHUNK_LENGTH = 300 # Depends on the basemodels of the ensemble timeframes = [] building_path = '/building{}'.format(test_meter.building()) mains_data_location = building_path + '/elec/meter1' data_is_available = False disag_filename = outFileName output_datastore = HDFDataStore(disag_filename, 'w') run = True chunkDataForOutput = None # -- Used to hold necessary data for saving the results using NILMTK (e.g. timeframes). # -- (in case where chunks have different size (not in current implementation), must use the chunk whose windowsSize is the least (to have all the data)) while run: try: testX = [] columnInd = 0 # Get Next chunk of each series for testXGen in test_series_list: chunkALL = next(testXGen) chunk = chunkALL[ 'slave'] # slave is the meter needed (master is only for aligning) chunk.fillna(0, inplace=True) if (columnInd == 0): chunkDataForOutput = chunk # Use 1st found chunk for it's metadata if (testX == []): testX = np.zeros( [len(chunk), len(test_series_list)] ) # Initialize the array that will hold all of the series as columns testX[:, columnInd] = chunk[:] columnInd += 1 testX = scaler.transform(testX) except: run = False break if len(chunkDataForOutput) < MIN_CHUNK_LENGTH: continue # print("New sensible chunk: {}".format(len(chunk))) startTime = chunkDataForOutput.index[0] endTime = chunkDataForOutput.index[ -1] # chunkDataForOutput.shape[0] - 1 # print('Start:',startTime,'End:',endTime) timeframes.append(TimeFrame( startTime, endTime)) #info needed for output for use with NILMTK measurement = ('power', 'active') pred = clf.predict(testX) column = pd.Series(pred, index=chunkDataForOutput.index, name=0) appliance_powers_dict = {} appliance_powers_dict[0] = column appliance_power = pd.DataFrame(appliance_powers_dict) appliance_power[appliance_power < 0] = 0 # Append prediction to output data_is_available = True cols = pd.MultiIndex.from_tuples([measurement]) meter_instance = test_meter.instance() df = pd.DataFrame(appliance_power.values, index=appliance_power.index, columns=cols, dtype="float32") key = '{}/elec/meter{}'.format(building_path, meter_instance) output_datastore.append(key, df) # Append aggregate data to output mains_df = pd.DataFrame(chunkDataForOutput, columns=cols, dtype="float32") # Note (For later): not 100% right. Should be mains. But it won't be used anywhere, so it doesn't matter in this case output_datastore.append(key=mains_data_location, value=mains_df) # Save metadata to output if data_is_available: disagr = Disaggregator() disagr.MODEL_NAME = 'Stacked model' disagr._save_metadata_for_disaggregation( output_datastore=output_datastore, sample_period=sample_period, measurement=measurement, timeframes=timeframes, building=test_meter.building(), meters=[test_meter]) #======================== Calculate Metrics ===================================== testYDS = DataSet(pathOrigDS) testYDS.set_window(start=test_meter.get_timeframe().start.date(), end=test_meter.get_timeframe().end.date()) testY_elec = testYDS.buildings[test_building].elec testY_meter = testY_elec.submeters()[meter_key] test_mains = testY_elec.mains() result = DataSet(disag_filename) res_elec = result.buildings[test_building].elec rpaf = metrics.recall_precision_accuracy_f1(res_elec[meter_key], testY_meter, meterTH, meterTH) relError = metrics.relative_error_total_energy(res_elec[meter_key], testY_meter) MAE = metrics.mean_absolute_error(res_elec[meter_key], testY_meter) RMSE = metrics.RMSE(res_elec[meter_key], testY_meter) print("============ Recall: {}".format(rpaf[0])) print("============ Precision: {}".format(rpaf[1])) print("============ Accuracy: {}".format(rpaf[2])) print("============ F1 Score: {}".format(rpaf[3])) print("============ Relative error in total energy: {}".format(relError)) print("============ Mean absolute error(in Watts): {}".format(MAE)) print("=== For docs: {:.4}\t{:.4}\t{:.4}\t{:.4}\t{:.4}\t{:.4}".format( rpaf[0], rpaf[1], rpaf[2], rpaf[3], relError, MAE)) # print("============ RMSE: {}".format(RMSE)) # print("============ TECA: {}".format(metrics.TECA([res_elec[meter_key]],[testY_meter],test_mains))) resDict = { 'model': 'TEST', 'building': test_building, 'Appliance': meter_key, 'Appliance_Type': 2, 'Recall': rpaf[0], 'Precision': rpaf[1], 'Accuracy': rpaf[2], 'F1': rpaf[3], 'relError': relError, 'MAE': MAE, 'RMSE': RMSE } metrics.writeResultsToCSV(resDict, metricsResFileName, clearMetricsFile)
def run_test(x_train, x_test, y_train, y_test, classifier, sample_weight=None): classifier.fit(x_train, y_train, sample_weight=sample_weight) y_pred = classifier.predict(x_test) return (confusion_matrix(y_true=y_test, y_pred=y_pred), recall_score(y_true=y_test, y_pred=y_pred, labels=np.arange(4), average=None), mean_absolute_error(y_true=y_test, y_pred=y_pred))