Ejemplo n.º 1
0
    epochs += 5
    disaggregator.export_model("UKDALE-RNN-h{}-{}-{}epochs.h5".format(
        train_building, meter_key, epochs))
end = time.time()
print("Train =", end - start, "seconds.")

print("========== DISAGGREGATE ============")
disag_filename = "disag-out.h5"
output = HDFDataStore(disag_filename, 'w')
disaggregator.disaggregate(test_mains,
                           output,
                           train_meter,
                           sample_period=sample_period)
output.close()

print("========== RESULTS ============")
result = DataSet(disag_filename)
res_elec = result.buildings[test_building].elec
rpaf = metrics.recall_precision_accuracy_f1(res_elec[meter_key],
                                            test_elec[meter_key])
print("============ Recall: {}".format(rpaf[0]))
print("============ Precision: {}".format(rpaf[1]))
print("============ Accuracy: {}".format(rpaf[2]))
print("============ F1 Score: {}".format(rpaf[2]))

print("============ Relative error in total energy: {}".format(
    metrics.relative_error_total_energy(res_elec[meter_key],
                                        test_elec[meter_key])))
print("============ Mean absolute error(in Watts): {}".format(
    metrics.mean_absolute_error(res_elec[meter_key], test_elec[meter_key])))
Ejemplo n.º 2
0
def main(_run,
         stock_file,
         days_back,
         days_forward,
         max_epochs,
         early_stopping_threshold,
         num_neurons,
         num_hidden_layers,
         seed,
         learning_rate,
         batch_size,
         activation,
         optimizer,
         kernel_init,
         regularization,
         loss,
         timesteps,
         use_sent_and_trends=False):
    # Read the stocks csv into a dataframe
    stock = data.Stocks(stock_file)
    stock.calc_patel_TI(days_back)
    if use_sent_and_trends:
        # If we have a sentiment file add it to the stock df
        sentiments = pd.read_csv(
            '../data/nytarticles/microsoft.2013-12-31.2018-12-31.imputed.sent.csv',
            index_col='date')
        trends = pd.read_csv(
            '../data/trends/msft.2013-12-31.2018-12-31.fixed.dates.csv',
            index_col='date')
        sent_trends = pd.merge(sentiments,
                               trends,
                               how='left',
                               left_index=True,
                               right_index=True)
        sent_trends[
            'sent_trends'] = sent_trends['sentiment'] * sent_trends['msft']
        import numpy as np
        sent_trends['randNumCol'] = np.random.randint(1, 100,
                                                      sent_trends.shape[0])
        stock.df = pd.merge(stock.df,
                            sent_trends,
                            how='left',
                            left_index=True,
                            right_index=True)
        stock.df.drop(['sentiment', 'msft', 'sent_trends'],
                      axis='columns',
                      inplace=True)

    stock.shift(days_forward)

    # Create the model
    model = K.Sequential()

    # Create the kernel initializer with the seed
    if kernel_init == 'glorot_uniform':
        kernel_initializer = K.initializers.glorot_uniform(seed)
    else:
        raise NotImplementedError

    # Add the layers
    return_sequences = True
    if num_hidden_layers == 1:
        return_sequences = False
    data_dim = stock.raw_values()['X'].shape[1]
    model.add(
        K.layers.LSTM(num_neurons,
                      input_shape=(timesteps, data_dim),
                      activation=activation,
                      return_sequences=return_sequences,
                      kernel_initializer=kernel_initializer))

    for i in range(num_hidden_layers - 1):
        # If not in the last layer return sequences
        if i != num_hidden_layers - 2:
            model.add(
                K.layers.LSTM(num_neurons,
                              activation=activation,
                              return_sequences=True,
                              kernel_initializer=kernel_initializer))
        else:
            model.add(
                K.layers.LSTM(num_neurons,
                              activation=activation,
                              kernel_initializer=kernel_initializer))

    # Add output layer
    model.add(
        K.layers.Dense(1,
                       activation='linear',
                       kernel_initializer=kernel_initializer))

    # Define Root Mean Squared Relative Error metric
    def root_mean_squared_relative_error(y_true, y_pred):
        squared_relative_error = K.backend.square(
            (y_true - y_pred) /
            K.backend.clip(K.backend.abs(y_true), K.backend.epsilon(), None))
        mean_squared_relative_error = K.backend.mean(squared_relative_error,
                                                     axis=-1)
        return K.backend.sqrt(mean_squared_relative_error)

    # Define Direction Accuracy metric
    def direction_accuracy(y_true, y_pred):
        # sign returns either -1 (if <0), 0 (if ==0), or 1 (if >0)
        true_signs = K.backend.sign(y_true[days_forward:] -
                                    y_true[:-days_forward])
        pred_signs = K.backend.sign(y_pred[days_forward:] -
                                    y_true[:-days_forward])

        equal_signs = K.backend.equal(true_signs, pred_signs)
        return K.backend.mean(equal_signs, axis=-1)

    # Create the optimizer
    if optimizer == 'adagrad':
        optimizer = K.optimizers.Adagrad(learning_rate)
    elif optimizer == 'adam':
        optimizer = K.optimizers.Adam(learning_rate)
    else:
        raise NotImplementedError

    model.compile(optimizer=optimizer,
                  loss=loss,
                  metrics=[
                      'mean_absolute_percentage_error', 'mean_absolute_error',
                      root_mean_squared_relative_error, 'mean_squared_error',
                      direction_accuracy
                  ])

    # Create the logging callback
    # The metrics are logged in the run's metrics and at heartbeat events
    # every 10 secs they get written to mongodb
    def on_epoch_end_metrics_log(epoch, logs):
        for metric_name, metric_value in logs.items():
            # The validation set keys have val_ prepended to the metric,
            # add train_ to the training set keys
            if 'val' not in metric_name:
                metric_name = 'train_' + metric_name

            _run.log_scalar(metric_name, metric_value, epoch)

    metrics_log_callback = K.callbacks.LambdaCallback(
        on_epoch_end=on_epoch_end_metrics_log)

    callbacks_list = [
        K.callbacks.EarlyStopping(monitor='val_loss',
                                  patience=early_stopping_threshold),
        K.callbacks.ModelCheckpoint(filepath='../models/best_model.h5',
                                    monitor='val_loss',
                                    save_best_only=True), metrics_log_callback
    ]

    model.fit(stock.raw_values_lstm_wrapper(dataset='train',
                                            norm=True,
                                            timesteps=timesteps)['X'],
              stock.raw_values_lstm_wrapper(dataset='train',
                                            norm=True,
                                            timesteps=timesteps)['y'],
              epochs=max_epochs,
              batch_size=batch_size,
              verbose=0,
              callbacks=callbacks_list,
              validation_data=(stock.raw_values_lstm_wrapper(
                  dataset='val', norm=True, timesteps=timesteps)['X'],
                               stock.raw_values_lstm_wrapper(
                                   dataset='val',
                                   norm=True,
                                   timesteps=timesteps)['y']))

    # Calculate metrics for normalized values
    test_norm_metrics = model.evaluate(
        stock.raw_values_lstm_wrapper(dataset='test',
                                      norm=True,
                                      timesteps=timesteps)['X'],
        stock.raw_values_lstm_wrapper(dataset='test',
                                      norm=True,
                                      timesteps=timesteps)['y'],
        verbose=0)

    # Log the metrics from the normalized values
    for metric in zip(model.metrics_names, test_norm_metrics):
        _run.log_scalar('test_norm_' + metric[0], metric[1])

    # Now calculate and save the unnormalised metrics
    # Predict returns normalised values
    y_pred_norm = model.predict(
        stock.raw_values_lstm_wrapper(dataset='test',
                                      norm=True,
                                      timesteps=timesteps)['X'])
    # Scale the output back to the actual stock price
    y_pred = stock.denorm_predictions(y_pred_norm)

    # Calculate the unnormalized metrics
    y_true = stock.raw_values_lstm_wrapper(dataset='test',
                                           timesteps=timesteps)['y']

    # df1 = pd.DataFrame({'date': stock.df.index.values[-y_pred.shape[0]:], 'y_pred': y_pred.flatten(), 'y_true': y_true.flatten()})
    # df1.set_index('date', inplace=True)
    # df1.to_csv('plot_data_lstm.csv')
    test_metrics = {
        'test_loss':
        metrics.mean_squared_error(y_true, y_pred),
        'test_mean_absolute_percentage_error':
        metrics.mean_absolute_percentage_error(y_true, y_pred),
        'test_mean_absolute_error':
        metrics.mean_absolute_error(y_true, y_pred),
        'test_root_mean_squared_relative_error':
        metrics.root_mean_squared_relative_error(y_true, y_pred),
        'test_mean_squared_error':
        metrics.mean_squared_error(y_true, y_pred),
        'test_direction_accuracy':
        metrics.direction_accuracy(y_true, y_pred, days_forward)
    }

    # Save the metrics
    for metric_name, metric_value in test_metrics.items():
        _run.log_scalar(metric_name, metric_value)
Ejemplo n.º 3
0
    model.fit(X_train,
              y_train,
              batch_size=128,
              epochs=epochs_per_checkpoint,
              shuffle=True)
    model.save(
        "SYNTH-LOOKBACK-{}-ALL-{}epochs-1WIN.h5".format(
            key_name, epochs + epochs_per_checkpoint), model)

# ======= Disaggregation phase
mains, meter = opends(test_building, key_name)
X_test = mains
y_test = meter * mmax

# Predict data
X_batch, Y_batch = gen_batch(X_test, y_test,
                             len(X_test) - input_window, 0, input_window)
pred = model.predict(X_batch) * mmax
pred[pred < 0] = 0
pred = np.transpose(pred)[0]
# Save results
np.save('pred.results', pred)

# Calculate and show metrics
print("============ Recall Precision Accurracy F1 {}".format(
    metrics.recall_precision_accuracy_f1(pred, Y_batch, threshold)))
print("============ relative_error_total_energy {}".format(
    metrics.relative_error_total_energy(pred, Y_batch)))
print("============ mean_absolute_error {}".format(
    metrics.mean_absolute_error(pred, Y_batch)))
Ejemplo n.º 4
0
print("========== TRAIN ============")
epochs = 0
for i in range(3):
    gru.train(train_mains, train_meter, epochs=5, sample_period=sample_period)
    epochs += 5
    gru.export_model("REDD-GRU-h{}-{}-{}epochs.h5".format(train_building,
                                                        meter_key,
                                                        epochs))
    print("CHECKPOINT {}".format(epochs))
end = time.time()
print("Train =", end-start, "seconds.")

print("========== DISAGGREGATE ============")
disag_filename = 'disag-out.h5'
output = HDFDataStore(disag_filename, 'w')
gru.disaggregate(test_mains, output, train_meter, sample_period=sample_period)
output.close()


print("========== RESULTS ============")
result = DataSet(disag_filename)
res_elec = result.buildings[test_building].elec
rpaf = metrics.recall_precision_accuracy_f1(res_elec[meter_key], test_elec[meter_key])
print("============ Recall: {}".format(rpaf[0]))
print("============ Precision: {}".format(rpaf[1]))
print("============ Accuracy: {}".format(rpaf[2]))
print("============ F1 Score: {}".format(rpaf[2]))

print("============ Relative error in total energy: {}".format(metrics.relative_error_total_energy(res_elec[meter_key], test_elec[meter_key])))
print("============ Mean absolute error(in Watts): {}".format(metrics.mean_absolute_error(res_elec[meter_key], test_elec[meter_key])))
Ejemplo n.º 5
0
def experiment(key_name, start_e, end_e):
    '''Trains a network and disaggregates the testset
	Displays the metrics for the disaggregated part

	Parameters
	----------
	key_name : The string key of the appliance
	start_e : The starting number of epochs for Training
	end_e: The ending number of epochs for Training
	'''

    # =======  Open configuration file
    if (key_name not in allowed_key_names):
        print("    Device {} not available".format(key_name))
        print("    Available device names: {}", allowed_key_names)
    conf_filename = "appconf/{}.json".format(key_name)
    with open(conf_filename) as data_file:
        conf = json.load(data_file)

    input_window = conf['lookback']
    threshold = conf['on_threshold']
    mamax = 5000
    memax = conf['memax']
    mean = conf['mean']
    std = conf['std']
    train_buildings = conf['train_buildings']
    test_building = conf['test_building']
    on_threshold = conf['on_threshold']
    meter_key = conf['nilmtk_key']
    save_path = conf['save_path']

    # ======= Training phase
    print("Training for device: {}".format(key_name))
    print("    train_buildings: {}".format(train_buildings))

    # Open train sets
    X_train = np.load("dataset/trainsets/X-{}.npy".format(key_name))
    X_train = normalize(X_train, mamax, mean, std)
    y_train = np.load("dataset/trainsets/Y-{}.npy".format(key_name))
    y_train = normalize(y_train, memax, mean, std)
    model = create_model(input_window)

    # Train model and save checkpoints
    if start_e > 0:
        model = load_model(
            save_path +
            "CHECKPOINT-{}-{}epochs.hdf5".format(key_name, start_e))

    if end_e > start_e:
        filepath = save_path + "CHECKPOINT-" + key_name + "-{epoch:01d}epochs.hdf5"
        checkpoint = ModelCheckpoint(filepath, verbose=1, save_best_only=False)
        history = model.fit(X_train,
                            y_train,
                            batch_size=128,
                            epochs=end_e,
                            shuffle=True,
                            initial_epoch=start_e,
                            callbacks=[checkpoint])
        losses = history.history['loss']

        model.save(
            "{}CHECKPOINT-{}-{}epochs.hdf5".format(save_path, key_name, end_e),
            model)

        # Save training loss per epoch
        try:
            a = np.loadtxt("{}losses.csv".format(save_path))
            losses = np.append(a, losses)
        except:
            pass
        np.savetxt("{}losses.csv".format(save_path), losses, delimiter=",")

    # ======= Disaggregation phase
    mains, meter = opends(test_building, key_name)
    X_test = normalize(mains, mamax, mean, std)
    y_test = meter

    # Predict data
    X_batch, Y_batch = gen_batch(X_test, y_test,
                                 len(X_test) - input_window, 0, input_window)
    pred = model.predict(X_batch)
    pred = denormalize(pred, memax, mean, std)
    pred[pred < 0] = 0
    pred = np.transpose(pred)[0]
    # Save results
    np.save("{}pred-{}-epochs{}".format(save_path, key_name, end_e), pred)

    rpaf = metrics.recall_precision_accuracy_f1(pred, Y_batch, threshold)
    rete = metrics.relative_error_total_energy(pred, Y_batch)
    mae = metrics.mean_absolute_error(pred, Y_batch)

    print("============ Recall: {}".format(rpaf[0]))
    print("============ Precision: {}".format(rpaf[1]))
    print("============ Accuracy: {}".format(rpaf[2]))
    print("============ F1 Score: {}".format(rpaf[3]))

    print("============ Relative error in total energy: {}".format(rete))
    print("============ Mean absolute error(in Watts): {}".format(mae))

    res_out = open(
        "{}results-pred-{}-{}epochs".format(save_path, key_name, end_e), 'w')
    for r in rpaf:
        res_out.write(str(r))
        res_out.write(',')
    res_out.write(str(rete))
    res_out.write(',')
    res_out.write(str(mae))
    res_out.close()
Ejemplo n.º 6
0
result = DataSet(DISAG)
res_elec = result.buildings[TEST_BUILDING].elec
predicted = res_elec[APPLIANCE]
ground_truth = test_elec[APPLIANCE]

fig = plt.figure()
ax = plt.subplot(111)
ax.plot(ground_truth.power_series_all_data(), label='ground truth')
ax.plot(predicted.power_series_all_data(), label='predicted')
#plt.xlim('2017-10-08 00:00:00', '2017-10-08 01:00:00')
#plt.ylim(0, 300)
plt.xlabel('Time')
plt.ylabel('Power [W]')
plt.title(APPLIANCE + ' Disaggregation')
myFmt = mdates.DateFormatter('%d:%H:%M')
ax.xaxis.set_major_formatter(myFmt)
ax.legend()
plt.savefig(APPLIANCE + "_mlp.png")

import metrics
print("============ Relative error in total energy: {}".format(
    metrics.relative_error_total_energy(predicted, ground_truth)))
print("============ Mean absolute error(in Watts): {}".format(
    metrics.mean_absolute_error(predicted, ground_truth)))
print("============ List of percentages for every days\n")
date_series = pd.date_range(start=START_TEST, end=END_TEST, freq='D')

print(
    metrics.daily_relative_consume(predicted, ground_truth, test_mains,
                                   date_series))
Ejemplo n.º 7
0
def runExperiment(experiment: experimentInfo, metricsResFileName,
                  clearMetricsFile):
    dsPathsList_Test = experiment.dsList
    outFileName = experiment.outName
    test_building = experiment.building
    meter_key = experiment.meter_key
    pathOrigDS = experiment.pathOrigDS
    meterTH = experiment.meterTH
    print('House ', test_building)

    # Load a "complete" dataset to have the test's timerange
    test = DataSet(dsPathsList_Test[0])
    test_elec = test.buildings[test_building].elec
    testRef_meter = test_elec.submeters(
    )[meter_key]  # will be used as reference to align all meters based on this

    # Align every test meter with testRef_meter as master
    test_series_list = []
    for path in dsPathsList_Test:
        test = DataSet(path)
        test_elec = test.buildings[test_building].elec
        test_meter = test_elec.submeters()[meter_key]
        # print('Stack test: ', test_meter.get_timeframe().start.date(), " - ", test_meter.get_timeframe().end.date())
        aligned_meters = align_two_meters(testRef_meter, test_meter)
        test_series_list.append(aligned_meters)

    # Init vars for the output
    MIN_CHUNK_LENGTH = 300  # Depends on the basemodels of the ensemble
    timeframes = []
    building_path = '/building{}'.format(test_meter.building())
    mains_data_location = building_path + '/elec/meter1'
    data_is_available = False
    disag_filename = outFileName
    output_datastore = HDFDataStore(disag_filename, 'w')

    run = True
    chunkDataForOutput = None
    # -- Used to hold necessary data for saving the results using NILMTK (e.g. timeframes).
    # -- (in case where chunks have different size (not in current implementation), must use the chunk whose windowsSize is the least (to have all the data))

    while run:
        try:
            testX = []
            columnInd = 0
            # Get Next chunk of each series
            for testXGen in test_series_list:
                chunkALL = next(testXGen)
                chunk = chunkALL[
                    'slave']  # slave is the meter needed (master is only for aligning)
                chunk.fillna(0, inplace=True)
                if (columnInd == 0):
                    chunkDataForOutput = chunk  # Use 1st found chunk for it's metadata
                if (testX == []):
                    testX = np.zeros(
                        [len(chunk), len(test_series_list)]
                    )  # Initialize the array that will hold all of the series as columns
                testX[:, columnInd] = chunk[:]
                columnInd += 1
            testX = scaler.transform(testX)
        except:
            run = False
            break

        if len(chunkDataForOutput) < MIN_CHUNK_LENGTH:
            continue
        # print("New sensible chunk: {}".format(len(chunk)))

        startTime = chunkDataForOutput.index[0]
        endTime = chunkDataForOutput.index[
            -1]  # chunkDataForOutput.shape[0] - 1
        # print('Start:',startTime,'End:',endTime)
        timeframes.append(TimeFrame(
            startTime, endTime))  #info needed for output for use with NILMTK
        measurement = ('power', 'active')

        pred = clf.predict(testX)
        column = pd.Series(pred, index=chunkDataForOutput.index, name=0)
        appliance_powers_dict = {}
        appliance_powers_dict[0] = column
        appliance_power = pd.DataFrame(appliance_powers_dict)
        appliance_power[appliance_power < 0] = 0

        # Append prediction to output
        data_is_available = True
        cols = pd.MultiIndex.from_tuples([measurement])
        meter_instance = test_meter.instance()
        df = pd.DataFrame(appliance_power.values,
                          index=appliance_power.index,
                          columns=cols,
                          dtype="float32")
        key = '{}/elec/meter{}'.format(building_path, meter_instance)
        output_datastore.append(key, df)

        # Append aggregate data to output
        mains_df = pd.DataFrame(chunkDataForOutput,
                                columns=cols,
                                dtype="float32")
        # Note (For later): not 100% right. Should be mains. But it won't be used anywhere, so it doesn't matter in this case
        output_datastore.append(key=mains_data_location, value=mains_df)

    # Save metadata to output
    if data_is_available:

        disagr = Disaggregator()
        disagr.MODEL_NAME = 'Stacked model'

        disagr._save_metadata_for_disaggregation(
            output_datastore=output_datastore,
            sample_period=sample_period,
            measurement=measurement,
            timeframes=timeframes,
            building=test_meter.building(),
            meters=[test_meter])

    #======================== Calculate Metrics =====================================
    testYDS = DataSet(pathOrigDS)
    testYDS.set_window(start=test_meter.get_timeframe().start.date(),
                       end=test_meter.get_timeframe().end.date())
    testY_elec = testYDS.buildings[test_building].elec
    testY_meter = testY_elec.submeters()[meter_key]
    test_mains = testY_elec.mains()

    result = DataSet(disag_filename)
    res_elec = result.buildings[test_building].elec
    rpaf = metrics.recall_precision_accuracy_f1(res_elec[meter_key],
                                                testY_meter, meterTH, meterTH)
    relError = metrics.relative_error_total_energy(res_elec[meter_key],
                                                   testY_meter)
    MAE = metrics.mean_absolute_error(res_elec[meter_key], testY_meter)
    RMSE = metrics.RMSE(res_elec[meter_key], testY_meter)
    print("============ Recall: {}".format(rpaf[0]))
    print("============ Precision: {}".format(rpaf[1]))
    print("============ Accuracy: {}".format(rpaf[2]))
    print("============ F1 Score: {}".format(rpaf[3]))
    print("============ Relative error in total energy: {}".format(relError))
    print("============ Mean absolute error(in Watts): {}".format(MAE))
    print("=== For docs: {:.4}\t{:.4}\t{:.4}\t{:.4}\t{:.4}\t{:.4}".format(
        rpaf[0], rpaf[1], rpaf[2], rpaf[3], relError, MAE))
    # print("============ RMSE: {}".format(RMSE))
    # print("============ TECA: {}".format(metrics.TECA([res_elec[meter_key]],[testY_meter],test_mains)))

    resDict = {
        'model': 'TEST',
        'building': test_building,
        'Appliance': meter_key,
        'Appliance_Type': 2,
        'Recall': rpaf[0],
        'Precision': rpaf[1],
        'Accuracy': rpaf[2],
        'F1': rpaf[3],
        'relError': relError,
        'MAE': MAE,
        'RMSE': RMSE
    }
    metrics.writeResultsToCSV(resDict, metricsResFileName, clearMetricsFile)
Ejemplo n.º 8
0
def run_test(x_train, x_test, y_train, y_test, classifier, sample_weight=None):
    classifier.fit(x_train, y_train, sample_weight=sample_weight)
    y_pred = classifier.predict(x_test)
    return (confusion_matrix(y_true=y_test, y_pred=y_pred),
            recall_score(y_true=y_test, y_pred=y_pred, labels=np.arange(4), average=None),
            mean_absolute_error(y_true=y_test, y_pred=y_pred))