Ejemplo n.º 1
0
def test(model: nn.Module, test_loader: DataLoader, loss_function, device, len_rotation=24) -> Dict:
    model.eval()
    test_loss = 0
    outputs = []
    targets = []
    with torch.no_grad():
        for idx, (x, y) in enumerate(test_loader):

            x = torch.Tensor(np.vstack(x)).to(device)
            y = torch.Tensor(y).to(device)

            output = model(x)

            test_loss += loss_function(output.view(-1), y.view(-1)).item()
            outputs.append(output.cpu().numpy().reshape(-1))
            targets.append(y.cpu().numpy().reshape(-1))

    targets = np.concatenate(targets).reshape(-1)
    outputs = np.concatenate(outputs).reshape(-1)

    test_loss /= len(test_loader.dataset)*len_rotation

    evaluation = {
        'test_loss': test_loss,
        'c_index': metrics.c_index(targets, outputs),
        'RMSE': metrics.RMSE(targets, outputs),
        'MAE': metrics.MAE(targets, outputs),
        'SD': metrics.SD(targets, outputs),
        'CORR': metrics.CORR(targets, outputs),
    }

    return evaluation
Ejemplo n.º 2
0
def test(model: nn.Module, test_loader, loss_function, device, show):
    model.eval()
    test_loss = 0
    outputs = []
    targets = []
    with torch.no_grad():
        for idx, (*x, y) in tqdm(enumerate(test_loader), disable=not show, total=len(test_loader)):
            for i in range(len(x)):
                x[i] = x[i].to(device)
            y = y.to(device)

            y_hat = model(*x)

            test_loss += loss_function(y_hat.view(-1), y.view(-1)).item()
            outputs.append(y_hat.cpu().numpy().reshape(-1))
            targets.append(y.cpu().numpy().reshape(-1))

    targets = np.concatenate(targets).reshape(-1)
    outputs = np.concatenate(outputs).reshape(-1)

    test_loss /= len(test_loader.dataset)

    evaluation = {
        'loss': test_loss,
        'c_index': metrics.c_index(targets, outputs),
        'RMSE': metrics.RMSE(targets, outputs),
        'MAE': metrics.MAE(targets, outputs),
        'SD': metrics.SD(targets, outputs),
        'CORR': metrics.CORR(targets, outputs),
    }

    return evaluation
def cluster_evaluation(mask):

    metric_list = []

    name = mask[0:6]

    sample_list = [1000, 3000, 5000, 7000, 9000, 11000, 14000]

    for i in tqdm(sample_list):
        try:
            xtrain, xval, _, ytrain, yval, _ = dp.areal_model(length=i,
                                                              mask=mask)
            m = gpm.multi_gp(xtrain, xval, ytrain, yval)

            training_R2 = me.R2(m, xtrain, ytrain)
            training_RMSE = me.RMSE(m, xtrain, ytrain)
            val_R2 = me.R2(m, xval, yval)
            val_RMSE = me.RMSE(m, xval, yval)

            metric_list.append(
                [i, training_R2, training_RMSE, val_R2, val_RMSE])

        except Exception:
            print(i + 100)
            xtrain, xval, _, ytrain, yval, _ = dp.areal_model(length=i + 100,
                                                              mask=mask)
            m = gpm.multi_gp(xtrain, xval, ytrain, yval)

            training_R2 = me.R2(m, xtrain, ytrain)
            training_RMSE = me.RMSE(m, xtrain, ytrain)
            val_R2 = me.R2(m, xval, yval)
            val_RMSE = me.RMSE(m, xval, yval)

            metric_list.append(
                [i, training_R2, training_RMSE, val_R2, val_RMSE])

    df = pd.DataFrame(
        metric_list,
        columns=[
            "samples", "training_R2", "training_RMSE", "val_R2", "val_RMSE"
        ],
    )
    df.to_csv(name + "-eval-2020-07-22.csv")
def multi_gp(xtrain, xval, ytrain, yval, save=False):
    """ Returns simple GP model """

    # model construction
    k1 = gpflow.kernels.Periodic(
        gpflow.kernels.RBF(lengthscales=1, variance=1, active_dims=[0]))
    k1b = gpflow.kernels.RBF(lengthscales=2, variance=1, active_dims=[0])
    k2 = gpflow.kernels.RBF(lengthscales=np.ones(len(xval[0]) - 1),
                            active_dims=np.arange(1, len(xval[0])))
    # k3 = gpflow.kernels.White()

    k = k1 * k1b + k2  # +k

    # mean_function = gpflow.mean_functions.Linear(A=np.ones((len(xtrain[0]),
    # 1)), b=[1])

    # , mean_function=mean_function)
    m = gpflow.models.GPR(data=(xtrain, ytrain.reshape(-1, 1)), kernel=k)

    opt = gpflow.optimizers.Scipy()
    # , options=dict(maxiter=1000)
    opt.minimize(m.training_loss, m.trainable_variables)
    # print_summary(m)

    x_plot = np.concatenate((xtrain, xval))
    y_gpr, y_std = m.predict_y(x_plot)

    print(
        " {0:.3f} | {1:.3f} | {2:.3f} | {3:.3f} | {4:.3f} | {5:.3f} |".format(
            me.R2(m, xtrain, ytrain),
            me.RMSE(m, xtrain, ytrain),
            me.R2(m, xval, yval),
            me.RMSE(m, xval, yval),
            np.mean(y_gpr),
            np.mean(y_std),
        ))

    if save is not False:
        filepath = save_model(m, xval, save)
        print(filepath)

    return m
Ejemplo n.º 5
0
def perform_analysis():
    """



    :return:
    """
    parser = parse_arg()
    args = parser.parse_args()

    # if len(sys.argv) == 1:  # no arguments, so print help message
    #     print("""Usage: python script.py data_path program_input out_path""")
    #     return
    #
    # dir_in = os.getcwd()
    # dir_out = os.getcwd()
    #
    # try:
    #     dir_in = sys.argv[1]
    #     dir_out = sys.argv[2]
    # except:
    #     print("Parameters: path/to/simple/file  input/folder  output/folder")
    #     sys.exit(0)

    #df = pd.read_csv(args.dir_in)
    df = pd.read_csv(args.input)
    (cal_df, tst_df) = separating_data_set(df)

    (x_train, y_train) = splitting_dataset(cal_df)
    (x_test, y_test) = splitting_dataset(tst_df)

    print(x_train)
    print(y_train)
    print(x_test)
    print(y_test)

    model = build_fnn(x_train)

    model.summary()
    early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=25)
    random.seed(1)
    model.fit(x_train, y_train, batch_size=args.batchSize, epochs=args.epochSize, validation_data=(x_test, y_test), callbacks=[early_stop])

    fnn_losses = pd.DataFrame(model.history.history)
    create_losses_plot(fnn_losses)

    predictions = model.predict(x_test)

    print("MSE:", metrics.MSE(y_test, predictions))
    print("RMSE:", metrics.RMSE(y_test, predictions))
    print("R-square:", metrics.R2(y_test, predictions))
    print("RPD:", metrics.RPD(y_test, predictions))

    create_prediction_plot(y_test, predictions)
def uib_evaluation(average=False):

    metric_list = []

    sample_list = [1000, 3000, 5000, 7000, 9000, 11000, 14000]

    for i in tqdm(sample_list):
        try:
            xtrain, xval, _, ytrain, yval, _ = dp.areal_model(
                length=i, EDA_average=average)
            m = gpm.multi_gp(xtrain, xval, ytrain, yval)

            training_R2 = me.R2(m, xtrain, ytrain)
            training_RMSE = me.RMSE(m, xtrain, ytrain)
            val_R2 = me.R2(m, xval, yval)
            val_RMSE = me.RMSE(m, xval, yval)

            metric_list.append(
                [i, training_R2, training_RMSE, val_R2, val_RMSE])

        except Exception:
            print(i + 100)
            xtrain, xval, _, ytrain, yval, _ = dp.areal_model(
                length=i + 100, EDA_average=average)
            m = gpm.multi_gp(xtrain, xval, ytrain, yval)

            training_R2 = me.R2(m, xtrain, ytrain)
            training_RMSE = me.RMSE(m, xtrain, ytrain)
            val_R2 = me.R2(m, xval, yval)
            val_RMSE = me.RMSE(m, xval, yval)

            metric_list.append(
                [i, training_R2, training_RMSE, val_R2, val_RMSE])

    df = pd.DataFrame(
        metric_list,
        columns=[
            "samples", "training_R2", "training_RMSE", "val_R2", "val_RMSE"
        ],
    )
    df.to_csv("uib-eval-2020-07-22.csv")
def hybrid_gp(xtrain, xval, ytrain, yval, save=False):
    """ Returns whole basin or cluster GP model with hybrid kernel """

    dimensions = len(xtrain[0])

    k1 = gpflow.kernels.RBF(lengthscales=np.ones(dimensions),
                            active_dims=np.arange(0, dimensions))
    k2 = gpflow.kernels.RBF(lengthscales=np.ones(dimensions),
                            active_dims=np.arange(0, dimensions))

    alpha1 = hybrid_kernel(dimensions, 1)
    alpha2 = hybrid_kernel(dimensions, 2)

    k = alpha1 * k1 + alpha2 * k2

    m = gpflow.models.GPR(data=(xtrain, ytrain.reshape(-1, 1)), kernel=k)

    opt = gpflow.optimizers.Scipy()
    opt.minimize(m.training_loss, m.trainable_variables)
    # print_summary(m)

    x_plot = np.concatenate((xtrain, xval))
    y_gpr, y_std = m.predict_y(x_plot)

    print(
        " {0:.3f} | {1:.3f} | {2:.3f} | {3:.3f} | {4:.3f} | {5:.3f} |".format(
            me.R2(m, xtrain, ytrain),
            me.RMSE(m, xtrain, ytrain),
            me.R2(m, xval, yval),
            me.RMSE(m, xval, yval),
            np.mean(y_gpr),
            np.mean(y_std),
        ))

    if save is True:
        filepath = save_model(m, xval, "")
        print(filepath)

    return m
Ejemplo n.º 8
0
    def predict(sess, XX, lagXX, YY, mYY, aYY, stepLen=params.decoder_length):

        y_size = len(YY[0])
        for yy in YY:
            assert len(yy) == y_size

        YY_pred = [[] for _ in range(len(YY))]
        SS_pred = [[] for _ in range(len(YY))]
        xItr = BatchGenerator(XX, batchSeqLen=params.sequence_length)
        lagXItr = BatchGenerator(lagXX, batchSeqLen=params.sequence_length)
        outputItr = BatchGenerator(YY, batchSeqLen=params.sequence_length)

        currItr = xItr.iterFinished
        total_loss = 0
        while currItr == xItr.iterFinished:
            b_x1, _, _, _, _, _ = xItr.nextBatch(batchSize=params.batch_size,
                                                 stepLen=stepLen)
            b_lagx1, _, _, _, _, _ = lagXItr.nextBatch(
                batchSize=params.batch_size, stepLen=stepLen)
            b_y1, tsIndices_, _, _, _, mask = outputItr.nextBatch(
                batchSize=params.batch_size, stepLen=stepLen)
            #b_mYY = [mYY[tsInd] for tsInd in tsIndices_]

            dictionary = {
                inputPh: b_x1,
                lagPh: b_lagx1,
                outputPh: b_y1,
                keep_prob: 1.00,
                mode: 0.0,
                tsIndicesPh: tsIndices_,
                maskPh: mask
            }

            if params.is_probabilistic_pred:
                loss_ret, pred_ret, sigma_ret = session.run(
                    [loss, pred, model_obj.sigma], feed_dict=dictionary)
            else:
                loss_ret, pred_ret = session.run([loss, pred],
                                                 feed_dict=dictionary)
                sigma_ret = -1.0 * np.ones_like(pred_ret)  # invalid sigma
            total_loss += loss_ret

            y_pred, sigma_pred = pred_ret, sigma_ret
            for i, tsInd in enumerate(tsIndices_):
                if mask[i] > 0:
                    YY_pred[tsInd] += y_pred[i].tolist()
                    SS_pred[tsInd] += sigma_pred[i].tolist()

        YY_gt = np.squeeze(np.array(YY)[:, params.encoder_length:, :], axis=-1)
        for yy, yy_pred in zip(YY_gt, YY_pred):
            assert len(yy) == len(yy_pred)
        YY_pred = np.array(YY_pred)
        SS_pred = np.array(SS_pred)
        assert YY_gt.shape[0] == YY_pred.shape[0]
        assert YY_gt.shape[1] == YY_pred.shape[1]

        if params.isNormalised:
            YY_gt = YY_gt * np.array(mYY)
            YY_pred = YY_pred * np.array(mYY)
        if params.deep_ar_normalize_seq:
            YY_gt = YY_gt * np.array(aYY)
            YY_pred = YY_pred * np.array(aYY)
        if params.isLogNormalised:
            YY_gt = np.exp(YY_gt)
            YY_pred = np.exp(YY_pred)

        error = np.square(YY_gt - YY_pred)
        test_len = YY_gt.shape[1]

        score = [
            metrics.RMSE(YY_gt, YY_pred),
            metrics.RMSE(YY_gt[:, :1], YY_pred[:, :1]),
            metrics.RMSE(YY_gt[:, :int(test_len / 2.0)],
                         YY_pred[:, :int(test_len / 2.0)]),
            metrics.RMSE(YY_gt[:, int(test_len / 2.0):],
                         YY_pred[:, int(test_len / 2.0):])
        ]

        print([
            metrics.ND(YY_gt, YY_pred),
            metrics.ND(YY_gt[:, :1], YY_pred[:, :1]),
            metrics.ND(YY_gt[:, :int(test_len / 2.0)],
                       YY_pred[:, :int(test_len / 2.0)]),
            metrics.ND(YY_gt[:, int(test_len / 2.0):],
                       YY_pred[:, int(test_len / 2.0):])
        ])

        return total_loss, score, YY_gt, YY_pred, SS_pred
Ejemplo n.º 9
0
def perform_analysis():
    """



    :return:
    """
    parser = parse_arg()
    args = parser.parse_args()

    df = pd.read_csv(args.input)
    # df = df.apply(lambda x: preProcessing.scaling_y_data(x) if x.name == 'OC' else x)  # scaling OC data

    (cal_df, tst_df) = separating_data_set(df)

    (X_train, y_train) = splitting_dataset(cal_df)
    (X_test, y_test) = splitting_dataset(tst_df)

    # Scale the features
    X_train = preProcessing.scaler_min_max_x_data(X_train)
    X_test = preProcessing.scaler_min_max_x_data(X_test)

    y_train = preProcessing.scaler_min_max_y_data(y_train)
    y_test = preProcessing.scaler_min_max_y_data(y_test)

    print(X_train)
    print(y_train)
    print(X_test)
    print(y_test)

    print(X_train.shape)
    print(y_train.shape)

    if args.hiddenLayers == 5:
        model = build_fnn_5l(X_train)
    elif args.hiddenLayers == 4:
        model = build_fnn_4l(X_train)
    elif args.hiddenLayers == 3:
        model = build_fnn_3l(X_train)
    elif args.hiddenLayers == 2:
        model = build_fnn_2l(X_train)
    else:
        model = build_fnn_1l(X_train)

    model.summary()

    early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=25)
    random.seed(1)
    model.fit(X_train, y_train, batch_size=args.batchSize, epochs=args.epochSize, validation_data=(X_test, y_test),
              callbacks=[early_stop])

    fnn_losses = pd.DataFrame(model.history.history)
    create_losses_plot(fnn_losses)

    trained_model_save(model, "trained_model.h5")

    predictions = model.predict(X_test)

    print("MSE:", metrics.MSE(y_test, predictions))
    print("RMSE:", metrics.RMSE(y_test, predictions))
    print("R-square:", metrics.R2(y_test, predictions))
    print("RPD:", metrics.RPD(y_test, predictions))

    create_prediction_plot(y_test, predictions)
Ejemplo n.º 10
0
def regression_report(y_true, y_pred):
    print('--------------------------------')
    print('MSE -', metrics.MSE(y_true, y_pred))
    print('RMSE -', metrics.RMSE(y_true, y_pred))
    print('MAN -', metrics.MAN(y_true, y_pred))
    print('--------------------------------')
Ejemplo n.º 11
0
def runExperiment(experiment: experimentInfo, metricsResFileName,
                  clearMetricsFile):
    dsPathsList_Test = experiment.dsList
    outFileName = experiment.outName
    test_building = experiment.building
    meter_key = experiment.meter_key
    pathOrigDS = experiment.pathOrigDS
    meterTH = experiment.meterTH
    print('House ', test_building)

    # Load a "complete" dataset to have the test's timerange
    test = DataSet(dsPathsList_Test[0])
    test_elec = test.buildings[test_building].elec
    testRef_meter = test_elec.submeters(
    )[meter_key]  # will be used as reference to align all meters based on this

    # Align every test meter with testRef_meter as master
    test_series_list = []
    for path in dsPathsList_Test:
        test = DataSet(path)
        test_elec = test.buildings[test_building].elec
        test_meter = test_elec.submeters()[meter_key]
        # print('Stack test: ', test_meter.get_timeframe().start.date(), " - ", test_meter.get_timeframe().end.date())
        aligned_meters = align_two_meters(testRef_meter, test_meter)
        test_series_list.append(aligned_meters)

    # Init vars for the output
    MIN_CHUNK_LENGTH = 300  # Depends on the basemodels of the ensemble
    timeframes = []
    building_path = '/building{}'.format(test_meter.building())
    mains_data_location = building_path + '/elec/meter1'
    data_is_available = False
    disag_filename = outFileName
    output_datastore = HDFDataStore(disag_filename, 'w')

    run = True
    chunkDataForOutput = None
    # -- Used to hold necessary data for saving the results using NILMTK (e.g. timeframes).
    # -- (in case where chunks have different size (not in current implementation), must use the chunk whose windowsSize is the least (to have all the data))

    while run:
        try:
            testX = []
            columnInd = 0
            # Get Next chunk of each series
            for testXGen in test_series_list:
                chunkALL = next(testXGen)
                chunk = chunkALL[
                    'slave']  # slave is the meter needed (master is only for aligning)
                chunk.fillna(0, inplace=True)
                if (columnInd == 0):
                    chunkDataForOutput = chunk  # Use 1st found chunk for it's metadata
                if (testX == []):
                    testX = np.zeros(
                        [len(chunk), len(test_series_list)]
                    )  # Initialize the array that will hold all of the series as columns
                testX[:, columnInd] = chunk[:]
                columnInd += 1
            testX = scaler.transform(testX)
        except:
            run = False
            break

        if len(chunkDataForOutput) < MIN_CHUNK_LENGTH:
            continue
        # print("New sensible chunk: {}".format(len(chunk)))

        startTime = chunkDataForOutput.index[0]
        endTime = chunkDataForOutput.index[
            -1]  # chunkDataForOutput.shape[0] - 1
        # print('Start:',startTime,'End:',endTime)
        timeframes.append(TimeFrame(
            startTime, endTime))  #info needed for output for use with NILMTK
        measurement = ('power', 'active')

        pred = clf.predict(testX)
        column = pd.Series(pred, index=chunkDataForOutput.index, name=0)
        appliance_powers_dict = {}
        appliance_powers_dict[0] = column
        appliance_power = pd.DataFrame(appliance_powers_dict)
        appliance_power[appliance_power < 0] = 0

        # Append prediction to output
        data_is_available = True
        cols = pd.MultiIndex.from_tuples([measurement])
        meter_instance = test_meter.instance()
        df = pd.DataFrame(appliance_power.values,
                          index=appliance_power.index,
                          columns=cols,
                          dtype="float32")
        key = '{}/elec/meter{}'.format(building_path, meter_instance)
        output_datastore.append(key, df)

        # Append aggregate data to output
        mains_df = pd.DataFrame(chunkDataForOutput,
                                columns=cols,
                                dtype="float32")
        # Note (For later): not 100% right. Should be mains. But it won't be used anywhere, so it doesn't matter in this case
        output_datastore.append(key=mains_data_location, value=mains_df)

    # Save metadata to output
    if data_is_available:

        disagr = Disaggregator()
        disagr.MODEL_NAME = 'Stacked model'

        disagr._save_metadata_for_disaggregation(
            output_datastore=output_datastore,
            sample_period=sample_period,
            measurement=measurement,
            timeframes=timeframes,
            building=test_meter.building(),
            meters=[test_meter])

    #======================== Calculate Metrics =====================================
    testYDS = DataSet(pathOrigDS)
    testYDS.set_window(start=test_meter.get_timeframe().start.date(),
                       end=test_meter.get_timeframe().end.date())
    testY_elec = testYDS.buildings[test_building].elec
    testY_meter = testY_elec.submeters()[meter_key]
    test_mains = testY_elec.mains()

    result = DataSet(disag_filename)
    res_elec = result.buildings[test_building].elec
    rpaf = metrics.recall_precision_accuracy_f1(res_elec[meter_key],
                                                testY_meter, meterTH, meterTH)
    relError = metrics.relative_error_total_energy(res_elec[meter_key],
                                                   testY_meter)
    MAE = metrics.mean_absolute_error(res_elec[meter_key], testY_meter)
    RMSE = metrics.RMSE(res_elec[meter_key], testY_meter)
    print("============ Recall: {}".format(rpaf[0]))
    print("============ Precision: {}".format(rpaf[1]))
    print("============ Accuracy: {}".format(rpaf[2]))
    print("============ F1 Score: {}".format(rpaf[3]))
    print("============ Relative error in total energy: {}".format(relError))
    print("============ Mean absolute error(in Watts): {}".format(MAE))
    print("=== For docs: {:.4}\t{:.4}\t{:.4}\t{:.4}\t{:.4}\t{:.4}".format(
        rpaf[0], rpaf[1], rpaf[2], rpaf[3], relError, MAE))
    # print("============ RMSE: {}".format(RMSE))
    # print("============ TECA: {}".format(metrics.TECA([res_elec[meter_key]],[testY_meter],test_mains)))

    resDict = {
        'model': 'TEST',
        'building': test_building,
        'Appliance': meter_key,
        'Appliance_Type': 2,
        'Recall': rpaf[0],
        'Precision': rpaf[1],
        'Accuracy': rpaf[2],
        'F1': rpaf[3],
        'relError': relError,
        'MAE': MAE,
        'RMSE': RMSE
    }
    metrics.writeResultsToCSV(resDict, metricsResFileName, clearMetricsFile)
Ejemplo n.º 12
0
if __name__ == "__main__":
    data = pd.read_csv(FILENAME_READ)
    data = data.drop('Unnamed: 0', axis=1)
    columns = data.drop('Target', axis=1).columns

    weights, y_trains, X_trains, y_tests, X_tests = lr.cross_validation(data)

    data_write = pd.DataFrame(
        columns=["", "T1", "T2", "T3", "T4", "T5", "E", "STD"])

    for i in range(5):
        y_pred_test = lr.predict(X_tests[i], weights[i])
        y_pred_train = lr.predict(X_trains[i], weights[i])
        y_test = np.array(y_tests[i])
        y_train = np.array(y_trains[i])

        r2_test = m.R2(y_test, y_pred_test)
        r2_train = m.R2(y_train, y_pred_train)
        rmse_test = m.RMSE(y_test, y_pred_test)
        rmse_train = m.RMSE(y_train, y_pred_train)

        data_write["T" + str(i + 1)] = [
            r2_test, r2_train, rmse_test, rmse_train
        ] + list(weights[i].reshape(weights[i].shape[0], 1))

    data_write["E"] = data_write[["T1", "T2", "T3", "T4", "T5"]].mean(axis=1)
    data_write["STD"] = data_write[["T1", "T2", "T3", "T4", "T5"]].std(axis=1)

    data_write.index = ["R^2_test", "R^2_train", "RMSE_test", "RMSE_train"
                        ] + list(columns) + list(["1"])
    data_write.to_csv("result.csv")
def single_loc_evaluation(location, perf_plot=False, hpar_plot=False):

    metric_list = []
    coord_list = sa.random_location_generator(location)
    n = len(coord_list)

    for i in tqdm(range(n)):
        try:
            xtrain, xval, _, ytrain, yval, _ = dp.point_model(
                coords=list(coord_list[i]))
            m = gpm.multi_gp(xtrain, xval, ytrain, yval)

            training_R2 = me.R2(m, xtrain, ytrain)
            training_RMSE = me.RMSE(m, xtrain, ytrain)
            val_R2 = me.R2(m, xval, yval)
            val_RMSE = me.RMSE(m, xval, yval)
            time_kernel_lengthscale = float(
                m.kernel.kernels[0].base_kernel.variance.value())
            time_kernel_variance = float(
                m.kernel.kernels[0].base_kernel.lengthscales.value())
            time_kernel_periodicity = float(m.kernel.kernels[0].period.value())
            N34_lengthscale = np.array(
                m.kernel.kernels[1].lengthscales.value())[2]
            d2m_lengthscale = np.array(
                m.kernel.kernels[1].lengthscales.value())[0]
            tcwv_lengthscale = np.array(
                m.kernel.kernels[1].lengthscales.value())[1]
            rbf_kernel_variance = float(m.kernel.kernels[1].variance.value())

            metric_list.append([
                coord_list[i, 0], coord_list[i, 1], training_R2, training_RMSE,
                val_R2, val_RMSE, time_kernel_lengthscale,
                time_kernel_variance, time_kernel_periodicity, N34_lengthscale,
                d2m_lengthscale, tcwv_lengthscale, rbf_kernel_variance
            ])

        except Exception:
            pass

    df = pd.DataFrame(
        metric_list,
        columns=[
            "latitude", "longitude", "training_R2", "training_RMSE", "val_R2",
            "val_RMSE", "time_kernel_lengthscale", "time_kernel_variance",
            "time_kernel_periodicity", "N34_lengthscale", "d2m_lengthscale",
            "tcwv_lengthscale", "rbf_kernel_variance"
        ],
    )

    now = datetime.datetime.now()
    df.to_csv("_Data/single-locations-eval-" + now.strftime("%Y-%m-%d") +
              ".csv")

    print(df.mean(axis=0))

    df_prep = df.set_index(["latitude", "longitude"])
    da = df_prep.to_xarray()

    if perf_plot is True:
        slm_perf_plots(da)

    if hpar_plot is True:
        slm_hpar_plots(da)
Ejemplo n.º 14
0
def run(config):
    os.environ['DGLBACKEND'] = 'pytorch'
    if config['model']['gpu'] != 'any':
        os.environ['CUDA_VISIBLE_DEVICES'] = config['model']['gpu']
    torch.backends.cudnn.benchmark = True

    if not os.path.exists(config['model']['model_path']):
        os.makedirs(config['model']['model_path'])

    # ----------------------------------Prepare Dataset----------------------------------
    print('Prepare Dataset')
    sys.stdout.flush()
    feature_map = {}
    node_map = None
    user_bought = None
    if 'bipartite' in config['dataset'] and config['dataset']['bipartite']:
        node_map = {}
        user_bought = {}
    num_features = 0
    field_size = 0
    train_dataset, valid_dataset, test_dataset = None, None, None
    if config['dataset']['decoder'] == 'libfm':
        for file in config['dataset']['paths'].values():
            field_size = dataset.LibFMDataset.read_features(file, feature_map, field_size, node_map)
        num_features = len(feature_map)
        print("number of features:", num_features)
        train_dataset = dataset.LibFMDataset(config['dataset']['paths']['train'], feature_map, node_map, user_bought)
        if 'valid' in config['dataset']['paths']:
            valid_dataset = dataset.LibFMDataset(config['dataset']['paths']['valid'], feature_map)
        test_dataset = dataset.LibFMDataset(config['dataset']['paths']['test'], feature_map)
    train_loader = DataLoader(train_dataset, drop_last=True,
                              batch_size=config['dataset']['batch_size'], shuffle=True,
                              num_workers=config['dataset']['num_workers'])
    valid_loader = DataLoader(valid_dataset, batch_size=config['dataset']['batch_size'], shuffle=False, num_workers=0)
    test_loader = DataLoader(test_dataset, batch_size=config['dataset']['batch_size'], shuffle=False, num_workers=0)

    # ----------------------------------Load or Create Model----------------------------------
    model = models.load_model(config['model'])  # load
    if model is None:  # create
        model = models.create_model(config['model'], num_features, field_size, node_map, user_bought)
    assert model is not None
    model.cuda()

    # ----------------------------------Construct Optimizer----------------------------------
    print('Construct Optimizer')
    sys.stdout.flush()
    optimizer = None
    if config['model']['optimizer'] == 'Adagrad':
        optimizer = optim.Adagrad(model.parameters(), lr=config['model']['learning_rate'],
                                  initial_accumulator_value=1e-8)
    elif config['model']['optimizer'] == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=config['model']['learning_rate'])
    elif config['model']['optimizer'] == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=config['model']['learning_rate'])
    elif config['model']['optimizer'] == 'Momentum':
        optimizer = optim.SGD(model.parameters(), lr=config['model']['learning_rate'], momentum=0.95)

    # ----------------------------------Construct Loss Function----------------------------------
    print('Construct Loss Function')
    sys.stdout.flush()
    criterion = None
    if config['model']['loss_type'] == 'square_loss':
        criterion = nn.MSELoss(reduction='sum')
    elif config['model']['loss_type'] == 'log_loss':  # log_loss
        criterion = nn.BCEWithLogitsLoss(reduction='sum')

    # ---------Model Initial Check---------
    if config['model']['evaluation']:
        model.eval()
        print("Model Initial Check: ", end='')
        if config['task'] == 'rating':
            train_result = metrics.RMSE(model, train_loader)
            test_result = metrics.RMSE(model, test_loader)
            if valid_dataset is not None:
                valid_result = metrics.RMSE(model, valid_loader)
                print("Train_RMSE: {:.3f},".format(train_result),
                      "Valid_RMSE: {:.3f}, Test_RMSE: {:.3f}".format(valid_result, test_result))
            else:
                print("Train_RMSE: {:.3f},".format(train_result), "Test_RMSE: {:.3f}".format(test_result))
        elif config['task'] == 'ranking':
            test_hr, test_ndcg = metrics.metrics(model, test_loader)
            test_result = test_hr
            print("Test_HR: {:.3f}, Test_NDCG: {:.3f}".format(test_hr, test_ndcg))
            sys.stdout.flush()

    # ----------------------------------Training----------------------------------
    print('Training...')
    if config['task'] == 'rating':
        best_result = 100
    elif config['task'] == 'ranking':
        best_result = (0, 0)
    else:
        best_result = None

    saved = False
    start_time = time.time()
    for epoch in range(config['model']['epochs']):
        model.train()
        loss = 0  # No effect, ignore this line
        for i, (features, feature_values, label) in enumerate(train_loader):
            features = features.cuda()
            feature_values = feature_values.cuda()
            label = label.cuda()
            if config['model']['loss_type'] == 'log_loss':
                label = label.clamp(min=0., max=1.)

            model.zero_grad()
            prediction = model(features, feature_values)
            loss = criterion(prediction, label)
            # ---------l2 regularization---------
            if model.l2 is not None:
                loss += model.l2_regularization()
            loss.backward()
            optimizer.step()
            # ---------checkpoint---------
            if i % config['model']['steps_per_checkpoint'] == 0:
                print(
                    "Running Epoch {:03d}/{:03d}".format(epoch + 1, config['model']['epochs']),
                    "loss:{:.3f}".format(float(loss)),
                    "costs:", time.strftime("%H: %M: %S", time.gmtime(time.time() - start_time)))
                start_time = time.time()
                sys.stdout.flush()
                # ----------------------------------Validation----------------------------------
                if config['model']['evaluation']:
                    model.eval()
                    save_flag = False
                    if config['task'] == 'rating':
                        # train_result = metrics.RMSE(model, train_loader)
                        test_result = metrics.RMSE(model, test_loader)
                        if valid_dataset is not None:
                            # valid_result = metrics.RMSE(model, valid_loader)
                            print("\tRunning Epoch {:03d}/{:03d}".format(epoch + 1, config['model']['epochs']),
                                  # "Train_RMSE: {:.3f},".format(train_result),
                                  # "Valid_RMSE: {:.3f},".format(valid_result),
                                  "Test_RMSE: {:.3f}".format(test_result))
                        else:
                            print("\tRunning Epoch {:03d}/{:03d}".format(epoch + 1, config['model']['epochs']),
                                  # "Train_RMSE: {:.3f},".format(train_result),
                                  "Test_RMSE: {:.3f}".format(test_result))

                        save_flag = test_result < best_result
                    elif config['task'] == 'ranking':
                        test_hr, test_ndcg = metrics.metrics(model, test_loader)
                        test_result = (test_hr, test_ndcg)
                        print("\tRunning Epoch {:03d}/{:03d}".format(epoch + 1, config['model']['epochs']),
                              "Test_HR: {:.3f}, Test_NDCG: {:.3f}".format(test_hr, test_ndcg))

                        save_flag = test_hr > best_result[0]
                    else:
                        test_result = best_result  # No effect, ignore this line

                    if save_flag:
                        if config['model']['save']:
                            if 'tag' in config:
                                torch.save(model, os.path.join(config['model']['model_path'],
                                                               '{}_{}.pth'.format(config['model']['name'], config['tag'])))
                            saved = True
                        best_result = test_result

                    sys.stdout.flush()

    # ----------------------------------Evaluation----------------------------------
    if config['model']['evaluation']:
        print('Evaluating...')
        model.eval()
        flag = False
        if config['task'] == 'rating':
            train_result = metrics.RMSE(model, train_loader)
            test_result = metrics.RMSE(model, test_loader)
            if valid_dataset is not None:
                valid_result = metrics.RMSE(model, valid_loader)
                print("Train_RMSE: {:.3f}, Valid_RMSE: {:.3f}, Test_RMSE: {:.3f}".format(train_result, valid_result,
                                                                                         test_result))
            else:
                print("Train_RMSE: {:.3f}, Test_RMSE: {:.3f}".format(train_result, test_result))

            flag = test_result < best_result
        elif config['task'] == 'ranking':
            train_result = metrics.RMSE(model, train_loader)
            test_hr, test_ndcg = metrics.metrics(model, test_loader)
            test_result = (test_hr, test_ndcg)
            print("Train_RMSE: {:.3f}, Test_HR: {:.3f}, Test_NDCG: {:.3f}".format(train_result, test_hr, test_ndcg))

            flag = test_hr > best_result[0]
        else:
            test_result = best_result  # No effect, ignore this line

        if flag:
            best_result = test_result
        print('------Best Result: ', best_result, '------', sep='')
        sys.stdout.flush()

    if not saved and config['model']['save']:
        if 'tag' in config:
            torch.save(model, os.path.join(config['model']['model_path'],
                                           '{}_{}.pth'.format(config['model']['name'], config['tag'])))