Python mean_absolute_errorの例、utils.mean_absolute_error Pythonの例

コード例 #1

0

ファイルを表示

def show_errors( time, Y_true, Y_predict, with_graphs=False ):

    mae  = utils.mean_absolute_error(            Y_true, Y_predict )
    mape = utils.mean_absolute_percentage_error( Y_true, Y_predict, epsilon=1.0 )
    mse  = utils.mean_squared_error(             Y_true, Y_predict )

    print( 'MSE   %f ' % mse.mean() )
    print( 'MAE   %f ' % mae.mean() )
    print( 'MAPE  %7.3f%% ' % mape.mean() )

    if with_graphs:
        pyplot.plot( time, Y_predict[:,0], color='blue',  lw=7, alpha=0.2 )
        pyplot.plot( time, Y_predict[:,1], color='green', lw=7, alpha=0.2 )
        pyplot.plot( time, Y_predict[:,2], color='red',   lw=7, alpha=0.2 )
        pyplot.plot( time, Y_true[:,0], color='blue',  lw=2 )
        pyplot.plot( time, Y_true[:,1], color='green', lw=2 )
        pyplot.plot( time, Y_true[:,2], color='red',   lw=2 )
        pyplot.grid()
        pyplot.show()

        pyplot.plot( time, mape, color='red',   lw=1 )
        pyplot.grid()
        pyplot.show()

コード例 #2

0

ファイルを表示

ファイル: debug.py プロジェクト: katnoria/crowd-density

def generate_plot_from_csv(name,
                           dataset,
                           ds_type,
                           cropsize=224,
                           dirname="predictions/debug-448/"):
    """ Generate plots from CSV

    Parameters
    ---------
    name: Model name (vgg16baseline or vgg16decoder)
    dataset: Dataset name (SHHA or SHHB)
    ds_type: Set type (train or test)
    cropsize: Input image crop size
    """
    fname = f"{dirname}/{name}_{dataset}_{ds_type}_predictions_{cropsize}.csv"
    df = pd.read_csv(fname)
    df['diff'] = df.true_labels - df.predicted_labels

    scatter = alt.Chart(df).mark_circle().encode(
        alt.X("true_labels"), alt.Y("predicted_labels"),
        alt.Tooltip(["true_labels", "predicted_labels"]))
    line = alt.Chart(df).mark_line().encode(alt.X('true_labels', title="True"),
                                            alt.Y('true_labels',
                                                  title="Predicted"),
                                            color=alt.value('rgb(0,0,0)'))

    mse = mean_squared_error(df.true_labels.values, df.predicted_labels.values)

    mae = mean_absolute_error(df.true_labels.values,
                              df.predicted_labels.values)

    chart = (scatter + line).properties(
        title=
        f"INPUT {cropsize}, {dataset}:{ds_type.upper()}, MSE: {mse} | MAE: {mae}"
    )
    return chart

コード例 #3

0

ファイルを表示

ファイル: sigmoid_true_base_new_ar_model.py プロジェクト: hannasv/MS

                print('Warning nans detected in test data')

            if o > 0:
                # updatig predictors
                Tr_Xtr = Xtr[:, 4:]
                Tr_Xte = Xte[:, 4:]

                coeffs_tr = fit_pixel(Tr_Xtr, ytr)

                y_test_pred_tr = predict_pixel(Tr_Xte, coeffs_tr)
                y_train_pred_tr = predict_pixel(Tr_Xtr, coeffs_tr)

                mse_test_tr = mean_squared_error(y_test_pred_tr, yte)
                mse_train_tr = mean_squared_error(y_train_pred_tr, ytr)

                mae_test_tr = mean_absolute_error(y_test_pred_tr, yte)
                mae_train_tr = mean_absolute_error(y_train_pred_tr, ytr)

            ############# Fitting
            coeffs = fit_pixel(Xtr, ytr)

            y_test_pred = predict_pixel(Xte, coeffs)
            y_train_pred = predict_pixel(Xtr, coeffs)

            ################ Evaluation
            mse_test = mean_squared_error(y_test_pred, yte)
            mse_train = mean_squared_error(y_train_pred, ytr)

            mae_test = mean_squared_error(y_test_pred, yte)
            mae_train = mean_squared_error(y_train_pred, ytr)

コード例 #4

0

ファイルを表示

#-*- coding:utf-8 -*-
#author: wenzhu

import scipy.io
import pandas as pd
from utils import mean_absolute_error,mean_squared_error,root_mean_squared_error,r2_score


#读取数据
pred = pd.read_csv("saved_results/model_conv2d/predict.csv")
real = pd.read_csv("saved_results/model_conv2d/real_data.csv")
pred = pred.values
real = real.values

#计算指标
mae = mean_absolute_error(real, pred)
mse = mean_squared_error(real, pred)
rmse = root_mean_squared_error(real, pred)
r2 = r2_score(real,pred)


print('mae:', mae)
print('mse:', mse)
print('rmse:', rmse)
print('r2_score:',r2)

コード例 #5

0

ファイルを表示

ファイル: main.py プロジェクト: hydrogeohc/DeepForecasting

    def fitness(self, **kwargs):
        original_kwargs = kwargs.copy()
        window_size = kwargs.pop('window_size')
        num_points_to_predict = kwargs.pop('num_points_to_predict')
        num_derivatives = kwargs.pop('num_derivatives')
        epochs = kwargs.pop('epochs')
        batch_size = kwargs.pop('batch_size')
        scaler_class = kwargs.pop('scaler_class', utils.StandardScaler)

        overall_y_true = []
        overall_y_pred = []

        # Training
        print('\n\nCreating model: \n\t{0}'.format(original_kwargs))
        model = create_model((window_size, num_derivatives + 1), num_points_to_predict, **kwargs)
        model.save_weights('initial_weights.h5')

        for train_t, test_t in utils.roll_cv(self.t_raw, folds=4, backtrack_padding=window_size-1):
            train_x, train_y = utils.as_sequences(train_t, window_size, num_derivatives, num_points_to_predict)
            test_x, test_y = utils.as_sequences(test_t, window_size, num_derivatives, num_points_to_predict)

            scaler_x = utils.SequenceScaler(scaler_class)
            scaler_y = scaler_class()

            train_x_scaled = scaler_x.fit_transform(train_x)
            train_y_scaled = scaler_y.fit_transform(train_y)

            test_x_scaled = scaler_x.transform(test_x)
            test_y_scaled = scaler_y.transform(test_y)

            print('Fitting')
            model.fit(train_x_scaled, train_y_scaled, epochs=epochs, batch_size=batch_size, verbose=0)

            pred_y_scaled = model.predict(test_x_scaled)
            pred_y = scaler_y.inverse_transform(pred_y_scaled)

            overall_y_true.append(test_y)
            overall_y_pred.append(pred_y)

            print('Reset weights')
            model.load_weights('initial_weights.h5')

        all_y_true = np.concatenate(overall_y_true)
        all_y_pred = np.concatenate(overall_y_pred)
        mse = utils.mean_squared_error(all_y_true, all_y_pred)
        mae = utils.mean_absolute_error(all_y_true, all_y_pred)

        print('MSE', mse)
        print('MAE', mae)

        fitness = -mse

        import matplotlib.pyplot as plt
        plt.plot(all_y_pred[:, 0], label='Predicted')
        plt.plot(all_y_true[:, 0], label='True')
        plt.grid()
        plt.legend()
        plt.show()

        log_kwargs = original_kwargs.copy()
        log_kwargs.update({'mse': mse, 'mae': mae})

        print('Trained: {0}'.format(log_kwargs))
        print('Fitness: {0:.3f}'.format(fitness))

        if not self.log_writer:
            self.log_file = open(self.log_path, 'a')
            self.log_writer = csv.DictWriter(self.log_file, fieldnames=sorted(log_kwargs.keys()))
            self.log_writer.writeheader()

        if self.log_writer:
            self.log_writer.writerow(log_kwargs)
            self.log_file.flush()

        return fitness

コード例 #6

0

ファイルを表示

ファイル: main.py プロジェクト: lileipisces/PETER

def generate(data):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    idss_predict = []
    context_predict = []
    rating_predict = []
    with torch.no_grad():
        while True:
            user, item, rating, seq, feature = data.next_batch()
            user = user.to(device)  # (batch_size,)
            item = item.to(device)
            bos = seq[:, 0].unsqueeze(0).to(device)  # (1, batch_size)
            feature = feature.t().to(device)  # (1, batch_size)
            if args.use_feature:
                text = torch.cat([feature, bos],
                                 0)  # (src_len - 1, batch_size)
            else:
                text = bos  # (src_len - 1, batch_size)
            start_idx = text.size(0)
            for idx in range(args.words):
                # produce a word at each step
                if idx == 0:
                    log_word_prob, log_context_dis, rating_p, _ = model(
                        user, item, text, False
                    )  # (batch_size, ntoken) vs. (batch_size, ntoken) vs. (batch_size,)
                    rating_predict.extend(rating_p.tolist())
                    context = predict(log_context_dis,
                                      topk=args.words)  # (batch_size, words)
                    context_predict.extend(context.tolist())
                else:
                    log_word_prob, _, _, _ = model(
                        user, item, text, False, False,
                        False)  # (batch_size, ntoken)
                word_prob = log_word_prob.exp()  # (batch_size, ntoken)
                word_idx = torch.argmax(
                    word_prob, dim=1
                )  # (batch_size,), pick the one with the largest probability
                text = torch.cat([text, word_idx.unsqueeze(0)],
                                 0)  # (len++, batch_size)
            ids = text[start_idx:].t().tolist()  # (batch_size, seq_len)
            idss_predict.extend(ids)

            if data.step == data.total_step:
                break

    # rating
    predicted_rating = [
        (r, p) for (r, p) in zip(data.rating.tolist(), rating_predict)
    ]
    RMSE = root_mean_square_error(predicted_rating, corpus.max_rating,
                                  corpus.min_rating)
    print(now_time() + 'RMSE {:7.4f}'.format(RMSE))
    MAE = mean_absolute_error(predicted_rating, corpus.max_rating,
                              corpus.min_rating)
    print(now_time() + 'MAE {:7.4f}'.format(MAE))
    # text
    tokens_test = [
        ids2tokens(ids[1:], word2idx, idx2word) for ids in data.seq.tolist()
    ]
    tokens_predict = [
        ids2tokens(ids, word2idx, idx2word) for ids in idss_predict
    ]
    BLEU1 = bleu_score(tokens_test, tokens_predict, n_gram=1, smooth=False)
    print(now_time() + 'BLEU-1 {:7.4f}'.format(BLEU1))
    BLEU4 = bleu_score(tokens_test, tokens_predict, n_gram=4, smooth=False)
    print(now_time() + 'BLEU-4 {:7.4f}'.format(BLEU4))
    USR, USN = unique_sentence_percent(tokens_predict)
    print(now_time() + 'USR {:7.4f} | USN {:7}'.format(USR, USN))
    feature_batch = feature_detect(tokens_predict, feature_set)
    DIV = feature_diversity(feature_batch)  # time-consuming
    print(now_time() + 'DIV {:7.4f}'.format(DIV))
    FCR = feature_coverage_ratio(feature_batch, feature_set)
    print(now_time() + 'FCR {:7.4f}'.format(FCR))
    feature_test = [idx2word[i]
                    for i in data.feature.squeeze(1).tolist()]  # ids to words
    FMR = feature_matching_ratio(feature_batch, feature_test)
    print(now_time() + 'FMR {:7.4f}'.format(FMR))
    text_test = [' '.join(tokens) for tokens in tokens_test]
    text_predict = [' '.join(tokens) for tokens in tokens_predict]
    tokens_context = [
        ' '.join([idx2word[i] for i in ids]) for ids in context_predict
    ]
    ROUGE = rouge_score(text_test, text_predict)  # a dictionary
    for (k, v) in ROUGE.items():
        print(now_time() + '{} {:7.4f}'.format(k, v))
    text_out = ''
    for (real, ctx, fake) in zip(text_test, tokens_context, text_predict):
        text_out += '{}\n{}\n{}\n\n'.format(real, ctx, fake)
    return text_out

コード例 #7

0

ファイルを表示

ファイル: train_sunnybrook.py プロジェクト: heyingte/ventricle-segmentation-and-area-estimation

            predict_pxiel_class = torch.max(nn.functional.softmax(
                predict_map_valid, dim=1),
                                            dim=1).indices
            predict_pxiel_class = predict_pxiel_class.cpu().numpy(
            )  # batch_size*height*width

            mask_valid = mask_valid.numpy()  #batch_size*height*width

            dice = dice_coeff_multiclass(predict_pxiel_class, mask_valid,
                                         num_class)
            dice_sum += dice

            #--------calculate mean absolute error of direct area estimation
            predict_area_valid = predict_area_valid.cpu().detach().numpy(
            )  #batch_size*num_class
            gt_area_valid = area_vector(mask_valid,
                                        num_class)  #batch_size*num_class
            estimate_mae = mean_absolute_error(predict_area_valid,
                                               gt_area_valid)
            estimate_mae_sum += estimate_mae

            #--------calculate mean absolute error of area by segmentation
            segment_area_valid = area_vector(predict_pxiel_class, num_class)
            segment_mae = mean_absolute_error(segment_area_valid,
                                              gt_area_valid)
            segment_mae_sum += segment_mae

        print("average validate dice ", dice_sum / (step + 1),
              "average validate mae ", estimate_mae_sum / (step + 1),
              segment_mae_sum / (step + 1))

コード例 #8

0

ファイルを表示

def train_ar_model(transform=False,
                   bias=False,
                   sig=False,
                   order=0,
                   overwrite_results=True):
    path_transform = '/home/hanna/lagrings/results/stats/2014-01-01_2018-12-31/'
    path = '/home/hanna/lagrings/ar_data/'
    #print(bias)
    #print(transform)
    if transform and bias:
        print('Not valid model....')
        raise OSError('Not valid model config')

    # path_transform = '/home/hanna/lagrings/results/stats/2014-01-01_2018-12-31/'
    # path = '/home/hanna/lagrings/ar_data/'

    lagr_path = '/uio/lagringshotell/geofag/students/metos/hannasv/'
    path_transform = '{}results/stats/2014-01-01_2018-12-31'.format(lagr_path)

    path = '{}ar_data/'.format(lagr_path)
    path_ar_results = '{}/results/ar/'.format(lagr_path)

    latitude = 30.0
    longitude = 5.25
    SPATIAL_RESOLUTION = 0.25

    latitudes = np.arange(30.0,
                          50.0 + SPATIAL_RESOLUTION,
                          step=SPATIAL_RESOLUTION)
    longitudes = np.arange(-15,
                           25 + SPATIAL_RESOLUTION,
                           step=SPATIAL_RESOLUTION)
    base = '{}/results/stats/2014-01-01_2018-12-31/'.format(lagr_path)

    if transform:
        ds_tcc = xr.open_dataset(base + 'stats_pixel_tcc_all.nc')
        ds_r = xr.open_dataset(base + 'stats_pixel_r_all.nc')
        ds_q = xr.open_dataset(base + 'stats_pixel_q_all.nc')
        ds_t2m = xr.open_dataset(base + 'stats_pixel_t2m_all.nc')
        ds_sp = xr.open_dataset(base + 'stats_pixel_sp_all.nc')

        stats_data = {
            'q': ds_q,
            't2m': ds_t2m,
            'r': ds_r,
            'sp': ds_sp,
            'tcc': ds_tcc
        }

    explaination = ['q', 't2m', 'r', 'sp']
    tr_e = []
    tr_index = 4

    if bias:
        explaination.append('bias')
        tr_e.append('bias')
        #tr_index +=1

    full_name = generate_model_name('AR', bias, transform, sig, order)
    config = get_config_from_model_name(full_name)

    full_name_tr = generate_model_name('TR', bias, transform, sig, order)
    tr_config = get_config_from_model_name(full_name_tr)

    for latitude in latitudes:
        for longitude in longitudes:

            explain = explaination.copy()
            tr_explain = tr_e.copy()

            for o in range(0, order + 1):
                name = full_name + '-L{}'.format(o)
                tr_name = full_name_tr + '-L{}'.format(o)

                w_filename = '{}weights_{}_{}_{}.nc'.format(
                    path_ar_results, name, longitude, latitude)
                p_filename = '{}performance_{}_{}_{}.nc'.format(
                    path_ar_results, name, longitude, latitude)

                if not (os.path.exists(w_filename)
                        and os.path.exists(p_filename)) or overwrite_results:
                    fil = 'all_vars_lat_lon_{}_{}.nc'.format(
                        latitude, longitude)
                    data = xr.open_dataset(path + fil)
                    #
                    if o > 0:
                        explain.append('O{}'.format(o))
                        tr_explain.append('O{}'.format(o))
                    start_time = timeit()

                    X_train, y_train = dataset_to_numpy_order(
                        dataset=data.sel(time=slice('2004', '2013')),
                        order=order,
                        bias=bias)
                    #print(X_train[0, :])
                    X_test, y_test = dataset_to_numpy_order(
                        dataset=data.sel(time=slice('2014', '2018')),
                        order=order,
                        bias=bias)
                    #print('transform {}'.format(transform))
                    #print(bias)
                    if transform:  # and not bias):# or (not transform and bias):
                        X_train = transform_X(X_train,
                                              lat=latitude,
                                              lon=longitude,
                                              data=stats_data,
                                              order=o)
                        X_test = transform_X(X_test,
                                             lat=latitude,
                                             lon=longitude,
                                             data=stats_data,
                                             order=o)
                    #else:
                    #    print('Not valid model....')
                    #    raise OSError('Not valid model config')

                    if sig:
                        y_train = inverse_sigmoid(y_train)
                        y_test = inverse_sigmoid(y_test)

                    name = full_name + '-o{}'.format(o)
                    tr_name = full_name_tr + '-o{}'.format(o)

                    eval_dict = {}
                    eval_tr_dict = {}
                    weights_dict = {}
                    weights_tr_dict = {}

                    Xtr, ytr = drop_nans(X_train[:, :int(tr_index + o)],
                                         y_train)
                    Xte, yte = drop_nans(X_test[:, :int(tr_index + o)], y_test)

                    if sig:
                        yte = sigmoid(yte)
                        ytr = sigmoid(ytr)

                    if np.isnan(yte).any():
                        print('Warning nans detected in training data')

                    if np.isnan(ytr).any():
                        print('Warning nans detected in test data')

                    if o > 0:
                        # updatig predictors
                        Tr_Xtr = Xtr[:, tr_index:]
                        Tr_Xte = Xte[:, tr_index:]
                        print(Tr_Xtr.shape)
                        coeffs_tr = fit_pixel(Tr_Xtr, ytr)

                        y_test_pred_tr = predict_pixel(Tr_Xte, coeffs_tr)
                        y_train_pred_tr = predict_pixel(Tr_Xtr, coeffs_tr)

                        mse_test_tr = mean_squared_error(y_test_pred_tr, yte)
                        mse_train_tr = mean_squared_error(y_train_pred_tr, ytr)

                        mae_test_tr = mean_absolute_error(y_test_pred_tr, yte)
                        mae_train_tr = mean_absolute_error(
                            y_train_pred_tr, ytr)

                    ############# Fitting
                    coeffs = fit_pixel(Xtr, ytr)

                    y_test_pred = predict_pixel(Xte, coeffs)
                    y_train_pred = predict_pixel(Xtr, coeffs)

                    ################ Evaluation
                    mse_test = mean_squared_error(y_test_pred, yte)
                    mse_train = mean_squared_error(y_train_pred, ytr)

                    mae_test = mean_squared_error(y_test_pred, yte)
                    mae_train = mean_squared_error(y_train_pred, ytr)

                    ##################### Adding the autoregressive model
                    #print(coeffs)
                    #print(explaination)
                    weights_dict['coeffs'] = (['weights'], coeffs.flatten()
                                              )  # 'latitude', 'longitude',

                    eval_dict['mse_test'] = mse_test[
                        0]  #(['latitude', 'longitude'],)
                    eval_dict['mse_train'] = mse_train[0]

                    eval_dict['mae_test'] = mae_test[
                        0]  #(['latitude', 'longitude'], )
                    eval_dict['mae_train'] = mae_train[
                        0]  #(['latitude', 'longitude'], )

                    num_test_samples = len(yte)
                    num_train_samples = len(ytr)

                    eval_dict[
                        'num_test_samples'] = num_test_samples  # (['latitude', 'longitude'], )
                    eval_dict[
                        'num_train_samples'] = num_train_samples  # (['latitude', 'longitude'], )

                    eval_dict.update(config)
                    weights_dict.update(config)

                    ###################### Adding traditional model
                    if o > 0:
                        weights_tr_dict['coeffs'] = ([
                            'weights'
                        ], coeffs_tr.flatten())  # 'latitude', 'longitude',
                        print(weights_tr_dict)
                        print(tr_explain)
                        eval_tr_dict['mse_test'] = mse_test_tr[
                            0]  #(['latitude', 'longitude'],)
                        eval_tr_dict['mse_train'] = mse_train_tr[0]

                        eval_tr_dict['mae_test'] = mae_test_tr[
                            0]  #(['latitude', 'longitude'], )
                        eval_tr_dict['mae_train'] = mae_train_tr[
                            0]  #(['latitude', 'longitude'], )

                        num_test_samples = len(yte)
                        num_train_samples = len(ytr)

                        eval_tr_dict[
                            'num_test_samples'] = num_test_samples  # (['latitude', 'longitude'], )
                        eval_tr_dict[
                            'num_train_samples'] = num_train_samples  # (['latitude', 'longitude'], )

                        eval_tr_dict.update(tr_config)
                        weights_tr_dict.update(tr_config)

                        w_tr_filename = '{}/weights_{}_{}_{}.nc'.format(
                            path_ar_results, tr_name, longitude, latitude)
                        p_tr_filename = '{}/performance_{}_{}_{}.nc'.format(
                            path_ar_results, tr_name, longitude, latitude)

                        ds = xr.Dataset(weights_tr_dict,
                                        coords={
                                            'latitude':
                                            (['latitude'], [latitude]),
                                            'longitude':
                                            (['longitude'], [longitude]),
                                            'weights':
                                            (['weights'], tr_explain)
                                        })
                        ds.to_netcdf(w_tr_filename)

                        ds = xr.Dataset(eval_tr_dict,
                                        coords={
                                            'latitude':
                                            (['latitude'], [latitude]),
                                            'longitude':
                                            (['longitude'], [longitude])
                                        })
                        ds.to_netcdf(p_tr_filename)

                    stop_time = timeit()
                    #print(stop_time - start_time)
                    eval_dict['time_elapsed_seconds'] = (
                        stop_time - start_time)  #(['latitude', 'longitude'], )

                    w_filename = '{}weights_{}_{}_{}.nc'.format(
                        path_ar_results, name, longitude, latitude)
                    p_filename = '{}performance_{}_{}_{}.nc'.format(
                        path_ar_results, name, longitude, latitude)
                    ds = xr.Dataset(weights_dict,
                                    coords={
                                        'latitude': (['latitude'], [latitude]),
                                        'longitude':
                                        (['longitude'], [longitude]),
                                        'weights': (['weights'], explain)
                                    })
                    ds.to_netcdf(w_filename)

                    ds = xr.Dataset(eval_dict,
                                    coords={
                                        'latitude': (['latitude'], [latitude]),
                                        'longitude':
                                        (['longitude'], [longitude])
                                    })
                    ds.to_netcdf(p_filename)
                    print(
                        'finished calibrating bias {}, sigmoid {}, Transform {}, order/Lag {} - ({}, {})'
                        .format(bias, sig, transform, o, longitude, latitude))
                else:
                    print(
                        'Model config already calibrated bias {}, sigmoid {}, Transform {}, order/Lag {} - ({}, {})'
                        .format(bias, sig, transform, o, longitude, latitude))

コード例 #9

0

ファイルを表示

ファイル: nilmtk_run.py プロジェクト: JoseDosSantos/NILMBenchmark

def run_model(model_type,
              appliances,
              interval,
              test_dataset,
              experiment_name,
              train_denorm=True,
              plot_results=False,
              return_time=False,
              export_predictions=False,
              verbose=False):
    if appliances:
        appliance_list = appliances
    else:
        appliance_list = APPLIANCES

    train_appliances = {}
    for app in appliance_list:
        train_appliances[app] = load_df(app,
                                        interval,
                                        col=app,
                                        dataset="train",
                                        denorm=train_denorm)
    train_mains = load_df("fridge",
                          interval,
                          col="mains",
                          dataset="train",
                          denorm=train_denorm)

    if model_type == "CO":
        model = CO({})
    elif model_type == "AFHMM":
        model = AFHMM({})
    else:
        raise ValueError(
            f"Model type {model_type} not understood. Available currently are only 'CO' and 'AFHMM'."
        )

    train_start_time = time.time()
    model.partial_fit(train_main=[train_mains],
                      train_appliances=train_appliances)
    train_time = time.time() - train_start_time

    test_appliances = {}
    # Average test time across datasets
    test_time = 0
    test_appliances = {}
    for app in appliance_list:
        try:
            test_appliances[app] = load_df(app,
                                           interval,
                                           col=app,
                                           dataset=test_dataset,
                                           denorm=train_denorm)
        except:
            pass

    if model_type == "AFHMM" and test_dataset == "ECO":
        raise ValueError(
            "Do not use AFHMM with ECO. It is not currently implemented due to long testing times."
        )

    test_time_agg = 0
    eval_counter = 0

    if model_type == "AFHMM":
        num_workers = cpu_count()

        # hardcoded fix for now
        chunk_length = 720
        test_mains = load_df(appliance_list[0],
                             interval,
                             col="mains",
                             dataset=test_dataset,
                             denorm=train_denorm)

        test_mains = test_mains.values.flatten().reshape((-1, 1))
        n = len(test_mains)
        n_chunks = int(math.ceil(len(test_mains) / chunk_length))

        # test_mains_chunks = [test_mains_big[i:i+self.time_period] for i in range(0, test_mains_big.shape[0], self.time_period)]
        n_iter = math.ceil(n_chunks / num_workers)
        results = []
        test_start_time = time.time()

        print(f"Starting disaggregation for {n_iter} chunks.")
        for i in tqdm(range(n_iter)):
            # print(i * num_workers * chunk_length, i * num_workers * chunk_length + chunk_length * num_workers)
            mains = test_mains[i * num_workers *
                               chunk_length:i * num_workers * chunk_length +
                               chunk_length * num_workers]
            # print(len(mains))
            results.append(model.disaggregate_chunk(mains)[0])
            pd.concat(results, axis=0).to_csv(
                f"quicksaves/checkpoint{i}_{interval}.csv", sep=";")
        test_time = time.time() - test_start_time
        results = pd.concat(results, axis=0)[:n]

    for app in appliance_list:
        try:
            if model_type == "CO":
                test_mains = load_df(app,
                                     interval,
                                     col="mains",
                                     dataset=test_dataset,
                                     denorm=train_denorm)
                n = len(test_mains)
                test_start_time = time.time()
                results = model.disaggregate_chunk(
                    mains=pd.Series([test_mains[:n]]))[0]
                test_time = time.time() - test_start_time

            if train_denorm:
                true_apps = np.array(test_appliances[app][:n])
                pred_apps = np.array(results[app])
            else:
                true_apps = utils.denormalize(test_appliances[app][:n], app)
                pred_apps = utils.denormalize(results[app], app)

            mse = utils.mean_squared_error(true_apps, pred_apps)
            mae = utils.mean_absolute_error(true_apps, pred_apps)
            sae = utils.normalised_signal_aggregate_error(true_apps, pred_apps)
            mr = utils.match_rate(true_apps, pred_apps)

            log_file_dir = f"Nilmtk/logs/{experiment_name}/{model_type}_{app}.log"

            # In Python 3.8 we can just add force=True to the basic config, but project is written in 3.7
            # so clear and reset path manually (there's probably a better way)
            for handler in logging.root.handlers[:]:
                logging.root.removeHandler(handler)
            logging.basicConfig(filename=log_file_dir,
                                format='%(message)s',
                                level=logging.INFO)

            test_log = f"Test dataset: {test_dataset}"
            logging.info(test_log)

            metric_string = f"MSE: {mse}" \
                            f" MAE: {mae}" \
                            f" SAE: {sae}" \
                            f" Match Rate: {mr}\n"
            logging.info(metric_string)

            if export_predictions:
                utils.check_dir(f"Nilmtk/model_predictions/{experiment_name}/")
                results_path = f"Nilmtk/model_predictions/{experiment_name}/{model_type}_{app}_{test_dataset}.csv"
                pd.DataFrame(pred_apps).to_csv(results_path, sep=";")

            test_time_agg += test_time
            eval_counter += 1
        except Exception as e:
            if verbose:
                print(app, e)

    test_time_agg /= eval_counter

    if return_time:
        return train_time, test_time_agg