コード例 #1
0
def ev_dir(
    pred_dir,
    true,
    measures={
        'SMAE': smae,
        'SRMSE': srmse,
        'SMAPE': smape,
        'MASE': partial(mase, shift=7 * 48)
    }):
    if os.listdir(true):
        files = [
            os.path.splitext(file)[0] for file in os.listdir(pred_dir)
            if os.path.isfile(true + os.path.splitext(file)[0] + '/test.csv')
        ]  # files in both directories
        result = pd.concat([
            ev(pred=dp.load(
                path=pred_dir + file + '.csv', idx='date', dates=True),
               true=dp.load(
                   path=true + file + '/test.csv', idx='date', dates=True),
               label=file,
               parse_label=False,
               measures=measures) for file in files
        ],
                           axis=0,
                           join='outer')  # merge results
    else:
        result = pd.concat([
            ev(pred=dp.load(path=pred_dir + name, idx='date', dates=True),
               true=true,
               label=re.sub(r',?[^,]*.csv', '', name),
               parse_label=True,
               measures=measures) for name in os.listdir(pred_dir)
        ],
                           axis=0,
                           join='outer')  # merge results
    result = result.fillna(value=False)  # replace nans with False
    return result
コード例 #2
0
ファイル: snn.py プロジェクト: lulzzz/mtsg
                 loss='mean_squared_error',
                 optimizer='adam'):
    from keras.models import Sequential
    from keras.layers.core import Dense
    model = Sequential()  # FFN
    model.add(Dense(n_hidden, input_dim=n_in,
                    activation=activation))  # input & hidden layers
    #model.add(Dropout({{uniform(0, 1)}})) # randomly set a number of inputs to 0 to prevent overfitting
    model.add(Dense(n_out))  # output layer
    model.compile(loss=loss, optimizer=optimizer)  # assemble network
    return model


np.random.seed(0)  # fix seed for reprodicibility
path = 'C:/Users/SABA/Google Drive/mtsg/data/household_power_consumption.csv'  # data path
load = dp.load(path)  # load data
load_with_nans = load.apply(
    axis=1,
    func=(lambda x: np.nan if (x.isnull().sum() > 0) else x.mean())).unstack(
    )  # custom sum function where any Nan in arguments gives Nan as result
# set grid search parameters and ranges
grid_space = {
    'n_hidden': [10, 20, 30],
    'nb_epoch': [500, 1000, 1500, 2000],
    'batch_size': [1, 5, 10, 20]
}

for i in range(1, 6):  # optimize for number of time steps
    X, Y = dp.split_X_Y(
        dp.shift(load_with_nans, n_shifts=i, shift=1).dropna()
    )  # create patterns & targets in the correct format
コード例 #3
0
            if interval > 0 and i % interval == 0:
                print(
                    'Epoch: {} | Batch: {}/{} ({:.0f}%) | G Loss: {:.6f} | C Loss: {:.6f}'
                    .format(
                        epoch, batch_size * i, len(train_loader.dataset),
                        100. * (batch_size * i) / len(train_loader.dataset),
                        g_loss.item(), c_loss.item()))

    g_train_loss /= g_batches
    c_train_loss /= len(train_loader)
    print('* (Train) Epoch: {} | G Loss: {:.4f} | C Loss: {:.4f}'.format(
        epoch, g_train_loss, c_train_loss))
    return (g_train_loss, c_train_loss)


train_loader, vocab = load(batch_size, seq_len)
autoencoder = Autoencoder(enc_hidden_dim, dec_hidden_dim, embedding_dim,
                          latent_dim, vocab.size(), dropout, seq_len)
autoencoder.load_state_dict(
    torch.load('autoencoder.th', map_location=lambda x, y: x))
generator = Generator(n_layers, block_dim)
critic = Critic(n_layers, block_dim)

g_optimizer = optim.Adam(generator.parameters(), lr=lr)
c_optimizer = optim.Adam(critic.parameters(), lr=lr)
if cuda:
    autoencoder = autoencoder.cuda()
    generator = generator.cuda()
    critic = critic.cuda()

best_loss = np.inf
コード例 #4
0
ファイル: nn.py プロジェクト: h83s/load_forecast
                             prep=prep,
                             postp=postp)  # evaluate network
        pred = pd.concat([pred, new_pred], axis=0)  # add new predictions
        train_loss = pd.concat([train_loss, tl], axis=0,
                               ignore_index=True)  # append to old loss
        val_loss = pd.concat([val_loss, l], axis=0,
                             ignore_index=True)  # append to old loss
    return pred, train_loss, val_loss


# SLNs
np.random.seed(0)  # fix seed for reprodicibility
data_dir = 'C:/Users/SABA/Google Drive/mtsg/data/train_test/'  # directory containing data
exp_dir = 'C:/Users/SABA/Google Drive/mtsg/data/sln/'  # directory containing results of experiments

true = dp.load(path=data_dir + 'test.csv', idx='date',
               dates=True)  # observations to forecast
measures = {
    'SRMSE': pf.srmse,
    'MASE': partial(pf.mase, shift=48 * 7),
    'SMAPE': pf.smape,
    'SMAE': pf.smae,
}  # performance to consider
train = dp.load(path=data_dir + 'train.csv', idx='date',
                dates=True)  # load train set
test = dp.load(path=data_dir + 'test.csv', idx='date',
               dates=True)  # load test set
weather_train = {
    name: dp.load(path=data_dir + name + '_train.csv', idx='date', dates=True)
    for name in ['temp', 'hum', 'wind']
}  # load weather characteristics for train set
weather_test = {
コード例 #5
0
ファイル: ma.py プロジェクト: lulzzz/mtsg
import os
import numpy as np
import sklearn
import matplotlib.pyplot as plt
import pandas as pd
import dataprep as dp
import patsy
import importlib
from sklearn.metrics import r2_score
from unittest.mock import inplace
from sklearn import multioutput

np.random.seed(0)  # fix seed for reprodicibility
path = 'C:/Users/SABA/Google Drive/mtsg/data/household_power_consumption.csv'  # data path
load_raw = dp.load(path)  # load data
targets = load_raw.apply(
    axis=1,
    func=(lambda x: np.nan if (x.isnull().sum() > 0) else x.mean())).unstack(
    )  # custom sum function where any Nan in arguments gives Nan as result

# moving average
for i in range(1, 50):
    pred = targets.rolling(window=i).mean().shift(1)
    load = pd.concat({'pred': pred, 'targets': targets}, axis=1)
    load.dropna(inplace=True)
    print(
        r2_score(y_pred=load['pred'],
                 y_true=load['targets'],
                 multioutput='uniform_average'))
コード例 #6
0
ファイル: workspace.py プロジェクト: h83s/load_forecast
data = imp.imp(
    data,
    alg=impts.na_seadec,
    freq=1440,
    **{
        'algorithm': 'ma',
        'weighting': 'linear',
        'k': 2
    })  # impute the whole dataset using three best methods of imputation
dp.save(data, path=data_dir + 'data_imp.csv',
        idx='datetime')  # save imputed data

# AGGREGATE DATA & CREATE TRAIN & TEST SETS
exp_dir = 'C:/Users/SABA/Google Drive/mtsg/data/train_test/'  # directory for the results
data = dp.load(path=data_dir + 'data_imp.csv',
               idx='datetime',
               cols='load',
               dates=True)  # load imputed data

data = dp.resample(data, freq=1440)  # aggregate minutes to half-hours
train, test = dp.train_test(data=data, test_size=0.255,
                            base=7)  # split into train & test sets
dp.save(data=train, path=exp_dir + 'train.csv', idx='date')  # save train set
dp.save(data=test, path=exp_dir + 'test.csv', idx='date')  # save test set
dp.save_dict(
    dic=dp.split(train, nsplits=7), path=exp_dir + 'train_', idx='date'
)  # split train set according to weekdays and save each into a separate file
dp.save_dict(
    dic=dp.split(test, nsplits=7), path=exp_dir + 'test_', idx='date'
)  # split test set according to weekdays and save each into a separate file

# WEATHER DATA