Ejemplo n.º 1
0
def main():
  if sys.argv[1] == 'daily':
    print('Using daily data...')
    path_to_dataset = '../data/household_power_consumption_daily.csv'
    model, y_test, predictions = run(path_to_dataset, 10, 50, 1.0)
  elif sys.argv[1] == 'monthly':
    print('Using monthly data...')
    path_to_dataset = '../data/household_power_consumption_monthly.csv'
    model, y_test, predictions = run(path_to_dataset, 30, 5, 1.0)
  elif sys.argv[1] == 'hourly':
    print('Using hourly data...')
    path_to_dataset = '../data/household_power_consumption_hourly.csv'
    model, y_test, predictions = run(path_to_dataset, 30, 50, 1.0)
  else:
    print('Using minute data...')
    path_to_dataset = '../data/household_power_consumption.csv'
    model, y_test, predictions = run(path_to_dataset)

  # save for later use
  model.save_weights('../output/lstm.h5', overwrite=True)
  # model.load_weights('../output/lstm.h5')

  graph_utils.plot('lstm', predictions, y_test)

  print('RMSE: %.4f'% metrics.rmse(predictions, y_test))
  print('MAPE: %.4f'% metrics.mape(predictions, y_test))
Ejemplo n.º 2
0
def input_metrics(targetxml, referencexml):
    rvars = pull_tune_variables(referencexml)
    tvars = pull_tune_variables(targetxml, referencexml)
    rvars.variables.sort(key=lambda x: x.group)
    tvars.variables.sort(key=lambda x: x.group)
    data = {'target': [], 'reference': [], 'min': [], 'max': [], 'key': []}
    for r, t in zip(rvars.variables, tvars.variables):
        if r.group == t.group:
            key = ';'.join([r.idfclass, r.idfobject, r.idffield])
            data['target'].append(float(t.value))
            data['reference'].append(float(r.value))
            data['min'].append(r.minimum)
            data['max'].append(r.maximum)
            data['key'].append(key)
    paes = metrics.pae(data['target'], data['reference'], data['min'],
                       data['max'])
    m = {
        'pae': {},
        'rmse': {},
        'cvrmse': {},
        'mbe': {},
        'nmbe': {},
        'mape': {}
    }
    for k, p in zip(data['key'], paes):
        m['pae'][k] = p
    m['rmse']['all inputs'] = metrics.rmse(data['target'], data['reference'])
    m['cvrmse']['all inputs'] = metrics.cvrmse(data['target'],
                                               data['reference'])
    m['mbe']['all inputs'] = metrics.mbe(data['target'], data['reference'])
    m['nmbe']['all inputs'] = metrics.nmbe(data['target'], data['reference'])
    m['mape']['all inputs'] = metrics.mape(data['target'], data['reference'])
    return m
Ejemplo n.º 3
0
def test_HaLRTC():
    dense_mat = pd.read_csv('./datasets/Seattle-data-set/mat.csv', index_col=0)
    rm = pd.read_csv('./datasets/Seattle-data-set/RM_mat.csv', index_col=0)
    dense_mat = dense_mat.values
    rm = rm.values

    binary_mat2 = np.round(rm + 0.5 - 0.2)
    nan_mat2 = binary_mat2.copy()

    nan_mat2[nan_mat2 == 0] = np.nan

    sparse_mat2 = np.multiply(nan_mat2, dense_mat)

    pos2 = np.where((dense_mat != 0) & (binary_mat2 == 0))

    sparse_tensor2 = sparse_mat2.reshape([sparse_mat2.shape[0], 28, 288])
    # sparse_tensor_ori, rank = 30, time_lags = (1, 2, 24),
    # burn_iter = 1100, gibbs_iter = 100
    HaLRTC_res2 = HaLRTC(sparse_tensor2, rho=1e-5, epsilon=1e-4,
                         maxiter=200).reshape(dense_mat.shape)

    HaLRTC_res2_mape2 = mape(dense_mat[pos2], HaLRTC_res2[pos2])
    HaLRTC_res2_rmse2 = rmse(dense_mat[pos2], HaLRTC_res2[pos2])

    print("HaLRTC_res2_mape2", HaLRTC_res2_mape2)
    print("HaLRTC_res2_rmse2", HaLRTC_res2_rmse2)
Ejemplo n.º 4
0
def test_BTRMF():
    dense_mat = pd.read_csv('./datasets/Seattle-data-set/mat.csv', index_col=0)
    rm = pd.read_csv('./datasets/Seattle-data-set/RM_mat.csv', index_col=0)
    dense_mat = dense_mat.values
    rm = rm.values

    binary_mat2 = np.round(rm + 0.5 - 0.2)
    nan_mat2 = binary_mat2.copy()

    nan_mat2[nan_mat2 == 0] = np.nan

    sparse_mat2 = np.multiply(nan_mat2, dense_mat)

    pos2 = np.where((dense_mat != 0) & (binary_mat2 == 0))

    # sparse_tensor2 = sparse_mat2.reshape([sparse_mat2.shape[0], 28, 288])

    BTRMF_res2 = BTRMF(sparse_mat2,
                       rank=50,
                       time_lags=(1, 2, 288),
                       burn_iter=100,
                       gibbs_iter=20)

    BTRMF_res2_mape2 = mape(dense_mat[pos2], BTRMF_res2[pos2])
    BTRMF_res2_rmse2 = rmse(dense_mat[pos2], BTRMF_res2[pos2])

    print("BTRMF_res2_mape2", BTRMF_res2_mape2)
    print("BTRMF_res2_rmse2", BTRMF_res2_rmse2)
Ejemplo n.º 5
0
def test_TRTF():
    dense_mat = pd.read_csv('./datasets/Seattle-data-set/mat.csv', index_col=0)
    rm = pd.read_csv('./datasets/Seattle-data-set/RM_mat.csv', index_col=0)
    dense_mat = dense_mat.values
    rm = rm.values

    binary_mat2 = np.round(rm + 0.5 - 0.2)
    nan_mat2 = binary_mat2.copy()

    nan_mat2[nan_mat2 == 0] = np.nan

    sparse_mat2 = np.multiply(nan_mat2, dense_mat)

    pos2 = np.where((dense_mat != 0) & (binary_mat2 == 0))

    sparse_tensor2 = sparse_mat2.reshape([sparse_mat2.shape[0], 28, 288])
    # sparse_tensor_ori, rank = 30, time_lags = (1, 2, 24),
    # burn_iter = 1100, gibbs_iter = 100

    # TRTF(sparse_tensor_ori, rank=30, time_lags=(1, 2, 24),
    #      lambda_u=500, lambda_v=500, lambda_ar=500,
    #      eta=2e-2, lambda_theta=100, maxiter=1000)
    TRTF_res2 = TRTF(sparse_tensor2,
                     rank=50,
                     time_lags=(1, 2, 288),
                     maxiter=200).reshape(dense_mat.shape)

    TRTF_res2_mape2 = mape(dense_mat[pos2], TRTF_res2[pos2])
    TRTF_res2_rmse2 = rmse(dense_mat[pos2], TRTF_res2[pos2])

    print("TRTF_res2_mape2", TRTF_res2_mape2)
    print("TRTF_res2_rmse2", TRTF_res2_rmse2)
Ejemplo n.º 6
0
def input_metrics(targetxml, referencexml):
    rvars = pull_tune_variables(referencexml)
    tvars = pull_tune_variables(targetxml, referencexml)
    rvars.variables.sort(key=lambda x: x.group)
    tvars.variables.sort(key=lambda x: x.group)
    data = {'target': [], 'reference': [], 'min': [], 'max': [], 'key': []}
    for r, t in zip(rvars.variables, tvars.variables):
        if r.group == t.group:
            key = ';'.join([r.idfclass, r.idfobject, r.idffield])
            data['target'].append(float(t.value))
            data['reference'].append(float(r.value))
            data['min'].append(r.minimum)
            data['max'].append(r.maximum)
            data['key'].append(key)
    paes = metrics.pae(data['target'], data['reference'], data['min'], data['max'])
    m = {'pae': {},
         'rmse': {},
         'cvrmse': {},
         'mbe': {},
         'nmbe': {},
         'mape': {}}
    for k, p in zip(data['key'], paes):
        m['pae'][k] = p
    m['rmse']['all inputs'] = metrics.rmse(data['target'], data['reference'])
    m['cvrmse']['all inputs'] = metrics.cvrmse(data['target'], data['reference'])
    m['mbe']['all inputs'] = metrics.mbe(data['target'], data['reference'])
    m['nmbe']['all inputs'] = metrics.nmbe(data['target'], data['reference'])
    m['mape']['all inputs'] = metrics.mape(data['target'], data['reference'])
    return m
Ejemplo n.º 7
0
def test_TRMF():
    dense_mat = pd.read_csv('./datasets/Seattle-data-set/mat.csv', index_col=0)
    rm = pd.read_csv('./datasets/Seattle-data-set/RM_mat.csv', index_col=0)
    dense_mat = dense_mat.values
    rm = rm.values

    binary_mat2 = np.round(rm + 0.5 - 0.2)
    nan_mat2 = binary_mat2.copy()

    nan_mat2[nan_mat2 == 0] = np.nan

    sparse_mat2 = np.multiply(nan_mat2, dense_mat)

    pos2 = np.where((dense_mat != 0) & (binary_mat2 == 0))

    # sparse_tensor2 = sparse_mat2.reshape([sparse_mat2.shape[0], 28, 288])
    # def TRMF(sparse_mat, lambda_w=500,
    #          lambda_x=500,
    #          lambda_theta=500,
    #          eta=0.03, time_lags=(1, 2, 144), maxiter=200)

    TRMF_res2 = TRMF(sparse_mat2,
                     lambda_w=500,
                     lambda_x=500,
                     lambda_theta=500,
                     eta=0.03,
                     time_lags=(1, 2, 3, 4, 144),
                     maxiter=200)
    # print(TRMF_res2)
    # print(dense_mat)
    TRMF_res2_mape2 = mape(dense_mat[pos2], TRMF_res2[pos2])
    TRMF_res2_rmse2 = rmse(dense_mat[pos2], TRMF_res2[pos2])

    print("TRMF_res2_mape2", TRMF_res2_mape2)
    print("TRMF_res2_rmse2", TRMF_res2_rmse2)
def main():
  # minute
  y_test, predictions = run()
  # hourly
  # y_test, predictions = run(50, 1.0)
  # daily
  # y_test, predictions = run(50, 1.0)

  graph_utils.plot('linear', predictions, y_test)

  print('RMSE: %.4f'% metrics.rmse(predictions, y_test))

  print('MAPE: %.4f'% metrics.mape(predictions, y_test))
Ejemplo n.º 9
0
def output_metrics(estresults, actresults):
    m = {'rmse': {}, 'cvrmse': {}, 'mbe': {}, 'nmbe': {}, 'mape': {}}
    estres = column_vectors(estresults)
    actres = column_vectors(actresults)
    for col in actres:
        try:
            m['rmse'][col] = metrics.rmse(estres[col], actres[col])
            m['cvrmse'][col] = metrics.cvrmse(estres[col], actres[col])
            m['mbe'][col] = metrics.mbe(estres[col], actres[col])
            m['nmbe'][col] = metrics.nmbe(estres[col], actres[col])
            m['mape'][col] = metrics.mape(estres[col], actres[col])
        except:
            # If anything crashes it here, just ignore the column in the output.
            pass
    return m
Ejemplo n.º 10
0
def main():
  # minute
  model, y_test, predictions = run()
  # hourly
  # model, y_test, predictions = run(30, 50, 1.0)
  # daily
  # model, y_test, predictions = run(100, 50, 1.0)

  # save for later use
  model.save_weights('../output/lstm.h5', overwrite=True)
  # model.load_weights('../output/lstm.h5')

  graph_utils.plot('lstm', predictions, y_test)

  print('RMSE: %.4f'% metrics.rmse(predictions, y_test))
  print('MAPE: %.4f'% metrics.mape(predictions, y_test))
Ejemplo n.º 11
0
def output_metrics(estresults, actresults):
    m = {'rmse': {},
         'cvrmse':{}, 
         'mbe': {},
         'nmbe':{},
         'mape': {}}
    estres = column_vectors(estresults)
    actres = column_vectors(actresults)
    for col in actres:
        try:
            m['rmse'][col] = metrics.rmse(estres[col], actres[col])
            m['cvrmse'][col] = metrics.cvrmse(estres[col], actres[col])
            m['mbe'][col] = metrics.mbe(estres[col], actres[col])
            m['nmbe'][col] = metrics.nmbe(estres[col], actres[col])
            m['mape'][col] = metrics.mape(estres[col], actres[col])
        except:
            # If anything crashes it here, just ignore the column in the output.
            pass
    return m
Ejemplo n.º 12
0
def main():
  if sys.argv[1] == 'daily':
    print('Using daily data...')
    path_to_dataset = '../data/household_power_consumption_daily.csv'
    y_test, predictions = run(path_to_dataset, 50, 1.0)
  elif sys.argv[1] == 'monthly':
    print('Using monthly data...')
    path_to_dataset = '../data/household_power_consumption_monthly.csv'
    y_test, predictions = run(path_to_dataset, 5, 1.0)
  elif sys.argv[1] == 'hourly':
    print('Using hourly data...')
    path_to_dataset = '../data/household_power_consumption_hourly.csv'
    y_test, predictions = run(path_to_dataset, 50, 1.0)
  else:
    print('Using minute data...')
    path_to_dataset = '../data/household_power_consumption.csv'
    y_test, predictions = run(path_to_dataset)

  graph_utils.plot('linear', predictions, y_test)

  print('RMSE: %.4f'% metrics.rmse(predictions, y_test))

  print('MAPE: %.4f'% metrics.mape(predictions, y_test))
Ejemplo n.º 13
0
def test_PPCA():
    dense_mat = pd.read_csv('./datasets/Seattle-data-set/mat.csv', index_col=0)
    rm = pd.read_csv('./datasets/Seattle-data-set/RM_mat.csv', index_col=0)
    dense_mat = dense_mat.values
    rm = rm.values

    binary_mat2 = np.round(rm + 0.5 - 0.2)
    nan_mat2 = binary_mat2.copy()

    nan_mat2[nan_mat2 == 0] = np.nan

    sparse_mat2 = np.multiply(nan_mat2, dense_mat)

    pos2 = np.where((dense_mat != 0) & (binary_mat2 == 0))

    # sparse_tensor2 = sparse_mat2.reshape([sparse_mat2.shape[0], 28, 288])

    PPCA_res2 = PPCA(sparse_mat2, 20)

    PPCA_res2_mape2 = mape(dense_mat[pos2], PPCA_res2[pos2])
    PPCA_res2_rmse2 = rmse(dense_mat[pos2], PPCA_res2[pos2])

    print("PPCA_res2_mape2", PPCA_res2_mape2)
    print("PPCA_res2_rmse2", PPCA_res2_rmse2)
Ejemplo n.º 14
0
            true_x.append(row['timestamp'])
            true_y.append(row['downstream'])
            pred_y.append(preds.inferences["multiStepBestPredictions"][1])
            pred_x.append(row['timestamp'] + timedelta(minutes=5))

        np.savez("pred_data/{}-htm-pred-data".format(fname),
                 true_x=true_x,
                 true_y=true_y,
                 pred_x=pred_x,
                 pred_y=pred_y)
        np_tx = np.array(true_x)[1:]
        np_ty = np.array(true_y)[1:]
        np_py = np.array(pred_y)[:-1]
        print()
        print("GEH:  ", geh(np_ty, np_py))
        print("MAPE: ", mape(np_ty, np_py))
        print("RMSE: ", rmse(np_ty, np_py))

        print()

        print("True x:", len(true_x))
        print("True y:", len(true_x))
        print("Pred y:", len(true_x))
        plt.plot(true_x, true_y, 'b-', label='Readings')
        plt.plot(pred_x, pred_y, 'r-', label='Predictions')

        plt.legend(prop={'size': 23})
        plt.grid(b=True, which='major', color='black', linestyle='-')
        plt.grid(b=True, which='minor', color='black', linestyle='dotted')
        df = "%A %d %B, %Y"
        plt.title("3002: Traffic Flow from {} to {}".format(
Ejemplo n.º 15
0
    def calculate_train_and_forecast_metrics(
            self, train: pd.DataFrame, oos: pd.DataFrame, target_index: int,
            hps: dict, horizon: int,
            mae_rmse_ignore_when_actual_and_pred_are_zero: bool,
            mape_ignore_when_actual_is_zero: bool):

        train_dataset = TrainDataset(train_df=train,
                                     target_index=target_index,
                                     hyperparams=hps,
                                     horizon=horizon)
        train_loader = DataLoader(train_dataset, batch_size=1, num_workers=1)
        inputs, train_actual = next(iter(train_loader))
        inputs = inputs.to(device=self.device)
        self.net = self.net.to(device=self.device)

        train_pred = self.net(inputs.float())
        train_actual = train_actual[0, 0, :].cpu().numpy()
        train_pred = train_pred[0, 0, :].cpu().detach().numpy()
        forecast_actual = oos.iloc[:horizon, target_index].values
        forecast_pred = self.predict(train_df, target_index, hps, horizon)

        assert (train_actual.shape == train_pred.shape)
        assert (forecast_actual.shape == forecast_pred.shape)

        train_dict = {
            'mae':
            metrics.mae(train_actual, train_pred,
                        mae_rmse_ignore_when_actual_and_pred_are_zero),
            'rmse':
            metrics.rmse(train_actual, train_pred,
                         mae_rmse_ignore_when_actual_and_pred_are_zero),
            'mape':
            metrics.mape(train_actual, train_pred,
                         mape_ignore_when_actual_is_zero),
            'presence_accuracy':
            metrics.presence_accuracy(train_actual, train_pred),
            'peak_accuracy':
            metrics.peak_accuracy(train_actual, train_pred),
            'total_volume':
            int(metrics.total_actual_volume(train_actual)),
            'num_timestamps_predicted_on':
            int(train_pred.shape[0])
        }

        forecast_dict = {
            'mae':
            metrics.mae(forecast_actual, forecast_pred,
                        mae_rmse_ignore_when_actual_and_pred_are_zero),
            'rmse':
            metrics.rmse(forecast_actual, forecast_pred,
                         mae_rmse_ignore_when_actual_and_pred_are_zero),
            'mape':
            metrics.mape(forecast_actual, forecast_pred,
                         mape_ignore_when_actual_is_zero),
            'presence_accuracy':
            metrics.presence_accuracy(forecast_actual, forecast_pred),
            'peak_accuracy':
            metrics.peak_accuracy(forecast_actual, forecast_pred),
            'total_volume':
            int(metrics.total_actual_volume(forecast_actual)),
            'num_time_stamps_predicted_on':
            int(forecast_pred.shape[0])
        }

        train_metrics = pd.DataFrame.from_dict(train_dict,
                                               columns=[None],
                                               orient='index').iloc[:,
                                                                    0].round(3)

        forecast_metrics = pd.DataFrame.from_dict(
            forecast_dict, columns=[None], orient='index').iloc[:, 0].round(3)

        return train_metrics, forecast_metrics
Ejemplo n.º 16
0
        predictions = {
            k: np.array(v[split_idx:]) for k, v in predictions.items()
        }
        print()

        table = []
        print(' & '.join(['step', 'geh', 'mape', 'rmse'])+' \\\\')
        for step in steps:
            # true values
            stepped_vals = flow_values[step:len(predictions)]
            # predicted  values
            pred_vals = predictions[step][:-step] + eps
            table.append(OrderedDict([
                ('steps', step),
                ('geh',  geh(stepped_vals, pred_vals)),
                ('mape', mape(stepped_vals, pred_vals)),
                ('rmse', rmse(stepped_vals, pred_vals))
            ]))
        print(tabulate.tabulate(table, 'keys', 'latex'))

        print("Loading matplotlib")
        import matplotlib.pyplot as plt

        true_y = []
        true_x = []
        pred_y = []
        print("Predicting data rows: {}".format(data_len - row_count))

        progress = pyprind.ProgBar(data_len - row_count, width=50, stream=1)
        for row in it:
            progress.update()
    adfuller(train_set_diff)
    acf_pacf(train_set_diff)
    # ряд стационарен, можно переходить к поиску оптимальных параметров
    # best_params.append(sarima_best_params(train_set, 7, 1, 0, 7, 3))
    # построение модели
    model = sm.tsa.SARIMAX(train_set,
                           order=(best_params[j][0], 1, best_params[j][1]),
                           seasonal_order=(best_params[j][2], 0,
                                           best_params[j][3],
                                           7)).fit(disp=False)
    # вывод информации о модели
    print(model.summary())
    # проверка остатков модели на случайность
    ljungbox(model.resid)
    acf_pacf(model.resid)
    # SARIMA прогноз на конкретный час
    hour_pred = model.forecast(2)[-1]
    # добавление прогноза на час к итоговому прогнозу
    total_pred.append(hour_pred)
# оценка прогноза по метрикам
nnf = nnfmetrics(total_pred, test_set, plan_set)
mse = mse(test_set, total_pred)
mape = mape(test_set, total_pred)
acc = accuracy(test_set, total_pred)
print('Оценка по NNFMETRICS = ', nnf)
print('Оценка по MSE = ', mse)
print('Оценка по MAPE = ', mape)
print('Точность прогноза = ', acc)
# отрисовка графика
plot_results(pred_date.strftime('%d-%m-%Y'), test_set, total_pred, plan_set)
Ejemplo n.º 18
0
                   activation='relu',
                   solver='lbfgs',
                   max_iter=10000,
                   learning_rate='constant')

mlp.fit(x_train, y_train)
predict = mlp.predict(x_test)

top_test = [y_test[i][0] for i in range(len(y_test))]
bottom_test = [y_test[i][1] for i in range(len(y_test))]

top_pred = [predict[i][0] for i in range(len(predict))]
bottom_pred = [predict[i][1] for i in range(len(predict))]

mse_high = MSE(top_test, top_pred)
mape_high = mape(top_test, top_pred)

mse_low = MSE(bottom_test, bottom_pred)
mape_low = mape(bottom_test, bottom_pred)

# ########################################### FIGURE ##############################################################################

plt.figure(1)
plt.plot(top_test, label="High -- Test", color='green', linewidth=2)
plt.plot(top_pred, label='High -- Prediction', linewidth=1)
plt.plot(bottom_test, label="Low -- Test", color='red', linewidth=2)
plt.plot(bottom_pred, label='Low -- Prediction', linewidth=1)
plt.legend()
plt.title("PETR4 High and Low\nPrice predictions")
plt.ylabel('Price')
plt.xlabel('Days')
def main():
    #getmodel
    model = lstm()
    adam = Adam(lr=lr)
    model.compile(loss='mse',
                  optimizer=adam,
                  metrics=[metrics.rmse, metrics.mape, metrics.ma])
    all_scores_train = []
    all_scores_test = []
    #model.summary()
    #get data
    data = load_data_()  #nodes x slots
    ts1 = time.time()

    trueY_train = []
    predictY_train = []
    trueY_test = []
    predictY_test = []

    for i in range(len(data)):
        print('grid %d.....' % (i))
        ts = time.time()
        #makedata set
        testslots = T * days_test
        trainx, trainy = makedataset(data[i, :-testslots])
        testx, testy = makedataset(data[i, -testslots:])
        print('trainx shape:', (trainx.shape))
        print('trainy shape:', (trainy.shape))
        print('testx shape:', (testx.shape))
        print('testy shape:', (testy.shape))

        #scaler
        print(trainy, testy)
        mmn = MinMaxScaler(feature_range=(-1, 1))
        trainlen = len(trainy)
        Y = np.concatenate([trainy, testy], axis=0)
        Y = mmn.fit_transform(Y.reshape(-1, 1))
        trainy, testy = Y[:trainlen], Y[trainlen:]
        print(trainy.shape, testy.shape)
        #train
        adam = Adam(lr=lr)
        model.compile(loss='mse',
                      optimizer=adam,
                      metrics=[metrics.rmse, metrics.mape, metrics.ma])
        early_stopping = EarlyStopping(monitor='val_rmse',
                                       patience=patience,
                                       mode='min')
        history = model.fit(trainx,
                            trainy,
                            epochs=nb_epoch,
                            batch_size=batch_size,
                            validation_split=0.1,
                            callbacks=[early_stopping],
                            verbose=0)
        #evalute
        predict_y_train = model.predict([trainx],
                                        batch_size=batch_size,
                                        verbose=0)[:, 0:1]
        score = model.evaluate(trainx,
                               trainy,
                               batch_size=batch_size,
                               verbose=0)
        print(
            'Train score: %.6f rmse (norm): %.6f  rmse (real): %.6f nrmse : %.6f mape: %.6f ma: %.6f'
            % (score[0], score[1], score[1] *
               (mmn._max - mmn._min) / 2., score[1] *
               (mmn._max - mmn._min) / 2. /
               mmn.inverse_transform(np.mean(y_train)), score[2], score[3]))
        predict_y_test = model.predict([testx],
                                       batch_size=batch_size,
                                       verbose=0)[:, 0:1]
        score = model.evaluate(testx, testy, batch_size=batch_size, verbose=0)
        print(
            'Test score: %.6f rmse (norm): %.6f  rmse (real): %.6f nrmse : %.6f mape: %.6f ma: %.6f'
            % (score[0], score[1], score[1] *
               (mmn._max - mmn._min) / 2., score[1] *
               (mmn._max - mmn._min) / 2. /
               mmn.inverse_transform(np.mean(y_test)), score[2], score[3]))

        predictY_train.append(
            mmn.inverse_transform(predict_y_train).reshape(-1).tolist())
        predictY_test.append(
            mmn.inverse_transform(predict_y_test).reshape(-1).tolist())
        trueY_train.append(mmn.inverse_transform(trainy).reshape(-1).tolist())
        trueY_test.append(mmn.inverse_transform(testy).reshape(-1).tolist())
        print("\nestimate on grid%d ,elapsed time (eval): %.3f seconds\n" %
              (i, time.time() - ts))
    #all_scores_train = np.asarray(all_scores_train)
    #all_scores_train = np.mean(all_scores_train, axis = 0)
    #all_scores_test = np.asarray(all_scores_test)
    #all_Scores_test = np.mean(all_scores_test,axis = 0)
    print('\n\n')
    evaluate = lambda y1, y2: (metrics.rmse(y1, y2), metrics.rmse(
        y1, y2) / np.mean(y1), metrics.mape(y1, y2), metrics.ma(y1, y2))
    print('All Train rmse (real): %.6f nrmse : %.6f mape: %.6f ma: %.6f' %
          (evaluate(trueY_train, predictY_train)))
    print('All Test rmse (real): %.6f nrmse : %.6f mape: %.6f ma: %.6f' %
          (evaluate(trueY_test, predictY_test)))
    print('elapsed time: %3f seconds\n' % (time.time() - ts1))
Ejemplo n.º 20
0
# автокорреляции и частные автокорреляции
acf_pacf(remainder)
# поиск лучших параметров
# sarima_best_params(remainder, 24, 0, 0, 4, 3)
# найденные параметры для 27.11.2019:
best_params = [2, 3, 1, 2]
# построение модели
model = sm.tsa.SARIMAX(remainder,
                       order=(best_params[0], 0, best_params[1]),
                       seasonal_order=(best_params[2], 0, best_params[3],
                                       24)).fit(disp=False)
# вывод информации о модели
print(model.summary())
# проверка остатков модели на случайность
ljungbox(model.resid)
acf_pacf(model.resid)
# SARIMA прогноз остатков
remainder_pred = model.forecast(48)
# итоговый прогноз
total_pred = trend_pred + seasonal_pred + remainder_pred[24:]
# оценка прогноза по метрикам
nnf_val = nnfmetrics(total_pred, test_set, plan_set)
mse_val = mse(test_set, total_pred)
mape_val = mape(test_set, total_pred)
acc_val = accuracy(test_set, total_pred)
print('Оценка по NNFMETRICS = ', nnf_val)
print('Оценка по MSE = ', mse_val)
print('Оценка по MAPE = ', mape_val)
print('Точность прогноза = ', acc_val)
# отрисовка графика
plot_results(pred_date.strftime('%d-%m-%Y'), test_set, total_pred, plan_set)
Ejemplo n.º 21
0
            ]]
            in_row = scaler.fit_transform(scaler.fit_transform(in_row))
            pred = model.predict(np.array([in_row]))
            # flow_val = pred[0][0]
            pred_y.append(scaler.inverse_transform([0, 0, 0, 0, 0,
                                                    pred[0][0]]))
        true_x = true_x[1:]
        true_y = true_y[1:]
        pred_y = pred_y[:-1]
        pred_y = np.array(pred_y, dtype=np.float32)
        true_y_max = np.copy(true_y)
        true_y_max[true_y_max == 0] = 1
        print(
            "GEH:  ",
            np.sqrt(2 * np.power(pred_y - true_y_max, 2) /
                    (pred_y + true_y_max)).mean(axis=0))
        print("MAPE: ", mape(true_y_max, pred_y))
        print("RMSE: ", np.sqrt(((pred_y - true_y_max)**2).mean(axis=0)))

        import matplotlib.pyplot as plt
        plt.plot(true_x, true_y, 'b-', label='Readings')
        plt.plot(true_x, pred_y, 'r-', label='Predictions')
        df = "%A %d %B, %Y"
        plt.title("3002: Traffic Flow from {} to {}".format(
            true_x[0].strftime(df), true_x[-1].strftime(df)))
        plt.legend()

        plt.ylabel("Vehicles/ 5 min")
        plt.xlabel("Time")
        plt.show()
plan = dataset.plan
# длина периода сезонной составляющей
p = 168
# тренировочный датасет train_set, данные с 14.01.2019 по 25.11.2019
train_start_date = datetime.datetime(2019, 1, 14, 0, 0, 0)
train_end_date = datetime.datetime(2019, 11, 25, 23, 0, 0)
train_set = fact[train_start_date:train_end_date]
# тестовый датасет test_set, данные за прогнозный день 27.11.2019
pred_date = datetime.date(2019, 11, 27)
pred_end_date = train_end_date + datetime.timedelta(days=2)
pred_start_date = pred_end_date - datetime.timedelta(hours=23)
test_set = fact[pred_start_date:pred_end_date]
# план предприятия plan_set для сверки, данные за прогнозный день 27.11.2019
plan_set = plan[pred_start_date:pred_end_date]
# выбор метода
method = additiveHoltWinters
# найденные постоянные сглаживания
best_params = [[0.06846567, 0.00032291, 0.26071966]]
# прогнозирование
pred = method(best_params[0], train_set, p, 48)[24:]
# оценка прогноза по метрикам
nnf_val = nnfmetrics(pred, test_set, plan_set)
mse_val = mse(test_set, pred)
mape_val = mape(test_set, pred)
acc_val = accuracy(test_set, pred)
print('Оценка по NNFMETRICS = ', nnf_val)
print('Оценка по MSE = ', mse_val)
print('Оценка по MAPE = ', mape_val)
print('Точность прогноза = ', acc_val)
# отрисовка графика
plot_results(pred_date.strftime('%d-%m-%Y'), test_set, pred, plan_set)
    true_x = true_xy[:, 0]
    pred_x = np.reshape(pred_xy[:, 0], (-1, 1))
    pred_y = np.reshape(pred_xy[:, 1].astype(dtype=np.float32), (-1, 1))
    true_y = true_xy[:, 1].astype(np.float32)
    true_y_max = np.copy(true_y)[:-1]
    true_y_max[true_y_max == 0] = 1
    np.savez('pred_data/3002-no-reset-on-error-all-sensor',
             true_x=true_x,
             true_y=true_y,
             pred_x=pred_x,
             pred_y=pred_y)
    true_y_max = true_y_max.reshape((true_y_max.shape[0], 1))
    # print ("true_y_max", true_y_max.shape)
    # print("pred_y", pred_y.shape)
    print("GEH:  ", geh(true_y_max, pred_y[:-1]))
    print("MAPE: ", mape(true_y_max, pred_y[:-1]))
    print("RMSE: ", rmse(true_y_max, pred_y[:-1]))

    font = {'size': 30}
    import matplotlib

    matplotlib.rc('font', **font)

    import matplotlib.pyplot as plt
    plt.plot(true_x, true_y, 'b-', label='Readings')
    plt.plot(pred_x, pred_y, 'r-', label='LSTM-Online Predictions')
    df = "%A %d %B, %Y"
    plt.title("3002: Traffic Flow from {} to {}".format(
        true_x[0].strftime(df), true_x[-1].strftime(df)))
    plt.legend()
Ejemplo n.º 24
0
sales = data['Sales']
elements = seasonal_decompose(sales, model='multiplicative')
data.plot()
elements.plot()
plt.show()

print(
    tabulate(
        [[
            "MSE",
            mse(data['Sales'].iloc[3:], data['Moving Average'].iloc[3:]),
            mse(data['Sales'], data['α=0.1']),
            mse(data['Sales'], data['α=0.5'])
        ],
         [
             "MAPE",
             mape(data['Sales'].iloc[3:], data['Moving Average'].iloc[3:]),
             mape(data['Sales'], data['α=0.1']),
             mape(data['Sales'], data['α=0.5'])
         ],
         [
             "LAD",
             lad(data['Sales'].iloc[3:], data['Moving Average'].iloc[3:]),
             lad(data['Sales'], data['α=0.1']),
             lad(data['Sales'], data['α=0.5'])
         ]],
        headers=["Moving Average", "α=0.1", "α=0.5"],
        tablefmt='fancy_grid',
        floatfmt='.2f'))
Ejemplo n.º 25
0
def regression_report(y_true, y_pred):
    print("MSE", metrics.mse(y_true, y_pred).item())
    print("RMSE", metrics.rmse(y_true, y_pred).item())
    print("MAPE", metrics.mape(y_true, y_pred).item())
    print("MPE", metrics.mpe(y_true, y_pred).item())
    print("R2", metrics.r2(y_true, y_pred).item())
Ejemplo n.º 26
0
def do_model(all_data, steps, run_model=True):
    _steps = steps
    print("steps:", _steps)
    all_data = all_data

    if not run_model:
        return None, None
    features = all_data[:-_steps]
    labels = all_data[_steps:, -1:]
    tts = train_test_split(features, labels, test_size=0.4)
    X_train = tts[0]
    X_test = tts[1]
    Y_train = tts[2].astype(np.float64)
    Y_test = tts[3].astype(np.float64)


    #
    optimiser = 'adam'
    # hidden_neurons = 300
    # loss_function = 'mse'
    # batch_size = 105
    # dropout = 0.056
    # inner_hidden_neurons = 269
    # dropout_inner = 0.22

    if steps == 1:
        hidden_neurons = 332
        loss_function = 'mse'
        batch_size = 128
        dropout = 0.0923
        inner_hidden_neurons = 269
        dropout_inner = 0.2269
    elif steps == 3:
        hidden_neurons = 256
        loss_function = 'mse'
        batch_size = 105
        dropout = 0.0923
        inner_hidden_neurons = 72
        dropout_inner = 0.001
    else:
        hidden_neurons = 332
        loss_function = 'mse'
        batch_size = 105
        dropout = 0.0042
        inner_hidden_neurons = 329
        dropout_inner = 0.1314

    batch_size = 1
    nb_epochs = 1

    X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
    X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
    print("X train shape:\t", X_train.shape)
    print("X test shape:\t", X_test.shape)
    # print("Y train shape:\t", Y_train.shape)
    # print("Y test shape:\t", Y_test.shape)
    # print("Steps:\t", _steps)
    in_neurons = X_train.shape[2]

    out_neurons = 1

    model = Sequential()
    gpu_cpu = 'cpu'
    best_weight = BestWeight()
    reset_state = ResetStatesCallback()
    model.add(LSTM(output_dim=hidden_neurons, input_dim=in_neurons, batch_input_shape=(1,1, in_neurons) ,return_sequences=True, init='uniform',
                   consume_less=gpu_cpu, stateful=True))
    model.add(Dropout(dropout))

    dense_input = inner_hidden_neurons
    model.add(LSTM(output_dim=dense_input, input_dim=hidden_neurons, return_sequences=False, consume_less=gpu_cpu, stateful=True))
    model.add(Dropout(dropout_inner))
    model.add(Activation('relu'))

    model.add(Dense(output_dim=out_neurons, input_dim=dense_input))
    model.add(Activation('relu'))

    model.compile(loss=loss_function, optimizer=optimiser)
    # run through all the training data
    # learning training set
    print("Learning training set")
    # progress = pyprind.ProgBar(len(X_train)/batch_size +1, width=50, stream=1)
    for epoch in xrange(nb_epochs):
        mean_tr_loss = []
        print("Epoch {}".format(epoch))
        for x_chunk, y_chunk in chunks(X_train, Y_train, batch_size):
            tr_loss = model.train_on_batch(x_chunk, y_chunk)
            mean_tr_loss.append(tr_loss)
            model.reset_states()


        print("Training Loss: {}".format(np.mean(mean_tr_loss)))


        geh_l = []
        rmse_l = []
        mape_l = []
        training_done = 0
        # progress = pyprind.ProgBar(len(X_test) / batch_size +1, width=50, stream=1)
        for x_chunk, y_chunk in chunks(X_test, Y_test, batch_size):

            # start collecting stats
            predicted = model.predict_on_batch(x_chunk) + EPS
            model.reset_states()
            model.train_on_batch(x_chunk, y_chunk)
            model.reset_states()

            geh_l.append(geh(y_chunk, predicted))
            rmse_l.append(rmse(y_chunk, predicted))
            mape_l.append(mape(y_chunk, predicted))

            # progress.update()

        print("Testing RMSE: {} GEH: {} MAPE: {}".format(np.mean(rmse_l), np.mean(geh_l), np.mean(mape_l)))
    print()
        # predict on the same chunk and collect stats, averaging them
    metrics = OrderedDict([
        ('online', True),
        ('hidden', hidden_neurons),
        ('steps', _steps),
        ('geh', np.mean(geh_l)),
        ('rmse', np.mean(rmse_l)),
        ('mape', np.mean(mape_l)),
        # ('smape', smape(predicted, _Y_test)),
        # ('median_pe', median_percentage_error(predicted, Y_test)),
        # ('mase', MASE(_Y_train, _Y_test, predicted)),
        # ('mae', mean_absolute_error(y_true=Y_test, y_pred=predicted)),
        ('batch_size', batch_size),
        # ('optimiser', optimiser),
        ('dropout', dropout),
        ('extra_layer_dropout', dropout_inner),
        ('extra_layer_neurons', inner_hidden_neurons),
        # ('loss function', loss_function)
        # 'history': history.history
    ])

    # print (tabulate.tabulate([metrics], tablefmt='latex', headers='keys'))

    return metrics, model