コード例 #1
0
ファイル: utilities.py プロジェクト: yiyuan1840/Autotune
def input_metrics(targetxml, referencexml):
    rvars = pull_tune_variables(referencexml)
    tvars = pull_tune_variables(targetxml, referencexml)
    rvars.variables.sort(key=lambda x: x.group)
    tvars.variables.sort(key=lambda x: x.group)
    data = {'target': [], 'reference': [], 'min': [], 'max': [], 'key': []}
    for r, t in zip(rvars.variables, tvars.variables):
        if r.group == t.group:
            key = ';'.join([r.idfclass, r.idfobject, r.idffield])
            data['target'].append(float(t.value))
            data['reference'].append(float(r.value))
            data['min'].append(r.minimum)
            data['max'].append(r.maximum)
            data['key'].append(key)
    paes = metrics.pae(data['target'], data['reference'], data['min'],
                       data['max'])
    m = {
        'pae': {},
        'rmse': {},
        'cvrmse': {},
        'mbe': {},
        'nmbe': {},
        'mape': {}
    }
    for k, p in zip(data['key'], paes):
        m['pae'][k] = p
    m['rmse']['all inputs'] = metrics.rmse(data['target'], data['reference'])
    m['cvrmse']['all inputs'] = metrics.cvrmse(data['target'],
                                               data['reference'])
    m['mbe']['all inputs'] = metrics.mbe(data['target'], data['reference'])
    m['nmbe']['all inputs'] = metrics.nmbe(data['target'], data['reference'])
    m['mape']['all inputs'] = metrics.mape(data['target'], data['reference'])
    return m
コード例 #2
0
def main():
  if sys.argv[1] == 'daily':
    print('Using daily data...')
    path_to_dataset = '../data/household_power_consumption_daily.csv'
    model, y_test, predictions = run(path_to_dataset, 10, 50, 1.0)
  elif sys.argv[1] == 'monthly':
    print('Using monthly data...')
    path_to_dataset = '../data/household_power_consumption_monthly.csv'
    model, y_test, predictions = run(path_to_dataset, 30, 5, 1.0)
  elif sys.argv[1] == 'hourly':
    print('Using hourly data...')
    path_to_dataset = '../data/household_power_consumption_hourly.csv'
    model, y_test, predictions = run(path_to_dataset, 30, 50, 1.0)
  else:
    print('Using minute data...')
    path_to_dataset = '../data/household_power_consumption.csv'
    model, y_test, predictions = run(path_to_dataset)

  # save for later use
  model.save_weights('../output/lstm.h5', overwrite=True)
  # model.load_weights('../output/lstm.h5')

  graph_utils.plot('lstm', predictions, y_test)

  print('RMSE: %.4f'% metrics.rmse(predictions, y_test))
  print('MAPE: %.4f'% metrics.mape(predictions, y_test))
コード例 #3
0
def test_TRTF():
    dense_mat = pd.read_csv('./datasets/Seattle-data-set/mat.csv', index_col=0)
    rm = pd.read_csv('./datasets/Seattle-data-set/RM_mat.csv', index_col=0)
    dense_mat = dense_mat.values
    rm = rm.values

    binary_mat2 = np.round(rm + 0.5 - 0.2)
    nan_mat2 = binary_mat2.copy()

    nan_mat2[nan_mat2 == 0] = np.nan

    sparse_mat2 = np.multiply(nan_mat2, dense_mat)

    pos2 = np.where((dense_mat != 0) & (binary_mat2 == 0))

    sparse_tensor2 = sparse_mat2.reshape([sparse_mat2.shape[0], 28, 288])
    # sparse_tensor_ori, rank = 30, time_lags = (1, 2, 24),
    # burn_iter = 1100, gibbs_iter = 100

    # TRTF(sparse_tensor_ori, rank=30, time_lags=(1, 2, 24),
    #      lambda_u=500, lambda_v=500, lambda_ar=500,
    #      eta=2e-2, lambda_theta=100, maxiter=1000)
    TRTF_res2 = TRTF(sparse_tensor2,
                     rank=50,
                     time_lags=(1, 2, 288),
                     maxiter=200).reshape(dense_mat.shape)

    TRTF_res2_mape2 = mape(dense_mat[pos2], TRTF_res2[pos2])
    TRTF_res2_rmse2 = rmse(dense_mat[pos2], TRTF_res2[pos2])

    print("TRTF_res2_mape2", TRTF_res2_mape2)
    print("TRTF_res2_rmse2", TRTF_res2_rmse2)
コード例 #4
0
ファイル: utilities.py プロジェクト: ORNL-BTRIC/Autotune
def input_metrics(targetxml, referencexml):
    rvars = pull_tune_variables(referencexml)
    tvars = pull_tune_variables(targetxml, referencexml)
    rvars.variables.sort(key=lambda x: x.group)
    tvars.variables.sort(key=lambda x: x.group)
    data = {'target': [], 'reference': [], 'min': [], 'max': [], 'key': []}
    for r, t in zip(rvars.variables, tvars.variables):
        if r.group == t.group:
            key = ';'.join([r.idfclass, r.idfobject, r.idffield])
            data['target'].append(float(t.value))
            data['reference'].append(float(r.value))
            data['min'].append(r.minimum)
            data['max'].append(r.maximum)
            data['key'].append(key)
    paes = metrics.pae(data['target'], data['reference'], data['min'], data['max'])
    m = {'pae': {},
         'rmse': {},
         'cvrmse': {},
         'mbe': {},
         'nmbe': {},
         'mape': {}}
    for k, p in zip(data['key'], paes):
        m['pae'][k] = p
    m['rmse']['all inputs'] = metrics.rmse(data['target'], data['reference'])
    m['cvrmse']['all inputs'] = metrics.cvrmse(data['target'], data['reference'])
    m['mbe']['all inputs'] = metrics.mbe(data['target'], data['reference'])
    m['nmbe']['all inputs'] = metrics.nmbe(data['target'], data['reference'])
    m['mape']['all inputs'] = metrics.mape(data['target'], data['reference'])
    return m
コード例 #5
0
ファイル: cross_validation.py プロジェクト: mayukh18/reco
def cross_val_score(model=None, data=None, cv=10, scorer=rmse):

    data=np.array(data)
    print(data.shape)
    chunks=chunk(data, cv)
    #print chunks
    score=list()

    for i in range(10):

        iter_data=list()
        for j in range(len(chunks)):
            if j!=i:
                iter_data.extend(chunks[j])

        pred_data=np.array(chunks[i])
        iter_data=np.array(iter_data)



        model.fit(iter_data)
        pred=model.predict(pred_data)
        score.append(rmse(pred_data[ : , model.formatizer['value']], pred))
        print(score[i])

    return np.mean(score)
コード例 #6
0
def test_HaLRTC():
    dense_mat = pd.read_csv('./datasets/Seattle-data-set/mat.csv', index_col=0)
    rm = pd.read_csv('./datasets/Seattle-data-set/RM_mat.csv', index_col=0)
    dense_mat = dense_mat.values
    rm = rm.values

    binary_mat2 = np.round(rm + 0.5 - 0.2)
    nan_mat2 = binary_mat2.copy()

    nan_mat2[nan_mat2 == 0] = np.nan

    sparse_mat2 = np.multiply(nan_mat2, dense_mat)

    pos2 = np.where((dense_mat != 0) & (binary_mat2 == 0))

    sparse_tensor2 = sparse_mat2.reshape([sparse_mat2.shape[0], 28, 288])
    # sparse_tensor_ori, rank = 30, time_lags = (1, 2, 24),
    # burn_iter = 1100, gibbs_iter = 100
    HaLRTC_res2 = HaLRTC(sparse_tensor2, rho=1e-5, epsilon=1e-4,
                         maxiter=200).reshape(dense_mat.shape)

    HaLRTC_res2_mape2 = mape(dense_mat[pos2], HaLRTC_res2[pos2])
    HaLRTC_res2_rmse2 = rmse(dense_mat[pos2], HaLRTC_res2[pos2])

    print("HaLRTC_res2_mape2", HaLRTC_res2_mape2)
    print("HaLRTC_res2_rmse2", HaLRTC_res2_rmse2)
コード例 #7
0
def test_TRMF():
    dense_mat = pd.read_csv('./datasets/Seattle-data-set/mat.csv', index_col=0)
    rm = pd.read_csv('./datasets/Seattle-data-set/RM_mat.csv', index_col=0)
    dense_mat = dense_mat.values
    rm = rm.values

    binary_mat2 = np.round(rm + 0.5 - 0.2)
    nan_mat2 = binary_mat2.copy()

    nan_mat2[nan_mat2 == 0] = np.nan

    sparse_mat2 = np.multiply(nan_mat2, dense_mat)

    pos2 = np.where((dense_mat != 0) & (binary_mat2 == 0))

    # sparse_tensor2 = sparse_mat2.reshape([sparse_mat2.shape[0], 28, 288])
    # def TRMF(sparse_mat, lambda_w=500,
    #          lambda_x=500,
    #          lambda_theta=500,
    #          eta=0.03, time_lags=(1, 2, 144), maxiter=200)

    TRMF_res2 = TRMF(sparse_mat2,
                     lambda_w=500,
                     lambda_x=500,
                     lambda_theta=500,
                     eta=0.03,
                     time_lags=(1, 2, 3, 4, 144),
                     maxiter=200)
    # print(TRMF_res2)
    # print(dense_mat)
    TRMF_res2_mape2 = mape(dense_mat[pos2], TRMF_res2[pos2])
    TRMF_res2_rmse2 = rmse(dense_mat[pos2], TRMF_res2[pos2])

    print("TRMF_res2_mape2", TRMF_res2_mape2)
    print("TRMF_res2_rmse2", TRMF_res2_rmse2)
コード例 #8
0
def test_BTRMF():
    dense_mat = pd.read_csv('./datasets/Seattle-data-set/mat.csv', index_col=0)
    rm = pd.read_csv('./datasets/Seattle-data-set/RM_mat.csv', index_col=0)
    dense_mat = dense_mat.values
    rm = rm.values

    binary_mat2 = np.round(rm + 0.5 - 0.2)
    nan_mat2 = binary_mat2.copy()

    nan_mat2[nan_mat2 == 0] = np.nan

    sparse_mat2 = np.multiply(nan_mat2, dense_mat)

    pos2 = np.where((dense_mat != 0) & (binary_mat2 == 0))

    # sparse_tensor2 = sparse_mat2.reshape([sparse_mat2.shape[0], 28, 288])

    BTRMF_res2 = BTRMF(sparse_mat2,
                       rank=50,
                       time_lags=(1, 2, 288),
                       burn_iter=100,
                       gibbs_iter=20)

    BTRMF_res2_mape2 = mape(dense_mat[pos2], BTRMF_res2[pos2])
    BTRMF_res2_rmse2 = rmse(dense_mat[pos2], BTRMF_res2[pos2])

    print("BTRMF_res2_mape2", BTRMF_res2_mape2)
    print("BTRMF_res2_rmse2", BTRMF_res2_rmse2)
コード例 #9
0
def val_model(model, criterion):
    dset_sizes = len(val_dataset)
    model.eval()
    running_loss = 0.0
    running_corrects = 0
    cont = 0
    outPre = []
    outLabel = []
    pres_list = []
    labels_list = []
    for data in val_loader:
        inputs, labels, month = data['X']['x'], data['Y'], data['X']['m']
        x = inputs[0]
        labels = labels.type(torch.float).cuda()
        inputs = [x.cuda()]
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        if cont == 0:
            outPre = outputs.data.cpu()
            outLabel = labels.data.cpu()
        else:
            outPre = torch.cat((outPre, outputs.data.cpu()), 0)
            outLabel = torch.cat((outLabel, labels.data.cpu()), 0)
        pres_list += outputs.cpu().numpy().tolist()
        labels_list += labels.data.cpu().numpy().tolist()
        running_loss += loss.item() * outputs.size(0)
        cont += 1
    #
    labels_arr = np.array(labels_list)
    pre_arr = np.array(pres_list)
    val_score = score(labels_arr, pre_arr)
    val_rmse = rmse(labels_arr, pre_arr)
    return val_score, val_rmse
コード例 #10
0
    def crossval_and_predict(self, n_folds: int, df: pd.DataFrame,
                             df_test: pd.DataFrame, feature_col: list,
                             target_col: str, model_params: dict):
        oof = np.zeros((len(df)))
        cv_preds = np.zeros((len(df_test)))
        kfold = KFold(n_splits=n_folds,
                      random_state=self.random_state,
                      shuffle=True)
        for train_idx, valid_idx in kfold.split(df):
            X_train, y_train = df[feature_col].iloc[train_idx], df[
                target_col].iloc[train_idx]
            X_valid, y_valid = df[feature_col].iloc[valid_idx], df[
                target_col].iloc[valid_idx]

            model_params['n_estimators'] = 5000
            model_params['learning_rate'] = 1e-2

            model = LGBMRegressor(**model_params)
            model.fit(X_train,
                      y_train,
                      eval_set=((X_valid, y_valid)),
                      early_stopping_rounds=500,
                      verbose=0)
            oof[valid_idx] = model.predict(X_valid)
            cv_preds += model.predict(df_test[feature_col]) / n_folds

        rmse_score = rmse(df[target_col], oof)
        return rmse_score, cv_preds
コード例 #11
0
def prediction_pipeline(model, X_train, X_test, y_train, y_test):
    """
    This function performs a pipeline for prediction and score on test set.
    
    param model: Estimator
    param X_train: Train dataframe without target.
    param X_test: Test dataframe without target.
    param y_train: Train target.
    param y_test: Test target.
    return: Predictions and score 
    
    """
    model.fit(X_train, y_train)
    predict_train = model.predict(X_train)
    predict_test = model.predict(X_test)
    score_train = rmse(y_train, predict_train)
    score_test = rmse(y_test, predict_test)

    return predict_train, predict_test, score_train, score_test
コード例 #12
0
    def update(self, test_predictions, val_predictions=None, year=None):
        self.test_predictions = self.test_predictions.append(test_predictions)

        try:
            self.test_metrics[str(year + 2014) + '/' + str(year + 15)] = [
                metrics.crps(test_predictions),
                metrics.nll(test_predictions),
                metrics.mae(test_predictions),
                metrics.rmse(test_predictions),
                metrics.smape(test_predictions),
                metrics.corr(test_predictions),
                np.ma.masked_invalid(metrics.mb_log(test_predictions)).mean(),
                metrics.sdp(test_predictions)
            ]
        except:
            pass
        if year == 3:
            self.test_metrics['Average'] = self.test_metrics.mean(1)
            self.test_metrics['Average'].loc['SDP'] = np.abs(
                self.test_metrics.loc['SDP'].values[-1]).mean()

        try:
            self.val_predictions = self.val_predictions.append(val_predictions)
            self.val_metrics[str(year + 2013) + '/' + str(year + 14)] = [
                metrics.crps(val_predictions),
                metrics.nll(val_predictions),
                metrics.mae(val_predictions),
                metrics.rmse(val_predictions),
                metrics.smape(val_predictions),
                metrics.corr(val_predictions),
                metrics.mb_log(val_predictions).mean(),
                metrics.sdp(val_predictions)
            ]
        except:
            pass

            self.val_metrics['Average'] = self.val_metrics.mean(1)
            self.val_metrics['Average'].loc['SDP'] = np.abs(
                self.val_metrics.loc['SDP'].values[-1]).mean()
            self.test_metrics['Average'] = self.test_metrics.mean(1)
            self.test_metrics['Average'].loc['SDP'] = np.abs(
                self.test_metrics.loc['SDP'].values[-1]).mean()
コード例 #13
0
    def crossval_and_predict(self, n_folds: int, df: pd.DataFrame,
                             df_test: pd.DataFrame, feature_col: list,
                             target_col: str, model_params: dict):
        oof = np.zeros((len(df)))
        cv_preds = np.zeros((len(df_test)))
        kfold = KFold(n_splits=n_folds,
                      random_state=self.random_state,
                      shuffle=True)
        for train_idx, valid_idx in kfold.split(df):
            X_train, y_train = df[feature_col].values[train_idx], df[
                target_col].values[train_idx].reshape(-1, 1)
            X_valid, y_valid = df[feature_col].values[valid_idx], df[
                target_col].values[valid_idx].reshape(-1, 1)
            X_test = df_test[feature_col].values

            params = self.default_params()
            params['seed'] = self.random_state
            params['n_d'] = model_params['n_d']
            params['n_a'] = model_params['n_d']
            params['gamma'] = model_params['gamma']
            params['momentum'] = model_params['momentum']
            params['n_steps'] = model_params['n_steps']
            params['n_shared'] = model_params['n_shared']
            params['n_independent'] = model_params['n_independent']

            logging.info(
                f'Parameters used for TabNet supervised training: {params}')

            unsupervised_model = TabNetPretrainer(**params)
            unsupervised_model.fit(X_train=X_train,
                                   eval_set=[X_valid],
                                   pretraining_ratio=0.5,
                                   max_epochs=20)

            model = TabNetRegressor(**params)
            model.fit(X_train=X_train,
                      y_train=y_train,
                      eval_set=[(X_valid, y_valid)],
                      eval_name=['valid'],
                      eval_metric=['rmse'],
                      max_epochs=100,
                      patience=10,
                      batch_size=1024,
                      from_unsupervised=unsupervised_model)

            oof[valid_idx] = model.predict(X_valid).squeeze()
            cv_preds += model.predict(X_test).squeeze() / n_folds
            logging.info(
                f'Finished fold with score {rmse(y_valid, oof[valid_idx])}')

        rmse_score = rmse(df[target_col], oof)
        return rmse_score, cv_preds
コード例 #14
0
def main():
  # minute
  y_test, predictions = run()
  # hourly
  # y_test, predictions = run(50, 1.0)
  # daily
  # y_test, predictions = run(50, 1.0)

  graph_utils.plot('linear', predictions, y_test)

  print('RMSE: %.4f'% metrics.rmse(predictions, y_test))

  print('MAPE: %.4f'% metrics.mape(predictions, y_test))
コード例 #15
0
ファイル: utilities.py プロジェクト: yiyuan1840/Autotune
def output_metrics(estresults, actresults):
    m = {'rmse': {}, 'cvrmse': {}, 'mbe': {}, 'nmbe': {}, 'mape': {}}
    estres = column_vectors(estresults)
    actres = column_vectors(actresults)
    for col in actres:
        try:
            m['rmse'][col] = metrics.rmse(estres[col], actres[col])
            m['cvrmse'][col] = metrics.cvrmse(estres[col], actres[col])
            m['mbe'][col] = metrics.mbe(estres[col], actres[col])
            m['nmbe'][col] = metrics.nmbe(estres[col], actres[col])
            m['mape'][col] = metrics.mape(estres[col], actres[col])
        except:
            # If anything crashes it here, just ignore the column in the output.
            pass
    return m
コード例 #16
0
def main():
  # minute
  model, y_test, predictions = run()
  # hourly
  # model, y_test, predictions = run(30, 50, 1.0)
  # daily
  # model, y_test, predictions = run(100, 50, 1.0)

  # save for later use
  model.save_weights('../output/lstm.h5', overwrite=True)
  # model.load_weights('../output/lstm.h5')

  graph_utils.plot('lstm', predictions, y_test)

  print('RMSE: %.4f'% metrics.rmse(predictions, y_test))
  print('MAPE: %.4f'% metrics.mape(predictions, y_test))
コード例 #17
0
ファイル: main.py プロジェクト: angusan/tensorflow-DeepFM
def _run_base_model_dfm(dfTrain, dfTest, folds, dfm_params):
    fd = FeatureDictionary(dfTrain=dfTrain, dfTest=dfTest,
                           numeric_cols=config.NUMERIC_COLS,
                           ignore_cols=config.IGNORE_COLS)
    data_parser = DataParser(feat_dict=fd)
    Xi_train, Xv_train, y_train = data_parser.parse(df=dfTrain, has_label=True)
    Xi_test, Xv_test, ids_test = data_parser.parse(df=dfTest)

    dfm_params["feature_size"] = fd.feat_dim
    dfm_params["field_size"] = len(Xi_train[0])

    y_train_meta = np.zeros((dfTrain.shape[0], 1), dtype=float)
    y_test_meta = np.zeros((dfTest.shape[0], 1), dtype=float)
    _get = lambda x, l: [x[i] for i in l]
    gini_results_cv = np.zeros(len(folds), dtype=float)
    gini_results_epoch_train = np.zeros((len(folds), dfm_params["epoch"]), dtype=float)
    gini_results_epoch_valid = np.zeros((len(folds), dfm_params["epoch"]), dtype=float)
    for i, (train_idx, valid_idx) in enumerate(folds):
        Xi_train_, Xv_train_, y_train_ = _get(Xi_train, train_idx), _get(Xv_train, train_idx), _get(y_train, train_idx)
        Xi_valid_, Xv_valid_, y_valid_ = _get(Xi_train, valid_idx), _get(Xv_train, valid_idx), _get(y_train, valid_idx)

        dfm = DeepFM(**dfm_params)
        dfm.fit(Xi_train_, Xv_train_, y_train_, Xi_valid_, Xv_valid_, y_valid_)

        y_train_meta[valid_idx,0] = dfm.predict(Xi_valid_, Xv_valid_)
        y_test_meta[:,0] += dfm.predict(Xi_test, Xv_test)

        gini_results_cv[i] = rmse(y_valid_, y_train_meta[valid_idx])
        gini_results_epoch_train[i] = dfm.train_result
        gini_results_epoch_valid[i] = dfm.valid_result

    y_test_meta /= float(len(folds))

    # save result
    if dfm_params["use_fm"] and dfm_params["use_deep"]:
        clf_str = "DeepFM"
    elif dfm_params["use_fm"]:
        clf_str = "FM"
    elif dfm_params["use_deep"]:
        clf_str = "DNN"
    print("%s: %.5f (%.5f)"%(clf_str, gini_results_cv.mean(), gini_results_cv.std()))
    filename = "%s_Mean%.5f_Std%.5f.csv"%(clf_str, gini_results_cv.mean(), gini_results_cv.std())
    _make_submission(ids_test, y_test_meta, filename)

    _plot_fig(gini_results_epoch_train, gini_results_epoch_valid, clf_str)

    return y_train_meta, y_test_meta
コード例 #18
0
def main():
    # dataset has format like [user_id, song_id, play_count]
    file = 'train_triplets.txt'

    print("Loading data...")
    load_data(file)

    print("Starting evaluation...")
    calc_neighbours()
    print("Finished evaluations.")

    print_top_songs_for_user(1)

    print("Starting cross validation...")
    print("RMSE result: ", str(rmse(train_set, test_set)))
    print("MAE result: ", str(mae(train_set, test_set)))
    print("NDCG result: ", str(ndcg(train_set, test_set)))
コード例 #19
0
ファイル: simulation.py プロジェクト: bzohidov/TomoRain
    def evaluate(self, observed, estimated, zone, res = 0.5):
        '''
        Returns evaluation between observed and estimated rainfall maps
        by computing following metrics:
            ['BIAS', 'CORREALTION', 'Nash-Sutcliffe', 'RMSE', 'MAE',
                                                      'MEAN_OBS','MEAN_EST']
        Inputs:
            observed  -  2D array.    Observed rainfall map.
            estimated -  2D array.    Estimated rainfall map.
            zone      -  (2,2) tuple. Evaluation study zone [km x km]
        Optional:
            res       -  scalar.      Resolution for comparison, [km].
        Outputs:
            metrics   -  dictionary.  Statistical metrics:
                                     ['bias', 'corr', 'nash', 'rmse', 'mae',
                                                      'mean_obs','mean_est']
        '''
        # We neglect area that is not estimated for comparison purpose.
        estimated[estimated <= -999] = -999
        observed[estimated <= -999] = -999

        ((x0, x1), (y0, y1)) = zone
        # Cut the zone for evaluation
        t1, t2, t3, t4 = int(y0/res), int(y1/res), int(x0/res), int(x1/res)
        observed = observed[t1:t2, t3:t4]
        estimated = estimated[t1:t2, t3:t4]

        est = estimated[estimated<>-999].flatten()
        obs = observed[observed<>-999].flatten()
        stats = dict()
        stats['bias'] = metrics.bias(obs, est)
        stats['corr'] = metrics.corr(obs, est)
        stats['nash'] = metrics.nash(obs, est)
        stats['rmse'] = metrics.rmse(obs, est)
        stats['mae'] = metrics.mae(obs, est)
        stats['mean_obs'] = metrics.average(obs)
        stats['mean_est'] = metrics.average(est)
        # additional metrics can be added
        ##stats['likelihood'] = metrics.likelihood(obs, est)
        ##stats['mape'] = metrics.mape(obs, est)
        ##stats['mse'] = metrics.mse(obs, est)
        ##stats['mspe'] = metrics.mspe(obs, est)
        ##stats['rmspe'] = metrics.rmspe(obs, est)
        return stats
コード例 #20
0
ファイル: utilities.py プロジェクト: ORNL-BTRIC/Autotune
def output_metrics(estresults, actresults):
    m = {'rmse': {},
         'cvrmse':{}, 
         'mbe': {},
         'nmbe':{},
         'mape': {}}
    estres = column_vectors(estresults)
    actres = column_vectors(actresults)
    for col in actres:
        try:
            m['rmse'][col] = metrics.rmse(estres[col], actres[col])
            m['cvrmse'][col] = metrics.cvrmse(estres[col], actres[col])
            m['mbe'][col] = metrics.mbe(estres[col], actres[col])
            m['nmbe'][col] = metrics.nmbe(estres[col], actres[col])
            m['mape'][col] = metrics.mape(estres[col], actres[col])
        except:
            # If anything crashes it here, just ignore the column in the output.
            pass
    return m
コード例 #21
0
    def forward(self, u, v, r_matrix):

        u_z, v_z = self.gcl1(self.u_features, self.v_features,
                             range(self.num_users), range(self.num_items), r_matrix)
        u_z, v_z = self.gcl2(u_z, v_z, u, v, r_matrix)

        u_f = torch.relu(self.denseu1(self.u_features_side[u]))
        v_f = torch.relu(self.densev1(self.v_features_side[v]))

        u_h = self.denseu2(F.dropout(torch.cat((u_z, u_f), 1), self.dropout))
        v_h = self.densev2(F.dropout(torch.cat((v_z, v_f), 1), self.dropout))

        output, m_hat = self.bilin_dec(u_h, v_h, u, v)

        r_mx = r_matrix.index_select(1, u).index_select(2, v)
        loss = softmax_cross_entropy(output, r_mx.float())
        rmse_loss = rmse(m_hat, r_mx.float())

        return output, loss, rmse_loss
コード例 #22
0
ファイル: models.py プロジェクト: suen049/quac
 def lambdas_cross_validate(self, lambdas, fold_ct):
     # Use cross-validation to test the lambdas. Set self.lambda_cv to a
     # series with index (lambda, fold index) and one column RMSE.
     index = pd.MultiIndex.from_product([lambdas, range(fold_ct)],
                                        names=["lambda", "fold"])
     out = pd.Series(index=index, name="RMSE", dtype=np.float64)
     for lambda_ in lambdas:
         folds =  sklearn.model_selection.KFold(fold_ct, shuffle=False) \
                 .split(self.features_train, self.incidence_train)
         for (fold_idx, (cv_train_is, cv_test_is)) in enumerate(folds):
             X_train = self.features_train.iloc[cv_train_is]
             y_train = self.incidence_train.iloc[cv_train_is]
             X_test = self.features_train.iloc[cv_test_is]
             y_test = self.incidence_train.iloc[cv_test_is]
             betas = linear_fit(X_train, y_train, self.penalty, lambda_)
             y_predicted = linear_predict(X_test, betas, y_test.index)
             out.loc[lambda_, fold_idx] = metrics.rmse(y_test, y_predicted)
     self.lambda_cv = out
     self.lambda_cv_means = out.groupby(["lambda"
                                         ]).mean()  # avg across folds
コード例 #23
0
def main():
  if sys.argv[1] == 'daily':
    print('Using daily data...')
    path_to_dataset = '../data/household_power_consumption_daily.csv'
    y_test, predictions = run(path_to_dataset, 50, 1.0)
  elif sys.argv[1] == 'monthly':
    print('Using monthly data...')
    path_to_dataset = '../data/household_power_consumption_monthly.csv'
    y_test, predictions = run(path_to_dataset, 5, 1.0)
  elif sys.argv[1] == 'hourly':
    print('Using hourly data...')
    path_to_dataset = '../data/household_power_consumption_hourly.csv'
    y_test, predictions = run(path_to_dataset, 50, 1.0)
  else:
    print('Using minute data...')
    path_to_dataset = '../data/household_power_consumption.csv'
    y_test, predictions = run(path_to_dataset)

  graph_utils.plot('linear', predictions, y_test)

  print('RMSE: %.4f'% metrics.rmse(predictions, y_test))

  print('MAPE: %.4f'% metrics.mape(predictions, y_test))
コード例 #24
0
def test_PPCA():
    dense_mat = pd.read_csv('./datasets/Seattle-data-set/mat.csv', index_col=0)
    rm = pd.read_csv('./datasets/Seattle-data-set/RM_mat.csv', index_col=0)
    dense_mat = dense_mat.values
    rm = rm.values

    binary_mat2 = np.round(rm + 0.5 - 0.2)
    nan_mat2 = binary_mat2.copy()

    nan_mat2[nan_mat2 == 0] = np.nan

    sparse_mat2 = np.multiply(nan_mat2, dense_mat)

    pos2 = np.where((dense_mat != 0) & (binary_mat2 == 0))

    # sparse_tensor2 = sparse_mat2.reshape([sparse_mat2.shape[0], 28, 288])

    PPCA_res2 = PPCA(sparse_mat2, 20)

    PPCA_res2_mape2 = mape(dense_mat[pos2], PPCA_res2[pos2])
    PPCA_res2_rmse2 = rmse(dense_mat[pos2], PPCA_res2[pos2])

    print("PPCA_res2_mape2", PPCA_res2_mape2)
    print("PPCA_res2_rmse2", PPCA_res2_rmse2)
コード例 #25
0
    def __call__(self, trial):
        df_train, df_valid = train_test_split(self.df,
                                              test_size=0.1,
                                              random_state=self.random_state)
        X_train, y_train = df_train[self.feature_col].values, df_train[
            self.target_col].values.reshape(-1, 1)
        X_valid, y_valid = df_valid[self.feature_col].values, df_valid[
            self.target_col].values.reshape(-1, 1)
        logging.info(
            f'Train/valid split: {X_train.shape[0]} for training, {X_valid.shape[0]} for validation'
        )

        n_d = trial.suggest_int('n_d', 8, 64)

        params = self.default_params
        params['n_d'] = n_d
        params['n_a'] = n_d
        params['seed'] = self.random_state
        params['n_steps'] = trial.suggest_int('n_steps', 3, 10)
        params['n_shared'] = trial.suggest_int('n_shared', 2, 5)
        params['n_independent'] = trial.suggest_int('n_independent', 2, 5)
        params['momentum'] = trial.suggest_float('momentum', 0.01, 0.4)
        params['gamma'] = trial.suggest_float('gamma', 1.0, 2.0)

        model = TabNetRegressor(**params)

        model.fit(X_train=X_train,
                  y_train=y_train,
                  eval_set=[(X_valid, y_valid)],
                  eval_metric=['rmse'],
                  max_epochs=20,
                  patience=10,
                  batch_size=1024)

        score = rmse(y_valid, model.predict(X_valid).squeeze())
        return score
コード例 #26
0
def cross_val_score(model=None, data=None, cv=10, scorer=rmse):

    data = np.array(data)
    print(data.shape)
    chunks = chunk(data, cv)
    #print chunks
    score = list()

    for i in range(10):

        iter_data = list()
        for j in range(len(chunks)):
            if j != i:
                iter_data.extend(chunks[j])

        pred_data = np.array(chunks[i])
        iter_data = np.array(iter_data)

        model.fit(iter_data)
        pred = model.predict(pred_data)
        score.append(rmse(pred_data[:, model.formatizer['value']], pred))
        print(score[i])

    return np.mean(score)
コード例 #27
0
    utilMat = svd(utilMat,k=15)

    pred = [] #to store the predicted ratings

    for _,row in test.iterrows():
        user = row['userId']
        item = row['movieId']

        if user in user_index:
            u_index = user_index[user]
            if item in item_index:
                i_index = item_index[item]
                pred_rating = utilMat[u_index, i_index]
            else:
                pred_rating = np.mean(utilMat[u_index, :])
        else:
            if item in item_index:
                i_index = item_index[item]
                pred_rating = np.mean(utilMat[:, i_index])
            else:
                pred_rating = np.mean(utilMat[:, :])

        pred.append(pred_rating)

    error = rmse(test['rating'], pred)
    print(error)

    errors.append(error)
    del error, pred

print np.mean(errors)
コード例 #28
0
ファイル: xnn.py プロジェクト: jkhlot/tensorflow-XNN
    def fit(self, X, y, validation_data=None):
        y = y.reshape(-1, 1)
        start_time = time.time()
        l = y.shape[0]
        train_idx_shuffle = np.arange(l)
        epoch_best_ = 4
        rmsle_best_ = 10.
        cycle_num = 0
        decay_steps = self.params["first_decay_steps"]
        global_step = 0
        global_step_exp = 0
        global_step_total = 0
        snapshot_num = 0
        learning_rate_need_big_jump = False
        total_rmse = 0.
        rmse_decay = 0.9
        for epoch in range(self.params["epoch"]):
            print("epoch: %d" % (epoch + 1))
            np.random.seed(epoch)
            if snapshot_num >= self.params["snapshot_before_restarts"] and self.params["shuffle_with_replacement"]:
                train_idx_shuffle = np.random.choice(np.arange(l), l)
            else:
                np.random.shuffle(train_idx_shuffle)
            batches = self._get_batch_index(train_idx_shuffle, self.params["batch_size_train"])
            for i, idx in enumerate(batches):
                if snapshot_num >= self.params["max_snapshot_num"]:
                    break
                if learning_rate_need_big_jump:
                    learning_rate = self.params["lr_jump_rate"] * self.params["max_lr_exp"]
                    learning_rate_need_big_jump = False
                else:
                    learning_rate = self.params["max_lr_exp"]
                lr = _exponential_decay(learning_rate=learning_rate,
                                        global_step=global_step_exp,
                                        decay_steps=decay_steps,  # self.params["num_update_each_epoch"],
                                        decay_rate=self.params["lr_decay_each_epoch_exp"])
                feed_dict = self._get_feed_dict(X, idx, dropout=0.1, training=False)
                feed_dict[self.target] = y[idx]
                feed_dict[self.learning_rate] = lr
                feed_dict[self.training] = True
                rmse_, opt = self.sess.run((self.rmse, self.train_op), feed_dict=feed_dict)
                if self.params["RUNNING_MODE"] != "submission":
                    # scaling rmsle' = (1/scale_) * (raw rmsle)
                    # raw rmsle = scaling rmsle' * scale_
                    total_rmse = rmse_decay * total_rmse + (1. - rmse_decay) * rmse_ * (self.target_scaler.scale_)
                    self.logger.info("[batch-%d] train-rmsle=%.5f, lr=%.5f [%.1f s]" % (
                        i + 1, total_rmse,
                        lr, time.time() - start_time))
                # save model
                global_step += 1
                global_step_exp += 1
                global_step_total += 1
                if self.params["enable_snapshot_ensemble"]:
                    if global_step % decay_steps == 0:
                        cycle_num += 1
                        if cycle_num % self.params["snapshot_every_num_cycle"] == 0:
                            snapshot_num += 1
                            print("snapshot num: %d" % snapshot_num)
                            self._save_state()
                            self.logger.info("[model-%d] cycle num=%d, current lr=%.5f [%.5f]" % (
                                snapshot_num, cycle_num, lr, time.time() - start_time))
                            # reset global_step and first_decay_steps
                            decay_steps = self.params["first_decay_steps"]
                            if self.params["lr_jump_exp"] or snapshot_num >= self.params["snapshot_before_restarts"]:
                                learning_rate_need_big_jump = True
                        if snapshot_num >= self.params["snapshot_before_restarts"]:
                            global_step = 0
                            global_step_exp = 0
                            decay_steps *= self.params["t_mul"]

                if validation_data is not None and global_step_total % self.params["eval_every_num_update"] == 0:
                    y_pred = self._predict(validation_data[0])
                    y_valid_inv = self.target_scaler.inverse_transform(validation_data[1])
                    y_pred_inv = self.target_scaler.inverse_transform(y_pred)
                    rmsle = rmse(y_valid_inv, y_pred_inv)
                    self.logger.info("[step-%d] train-rmsle=%.5f, valid-rmsle=%.5f, lr=%.5f [%.1f s]" % (
                        global_step_total, total_rmse, rmsle, lr, time.time() - start_time))
                    if rmsle < rmsle_best_:
                        rmsle_best_ = rmsle
                        epoch_best_ = epoch + 1

        return rmsle_best_, epoch_best_
コード例 #29
0
ファイル: operators.py プロジェクト: ORNL-BTRIC/Autotune
 os.close(cfile)
 runner = eplus.EnergyPlus()
 eplus_data = runner.run(candidate_filepath, eplus_weather, eplus_schedule, eplus_params['output_directory'])
 if eplus_data is None or user_data is None: 
     if eplus_data is None:
         logger.error('evaluator() :: EnergyPlus output is None.')
     elif user_data is None:
         logger.error('evaluator() :: User data is None.')
     fitness = WORST_FITNESS
 else:
     ep = utilities.column_vectors(eplus_data)
     ud = utilities.column_vectors(user_data)
     errors = {}
     for key in ud:
         if 'Date/Time' not in key:
             errors[key] = metrics.rmse(ep[key], ud[key])
     if eplus_tune_keys is None or len(eplus_tune_keys) == 0:
         fitness = sum([errors[k] for k in errors if errors[k] is not None])
     else:
         fitness = 0
         for k in eplus_tune_keys:
             k = k.strip()
             try:
                 fitness += errors[k]
             except KeyError:
                 logger.warning('evaluator() :: Tune key {} does not exist in model output.'.format(k))
             except TypeError:
                 logger.warning('evaluator() :: Tune key {} has error value None and is excluded from fitness.'.format(k))
 try:
     os.remove(candidate_filepath)
 except:
コード例 #30
0

if __name__ == "__main__":
    # Matrix of movie ratings
    train_data, test_data = get_movie_matrix()

    # Get all user mean values
    user_mean = get_user_mean(train_data)

    start = time.time()
    prediction_matrix = pd.DataFrame(index=train_data.index)
    for name, data in train_data.iteritems():
        prediction_matrix[name] = main(train_data, name, k)
        # break
    logging.info("Process done in: {0:.2f} seconds".format(time.time() -
                                                           start))

    inter_columns = np.intersect1d(prediction_matrix.columns.values,
                                   test_data.columns.values)
    small_pred = prediction_matrix[inter_columns].dropna(how='all')
    small_test = test_data.loc[:, inter_columns].dropna(how='all')
    print("Test Matrix\n", small_test)
    print("Predicted Matrix\n", small_pred.loc[small_test.index, :])

    logging.info('\nMetric Calculations RMSE and MAE')
    rmse_value = metrics.rmse(test_data, prediction_matrix)
    print(f'RMSE:\t{rmse_value}')

    mae_value = metrics.mae(test_data, prediction_matrix)
    print(f'MAE:\t{mae_value}')
コード例 #31
0
def main():
    #getmodel
    model = lstm()
    adam = Adam(lr=lr)
    model.compile(loss='mse',
                  optimizer=adam,
                  metrics=[metrics.rmse, metrics.mape, metrics.ma])
    all_scores_train = []
    all_scores_test = []
    #model.summary()
    #get data
    data = load_data_()  #nodes x slots
    ts1 = time.time()

    trueY_train = []
    predictY_train = []
    trueY_test = []
    predictY_test = []

    for i in range(len(data)):
        print('grid %d.....' % (i))
        ts = time.time()
        #makedata set
        testslots = T * days_test
        trainx, trainy = makedataset(data[i, :-testslots])
        testx, testy = makedataset(data[i, -testslots:])
        print('trainx shape:', (trainx.shape))
        print('trainy shape:', (trainy.shape))
        print('testx shape:', (testx.shape))
        print('testy shape:', (testy.shape))

        #scaler
        print(trainy, testy)
        mmn = MinMaxScaler(feature_range=(-1, 1))
        trainlen = len(trainy)
        Y = np.concatenate([trainy, testy], axis=0)
        Y = mmn.fit_transform(Y.reshape(-1, 1))
        trainy, testy = Y[:trainlen], Y[trainlen:]
        print(trainy.shape, testy.shape)
        #train
        adam = Adam(lr=lr)
        model.compile(loss='mse',
                      optimizer=adam,
                      metrics=[metrics.rmse, metrics.mape, metrics.ma])
        early_stopping = EarlyStopping(monitor='val_rmse',
                                       patience=patience,
                                       mode='min')
        history = model.fit(trainx,
                            trainy,
                            epochs=nb_epoch,
                            batch_size=batch_size,
                            validation_split=0.1,
                            callbacks=[early_stopping],
                            verbose=0)
        #evalute
        predict_y_train = model.predict([trainx],
                                        batch_size=batch_size,
                                        verbose=0)[:, 0:1]
        score = model.evaluate(trainx,
                               trainy,
                               batch_size=batch_size,
                               verbose=0)
        print(
            'Train score: %.6f rmse (norm): %.6f  rmse (real): %.6f nrmse : %.6f mape: %.6f ma: %.6f'
            % (score[0], score[1], score[1] *
               (mmn._max - mmn._min) / 2., score[1] *
               (mmn._max - mmn._min) / 2. /
               mmn.inverse_transform(np.mean(y_train)), score[2], score[3]))
        predict_y_test = model.predict([testx],
                                       batch_size=batch_size,
                                       verbose=0)[:, 0:1]
        score = model.evaluate(testx, testy, batch_size=batch_size, verbose=0)
        print(
            'Test score: %.6f rmse (norm): %.6f  rmse (real): %.6f nrmse : %.6f mape: %.6f ma: %.6f'
            % (score[0], score[1], score[1] *
               (mmn._max - mmn._min) / 2., score[1] *
               (mmn._max - mmn._min) / 2. /
               mmn.inverse_transform(np.mean(y_test)), score[2], score[3]))

        predictY_train.append(
            mmn.inverse_transform(predict_y_train).reshape(-1).tolist())
        predictY_test.append(
            mmn.inverse_transform(predict_y_test).reshape(-1).tolist())
        trueY_train.append(mmn.inverse_transform(trainy).reshape(-1).tolist())
        trueY_test.append(mmn.inverse_transform(testy).reshape(-1).tolist())
        print("\nestimate on grid%d ,elapsed time (eval): %.3f seconds\n" %
              (i, time.time() - ts))
    #all_scores_train = np.asarray(all_scores_train)
    #all_scores_train = np.mean(all_scores_train, axis = 0)
    #all_scores_test = np.asarray(all_scores_test)
    #all_Scores_test = np.mean(all_scores_test,axis = 0)
    print('\n\n')
    evaluate = lambda y1, y2: (metrics.rmse(y1, y2), metrics.rmse(
        y1, y2) / np.mean(y1), metrics.mape(y1, y2), metrics.ma(y1, y2))
    print('All Train rmse (real): %.6f nrmse : %.6f mape: %.6f ma: %.6f' %
          (evaluate(trueY_train, predictY_train)))
    print('All Test rmse (real): %.6f nrmse : %.6f mape: %.6f ma: %.6f' %
          (evaluate(trueY_test, predictY_test)))
    print('elapsed time: %3f seconds\n' % (time.time() - ts1))
コード例 #32
0
    x_test = x_test[-days_test:, :, -30:]
    # y_test = y_test[-days_test:,:]

    x_train = x_train[-days_train:, :, -30:]
    y_train = y_train[-days_train:, :]

    model = model_builder(x_train, y_train, args)

    model.fit(x_train, y_train)
    pred = model.predict(x_test, y_test)

    results[str(fold_num + 2013) + '/' + str(fold_num + 14)] = [
        metrics.crps(pred),
        metrics.nll(pred),
        metrics.mae(pred),
        metrics.rmse(pred),
        metrics.smape(pred),
        metrics.corr(pred),
        metrics.mb_log(pred),
        metrics.sdp(pred)
    ]
    tf.keras.backend.clear_session()

results['Average'] = results.mean(1)
results['Average'].loc['SDP'] = np.abs(results.loc['SDP'].values[-1]).mean()

plt.plot(pred.index, pred['True'], color='black')
plt.plot(pred.index, pred['Pred'], color='red')
plt.fill_between(pred.index,
                 pred['Pred'] - pred['Std'],
                 pred['Pred'] + pred['Std'],
コード例 #33
0
def do_model(all_data, steps, run_model=True):
    _steps = steps
    print("steps:", _steps)
    scaler = MinMaxScaler()
    all_data = scaler.fit_transform(all_data)
    if not run_model:
        return None, None, scaler
    features = all_data[:-_steps]
    labels = all_data[_steps:, -1:]
    tts = train_test_split(features, labels, test_size=0.4)
    X_train = tts[0]
    X_test = tts[1]
    Y_train = tts[2].astype(np.float64)
    Y_test = tts[3].astype(np.float64)

    optimiser = 'adam'
    hidden_neurons = 200
    loss_function = 'mse'
    batch_size = 105
    dropout = 0.056
    inner_hidden_neurons = 269
    dropout_inner = 0.22

    X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
    X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
    print("X train shape:\t", X_train.shape)
    print("X test shape:\t", X_test.shape)
    # print("Y train shape:\t", Y_train.shape)
    # print("Y test shape:\t", Y_test.shape)
    # print("Steps:\t", _steps)
    in_neurons = X_train.shape[2]

    out_neurons = 1

    model = Sequential()
    gpu_cpu = 'cpu'
    best_weight = BestWeight()
    model.add(
        LSTM(output_dim=hidden_neurons,
             input_dim=in_neurons,
             return_sequences=True,
             init='uniform',
             consume_less=gpu_cpu))
    model.add(Dropout(dropout))

    dense_input = inner_hidden_neurons
    model.add(
        LSTM(output_dim=dense_input,
             input_dim=hidden_neurons,
             return_sequences=False,
             consume_less=gpu_cpu))
    model.add(Dropout(dropout_inner))
    model.add(Activation('relu'))

    model.add(Dense(output_dim=out_neurons, input_dim=dense_input))
    model.add(Activation('relu'))

    model.compile(loss=loss_function, optimizer=optimiser)

    history = model.fit(X_train,
                        Y_train,
                        verbose=0,
                        batch_size=batch_size,
                        nb_epoch=30,
                        validation_split=0.3,
                        shuffle=False,
                        callbacks=[best_weight])

    model.set_weights(best_weight.get_best())
    predicted = model.predict(X_test) + EPS
    rmse_val = rmse(Y_test, predicted)
    metrics = OrderedDict([
        # ('hidden', hidden_neurons),
        ('steps', _steps),
        ('geh', geh(Y_test, predicted)),
        ('rmse', rmse_val),
        ('mape', mean_absolute_percentage_error(Y_test, predicted)),
        # ('smape', smape(predicted, _Y_test)),
        # ('median_pe', median_percentage_error(predicted, Y_test)),
        # ('mase', MASE(_Y_train, _Y_test, predicted)),
        # ('mae', mean_absolute_error(y_true=Y_test, y_pred=predicted)),
        # ('batch_size', batch_size),
        # ('optimiser', optimiser),
        # ('dropout', dropout),
        # ('extra_layer_dropout', dropout_inner),
        # ('extra_layer_neurons', inner_hidden_neurons),
        # ('loss function', loss_function)
        # 'history': history.history
    ])

    return metrics, model, scaler
コード例 #34
0
def get_scores(y_true, y_predict):
    return dice_coef(y_true, y_predict), sensitivity(y_true, y_predict), specificity(y_true, y_predict), MeanSurfaceDistance(y_true, y_predict), mutual_information(y_true, y_predict), rmse(y_true, y_predict)
コード例 #35
0
# Preprocessing
X = shuffled.iloc[:, :-1].squeeze()
y = (shuffled.iloc[:, -1:]).T.squeeze()
len_estate = len(y)

# Splitting data
X_train, y_train = X.loc[:split*len_estate], y.loc[:split*len_estate]
X_test, y_test = X.loc[split*len_estate+1:].reset_index(
    drop=True), y.loc[split*len_estate+1:].reset_index(drop=True)

# Learning tree
print("Please wait for some time, it takes time, you can change max depth if it takes too long time.")
tree = DecisionTree(criterion="information_gain", max_depth=max_depth)
tree.fit(X_train, y_train)
tree.plot()

# Printing accuracies for different depths
for depth in range(2, max_depth+1):
    y_hat = tree.predict(X_test, max_depth=depth)
    print("Depth: ", depth)
    print('\tRMSE: ', rmse(y_hat, y_test))
    print('\tMAE: ', mae(y_hat, y_test))

# Decision Tree Regressor from Sci-kit learn
dt = DecisionTreeRegressor(random_state=0)
dt.fit(X_train, y_train)
y_hat = pd.Series(dt.predict(X_test))

print('Sklearn RMSE: ', rmse(y_hat, y_test))
print('Sklearn MAE: ', mae(y_hat, y_test))
コード例 #36
0
            k: np.array(v[split_idx:]) for k, v in predictions.items()
        }
        print()

        table = []
        print(' & '.join(['step', 'geh', 'mape', 'rmse'])+' \\\\')
        for step in steps:
            # true values
            stepped_vals = flow_values[step:len(predictions)]
            # predicted  values
            pred_vals = predictions[step][:-step] + eps
            table.append(OrderedDict([
                ('steps', step),
                ('geh',  geh(stepped_vals, pred_vals)),
                ('mape', mape(stepped_vals, pred_vals)),
                ('rmse', rmse(stepped_vals, pred_vals))
            ]))
        print(tabulate.tabulate(table, 'keys', 'latex'))

        print("Loading matplotlib")
        import matplotlib.pyplot as plt

        true_y = []
        true_x = []
        pred_y = []
        print("Predicting data rows: {}".format(data_len - row_count))

        progress = pyprind.ProgBar(data_len - row_count, width=50, stream=1)
        for row in it:
            progress.update()
            preds = model.run(row)
コード例 #37
0
    def calculate_train_and_forecast_metrics(
            self, train: pd.DataFrame, oos: pd.DataFrame, target_index: int,
            hps: dict, horizon: int,
            mae_rmse_ignore_when_actual_and_pred_are_zero: bool,
            mape_ignore_when_actual_is_zero: bool):

        train_dataset = TrainDataset(train_df=train,
                                     target_index=target_index,
                                     hyperparams=hps,
                                     horizon=horizon)
        train_loader = DataLoader(train_dataset, batch_size=1, num_workers=1)
        inputs, train_actual = next(iter(train_loader))
        inputs = inputs.to(device=self.device)
        self.net = self.net.to(device=self.device)

        train_pred = self.net(inputs.float())
        train_actual = train_actual[0, 0, :].cpu().numpy()
        train_pred = train_pred[0, 0, :].cpu().detach().numpy()
        forecast_actual = oos.iloc[:horizon, target_index].values
        forecast_pred = self.predict(train_df, target_index, hps, horizon)

        assert (train_actual.shape == train_pred.shape)
        assert (forecast_actual.shape == forecast_pred.shape)

        train_dict = {
            'mae':
            metrics.mae(train_actual, train_pred,
                        mae_rmse_ignore_when_actual_and_pred_are_zero),
            'rmse':
            metrics.rmse(train_actual, train_pred,
                         mae_rmse_ignore_when_actual_and_pred_are_zero),
            'mape':
            metrics.mape(train_actual, train_pred,
                         mape_ignore_when_actual_is_zero),
            'presence_accuracy':
            metrics.presence_accuracy(train_actual, train_pred),
            'peak_accuracy':
            metrics.peak_accuracy(train_actual, train_pred),
            'total_volume':
            int(metrics.total_actual_volume(train_actual)),
            'num_timestamps_predicted_on':
            int(train_pred.shape[0])
        }

        forecast_dict = {
            'mae':
            metrics.mae(forecast_actual, forecast_pred,
                        mae_rmse_ignore_when_actual_and_pred_are_zero),
            'rmse':
            metrics.rmse(forecast_actual, forecast_pred,
                         mae_rmse_ignore_when_actual_and_pred_are_zero),
            'mape':
            metrics.mape(forecast_actual, forecast_pred,
                         mape_ignore_when_actual_is_zero),
            'presence_accuracy':
            metrics.presence_accuracy(forecast_actual, forecast_pred),
            'peak_accuracy':
            metrics.peak_accuracy(forecast_actual, forecast_pred),
            'total_volume':
            int(metrics.total_actual_volume(forecast_actual)),
            'num_time_stamps_predicted_on':
            int(forecast_pred.shape[0])
        }

        train_metrics = pd.DataFrame.from_dict(train_dict,
                                               columns=[None],
                                               orient='index').iloc[:,
                                                                    0].round(3)

        forecast_metrics = pd.DataFrame.from_dict(
            forecast_dict, columns=[None], orient='index').iloc[:, 0].round(3)

        return train_metrics, forecast_metrics
コード例 #38
0
print('Items factorization matrix is:')  # item factorization matrix
print(pd.DataFrame(algo.qi))
print()

# predict a single score
# algo.predict(192, 302, 4, verbose=True)

# show predicted score and actual score side by side
# randomly choose first 20 records with percentage 20/100000(# of records in total)
print('Randomly choose 1-10 records to compare side by side:')

limit = 10
for uid, iid, r, timestamp in data.raw_ratings:
    if random.random() > limit / 100000:
        continue
    if limit == 0:
        break
    algo.predict(uid, iid, r, verbose=True)
    limit -= 1

print()

# show rmse
print('RMSE value of all rated scores is:')

predictions = []
for uid, iid, r, timestamp in data.raw_ratings:
    prediction = (r, algo.predict(uid, iid, r, verbose=False))
    predictions.append(prediction)
metrics.rmse(predictions, verbose=True)
コード例 #39
0
import metrics
import numpy as np
import tensorflow as tf
import math
import tensorflow.keras.backend as K
from tensorflow.keras.layers import Reshape

y_true = np.random.rand(1, 55, 74)
# y_pred = np.random.rand(1, 55, 74)

y_true_tensor = tf.constant(y_true, dtype='float32')
y_pred_tensor = tf.constant(y_true, dtype='float32')

print("y_true_tensor: " + str(y_true_tensor))
print("y_pred_tensor: " + str(y_pred_tensor))

abs_relative_diff = metrics.abs_relative_diff(y_true_tensor, y_pred_tensor)
squared_relative_diff = metrics.squared_relative_diff(y_true_tensor,
                                                      y_pred_tensor)
rmse = metrics.rmse(y_true_tensor, y_pred_tensor)
rmse_log = metrics.rmse_log(y_true_tensor, y_pred_tensor)
rmse_scale_invariance_log = metrics.rmse_scale_invariance_log(
    y_true_tensor, y_pred_tensor)

print("abs_relative_diff: " + str(abs_relative_diff))
print("squared_relative_diff: " + str(squared_relative_diff))
print("rmse: " + str(rmse))
print("rmse_log: " + str(rmse_log))
print("rmse_scale_invariance_log: " + str(rmse_scale_invariance_log))
コード例 #40
0
ファイル: main.py プロジェクト: wuyou33/microgrid-mpc
def main():
    """
    Main function for mpc-scheme with receding horizion.
    """
    conf = utils.parse_config()

    logpath = None
    log = input("Do you wish to log this run? ")

    if log in ["y", "yes", "Yes"]:
        foldername = input("Do you wish to name logfolder? (enter to skip)")
        logpath = utils.create_logs_folder(conf["logpath"], foldername)

    openloop = False

    predictions = conf["predictions"]
    print("Using {} predictions.".format(predictions))

    actions_per_hour = conf["actions_per_hour"]
    horizon = conf["simulation_horizon"]
    simulation_horizon = horizon * actions_per_hour

    start_time = time.time()
    step_time = start_time

    PV, PV_pred, PL, PL_pred, grid_buy, grid_sell = utils.load_data()

    T = conf["prediction_horizon"]
    N = conf["prediction_horizon"] * actions_per_hour

    xk = conf["x_inital"]
    xk_sim = conf["x_inital"]
    x_opt = np.asarray([xk])
    x_sim = np.asarray([xk])
    u0 = np.asarray([])
    u1 = np.asarray([])
    u2 = np.asarray([])
    u3 = np.asarray([])

    solver = OptiSolver(N)

    x, lbx, ubx, lbg, ubg = solver.build_nlp(
        T,
        N,
    )

    net_cost_grid = 0
    net_cost_bat = 0
    J = 0

    pv_preds = [PV[0]]
    pl_preds = [PL[0]]

    pv_error = []
    pl_error = []

    plt.figure()

    if predictions in ["arima", "best"]:
        pv_model = Arima("PV", order=(3, 1, 2))
        pl_model = Arima("PL", order=(1, 1, 4), seasonal_order=(0, 0, 0, 0))

    for step in range(simulation_horizon - N):
        # Update NLP parameters
        x[0] = xk
        lbx[0] = xk
        ubx[0] = xk

        PV_true = PV[step:step + N]
        PL_true = PL[step:step + N]

        if predictions == "constant":  # Predicted values equal to measurement
            pv_ref = np.ones(N) * PV[step]
            pl_ref = np.ones(N) * PL[step]

        elif predictions == "arima":  # Estimate using ARIMA
            pv_model.update(PV[step])
            pl_model.update(PL[step])

            pv_ref = pv_model.predict(T)
            pl_ref = pl_model.predict(T)

        elif predictions == "data":
            pv_ref = PV_pred[step:step + N]
            pl_ref = PL_pred[step:step + N]

        elif predictions == "scaled_mean":
            pv_ref = (PV[step] / PV_pred[step]) * PV_pred[step:step + N]
            pl_ref = (PL[step] / PL_pred[step]) * PL_pred[step:step + N]
        elif predictions == "best":
            pv_model.update(PV[step])

            pv_ref = pv_model.predict(T)
            pl_ref = (PL[step] / PL_pred[step]) * PL_pred[step:step + N]

        else:  # Use true predictions
            pv_ref = PV_true
            pl_ref = PL_true

        pv_preds.append(pv_ref[1])
        pl_preds.append(pl_ref[1])

        pv_error.append(metrics.rmse(PV_true[0:4], pv_ref[0:4]))
        pl_error.append(metrics.rmse(PL_true[0:4], pl_ref[0:4]))

        plt.plot(range(step, step + N), pv_ref, c="b")
        plt.plot(range(step, step + N), PV_true, c="r")

        xk_opt, Uk_opt, J_opt = solver.solve_nlp([x, lbx, ubx, lbg, ubg],
                                                 vertcat(pv_ref, pl_ref))
        J += J_opt
        x_opt = np.append(x_opt, xk_opt[1])

        xk_sim, Uk_sim = simulate_SOC(
            xk_sim,
            Uk_opt,
            PV[step],
            PL[step],
            solver.F,
        )

        x_sim = np.append(x_sim, xk_sim)

        if openloop:
            xk = xk_opt[1]  # xk is optimal
        else:
            xk = xk_sim

        uk = [u[0] for u in Uk_opt]
        u0 = np.append(u0, uk[0])
        u1 = np.append(u1, uk[1])
        u2 = np.append(u2, uk[2])
        u3 = np.append(u3, uk[3])

        net_cost_grid += metrics.net_spending_grid(uk, 1.5, actions_per_hour)
        net_cost_bat += metrics.net_cost_battery(
            uk, conf["system"]["battery_cost"], actions_per_hour)

        if step % 50 == 0:
            print("\nFinshed iteration step {}. Current step took {}s".format(
                step, np.around(time.time() - step_time, 2)))
            print("xsim {}%, x_opt {}%".format(np.around(xk_sim, 2),
                                               np.around(xk_opt[1], 2)))
            step_time = time.time()

    peak_power = np.around(np.max(u2), 2) * 70

    E_start = conf["x_inital"] * conf["system"]["C_MAX"]
    E_end = xk * conf["system"]["C_MAX"]
    battery_change = np.around(grid_buy * (E_end - E_start), 2)

    print()
    print("Error PV prediction:", np.mean(pv_error))
    print("Error PL prediction:", np.mean(pl_error))

    print("Net spending grid: {} kr".format(np.around(net_cost_grid, 2)))
    print("Peak power cost: {} kr".format(peak_power))
    print("Net spending battery: {} kr".format(np.around(net_cost_bat, 2)))
    print("Grid + battery spending: {} kr".format(
        np.around(net_cost_grid + net_cost_bat, 2), ))
    print("Change in battery energy {} kr".format(battery_change))
    print("Total spending:",
          net_cost_grid + net_cost_bat - battery_change + peak_power)

    # Plotting
    u = np.asarray([-u0, u1, u2, -u3])
    u_bat = np.asarray([-u0, u1])
    u_grid = np.asarray([u2, -u3])

    p.plot_control_actions(u, horizon - T, actions_per_hour, logpath)

    p.plot_control_actions(
        u_bat,
        horizon - T,
        actions_per_hour,
        logpath,
        title="Battery Controls",
        legends=["Battery Charge", "Battery Discharge"],
    )

    p.plot_control_actions(
        u_grid,
        horizon - T,
        actions_per_hour,
        logpath,
        title="Grid Controls",
        legends=["Grid Buy", "Grid Sell"],
    )

    p.plot_SOC(x_sim, horizon - T, logpath)

    p.plot_data(
        [x_opt, x_sim],
        logpath=logpath,
        legends=["SOC optimal", "SOC simulated"],
        title="Simulated vs optimal SOC",
    )

    p.plot_data(
        [PV[:simulation_horizon - N], PL[:simulation_horizon - N]],
        logpath=logpath,
        legends=["PV Production", "Load Demands"],
        title="PV Production & Load Demands",
    )

    p.plot_SOC_control_subplots(x_sim, u, horizon - T, logpath=logpath)
    stop = time.time()
    print("\nFinished optimation in {}s".format(np.around(
        stop - start_time, 2)))
    utils.save_datafile(
        [x_opt, x_sim, u0, u1, u2, u3, PV, PV_pred, PL, PL_pred],
        names=[
            "x_opt",
            "x_sim",
            "u0",
            "u1",
            "u2",
            "u3",
            "PV",
            "PV_pred",
            "PL",
            "PL_pred",
        ],
        logpath=logpath,
    )

    print("One-step PV RMSE:", metrics.rmse_predictions(PV, pv_preds))
    print("One-step Load RMSE:", metrics.rmse_predictions(PL, pl_preds))
    if conf["plot_predictions"]:
        p.plot_predictions_subplots(PV, pv_preds, PL, pl_preds, logpath)
    plt.show(block=True)
    plt.ion()
    plt.close("all")