Ejemplo n.º 1
0
def train_stage0():
    """
    heat up weights for 5 epochs
    """
    ds = ReadingImageProvider(TiffImageType,
                              paths,
                              fn_mapping,
                              image_suffix='RGB')

    folds = get_folds(ds, 5)
    num_workers = 0 if os.name == 'nt' else 8
    train(ds,
          folds,
          config,
          num_workers=num_workers,
          transforms=augment_flips_color)
Ejemplo n.º 2
0
def train_stage2(sal_map: bool, three=False):
    """
    train with other loss function
    three = True ===> use only RGB for training
    """
    im_type = TiffDemImageType
    if sal_map:
        im_type = SalImageType
    if three:
        im_type = TiffImageType
    ds = ReadingImageProvider(im_type, paths, fn_mapping, image_suffix='RGB')

    folds = get_folds(ds, 5)
    num_workers = 0 if os.name == 'nt' else 8
    train(ds,
          folds,
          config,
          num_workers=num_workers,
          transforms=augment_flips_color)
Ejemplo n.º 3
0
def train_stage1(sal_map: bool, three=False):
    """
    main training stage with dtm/dsm data
    three = True ===> use only RGB for training
    updates channels from warm start with only RGB to final number of channels in config.num_channels
    """
    im_type = TiffDemImageType
    if sal_map:
        im_type = SalImageType
    if three:
        im_type = TiffImageType
    ds = ReadingImageProvider(im_type, paths, fn_mapping, image_suffix='RGB')

    folds = get_folds(ds, 5)
    num_workers = 0 if os.name == 'nt' else 8
    train(ds,
          folds,
          config,
          num_workers=num_workers,
          transforms=augment_flips_color,
          num_channels_changed=not three)
y_scores = np.array([0.1, 0.4, 0.35, 0.8])

# A probabilidade de predição de cada classe retornada por um classificador:
y_probas = np.array([[1, 0], [1, 0], [1, 0], [0, 1]])

# Calculando os valores da curva AUC (Area Under Curve)
roc_auc_score(y_true, y_scores)

# Calculando os pontos da curva ROC (Receiver Operating Characteristic):
fpr, tpr, thresholds = roc_curve(y_true, y_scores, pos_label=2)

# Plotando os pontos da curva ROC:
plot_roc(y_true, y_probas)

# Usando a função para definir os indices de uma validação cruzada com 5 folds.
for fold in get_folds(list(df.index.values)):
    print("Indices de Treinamento:", fold[0], "Indices de Testes:", fold[1])

# Calidação cruzada com 5 folds usando o sklearn
kf = KFold(n_splits=5, random_state=42, shuffle=True)
for train_index, test_index in kf.split(X):
    print("Indices de Treinamento:", train_index, "Indices de Testes:",
          test_index)
    #X_train, X_test = X[train_index], X[test_index]
    #y_train, y_test = y[train_index], y[test_index]

# Dividindo o dataset no conjunto de treinamento (80%) e testes (20%)
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)
Ejemplo n.º 5
0
    train_vals = list(train[c].values.astype(str))
    test_vals = list(test[c].values.astype(str))
    le.fit(train_vals + test_vals)
    train[c] = le.transform(train_vals)
    test[c] = le.transform(test_vals)

train[target_cols] = train[target_cols].fillna(0).astype('float')
train_idx = train['fullVisitorId']
test_idx = test['fullVisitorId']

## Modeling
TARGET = np.log1p(train['totals_transactionRevenue'].values)
train_X = train[num_cols + cat_cols].copy()
test_X = test[num_cols + cat_cols].copy()

folds = utils.get_folds(train, n_splits = 5)

oof_prediction = np.zeros(train_X.shape[0])
sub_prediction = np.zeros(test_X.shape[0])
oof_scores = []

# LightGBM
lgb_params = {"objective" : "regression",
              "boosting_type" : "dart",
              "metric" : "rmse",
              "num_leaves" : 15,
              "learning_rate" : 0.1,
              "max_depth" : 7,
              "bagging_fraction" : 0.9,
              "feature_fraction" : 0.9,
              "number_boosting_rounds" : 100,
Ejemplo n.º 6
0
def run(config):

    models = config['models']

    total_mse = [0 for i in range(len(models))]
    total_rmse = [0 for i in range(len(models))]
    total_r2 = [0 for i in range(len(models))]
    total_adj_r2 = [0 for i in range(len(models))]

    total_accuracy = [0 for i in range(len(models))]
    total_balanced_accuracy = [0 for i in range(len(models))]

    print("training and validating")

    for i, model in enumerate(models):

        print(model)
        temp_config = get_config({**config, 'model': model})
        temp_config['print'] = config['print']
        all_folds, all_folds_baseline = get_folds(temp_config)

        for index, (fold,
                    fold_base) in enumerate(zip(all_folds,
                                                all_folds_baseline)):

            if model == 'baseline':
                mse, rmse, r2, adj_r2, accuracy, balanced_accuracy = run_model(
                    temp_config, fold, fold_base)
            else:
                mse, rmse, r2, adj_r2, accuracy, balanced_accuracy = run_model(
                    temp_config, fold)

            total_mse[i] += mse
            total_rmse[i] += rmse
            total_r2[i] += r2
            total_adj_r2[i] += adj_r2

            total_accuracy[i] += accuracy
            total_balanced_accuracy[i] += balanced_accuracy

    # Calculate the average over all runs
    mses = [mse / len(all_folds) for mse in total_mse]
    rmses = [rmse / len(all_folds) for rmse in total_rmse]
    r2s = [r2 / len(all_folds) for r2 in total_r2]
    adj_r2s = [adj_r2 / len(all_folds) for adj_r2 in total_adj_r2]

    accuracies = [accuracy / len(all_folds) for accuracy in total_accuracy]
    balanced_accuracies = [
        balanced_accuracy / len(all_folds)
        for balanced_accuracy in total_balanced_accuracy
    ]

    # Print the results in a table
    table = [['mse'] + mses, ['root_mse'] + rmses, ['r2_score'] + r2s,
             ['adj_r2_score'] + adj_r2s, ['accuracy'] + accuracies,
             ['bal_accuracy'] + balanced_accuracies]

    print(tabulate(table, headers=['metrics'] + models,
                   tablefmt="fancy_grid"))  # plain

    if config['test']:
        run_test(config)
Ejemplo n.º 7
0
def run_models(config, subset_names, subset_indices):

    models = config['models']
    folder = config['dataset']['save_folder']

    for name, indices in zip(subset_names, subset_indices):

        total_mse = [0 for i in range(len(models))]
        total_rmse = [0 for i in range(len(models))]
        total_r2 = [0 for i in range(len(models))]
        total_adj_r2 = [0 for i in range(len(models))]

        total_accuracy = [0 for i in range(len(models))]
        total_balanced_accuracy = [0 for i in range(len(models))]

        for i, model in enumerate(models):

            temp_config = get_config({**config, 'model': model})
            temp_config['data_path'] = folder + '/subdata_' + name + '.pkl'
            temp_config['print'] = config['print']

            if model in ['NN', 'LSTM', 'BiLSTM']:
                temp_config['in_dim'] = indices[1]
            if model == "NN":
                if name in set(['pr_su_bf_ma_tsfp', 'pr_su_bf_ma_tsfp_tsfd']):
                    temp_config['lr'] = 0.0001

            all_folds, all_folds_baseline = get_folds(temp_config)

            for index, (fold, fold_base) in enumerate(
                    zip(all_folds, all_folds_baseline)):

                if model == 'baseline':
                    mse, rmse, r2, adj_r2, accuracy, balanced_accuracy = run_model(
                        temp_config, fold, fold_base)
                else:
                    mse, rmse, r2, adj_r2, accuracy, balanced_accuracy = run_model(
                        temp_config, fold)

                total_mse[i] += mse
                total_rmse[i] += rmse
                total_r2[i] += r2
                total_adj_r2[i] += adj_r2

                total_accuracy[i] += accuracy
                total_balanced_accuracy[i] += balanced_accuracy

        # Calculate the average over all runs
        mses = [mse / len(all_folds) for mse in total_mse]
        rmses = [rmse / len(all_folds) for rmse in total_rmse]
        r2s = [r2 / len(all_folds) for r2 in total_r2]
        adj_r2s = [adj_r2 / len(all_folds) for adj_r2 in total_adj_r2]

        accuracies = [accuracy / len(all_folds) for accuracy in total_accuracy]
        balanced_accuracies = [
            balanced_accuracy / len(all_folds)
            for balanced_accuracy in total_balanced_accuracy
        ]

        # Print the results in a table
        table = [['mse'] + mses, ['root_mse'] + rmses, ['r2_score'] + r2s,
                 ['adj_r2_score'] + adj_r2s, ['accuracy'] + accuracies,
                 ['bal_accuracy'] + balanced_accuracies]
        if not os.path.exists("results"):
            os.makedirs("results")
        pd.DataFrame(table, columns=["metrics"] +
                     models).to_csv("results/results_" + name + ".csv")
        #oke nice, dan kan ik nu helemaal overnieuw alles gaan runnen? Ja idd, en moet ook ff die wijziging van net terug draaien

        print('dataset: ' + name)
        print(
            tabulate(table,
                     headers=['metrics'] + models,
                     tablefmt="fancy_grid"))  # plain