def train_stage0(): """ heat up weights for 5 epochs """ ds = ReadingImageProvider(TiffImageType, paths, fn_mapping, image_suffix='RGB') folds = get_folds(ds, 5) num_workers = 0 if os.name == 'nt' else 8 train(ds, folds, config, num_workers=num_workers, transforms=augment_flips_color)
def train_stage2(sal_map: bool, three=False): """ train with other loss function three = True ===> use only RGB for training """ im_type = TiffDemImageType if sal_map: im_type = SalImageType if three: im_type = TiffImageType ds = ReadingImageProvider(im_type, paths, fn_mapping, image_suffix='RGB') folds = get_folds(ds, 5) num_workers = 0 if os.name == 'nt' else 8 train(ds, folds, config, num_workers=num_workers, transforms=augment_flips_color)
def train_stage1(sal_map: bool, three=False): """ main training stage with dtm/dsm data three = True ===> use only RGB for training updates channels from warm start with only RGB to final number of channels in config.num_channels """ im_type = TiffDemImageType if sal_map: im_type = SalImageType if three: im_type = TiffImageType ds = ReadingImageProvider(im_type, paths, fn_mapping, image_suffix='RGB') folds = get_folds(ds, 5) num_workers = 0 if os.name == 'nt' else 8 train(ds, folds, config, num_workers=num_workers, transforms=augment_flips_color, num_channels_changed=not three)
y_scores = np.array([0.1, 0.4, 0.35, 0.8]) # A probabilidade de predição de cada classe retornada por um classificador: y_probas = np.array([[1, 0], [1, 0], [1, 0], [0, 1]]) # Calculando os valores da curva AUC (Area Under Curve) roc_auc_score(y_true, y_scores) # Calculando os pontos da curva ROC (Receiver Operating Characteristic): fpr, tpr, thresholds = roc_curve(y_true, y_scores, pos_label=2) # Plotando os pontos da curva ROC: plot_roc(y_true, y_probas) # Usando a função para definir os indices de uma validação cruzada com 5 folds. for fold in get_folds(list(df.index.values)): print("Indices de Treinamento:", fold[0], "Indices de Testes:", fold[1]) # Calidação cruzada com 5 folds usando o sklearn kf = KFold(n_splits=5, random_state=42, shuffle=True) for train_index, test_index in kf.split(X): print("Indices de Treinamento:", train_index, "Indices de Testes:", test_index) #X_train, X_test = X[train_index], X[test_index] #y_train, y_test = y[train_index], y[test_index] # Dividindo o dataset no conjunto de treinamento (80%) e testes (20%) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
train_vals = list(train[c].values.astype(str)) test_vals = list(test[c].values.astype(str)) le.fit(train_vals + test_vals) train[c] = le.transform(train_vals) test[c] = le.transform(test_vals) train[target_cols] = train[target_cols].fillna(0).astype('float') train_idx = train['fullVisitorId'] test_idx = test['fullVisitorId'] ## Modeling TARGET = np.log1p(train['totals_transactionRevenue'].values) train_X = train[num_cols + cat_cols].copy() test_X = test[num_cols + cat_cols].copy() folds = utils.get_folds(train, n_splits = 5) oof_prediction = np.zeros(train_X.shape[0]) sub_prediction = np.zeros(test_X.shape[0]) oof_scores = [] # LightGBM lgb_params = {"objective" : "regression", "boosting_type" : "dart", "metric" : "rmse", "num_leaves" : 15, "learning_rate" : 0.1, "max_depth" : 7, "bagging_fraction" : 0.9, "feature_fraction" : 0.9, "number_boosting_rounds" : 100,
def run(config): models = config['models'] total_mse = [0 for i in range(len(models))] total_rmse = [0 for i in range(len(models))] total_r2 = [0 for i in range(len(models))] total_adj_r2 = [0 for i in range(len(models))] total_accuracy = [0 for i in range(len(models))] total_balanced_accuracy = [0 for i in range(len(models))] print("training and validating") for i, model in enumerate(models): print(model) temp_config = get_config({**config, 'model': model}) temp_config['print'] = config['print'] all_folds, all_folds_baseline = get_folds(temp_config) for index, (fold, fold_base) in enumerate(zip(all_folds, all_folds_baseline)): if model == 'baseline': mse, rmse, r2, adj_r2, accuracy, balanced_accuracy = run_model( temp_config, fold, fold_base) else: mse, rmse, r2, adj_r2, accuracy, balanced_accuracy = run_model( temp_config, fold) total_mse[i] += mse total_rmse[i] += rmse total_r2[i] += r2 total_adj_r2[i] += adj_r2 total_accuracy[i] += accuracy total_balanced_accuracy[i] += balanced_accuracy # Calculate the average over all runs mses = [mse / len(all_folds) for mse in total_mse] rmses = [rmse / len(all_folds) for rmse in total_rmse] r2s = [r2 / len(all_folds) for r2 in total_r2] adj_r2s = [adj_r2 / len(all_folds) for adj_r2 in total_adj_r2] accuracies = [accuracy / len(all_folds) for accuracy in total_accuracy] balanced_accuracies = [ balanced_accuracy / len(all_folds) for balanced_accuracy in total_balanced_accuracy ] # Print the results in a table table = [['mse'] + mses, ['root_mse'] + rmses, ['r2_score'] + r2s, ['adj_r2_score'] + adj_r2s, ['accuracy'] + accuracies, ['bal_accuracy'] + balanced_accuracies] print(tabulate(table, headers=['metrics'] + models, tablefmt="fancy_grid")) # plain if config['test']: run_test(config)
def run_models(config, subset_names, subset_indices): models = config['models'] folder = config['dataset']['save_folder'] for name, indices in zip(subset_names, subset_indices): total_mse = [0 for i in range(len(models))] total_rmse = [0 for i in range(len(models))] total_r2 = [0 for i in range(len(models))] total_adj_r2 = [0 for i in range(len(models))] total_accuracy = [0 for i in range(len(models))] total_balanced_accuracy = [0 for i in range(len(models))] for i, model in enumerate(models): temp_config = get_config({**config, 'model': model}) temp_config['data_path'] = folder + '/subdata_' + name + '.pkl' temp_config['print'] = config['print'] if model in ['NN', 'LSTM', 'BiLSTM']: temp_config['in_dim'] = indices[1] if model == "NN": if name in set(['pr_su_bf_ma_tsfp', 'pr_su_bf_ma_tsfp_tsfd']): temp_config['lr'] = 0.0001 all_folds, all_folds_baseline = get_folds(temp_config) for index, (fold, fold_base) in enumerate( zip(all_folds, all_folds_baseline)): if model == 'baseline': mse, rmse, r2, adj_r2, accuracy, balanced_accuracy = run_model( temp_config, fold, fold_base) else: mse, rmse, r2, adj_r2, accuracy, balanced_accuracy = run_model( temp_config, fold) total_mse[i] += mse total_rmse[i] += rmse total_r2[i] += r2 total_adj_r2[i] += adj_r2 total_accuracy[i] += accuracy total_balanced_accuracy[i] += balanced_accuracy # Calculate the average over all runs mses = [mse / len(all_folds) for mse in total_mse] rmses = [rmse / len(all_folds) for rmse in total_rmse] r2s = [r2 / len(all_folds) for r2 in total_r2] adj_r2s = [adj_r2 / len(all_folds) for adj_r2 in total_adj_r2] accuracies = [accuracy / len(all_folds) for accuracy in total_accuracy] balanced_accuracies = [ balanced_accuracy / len(all_folds) for balanced_accuracy in total_balanced_accuracy ] # Print the results in a table table = [['mse'] + mses, ['root_mse'] + rmses, ['r2_score'] + r2s, ['adj_r2_score'] + adj_r2s, ['accuracy'] + accuracies, ['bal_accuracy'] + balanced_accuracies] if not os.path.exists("results"): os.makedirs("results") pd.DataFrame(table, columns=["metrics"] + models).to_csv("results/results_" + name + ".csv") #oke nice, dan kan ik nu helemaal overnieuw alles gaan runnen? Ja idd, en moet ook ff die wijziging van net terug draaien print('dataset: ' + name) print( tabulate(table, headers=['metrics'] + models, tablefmt="fancy_grid")) # plain