def training_curve(X, y, model): def mlp_databalancing(_X, _y, sampling_strategy): if sampling_strategy == SamplingStrategy.UNDERSAMPLING: _X, _y = RandomUnderSampler(random_state=SEED).fit_resample(_X, _y) elif sampling_strategy == SamplingStrategy.OVERSAMPLING: _X, _y = SMOTE(random_state=SEED, n_jobs=-1).fit_resample(_X, _y) return _X, _y SPLIT_AND_VALIDATE = True # If False, train with all data SAMPLING_STRATEGY = SamplingStrategy.OVERSAMPLING with timing(): # Custom code for databalancing for MLP, since imblearn pipeline don't return keras-history if SPLIT_AND_VALIDATE: X_train, X_val, y_train, y_val = train_test_split( X, y, test_size=0.1, random_state=SEED) if SAMPLING_STRATEGY != SamplingStrategy.NONE: X_train, y_train = mlp_databalancing( X_train, y_train, SAMPLING_STRATEGY) history = model.fit(X_train, y_train, epochs=num_epochs, batch_size=batch_size, validation_data=(X_val, y_val)) else: X, y = mlp_databalancing(X, y, SAMPLING_STRATEGY) history = model.fit(X, y, epochs=num_epochs, batch_size=batch_size, verbose=0) plot_learning_curve_keras(history)
def evaluate(X, y, model): with timing(): scores, averages = evaluate_model(X, y, model) print(scores) print(f'Averages: {averages}') confusion_matrix(X, y, model, XGBOOST_VISUALIZATION_PATH)
def evaluate(X, y): with timing(): model_for_evaluation = KerasClassifier( build_fn=create_model, epochs=num_epochs, batch_size=batch_size, verbose=0) model = create_pipeline( model_for_evaluation, sampling_strategy=SamplingStrategy.OVERSAMPLING, y=y) scores, averages = evaluate_model(X, y, model, gpu_mode=True) print('\n\n', scores) print(f'Averages: {averages}') confusion_matrix(X, y, model, MLP_VISUALIZATION_PATH)
def training_curve(X, y, model): with timing(): plot_learning_curve(X, y, model, XGBOOST_VISUALIZATION_PATH)