def test_valid_input(self): st = StackingMLR(cvxopt=False) start = default_timer() print(self.basemodels_preds.shape, "test", self.test_preds.shape) st.fit(self.basemodels_preds, self.train_y) pred = st.predict(self.test_preds) print("pred", pred, "shape", pred.shape) print(default_timer() - start) n_datapoints = self.basemodels_preds.shape[1] print("Test for Stacking with Multi-Response Linear Regression") print("######################################################") print( "precision:", calculate_score(self.test_y, pred, BINARY_CLASSIFICATION, precision)) print( "roc_auc:", calculate_score(self.test_y, pred, BINARY_CLASSIFICATION, roc_auc)) print( "log_loss:", calculate_score(self.test_y, pred, BINARY_CLASSIFICATION, log_loss)) print( "recall:", calculate_score(self.test_y, pred, BINARY_CLASSIFICATION, recall)) print( "accuracy:", calculate_score(self.test_y, pred, BINARY_CLASSIFICATION, accuracy))
def benchmark_best_model(args): task_type = STRING_TO_TASK_TYPES[args.task_type] path = args.data_path dataset = get_datasetname(path) metric = CLASSIFICATION_METRICS[args.metric] name = "SINGLE BEST" X, y, X_test, y_test = load_data(path) n_basemodels = X.shape[0] best_model = np.argmax(calculate_score(y, X[m,:,:], task_type, metric) for m in range(n_basemodels)) perf = calculate_score(y_test, X_test[best_model, :, :], task_type, metric) return {dataset: {name: {str(metric): perf}}}
def test_valid_input(self): ba = BayesianAverage() ba.fit(self.basemodels_preds, self.train_y) pred = ba.predict(self.test_preds) print("preds", pred.shape, "test_y", self.test_y.shape, "test_x", self.test_preds.shape) #error metric is accuracy measure. print("Test for Stacking with Bayesian Averaging") print("######################################################") print( "Accuracy:", calculate_score(self.test_y, pred, BINARY_CLASSIFICATION, accuracy)) print( "roc_auc:", calculate_score(self.test_y, pred, BINARY_CLASSIFICATION, roc_auc))
def prune(X, y, X_test, task_type, metric, p=0.8): """ Preprocessing method for ensemble methods. Takes instance of model and percentage of models to be pruned as input. It computes the performance of the base models and sorts them. And then takes a p% of those sorted models as input to the ensemble. Parameters ---------- X : {array-like} of shape = [n_basemodels, n_datapoints, n_targets] Predicted class probabilites of base models trained on level-zero validation data y : array-like, shape = [n_datapoints] The predicted classes. X_test: array-like, of shape = [n_basemodels, n_datapoints, n_targets] Predicted class probabilities on level-zero test data Returns ------- pruned training set and pruned test set """ n_basemodels = X.shape[0] N = int(p * n_basemodels) perf = np.zeros(n_basemodels) for idx, basemodel in enumerate(X): perf[idx] = calculate_score(y, basemodel, task_type, metric) indices = np.argsort(perf)[-N:] X_pruned = X[indices] X_test_pruned = X_test[indices] return X_pruned, X_test_pruned
def apply_model(args, X, y, X_test, y_test, task_type, metric, model, name, dataset): print("X_train:", X.shape, "y_train", y.shape, "X_test:", X_test.shape, "y_test:", y_test.shape) if args.prune and args.bagging: start = default_timer() X_pruned, X_test_pruned = prune(X, y, X_test, task_type, metric) # prunes 20% away pred = bagging(X_pruned, y, X_test_pruned, model, task_type, metric) # predictions of bagged ensembles name = "pruned_bagged_" + name elif args.prune: start = default_timer() X_pruned, X_test_pruned = prune(X, y, X_test, task_type, metric)# prunes 20% away model.fit(X_pruned, y) pred = model.predict(X_test_pruned) # do pruning name = "pruned_" + name elif args.bagging: start = default_timer() # do bagging pred = bagging(X, y, X_test, model, task_type, metric)#predictions of bagged ensembles name = "bagged_" + name else: start = default_timer() model.fit(X, y) pred = model.predict(X_test) perf = calculate_score(y_test, pred, task_type, metric) runtime = default_timer() - start #logs runtime for each job with open("runtime.txt", "a") as f: f.write( name + ": " + str(runtime) + "\n") return {dataset: {name: {str(metric): perf}}}
def basemodels_perf(X, y): """TODO: Docstring for __basemodels_score__. Args: arg1 (TODO): TODO Returns: TODO """ n_datapoints = X.shape[1] X = X.transpose() M1 = np.asarray([np.argmax(X[:, i, 0]) for i in range(n_datapoints)]) M2 = np.asarray([np.argmax(X[:, i, 1]) for i in range(n_datapoints)]) M3 = np.asarray([np.argmax(X[:, i, 2]) for i in range(n_datapoints)]) M4 = np.asarray([np.argmax(X[:, i, 3]) for i in range(n_datapoints)]) M5 = np.asarray([np.argmax(X[:, i, 4]) for i in range(n_datapoints)]) M6 = np.asarray([np.argmax(X[:, i, 5]) for i in range(n_datapoints)]) M7 = np.asarray([np.argmax(X[:, i, 6]) for i in range(n_datapoints)]) print("basemodels accuracy") print("######################################################") print("basemodel_1_Accuracy:", calculate_score(y, M1, BINARY_CLASSIFICATION, accuracy)) print("basemodel_2_Accuracy:", calculate_score(y, M2, BINARY_CLASSIFICATION, accuracy)) print("basemodel_3_Accuracy:", calculate_score(y, M3, BINARY_CLASSIFICATION, accuracy)) print("basemodel_4_Accuracy:", calculate_score(y, M4, BINARY_CLASSIFICATION, accuracy)) print("basemodel_5_Accuracy:", calculate_score(y, M5, BINARY_CLASSIFICATION, accuracy)) print("basemodel_6_Accuracy:", calculate_score(y, M6, BINARY_CLASSIFICATION, accuracy)) print("basemodel_7_Accuracy:", calculate_score(y, M7, BINARY_CLASSIFICATION, accuracy))
def test_valid_input(self): st = StackingLogit() self.l1train = tensor2matrix(self.basemodels_preds) self.l1test = tensor2matrix(self.test_preds) st.fit(self.l1train, self.train_y) pred = st.predict(self.l1test) print("Test for Stacking with Unregularized Logistic Regression") print("######################################################") print( "Accuracy:", calculate_score(self.test_y, pred, BINARY_CLASSIFICATION, accuracy)) self.assertGreaterEqual(st.error_metric(pred, self.test_y), 0.5)
def test_small_valid_input(self): es = EnsembleSelection(task_type=BINARY_CLASSIFICATION, metric=accuracy, no_iterations=100, with_replacements=False, sorted_initialization=False, n_init=None) es.fit(self.basemodels_preds, self.train_y) pred = es.predict(self.test_preds) print("Test for Ensemble Selection Standard") print("######################################################") score = calculate_score(self.test_y, pred, BINARY_CLASSIFICATION, accuracy) print("Accuracy:", score) self.assertGreaterEqual(score, 0.5)
def test_kept_models_are_best(self): # XXX: Check that the models we keep are actually the ones with best performance task = "binary.classification" task_type = STRING_TO_TASK_TYPES[task] metric = accuracy lst_perf = [] for model in self.basemodels_preds: perf = calculate_score(solution=self.train_y, prediction=model, task_type=task_type, metric=metric) lst_perf.append(perf) np.argsort(self.basemodels_preds, kind=lst_perf)
def test_valid_input(self): st = StackingLogitReg() st.fit(self.l1train, self.train_y) pred = st.predict(self.l1test) #basemodels predictions accuracy # error metric is accuracy measure. print("Test for Stacking with Regularized Logistic Regression") print("######################################################") print( "Accuracy:", calculate_score(self.test_y, pred, BINARY_CLASSIFICATION, accuracy)) self.assertGreaterEqual( st.error_metric(np.argmax(pred, axis=1), self.test_y), 0.5)
def test_small_valid_input(self): es = EnsembleSelection(task_type=BINARY_CLASSIFICATION, metric=accuracy, no_iterations=100, with_replacements=True, sorted_initialization=False, n_init=None) es.fit(self.basemodels_preds, self.train_y) pred = es.predict(self.test_preds) #with open("./models/with_replacement_models.pkl", 'wb') as f: # pickle.dump(es, f) #with open("./models/with_replacement_models.pkl", 'rb') as f: # old_ensemble = pickle.load(f) #self.assertTrue(ensemble_selections_equal(old_ensemble, es)) score = calculate_score(self.test_y, pred, BINARY_CLASSIFICATION, accuracy) print("Accuracy:", score) self.assertGreaterEqual(score, 0.5)
def test_small_valid_input(self): """ Checks if ensemble is initializated by sorting with respect to performace.For zero iterations, returns best performing model, for more iterations, initializes ensemble with n_init good performing models. """ es = EnsembleSelection(task_type=BINARY_CLASSIFICATION, metric=accuracy, no_iterations=100, with_replacements=False, sorted_initialization=True, n_init=100) es.fit(self.basemodels_preds, self.train_y) pred = es.predict(self.test_preds) print("Test for Ensemble Selection with Sorted Initialization") print("######################################################") score = calculate_score(self.test_y, pred, BINARY_CLASSIFICATION, accuracy) print("Accuracy:", score) self.assertGreaterEqual(score, 0.5)
def fit(self, base_models_predictions, true_targets): """ Build an ensemble from base_models_predictions Parameters ---------- base_models_predictions : {array-like} of shape = [n_basemodels, n_datapoints, n_targets] Predicted class probabilites of base models trained on level-zero validation data true_targets : array-like, shape = [n_datapoints] The target values. Returns -------- self.weight_vector: array-like, shape[n_basemodels] """ base_models_predictions = base_models_predictions.transpose() n_targets = base_models_predictions.shape[0] n_datapoints = base_models_predictions.shape[1] n_base_models = base_models_predictions.shape[2] self.weight_vector = np.zeros(n_base_models) index_list = np.arange(n_datapoints, dtype=int) for _ in range(self.n_bootstraps): risk_vector = np.zeros(n_base_models) for model in range(n_base_models): pred = base_models_predictions[:, index_list, model] target = true_targets[index_list] score = calculate_score(target, pred.transpose(), self.task_type, self.metric) risk_vector[model] = 1 - score self.weight_vector[np.argmin(risk_vector)] += 1 def normalize_proba(probs): return probs / float(np.sum(probs)) self.weight_vector = normalize_proba(self.weight_vector) print("AgnosticBayesianWeights:", self.weight_vector)
def test_small_valid_input(self): """ Checks if ensemble is initializated by sorting with respect to performace.For zero iterations, returns best performing model, for more iterations, initializes ensemble with n_init good performing models. """ # TODO: automate this to call instances of all classes and test # for all of them at once es = EnsembleSelection(BINARY_CLASSIFICATION, accuracy) bg = LibraryBagging(es) #bag_gen = bg.generate_random_bag(self.basemodels_preds) #next(bag_gen) bg.fit(self.basemodels_preds, self.train_y, self.test_preds) pred = bg.predict() print("Test for Wrapper: Library_Pruning") print("######################################################") Accuracy = calculate_score(self.test_y, pred, BINARY_CLASSIFICATION, accuracy) print("Accuracy:", Accuracy) self.assertGreaterEqual(Accuracy, 0.5)
def test_valid_input(self): bm = BayesianAverageMCMC() bm.fit(self.basemodels_preds.transpose(), self.train_y) pred = bm.predict(self.test_preds.transpose()) print("pred", pred.shape) #error metric is accuracy measure. self.basemodels_preds = self.basemodels_preds.transpose() n_datapoints = self.basemodels_preds.shape[1] M1 = np.asarray([ np.argmax(self.basemodels_preds[:, i, 0]) for i in range(n_datapoints) ]) M2 = np.asarray([ np.argmax(self.basemodels_preds[:, i, 1]) for i in range(n_datapoints) ]) M3 = np.asarray([ np.argmax(self.basemodels_preds[:, i, 2]) for i in range(n_datapoints) ]) M4 = np.asarray([ np.argmax(self.basemodels_preds[:, i, 3]) for i in range(n_datapoints) ]) M5 = [ np.argmax(self.basemodels_preds[:, i, 4]) for i in range(n_datapoints) ] M6 = [ np.argmax(self.basemodels_preds[:, i, 5]) for i in range(n_datapoints) ] M7 = [ np.argmax(self.basemodels_preds[:, i, 6]) for i in range(n_datapoints) ] M5 = np.asarray(M5) M6 = np.asarray(M6) M7 = np.asarray(M7) print("Test for Stacking with Bayesian Averaging MCMC") print("######################################################") print( "basemodel_1_Accuracy:", calculate_score(self.train_y, M1, BINARY_CLASSIFICATION, accuracy)) print( "basemodel_2_Accuracy:", calculate_score(self.train_y, M2, BINARY_CLASSIFICATION, accuracy)) print( "basemodel_3_Accuracy:", calculate_score(self.train_y, M3, BINARY_CLASSIFICATION, accuracy)) print( "basemodel_4_Accuracy:", calculate_score(self.train_y, M4, BINARY_CLASSIFICATION, accuracy)) print( "basemodel_5_Accuracy:", calculate_score(self.train_y, M5, BINARY_CLASSIFICATION, accuracy)) print( "basemodel_6_Accuracy:", calculate_score(self.train_y, M6, BINARY_CLASSIFICATION, accuracy)) print( "basemodel_7_Accuracy:", calculate_score(self.train_y, M7, BINARY_CLASSIFICATION, accuracy)) print( "Accuracy:", calculate_score(self.test_y, pred, BINARY_CLASSIFICATION, accuracy)) self.assertGreaterEqual( bm.error_metric(np.argmax(pred, axis=1), self.test_y), 0.5)
def fit(self, base_models_predictions, true_targets, model_identifiers=None): """ Build an ensemble from base_models_predictions Parameters ---------- base_models_predictions : {array-like} of shape = [n_basemodels, n_datapoints, n_targets] Predicted class probabilites of base models trained on level-zero validation data true_targets : array-like, shape = [n_datapoints] The target values. Returns -------- self : object Returns self. """ ensemble = [] n_basemodels = base_models_predictions.shape[0] self.no_iterations = min(n_basemodels, self.no_iterations) active = np.ones(n_basemodels, dtype=bool) if not self.sorted_initialization and self.n_init is not None: raise ValueError("You specified parameter for number of " + "initial models 'N', but did not choose " + "'sorted_initialization'. This parameter " + "combination is not supported!") if self.sorted_initialization: #self.n_init = int(0.2 * n_base_models_predictions) if self.n_init is None: raise ValueError("Please specify the number of" + " models to initialize ensemble with n_init") perf = np.zeros(n_basemodels) for idx, pred in enumerate(base_models_predictions): perf[idx] = calculate_score(true_targets, pred, self.task_type, self.metric) indices = np.argsort(perf)[-self.n_init:] ensemble = [base_models_predictions[idx] for idx in indices] if not self.with_replacements: for idx in indices: active[idx] = False self.weights = np.zeros((n_basemodels,)) for _ in range(self.no_iterations): best_index = -1 best_score = -np.inf temp = list(ensemble) # N:number of models active in the library for basemodel in range(n_basemodels): if active[basemodel]: if not ensemble: score = calculate_score(true_targets, base_models_predictions[basemodel, :, :], self.task_type, self.metric) else: avg_temp = np.mean(temp + [base_models_predictions[basemodel, :, :]], axis=0) score = calculate_score(true_targets, avg_temp, self.task_type, self.metric) if score >= best_score: best_index = basemodel best_score = score ensemble.append(base_models_predictions[best_index, :, :]) self.weights[best_index] += 1 # Should this be outside of loop? if not self.with_replacements: active[best_index] = False self.weights = self.weights / np.sum(self.weights) print("EnsembleSelection Weights: ", self.weights)
def test_invalid_inputs(self): """checks if valueError is raised when input is invalid""" calculate_score(self.train_y, self.basemodels_preds) self.assertRaises(ValueError, es.fit, self.validation_x, self.validation_y)