def evaluate_no_cv(self, train_index, test_index): x_train = self._x[train_index] y_train = self._y[train_index] x_test = self._x[test_index] y_test = self._y[test_index] best_parameter = dict() best_parameter['n_estimators'] = self._algorithm_params['n_estimators_range'] best_parameter['max_depth'] = self._algorithm_params['max_depth_range'] best_parameter['min_samples_split'] = self._algorithm_params['min_samples_split_range'] best_parameter['max_features'] = self._algorithm_params['max_features_range'] _, y_hat, auc, y_hat_train = self._launch_random_forest(x_train, x_test, y_train, y_test, self._algorithm_params['n_estimators_range'], self._algorithm_params['max_depth_range'], self._algorithm_params['min_samples_split_range'], self._algorithm_params['max_features_range']) result = dict() result['best_parameter'] = best_parameter result['evaluation'] = utils.evaluate_prediction(y_test, y_hat) best_parameter['balanced_accuracy'] = result['evaluation']['balanced_accuracy'] result['evaluation_train'] = utils.evaluate_prediction(y_train, y_hat_train) result['y_hat'] = y_hat result['y_hat_train'] = y_hat_train result['y'] = y_test result['y_train'] = y_train result['y_index'] = test_index result['x_index'] = train_index result['auc'] = auc return result
def evaluate_no_cv(self, train_index, test_index): x_train = self._x[train_index] y_train = self._y[train_index] x_test = self._x[test_index] y_test = self._y[test_index] best_parameter = dict() best_parameter['max_depth'] = self._max_depth_range best_parameter['learning_rate'] = self._learning_rate_range best_parameter['n_estimators'] = self._n_estimators_range best_parameter['colsample_bytree'] = self._colsample_bytree_range _, y_hat, auc, y_hat_train = self._launch_xgboost( x_train, x_test, y_train, y_test, self._max_depth_range, self._learning_rate_range, self._n_estimators_range, self._colsample_bytree_range) result = dict() result['best_parameter'] = best_parameter result['evaluation'] = utils.evaluate_prediction(y_test, y_hat) best_parameter['balanced_accuracy'] = result['evaluation'][ 'balanced_accuracy'] result['evaluation_train'] = utils.evaluate_prediction( y_train, y_hat_train) result['y_hat'] = y_hat result['y_hat_train'] = y_hat_train result['y'] = y_test result['y_train'] = y_train result['y_index'] = test_index result['x_index'] = train_index result['auc'] = auc return result
def evaluate(self, train_index, test_index): inner_pool = ThreadPool(self._algorithm_params['n_threads']) async_result = {} for i in range(self._algorithm_params['grid_search_folds']): async_result[i] = {} x_train = self._x[train_index] y_train = self._y[train_index] skf = StratifiedKFold( n_splits=self._algorithm_params['grid_search_folds'], shuffle=True) inner_cv = list(skf.split(np.zeros(len(y_train)), y_train)) parameters_combinations = list( itertools.product( self._algorithm_params['max_depth_range'], self._algorithm_params['learning_rate_range'], self._algorithm_params['n_estimators_range'], self._algorithm_params['colsample_bytree_range'])) for i in range(len(inner_cv)): inner_train_index, inner_test_index = inner_cv[i] x_train_inner = x_train[inner_train_index] x_test_inner = x_train[inner_test_index] y_train_inner = y_train[inner_train_index] y_test_inner = y_train[inner_test_index] for parameters in parameters_combinations: async_result[i][parameters] = inner_pool.apply_async( self._grid_search, (x_train_inner, x_test_inner, y_train_inner, y_test_inner, parameters[0], parameters[1], parameters[2], parameters[3])) inner_pool.close() inner_pool.join() best_parameter = self._select_best_parameter(async_result) x_test = self._x[test_index] y_test = self._y[test_index] _, y_hat, auc, y_hat_train = self._launch_xgboost( x_train, x_test, y_train, y_test, best_parameter['max_depth'], best_parameter['learning_rate'], best_parameter['n_estimators'], best_parameter['colsample_bytree']) result = dict() result['best_parameter'] = best_parameter result['evaluation'] = utils.evaluate_prediction(y_test, y_hat) result['evaluation_train'] = utils.evaluate_prediction( y_train, y_hat_train) result['y_hat'] = y_hat result['y_hat_train'] = y_hat_train result['y'] = y_test result['y_train'] = y_train result['y_index'] = test_index result['x_index'] = train_index result['auc'] = auc return result
def evaluate(self, train_index, test_index): inner_pool = ThreadPool(self._algorithm_params["n_threads"]) async_result = {} for i in range(self._algorithm_params["grid_search_folds"]): async_result[i] = {} outer_kernel = self._kernel[train_index, :][:, train_index] y_train = self._y[train_index] skf = StratifiedKFold( n_splits=self._algorithm_params["grid_search_folds"], shuffle=True) inner_cv = list(skf.split(np.zeros(len(y_train)), y_train)) for i in range(len(inner_cv)): inner_train_index, inner_test_index = inner_cv[i] inner_kernel = outer_kernel[ inner_train_index, :][:, inner_train_index] x_test_inner = outer_kernel[inner_test_index, :][:, inner_train_index] y_train_inner, y_test_inner = ( y_train[inner_train_index], y_train[inner_test_index], ) for c in self._algorithm_params["c_range"]: async_result[i][c] = inner_pool.apply_async( self._grid_search, (inner_kernel, x_test_inner, y_train_inner, y_test_inner, c), ) inner_pool.close() inner_pool.join() best_parameter = self._select_best_parameter(async_result) x_test = self._kernel[test_index, :][:, train_index] y_train, y_test = self._y[train_index], self._y[test_index] _, y_hat, auc, y_hat_train = self._launch_svc(outer_kernel, x_test, y_train, y_test, best_parameter["c"]) result = dict() result["best_parameter"] = best_parameter result["evaluation"] = utils.evaluate_prediction(y_test, y_hat) result["evaluation_train"] = utils.evaluate_prediction( y_train, y_hat_train) result["y_hat"] = y_hat result["y_hat_train"] = y_hat_train result["y"] = y_test result["y_train"] = y_train result["y_index"] = test_index result["x_index"] = train_index result["auc"] = auc return result
def evaluate(self, train_index, test_index): inner_pool = ThreadPool(self._n_threads) async_result = {} for i in range(self._grid_search_folds): async_result[i] = {} outer_kernel = self._kernel[train_index, :][:, train_index] y_train = self._y[train_index] skf = StratifiedKFold(n_splits=self._grid_search_folds, shuffle=True) inner_cv = list(skf.split(np.zeros(len(y_train)), y_train)) for i in range(len(inner_cv)): inner_train_index, inner_test_index = inner_cv[i] inner_kernel = outer_kernel[ inner_train_index, :][:, inner_train_index] x_test_inner = outer_kernel[inner_test_index, :][:, inner_train_index] y_train_inner, y_test_inner = y_train[inner_train_index], y_train[ inner_test_index] for c in self._c_range: async_result[i][c] = inner_pool.apply_async( self._grid_search, (inner_kernel, x_test_inner, y_train_inner, y_test_inner, c)) inner_pool.close() inner_pool.join() best_parameter = self._select_best_parameter(async_result) x_test = self._kernel[test_index, :][:, train_index] y_train, y_test = self._y[train_index], self._y[test_index] _, y_hat, auc, y_hat_train = self._launch_svc(outer_kernel, x_test, y_train, y_test, best_parameter['c']) result = dict() result['best_parameter'] = best_parameter result['evaluation'] = utils.evaluate_prediction(y_test, y_hat) result['evaluation_train'] = utils.evaluate_prediction( y_train, y_hat_train) result['y_hat'] = y_hat result['y_hat_train'] = y_hat_train result['y'] = y_test result['y_train'] = y_train result['y_index'] = test_index result['x_index'] = train_index result['auc'] = auc return result
def _grid_search(self, kernel_train, x_test, y_train, y_test, c): _, y_hat, _, _ = self._launch_svc(kernel_train, x_test, y_train, y_test, c) res = utils.evaluate_prediction(y_test, y_hat) return res['balanced_accuracy']
def _grid_search(self, x_train, x_test, y_train, y_test, max_depth, learning_rate, n_estimators, colsample_bytree): _, y_hat, _, _ = self._launch_xgboost(x_train, x_test, y_train, y_test, max_depth, learning_rate, n_estimators, colsample_bytree) res = utils.evaluate_prediction(y_test, y_hat) return res['balanced_accuracy']
def _grid_search( self, x_train, x_test, y_train, y_test, n_estimators, max_depth, min_samples_split, max_features, ): _, y_hat, _, _ = self._launch_random_forest( x_train, x_test, y_train, y_test, n_estimators, max_depth, min_samples_split, max_features, ) res = utils.evaluate_prediction(y_test, y_hat) return res["balanced_accuracy"]
def _grid_search(self, x_train, x_test, y_train, y_test, c): _, y_hat, _, _ = self._launch_logistic_reg(x_train, x_test, y_train, y_test, c) res = utils.evaluate_prediction(y_test, y_hat) return res["balanced_accuracy"]
def evaluate_no_cv(self, train_index, test_index): x_train = self._x[train_index] y_train = self._y[train_index] x_test = self._x[test_index] y_test = self._y[test_index] best_parameter = dict() best_parameter["max_depth"] = self._algorithm_params["max_depth_range"] best_parameter["learning_rate"] = self._algorithm_params[ "learning_rate_range"] best_parameter["n_estimators"] = self._algorithm_params[ "n_estimators_range"] best_parameter["colsample_bytree"] = self._algorithm_params[ "colsample_bytree_range"] _, y_hat, auc, y_hat_train = self._launch_xgboost( x_train, x_test, y_train, y_test, self._algorithm_params["max_depth_range"], self._algorithm_params["learning_rate_range"], self._algorithm_params["n_estimators_range"], self._algorithm_params["colsample_bytree_range"], ) result = dict() result["best_parameter"] = best_parameter result["evaluation"] = utils.evaluate_prediction(y_test, y_hat) best_parameter["balanced_accuracy"] = result["evaluation"][ "balanced_accuracy"] result["evaluation_train"] = utils.evaluate_prediction( y_train, y_hat_train) result["y_hat"] = y_hat result["y_hat_train"] = y_hat_train result["y"] = y_test result["y_train"] = y_train result["y_index"] = test_index result["x_index"] = train_index result["auc"] = auc return result
def _compute_average_test_accuracy(self, y_list, yhat_list): from clinica.pipelines.machine_learning.ml_utils import evaluate_prediction return evaluate_prediction(y_list, yhat_list)['balanced_accuracy']
def evaluate(self, train_index, test_index): inner_pool = ThreadPool(self._algorithm_params["n_threads"]) async_result = {} for i in range(self._algorithm_params["grid_search_folds"]): async_result[i] = {} x_train = self._x[train_index] y_train = self._y[train_index] skf = StratifiedKFold( n_splits=self._algorithm_params["grid_search_folds"], shuffle=True) inner_cv = list(skf.split(np.zeros(len(y_train)), y_train)) parameters_combinations = list( itertools.product( self._algorithm_params["n_estimators_range"], self._algorithm_params["max_depth_range"], self._algorithm_params["min_samples_split_range"], self._algorithm_params["max_features_range"], )) for i in range(len(inner_cv)): inner_train_index, inner_test_index = inner_cv[i] x_train_inner = x_train[inner_train_index] x_test_inner = x_train[inner_test_index] y_train_inner = y_train[inner_train_index] y_test_inner = y_train[inner_test_index] for parameters in parameters_combinations: async_result[i][parameters] = inner_pool.apply_async( self._grid_search, ( x_train_inner, x_test_inner, y_train_inner, y_test_inner, parameters[0], parameters[1], parameters[2], parameters[3], ), ) inner_pool.close() inner_pool.join() best_parameter = self._select_best_parameter(async_result) x_test = self._x[test_index] y_test = self._y[test_index] _, y_hat, auc, y_hat_train = self._launch_random_forest( x_train, x_test, y_train, y_test, best_parameter["n_estimators"], best_parameter["max_depth"], best_parameter["min_samples_split"], best_parameter["max_features"], ) result = dict() result["best_parameter"] = best_parameter result["evaluation"] = utils.evaluate_prediction(y_test, y_hat) result["evaluation_train"] = utils.evaluate_prediction( y_train, y_hat_train) result["y_hat"] = y_hat result["y_hat_train"] = y_hat_train result["y"] = y_test result["y_train"] = y_train result["y_index"] = test_index result["x_index"] = train_index result["auc"] = auc return result