Beispiel #1
0
    def evaluate_no_cv(self, train_index, test_index):

        x_train = self._x[train_index]
        y_train = self._y[train_index]
        x_test = self._x[test_index]
        y_test = self._y[test_index]

        best_parameter = dict()
        best_parameter['n_estimators'] = self._algorithm_params['n_estimators_range']
        best_parameter['max_depth'] = self._algorithm_params['max_depth_range']
        best_parameter['min_samples_split'] = self._algorithm_params['min_samples_split_range']
        best_parameter['max_features'] = self._algorithm_params['max_features_range']

        _, y_hat, auc, y_hat_train = self._launch_random_forest(x_train, x_test, y_train, y_test,
                                                                self._algorithm_params['n_estimators_range'],
                                                                self._algorithm_params['max_depth_range'],
                                                                self._algorithm_params['min_samples_split_range'],
                                                                self._algorithm_params['max_features_range'])
        result = dict()
        result['best_parameter'] = best_parameter
        result['evaluation'] = utils.evaluate_prediction(y_test, y_hat)
        best_parameter['balanced_accuracy'] = result['evaluation']['balanced_accuracy']
        result['evaluation_train'] = utils.evaluate_prediction(y_train, y_hat_train)
        result['y_hat'] = y_hat
        result['y_hat_train'] = y_hat_train
        result['y'] = y_test
        result['y_train'] = y_train
        result['y_index'] = test_index
        result['x_index'] = train_index
        result['auc'] = auc

        return result
Beispiel #2
0
    def evaluate_no_cv(self, train_index, test_index):

        x_train = self._x[train_index]
        y_train = self._y[train_index]
        x_test = self._x[test_index]
        y_test = self._y[test_index]

        best_parameter = dict()
        best_parameter['max_depth'] = self._max_depth_range
        best_parameter['learning_rate'] = self._learning_rate_range
        best_parameter['n_estimators'] = self._n_estimators_range
        best_parameter['colsample_bytree'] = self._colsample_bytree_range

        _, y_hat, auc, y_hat_train = self._launch_xgboost(
            x_train, x_test, y_train, y_test, self._max_depth_range,
            self._learning_rate_range, self._n_estimators_range,
            self._colsample_bytree_range)
        result = dict()
        result['best_parameter'] = best_parameter
        result['evaluation'] = utils.evaluate_prediction(y_test, y_hat)
        best_parameter['balanced_accuracy'] = result['evaluation'][
            'balanced_accuracy']
        result['evaluation_train'] = utils.evaluate_prediction(
            y_train, y_hat_train)
        result['y_hat'] = y_hat
        result['y_hat_train'] = y_hat_train
        result['y'] = y_test
        result['y_train'] = y_train
        result['y_index'] = test_index
        result['x_index'] = train_index
        result['auc'] = auc

        return result
Beispiel #3
0
    def evaluate(self, train_index, test_index):

        inner_pool = ThreadPool(self._algorithm_params['n_threads'])
        async_result = {}
        for i in range(self._algorithm_params['grid_search_folds']):
            async_result[i] = {}

        x_train = self._x[train_index]
        y_train = self._y[train_index]

        skf = StratifiedKFold(
            n_splits=self._algorithm_params['grid_search_folds'], shuffle=True)
        inner_cv = list(skf.split(np.zeros(len(y_train)), y_train))

        parameters_combinations = list(
            itertools.product(
                self._algorithm_params['max_depth_range'],
                self._algorithm_params['learning_rate_range'],
                self._algorithm_params['n_estimators_range'],
                self._algorithm_params['colsample_bytree_range']))

        for i in range(len(inner_cv)):
            inner_train_index, inner_test_index = inner_cv[i]

            x_train_inner = x_train[inner_train_index]
            x_test_inner = x_train[inner_test_index]
            y_train_inner = y_train[inner_train_index]
            y_test_inner = y_train[inner_test_index]

            for parameters in parameters_combinations:
                async_result[i][parameters] = inner_pool.apply_async(
                    self._grid_search,
                    (x_train_inner, x_test_inner, y_train_inner, y_test_inner,
                     parameters[0], parameters[1], parameters[2],
                     parameters[3]))
        inner_pool.close()
        inner_pool.join()
        best_parameter = self._select_best_parameter(async_result)
        x_test = self._x[test_index]
        y_test = self._y[test_index]

        _, y_hat, auc, y_hat_train = self._launch_xgboost(
            x_train, x_test, y_train, y_test, best_parameter['max_depth'],
            best_parameter['learning_rate'], best_parameter['n_estimators'],
            best_parameter['colsample_bytree'])

        result = dict()
        result['best_parameter'] = best_parameter
        result['evaluation'] = utils.evaluate_prediction(y_test, y_hat)
        result['evaluation_train'] = utils.evaluate_prediction(
            y_train, y_hat_train)
        result['y_hat'] = y_hat
        result['y_hat_train'] = y_hat_train
        result['y'] = y_test
        result['y_train'] = y_train
        result['y_index'] = test_index
        result['x_index'] = train_index
        result['auc'] = auc

        return result
Beispiel #4
0
    def evaluate(self, train_index, test_index):

        inner_pool = ThreadPool(self._algorithm_params["n_threads"])
        async_result = {}
        for i in range(self._algorithm_params["grid_search_folds"]):
            async_result[i] = {}

        outer_kernel = self._kernel[train_index, :][:, train_index]
        y_train = self._y[train_index]

        skf = StratifiedKFold(
            n_splits=self._algorithm_params["grid_search_folds"], shuffle=True)
        inner_cv = list(skf.split(np.zeros(len(y_train)), y_train))

        for i in range(len(inner_cv)):
            inner_train_index, inner_test_index = inner_cv[i]

            inner_kernel = outer_kernel[
                inner_train_index, :][:, inner_train_index]
            x_test_inner = outer_kernel[inner_test_index, :][:,
                                                             inner_train_index]
            y_train_inner, y_test_inner = (
                y_train[inner_train_index],
                y_train[inner_test_index],
            )

            for c in self._algorithm_params["c_range"]:
                async_result[i][c] = inner_pool.apply_async(
                    self._grid_search,
                    (inner_kernel, x_test_inner, y_train_inner, y_test_inner,
                     c),
                )
        inner_pool.close()
        inner_pool.join()

        best_parameter = self._select_best_parameter(async_result)
        x_test = self._kernel[test_index, :][:, train_index]
        y_train, y_test = self._y[train_index], self._y[test_index]

        _, y_hat, auc, y_hat_train = self._launch_svc(outer_kernel, x_test,
                                                      y_train, y_test,
                                                      best_parameter["c"])

        result = dict()
        result["best_parameter"] = best_parameter
        result["evaluation"] = utils.evaluate_prediction(y_test, y_hat)
        result["evaluation_train"] = utils.evaluate_prediction(
            y_train, y_hat_train)
        result["y_hat"] = y_hat
        result["y_hat_train"] = y_hat_train
        result["y"] = y_test
        result["y_train"] = y_train
        result["y_index"] = test_index
        result["x_index"] = train_index
        result["auc"] = auc

        return result
Beispiel #5
0
    def evaluate(self, train_index, test_index):

        inner_pool = ThreadPool(self._n_threads)
        async_result = {}
        for i in range(self._grid_search_folds):
            async_result[i] = {}

        outer_kernel = self._kernel[train_index, :][:, train_index]
        y_train = self._y[train_index]

        skf = StratifiedKFold(n_splits=self._grid_search_folds, shuffle=True)
        inner_cv = list(skf.split(np.zeros(len(y_train)), y_train))

        for i in range(len(inner_cv)):
            inner_train_index, inner_test_index = inner_cv[i]

            inner_kernel = outer_kernel[
                inner_train_index, :][:, inner_train_index]
            x_test_inner = outer_kernel[inner_test_index, :][:,
                                                             inner_train_index]
            y_train_inner, y_test_inner = y_train[inner_train_index], y_train[
                inner_test_index]

            for c in self._c_range:
                async_result[i][c] = inner_pool.apply_async(
                    self._grid_search, (inner_kernel, x_test_inner,
                                        y_train_inner, y_test_inner, c))
        inner_pool.close()
        inner_pool.join()

        best_parameter = self._select_best_parameter(async_result)
        x_test = self._kernel[test_index, :][:, train_index]
        y_train, y_test = self._y[train_index], self._y[test_index]

        _, y_hat, auc, y_hat_train = self._launch_svc(outer_kernel, x_test,
                                                      y_train, y_test,
                                                      best_parameter['c'])

        result = dict()
        result['best_parameter'] = best_parameter
        result['evaluation'] = utils.evaluate_prediction(y_test, y_hat)
        result['evaluation_train'] = utils.evaluate_prediction(
            y_train, y_hat_train)
        result['y_hat'] = y_hat
        result['y_hat_train'] = y_hat_train
        result['y'] = y_test
        result['y_train'] = y_train
        result['y_index'] = test_index
        result['x_index'] = train_index
        result['auc'] = auc

        return result
Beispiel #6
0
    def _grid_search(self, kernel_train, x_test, y_train, y_test, c):

        _, y_hat, _, _ = self._launch_svc(kernel_train, x_test, y_train,
                                          y_test, c)
        res = utils.evaluate_prediction(y_test, y_hat)

        return res['balanced_accuracy']
Beispiel #7
0
    def _grid_search(self, x_train, x_test, y_train, y_test, max_depth, learning_rate, n_estimators, colsample_bytree):

        _, y_hat, _, _ = self._launch_xgboost(x_train, x_test, y_train, y_test, max_depth, learning_rate, n_estimators,
                                              colsample_bytree)
        res = utils.evaluate_prediction(y_test, y_hat)

        return res['balanced_accuracy']
Beispiel #8
0
    def _grid_search(
        self,
        x_train,
        x_test,
        y_train,
        y_test,
        n_estimators,
        max_depth,
        min_samples_split,
        max_features,
    ):

        _, y_hat, _, _ = self._launch_random_forest(
            x_train,
            x_test,
            y_train,
            y_test,
            n_estimators,
            max_depth,
            min_samples_split,
            max_features,
        )
        res = utils.evaluate_prediction(y_test, y_hat)

        return res["balanced_accuracy"]
Beispiel #9
0
    def _grid_search(self, x_train, x_test, y_train, y_test, c):

        _, y_hat, _, _ = self._launch_logistic_reg(x_train, x_test, y_train,
                                                   y_test, c)
        res = utils.evaluate_prediction(y_test, y_hat)

        return res["balanced_accuracy"]
Beispiel #10
0
    def evaluate_no_cv(self, train_index, test_index):

        x_train = self._x[train_index]
        y_train = self._y[train_index]
        x_test = self._x[test_index]
        y_test = self._y[test_index]

        best_parameter = dict()
        best_parameter["max_depth"] = self._algorithm_params["max_depth_range"]
        best_parameter["learning_rate"] = self._algorithm_params[
            "learning_rate_range"]
        best_parameter["n_estimators"] = self._algorithm_params[
            "n_estimators_range"]
        best_parameter["colsample_bytree"] = self._algorithm_params[
            "colsample_bytree_range"]

        _, y_hat, auc, y_hat_train = self._launch_xgboost(
            x_train,
            x_test,
            y_train,
            y_test,
            self._algorithm_params["max_depth_range"],
            self._algorithm_params["learning_rate_range"],
            self._algorithm_params["n_estimators_range"],
            self._algorithm_params["colsample_bytree_range"],
        )
        result = dict()
        result["best_parameter"] = best_parameter
        result["evaluation"] = utils.evaluate_prediction(y_test, y_hat)
        best_parameter["balanced_accuracy"] = result["evaluation"][
            "balanced_accuracy"]
        result["evaluation_train"] = utils.evaluate_prediction(
            y_train, y_hat_train)
        result["y_hat"] = y_hat
        result["y_hat_train"] = y_hat_train
        result["y"] = y_test
        result["y_train"] = y_train
        result["y_index"] = test_index
        result["x_index"] = train_index
        result["auc"] = auc

        return result
Beispiel #11
0
    def _compute_average_test_accuracy(self, y_list, yhat_list):

        from clinica.pipelines.machine_learning.ml_utils import evaluate_prediction

        return evaluate_prediction(y_list, yhat_list)['balanced_accuracy']
Beispiel #12
0
    def evaluate(self, train_index, test_index):

        inner_pool = ThreadPool(self._algorithm_params["n_threads"])
        async_result = {}
        for i in range(self._algorithm_params["grid_search_folds"]):
            async_result[i] = {}

        x_train = self._x[train_index]
        y_train = self._y[train_index]

        skf = StratifiedKFold(
            n_splits=self._algorithm_params["grid_search_folds"], shuffle=True)
        inner_cv = list(skf.split(np.zeros(len(y_train)), y_train))

        parameters_combinations = list(
            itertools.product(
                self._algorithm_params["n_estimators_range"],
                self._algorithm_params["max_depth_range"],
                self._algorithm_params["min_samples_split_range"],
                self._algorithm_params["max_features_range"],
            ))

        for i in range(len(inner_cv)):
            inner_train_index, inner_test_index = inner_cv[i]

            x_train_inner = x_train[inner_train_index]
            x_test_inner = x_train[inner_test_index]
            y_train_inner = y_train[inner_train_index]
            y_test_inner = y_train[inner_test_index]

            for parameters in parameters_combinations:
                async_result[i][parameters] = inner_pool.apply_async(
                    self._grid_search,
                    (
                        x_train_inner,
                        x_test_inner,
                        y_train_inner,
                        y_test_inner,
                        parameters[0],
                        parameters[1],
                        parameters[2],
                        parameters[3],
                    ),
                )
        inner_pool.close()
        inner_pool.join()
        best_parameter = self._select_best_parameter(async_result)
        x_test = self._x[test_index]
        y_test = self._y[test_index]

        _, y_hat, auc, y_hat_train = self._launch_random_forest(
            x_train,
            x_test,
            y_train,
            y_test,
            best_parameter["n_estimators"],
            best_parameter["max_depth"],
            best_parameter["min_samples_split"],
            best_parameter["max_features"],
        )

        result = dict()
        result["best_parameter"] = best_parameter
        result["evaluation"] = utils.evaluate_prediction(y_test, y_hat)
        result["evaluation_train"] = utils.evaluate_prediction(
            y_train, y_hat_train)
        result["y_hat"] = y_hat
        result["y_hat_train"] = y_hat_train
        result["y"] = y_test
        result["y_train"] = y_train
        result["y_index"] = test_index
        result["x_index"] = train_index
        result["auc"] = auc

        return result