Beispiel #1
0
    def _run_multiclass(self, multiclass, xval_params, expected_best):

        xval_splitter = CDataSplitter.create(
            'kfold', num_folds=3, random_state=50000)

        # Set the best parameters inside the classifier
        best_params = multiclass.estimate_parameters(
            self.tr, xval_params, xval_splitter, 'accuracy',
            perf_evaluator='xval-multiclass', n_jobs=1)

        self.logger.info(
            "Multiclass SVM has now the following parameters: {:}".format(
                multiclass.get_params()))

        for clf_idx, clf in enumerate(multiclass._binary_classifiers):
            self.assertEqual(
                clf.C, expected_best['C'][clf_idx])
            self.assertEqual(
                clf.kernel.gamma, expected_best['kernel.gamma'][clf_idx])

        # Final test: fit using best parameters
        multiclass.fit(self.tr)

        for clf_idx, clf in enumerate(multiclass._binary_classifiers):
            for param in best_params:
                self.assertEqual(clf.get_params()[param],
                                 best_params[param][clf_idx])
Beispiel #2
0
    def test_nan_metric_value(self):

        # Changing default parameters to be sure are not used
        self.svm.set_params({'C': 25, 'kernel.gamma': 1e-1})
        xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [1, 50]}

        # DO XVAL FOR CHOOSE BEST PARAMETERS
        xval_splitter = CDataSplitter.create('kfold',
                                             num_folds=5,
                                             random_state=50000)

        self.logger.info("Testing metric with some nan")

        some_nan_metric = CMetricFirstNan()

        # Now we compare the parameters chosen before with a new evaluator
        perf_eval = CPerfEvaluatorXVal(xval_splitter, some_nan_metric)
        perf_eval.verbose = 1

        best_params, best_score = perf_eval.evaluate_params(
            self.svm, self.training_dataset, xval_parameters, pick='last')

        self.logger.info("best score : {:}".format(best_score))

        # The xval should select the only one actual value (others are nan)
        self.assertEqual(best_score, 1.)

        self.logger.info("Testing metric with all nan")

        # This test case involves an all-nan slice
        self.logger.filterwarnings(action="ignore",
                                   message="All-NaN slice encountered",
                                   category=RuntimeWarning)

        all_nan_metric = CMetricAllNan()

        # Now we compare the parameters chosen before with a new evaluator
        perf_eval = CPerfEvaluatorXVal(xval_splitter, all_nan_metric)
        perf_eval.verbose = 1

        with self.assertRaises(ValueError):
            perf_eval.evaluate_params(self.svm,
                                      self.training_dataset,
                                      xval_parameters,
                                      pick='last')
Beispiel #3
0
    def test_parameters_setting(self):

        # Changing default parameters to be sure are not used
        self.svm.set_params({'C': 25, 'kernel.gamma': 1e-1, 'n_jobs': 2})

        xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [1, 50]}

        # DO XVAL FOR CHOOSE BEST PARAMETERS
        xval_splitter = CDataSplitter.create('kfold',
                                             num_folds=5,
                                             random_state=50000)

        # Set the best parameters inside the classifier
        self.svm.estimate_parameters(self.training_dataset, xval_parameters,
                                     xval_splitter, 'accuracy')

        self.logger.info("SVM has now the following parameters: {:}".format(
            self.svm.get_params()))

        self.assertEqual(self.svm.get_params()['C'], 1)
        self.assertEqual(self.svm.get_params()['kernel.gamma'], 50)

        # Now we compare the parameters chosen before with a new evaluator
        perf_eval = CPerfEvaluatorXVal(xval_splitter,
                                       CMetric.create('accuracy'))
        perf_eval.verbose = 1

        best_params, best_score = perf_eval.evaluate_params(
            self.svm, self.training_dataset, xval_parameters)

        for param in xval_parameters:
            self.logger.info("Best '{:}' is: {:}".format(
                param, best_params[param]))
            self.assertEqual(best_params[param], self.svm.get_params()[param])

        self.svm.verbose = 0

        parameters_combination = [[1, 1], [1, 50], [10, 1], [10, 50], [100, 1],
                                  [100, 50]]
        par_comb_score = CArray.zeros(len(parameters_combination))
        for comb in range(len(parameters_combination)):

            this_fold_score = []
            num_xval_fold = len(xval_splitter.tr_idx)

            for f in range(num_xval_fold):
                self.svm.set("C", parameters_combination[comb][0])
                self.svm.kernel.gamma = parameters_combination[comb][1]

                self.svm.fit(
                    self.training_dataset[xval_splitter.tr_idx[f], :].X,
                    self.training_dataset[xval_splitter.tr_idx[f], :].Y)

                this_fold_predicted = self.svm.predict(
                    self.training_dataset[xval_splitter.ts_idx[f], :].X)

                this_fold_accuracy = skm.accuracy_score(
                    self.training_dataset[
                        xval_splitter.ts_idx[f], :].Y.get_data(),
                    this_fold_predicted.get_data())
                this_fold_score.append(this_fold_accuracy)

            par_comb_score[comb] = (np.mean(this_fold_score))
            self.logger.info("this fold mean: {:}".format(
                par_comb_score[comb]))

        max_combination_score = par_comb_score.max()
        better_param_comb = parameters_combination[par_comb_score.argmax()]
        self.logger.info("max combination score founded here: {:}".format(
            max_combination_score))
        self.logger.info(
            "max comb score founded during xval {:}".format(best_score))

        self.assertEqual(max_combination_score, best_score)

        # set parameters found by xval and check if are the same chosen here
        self.logger.info("the parameters selected by own xval are:")
        self.svm.set_params(best_params)
        self.logger.info("C: {:}".format(self.svm.C))
        self.logger.info("kernel.gamma: {:}".format(self.svm.kernel.gamma))
        # check c
        self.assertEqual(better_param_comb[0], self.svm.C)
        # check gamma
        self.assertEqual(better_param_comb[1], self.svm.kernel.gamma)
    def __init__(self, splitter, metric):

        self.splitter = CDataSplitter.create(splitter)
        self.metric = CMetric.create(metric)