def test_sigmoid_calibration():
    """Test calibration values with Platt sigmoid model"""
    exF = np.array([5, -4, 1.0])
    exY = np.array([1, -1, -1])
    # computed from my python port of the C++ code in LibSVM
    AB_lin_libsvm = np.array([-0.20261354391187855, 0.65236314980010512])
    assert_array_almost_equal(AB_lin_libsvm, _sigmoid_calibration(exF, exY), 3)
    lin_prob = 1.0 / (1.0 + np.exp(AB_lin_libsvm[0] * exF + AB_lin_libsvm[1]))
    sk_prob = _SigmoidCalibration().fit(exF, exY).predict(exF)
    assert_array_almost_equal(lin_prob, sk_prob, 6)

    # check that _SigmoidCalibration().fit only accepts 1d array or 2d column
    # arrays
    with pytest.raises(ValueError):
        _SigmoidCalibration().fit(np.vstack((exF, exF)), exY)
def test_calibration_ensemble_false(data, method):
    # Test that `ensemble=False` is the same as using predictions from
    # `cross_val_predict` to train calibrator.
    X, y = data
    clf = LinearSVC(random_state=7)

    cal_clf = CalibratedClassifierCV(clf, method=method, cv=3, ensemble=False)
    cal_clf.fit(X, y)
    cal_probas = cal_clf.predict_proba(X)

    # Get probas manually
    unbiased_preds = cross_val_predict(clf,
                                       X,
                                       y,
                                       cv=3,
                                       method="decision_function")
    if method == "isotonic":
        calibrator = IsotonicRegression(out_of_bounds="clip")
    else:
        calibrator = _SigmoidCalibration()
    calibrator.fit(unbiased_preds, y)
    # Use `clf` fit on all data
    clf.fit(X, y)
    clf_df = clf.decision_function(X)
    manual_probas = calibrator.predict(clf_df)
    assert_allclose(cal_probas[:, 1], manual_probas)
def test_sigmoid_calibration():
    """Test calibration values with Platt sigmoid model"""
    exF = np.array([5, -4, 1.0])
    exY = np.array([1, -1, -1])
    # computed from my python port of the C++ code in LibSVM
    AB_lin_libsvm = np.array([-0.20261354391187855, 0.65236314980010512])
    assert_array_almost_equal(AB_lin_libsvm,
                              _sigmoid_calibration(exF, exY), 3)
    lin_prob = 1. / (1. + np.exp(AB_lin_libsvm[0] * exF + AB_lin_libsvm[1]))
    sk_prob = _SigmoidCalibration().fit(exF, exY).predict(exF)
    assert_array_almost_equal(lin_prob, sk_prob, 6)

    # check that _SigmoidCalibration().fit only accepts 1d array or 2d column
    # arrays
    assert_raises(ValueError, _SigmoidCalibration().fit,
                  np.vstack((exF, exF)), exY)
Example #4
0
    def fit(self, T, y, sample_weight=None):
        """Fit using `T`, `y` as training data.

        Parameters
        ----------
        * `T` [array-like, shape=(n_samples,)]:
            Training data.

        * `y` [array-like, shape=(n_samples,)]:
            Training target.

        * `sample_weight` [array-like, shape=(n_samples,), optional]:
            Weights. If set to `None`, all weights will be set to 1.

        Returns
        -------
        * `self` [object]:
            `self`.
        """
        # Check input
        T = column_or_1d(T)

        # Fit
        self.calibrator_ = _SigmoidCalibration()
        self.calibrator_.fit(T, y, sample_weight=sample_weight)

        return self
Example #5
0
    def fit(self, T, y, sample_weight=None):
        """Fit using `T`, `y` as training data.

        Parameters
        ----------
        * `T` [array-like, shape=(n_samples,)]:
            Training data.

        * `y` [array-like, shape=(n_samples,)]:
            Training target.

        * `sample_weight` [array-like, shape=(n_samples,), optional]:
            Weights. If set to `None`, all weights will be set to 1.

        Returns
        -------
        * `self` [object]:
            `self`.
        """
        # Check input
        T = column_or_1d(T)

        # Fit
        self.calibrator_ = _SigmoidCalibration()
        self.calibrator_.fit(T, y, sample_weight=sample_weight)

        return self
Example #6
0
    def train(self, y_true, y_prob):
        if self.calibrator_type == 'isotonic':
            self.calibrator = IsotonicRegression(out_of_bounds='clip')
        elif self.calibrator_type == 'platt':
            self.calibrator = _SigmoidCalibration()

        self.calibrator.fit(y_prob, y_true)

        self.save()
Example #7
0
 def get_model(self, model, ytrue, probabilities):
     if self.calibration_method == CalibratedLearner.Sigmoid:
         fitter = _SigmoidCalibration()
     else:
         fitter = IsotonicRegression(out_of_bounds='clip')
     probabilities[np.isinf(probabilities)] = 1
     calibrators = [fitter.fit(cls_probs, ytrue)
                    for cls_idx, cls_probs in enumerate(probabilities.T)]
     return CalibratedClassifier(model, calibrators)
Example #8
0
    def fit(self, p_input, y):
        if self.method == 'isotonic':
            calibrator = IsotonicRegression(out_of_bounds='clip')
        elif self.method == 'sigmoid':
            calibrator = _SigmoidCalibration()
        calibrator.fit(p_input, y)
        if self.method == 'sigmoid':
            self.a = calibrator.a_
            self.b = calibrator.b_
        self.calibrator = calibrator

        return self
Example #9
0
    def fit(self, X, y, sample_weight=None):
        """Calibrate the fitted model

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data.

        y : array-like, shape (n_samples,)
            Target values.

        sample_weight : array-like, shape = [n_samples] or None
            Sample weights. If None, then samples are equally weighted.

        Returns
        -------
        self : object
            Returns an instance of self.
        """

        self.label_encoder_ = LabelEncoder()
        if self.classes is None:
            self.label_encoder_.fit(y)
        else:
            self.label_encoder_.fit(self.classes)

        self.classes_ = self.label_encoder_.classes_
        Y = label_binarize(y, self.classes_)

        df, idx_pos_class = self._preproc(X)
        self.calibrators_ = []
        for k, this_df in zip(idx_pos_class, df.T):
            if self.method == 'isotonic':
                calibrator = IsotonicRegression(out_of_bounds='clip')
            elif self.method == 'sigmoid':
                calibrator = _SigmoidCalibration()
            elif self.method == 'euler':
                calibrator = _EulerSigmoidCalibration()
            elif self.method == 'beta':
                calibrator = BetaCalibration()
            elif self.method in ['rocch', 'convex']:
                calibrator = _ROCCHCalibration()
            elif isinstance(self.method, BaseEstimator):
                calibrator = self.method
            else:
                raise ValueError('method should be "sigmoid" or '
                                 '"isotonic". Got %s.' % self.method)
            calibrator.fit(this_df, Y[:, k], sample_weight)
            self.calibrators_.append(calibrator)

        return self
Example #10
0
    def fit(self, X, y=None):
        """
        Fit the data after adapting the same weight.

        :param X: array-like, shape=(n_columns, n_samples,) training data.
        :param y: array-like, shape=(n_samples,) training data.
        :return: Returns an instance of self.
        """
        X, y = check_X_y(X, y, estimator=self)
        if not self._is_outlier_model():
            raise ValueError("Passed model does not detect outliers!")
        if not hasattr(self.model, 'decision_function'):
            raise ValueError(
                f'Passed model {self.model} does not have a `decision_function` '
                f'method. This is required for `predict_proba` estimation.')
        self.estimator_ = self.model.fit(X, y)
        self.classes_ = np.array([0, 1])

        # fit sigmoid function for `predict_proba`
        decision_function_scores = self.estimator_.decision_function(X)
        self._predict_proba_sigmoid = _SigmoidCalibration().fit(
            decision_function_scores, y)

        return self
Example #11
0
    def predict(self,
                pos_genes,
                neg_genes,
                predict_all=False,
                best_params=False,
                prob_fit='SIGMOID',
                cv_folds=5):

        logger.info("Running %i fold SVM", cv_folds)

        # Group training genes
        train_genes = [
            g for g in (pos_genes | neg_genes)
            if self._dab.get_index(g) is not None
        ]
        train_genes_idx = [self._dab.get_index(g) for g in train_genes]

        # Subset training matrix and labels
        if predict_all:
            X = self._dab_matrix()[train_genes_idx]
            y = np.array([1 if g in pos_genes else -1 for g in train_genes])
        else:
            X = np.empty([len(train_genes), self._dab.get_size()])
            y = np.empty(len(train_genes))
            for i, g in enumerate(train_genes):
                X[i] = self._dab.get(g)
                y[i] = 1 if g in pos_genes else -1

        params = NetworkSVM.default_params

        if best_params:
            # Set the parameters by cross-validation
            score = 'average_precision'
            clf = GridSearchCV(LinearSVC(),
                               NetworkSVM.tuned_parameters,
                               cv=3,
                               n_jobs=10,
                               scoring=score)
            clf.fit(X, y)
            params = clf.best_params_

        train_scores, train_probs = np.empty(len(train_genes)), np.empty(
            len(train_genes))
        train_scores[:], train_probs[:] = np.NAN, np.NAN
        scores, probs = None, None

        kf = StratifiedKFold(n_splits=cv_folds)
        for cv, (train, test) in enumerate(kf.split(X, y)):
            X_train, X_test, y_train, y_test = X[train], X[test], y[train], y[
                test]

            logger.info('Learning SVM')
            clf = LinearSVC(**params)
            clf.fit(X_train, y_train)

            logger.info('Predicting SVM')
            if predict_all:
                scores_cv = clf.decision_function(X_all)
                scores = scores_cv if scores is None else np.column_stack(
                    (scores, scores_cv))

                for idx in test:
                    train_scores[idx] = scores_cv[train_genes_idx[idx]]
            else:
                scores_cv = clf.decision_function(X_test)
                for i, idx in enumerate(test):
                    train_scores[idx] = scores_cv[i]

        if prob_fit == 'ISO':
            ir = IsotonicRegression(out_of_bounds='clip')
            Y = label_binarize(y, [-1, 1])
            ir.fit(train_scores, Y[:, 0])
            train_probs = ir.predict(train_scores)
        else:
            Y = label_binarize(y, [-1, 1])
            sc = _SigmoidCalibration()
            sc.fit(train_scores, Y)
            train_probs = sc.predict(train_scores)

        if predict_all:
            scores = np.median(scores, axis=1)
            for i, idx in enumerate(train_genes_idx):
                scores[idx] = train_scores[i]

            probs = np.median(probs, axis=1)
            for i, idx in enumerate(train_genes_idx):
                probs[idx] = train_probs[i]

            genes = dab.gene_list
        else:
            scores = train_scores
            genes = train_genes
            probs = train_probs

        self._predictions = sorted(zip(genes, scores, probs),
                                   key=itemgetter(1),
                                   reverse=True)

        return self._predictions
Example #12
0
            for idx in test:
                train_scores[idx] = scores_cv[train_genes_idx[idx]]
        else:
            scores_cv = clf.decision_function(X_test)
            for i, idx in enumerate(test):
                train_scores[idx] = scores_cv[i]

    if args.prob_fit == 'ISO':
        ir = IsotonicRegression(out_of_bounds='clip')
        Y = label_binarize(y, [-1, 1])
        ir.fit(train_scores, Y[:, 0])
        train_probs = ir.predict(train_scores)
    elif args.prob_fit == 'SIGMOID':
        Y = label_binarize(y, [-1, 1])
        sc = _SigmoidCalibration()
        sc.fit(train_scores, Y)
        train_probs = sc.predict(train_scores)

    if args.all:
        scores = np.median(scores, axis=1)
        for i, idx in enumerate(train_genes_idx):
            scores[idx] = train_scores[i]

        probs = np.median(probs, axis=1)
        for i, idx in enumerate(train_genes_idx):
            probs[idx] = train_probs[i]

        genes = dab.gene_list
    else:
        scores = train_scores