def test_sigmoid_calibration(): """Test calibration values with Platt sigmoid model""" exF = np.array([5, -4, 1.0]) exY = np.array([1, -1, -1]) # computed from my python port of the C++ code in LibSVM AB_lin_libsvm = np.array([-0.20261354391187855, 0.65236314980010512]) assert_array_almost_equal(AB_lin_libsvm, _sigmoid_calibration(exF, exY), 3) lin_prob = 1.0 / (1.0 + np.exp(AB_lin_libsvm[0] * exF + AB_lin_libsvm[1])) sk_prob = _SigmoidCalibration().fit(exF, exY).predict(exF) assert_array_almost_equal(lin_prob, sk_prob, 6) # check that _SigmoidCalibration().fit only accepts 1d array or 2d column # arrays with pytest.raises(ValueError): _SigmoidCalibration().fit(np.vstack((exF, exF)), exY)
def test_calibration_ensemble_false(data, method): # Test that `ensemble=False` is the same as using predictions from # `cross_val_predict` to train calibrator. X, y = data clf = LinearSVC(random_state=7) cal_clf = CalibratedClassifierCV(clf, method=method, cv=3, ensemble=False) cal_clf.fit(X, y) cal_probas = cal_clf.predict_proba(X) # Get probas manually unbiased_preds = cross_val_predict(clf, X, y, cv=3, method="decision_function") if method == "isotonic": calibrator = IsotonicRegression(out_of_bounds="clip") else: calibrator = _SigmoidCalibration() calibrator.fit(unbiased_preds, y) # Use `clf` fit on all data clf.fit(X, y) clf_df = clf.decision_function(X) manual_probas = calibrator.predict(clf_df) assert_allclose(cal_probas[:, 1], manual_probas)
def test_sigmoid_calibration(): """Test calibration values with Platt sigmoid model""" exF = np.array([5, -4, 1.0]) exY = np.array([1, -1, -1]) # computed from my python port of the C++ code in LibSVM AB_lin_libsvm = np.array([-0.20261354391187855, 0.65236314980010512]) assert_array_almost_equal(AB_lin_libsvm, _sigmoid_calibration(exF, exY), 3) lin_prob = 1. / (1. + np.exp(AB_lin_libsvm[0] * exF + AB_lin_libsvm[1])) sk_prob = _SigmoidCalibration().fit(exF, exY).predict(exF) assert_array_almost_equal(lin_prob, sk_prob, 6) # check that _SigmoidCalibration().fit only accepts 1d array or 2d column # arrays assert_raises(ValueError, _SigmoidCalibration().fit, np.vstack((exF, exF)), exY)
def fit(self, T, y, sample_weight=None): """Fit using `T`, `y` as training data. Parameters ---------- * `T` [array-like, shape=(n_samples,)]: Training data. * `y` [array-like, shape=(n_samples,)]: Training target. * `sample_weight` [array-like, shape=(n_samples,), optional]: Weights. If set to `None`, all weights will be set to 1. Returns ------- * `self` [object]: `self`. """ # Check input T = column_or_1d(T) # Fit self.calibrator_ = _SigmoidCalibration() self.calibrator_.fit(T, y, sample_weight=sample_weight) return self
def train(self, y_true, y_prob): if self.calibrator_type == 'isotonic': self.calibrator = IsotonicRegression(out_of_bounds='clip') elif self.calibrator_type == 'platt': self.calibrator = _SigmoidCalibration() self.calibrator.fit(y_prob, y_true) self.save()
def get_model(self, model, ytrue, probabilities): if self.calibration_method == CalibratedLearner.Sigmoid: fitter = _SigmoidCalibration() else: fitter = IsotonicRegression(out_of_bounds='clip') probabilities[np.isinf(probabilities)] = 1 calibrators = [fitter.fit(cls_probs, ytrue) for cls_idx, cls_probs in enumerate(probabilities.T)] return CalibratedClassifier(model, calibrators)
def fit(self, p_input, y): if self.method == 'isotonic': calibrator = IsotonicRegression(out_of_bounds='clip') elif self.method == 'sigmoid': calibrator = _SigmoidCalibration() calibrator.fit(p_input, y) if self.method == 'sigmoid': self.a = calibrator.a_ self.b = calibrator.b_ self.calibrator = calibrator return self
def fit(self, X, y, sample_weight=None): """Calibrate the fitted model Parameters ---------- X : array-like, shape (n_samples, n_features) Training data. y : array-like, shape (n_samples,) Target values. sample_weight : array-like, shape = [n_samples] or None Sample weights. If None, then samples are equally weighted. Returns ------- self : object Returns an instance of self. """ self.label_encoder_ = LabelEncoder() if self.classes is None: self.label_encoder_.fit(y) else: self.label_encoder_.fit(self.classes) self.classes_ = self.label_encoder_.classes_ Y = label_binarize(y, self.classes_) df, idx_pos_class = self._preproc(X) self.calibrators_ = [] for k, this_df in zip(idx_pos_class, df.T): if self.method == 'isotonic': calibrator = IsotonicRegression(out_of_bounds='clip') elif self.method == 'sigmoid': calibrator = _SigmoidCalibration() elif self.method == 'euler': calibrator = _EulerSigmoidCalibration() elif self.method == 'beta': calibrator = BetaCalibration() elif self.method in ['rocch', 'convex']: calibrator = _ROCCHCalibration() elif isinstance(self.method, BaseEstimator): calibrator = self.method else: raise ValueError('method should be "sigmoid" or ' '"isotonic". Got %s.' % self.method) calibrator.fit(this_df, Y[:, k], sample_weight) self.calibrators_.append(calibrator) return self
def fit(self, X, y=None): """ Fit the data after adapting the same weight. :param X: array-like, shape=(n_columns, n_samples,) training data. :param y: array-like, shape=(n_samples,) training data. :return: Returns an instance of self. """ X, y = check_X_y(X, y, estimator=self) if not self._is_outlier_model(): raise ValueError("Passed model does not detect outliers!") if not hasattr(self.model, 'decision_function'): raise ValueError( f'Passed model {self.model} does not have a `decision_function` ' f'method. This is required for `predict_proba` estimation.') self.estimator_ = self.model.fit(X, y) self.classes_ = np.array([0, 1]) # fit sigmoid function for `predict_proba` decision_function_scores = self.estimator_.decision_function(X) self._predict_proba_sigmoid = _SigmoidCalibration().fit( decision_function_scores, y) return self
def predict(self, pos_genes, neg_genes, predict_all=False, best_params=False, prob_fit='SIGMOID', cv_folds=5): logger.info("Running %i fold SVM", cv_folds) # Group training genes train_genes = [ g for g in (pos_genes | neg_genes) if self._dab.get_index(g) is not None ] train_genes_idx = [self._dab.get_index(g) for g in train_genes] # Subset training matrix and labels if predict_all: X = self._dab_matrix()[train_genes_idx] y = np.array([1 if g in pos_genes else -1 for g in train_genes]) else: X = np.empty([len(train_genes), self._dab.get_size()]) y = np.empty(len(train_genes)) for i, g in enumerate(train_genes): X[i] = self._dab.get(g) y[i] = 1 if g in pos_genes else -1 params = NetworkSVM.default_params if best_params: # Set the parameters by cross-validation score = 'average_precision' clf = GridSearchCV(LinearSVC(), NetworkSVM.tuned_parameters, cv=3, n_jobs=10, scoring=score) clf.fit(X, y) params = clf.best_params_ train_scores, train_probs = np.empty(len(train_genes)), np.empty( len(train_genes)) train_scores[:], train_probs[:] = np.NAN, np.NAN scores, probs = None, None kf = StratifiedKFold(n_splits=cv_folds) for cv, (train, test) in enumerate(kf.split(X, y)): X_train, X_test, y_train, y_test = X[train], X[test], y[train], y[ test] logger.info('Learning SVM') clf = LinearSVC(**params) clf.fit(X_train, y_train) logger.info('Predicting SVM') if predict_all: scores_cv = clf.decision_function(X_all) scores = scores_cv if scores is None else np.column_stack( (scores, scores_cv)) for idx in test: train_scores[idx] = scores_cv[train_genes_idx[idx]] else: scores_cv = clf.decision_function(X_test) for i, idx in enumerate(test): train_scores[idx] = scores_cv[i] if prob_fit == 'ISO': ir = IsotonicRegression(out_of_bounds='clip') Y = label_binarize(y, [-1, 1]) ir.fit(train_scores, Y[:, 0]) train_probs = ir.predict(train_scores) else: Y = label_binarize(y, [-1, 1]) sc = _SigmoidCalibration() sc.fit(train_scores, Y) train_probs = sc.predict(train_scores) if predict_all: scores = np.median(scores, axis=1) for i, idx in enumerate(train_genes_idx): scores[idx] = train_scores[i] probs = np.median(probs, axis=1) for i, idx in enumerate(train_genes_idx): probs[idx] = train_probs[i] genes = dab.gene_list else: scores = train_scores genes = train_genes probs = train_probs self._predictions = sorted(zip(genes, scores, probs), key=itemgetter(1), reverse=True) return self._predictions
for idx in test: train_scores[idx] = scores_cv[train_genes_idx[idx]] else: scores_cv = clf.decision_function(X_test) for i, idx in enumerate(test): train_scores[idx] = scores_cv[i] if args.prob_fit == 'ISO': ir = IsotonicRegression(out_of_bounds='clip') Y = label_binarize(y, [-1, 1]) ir.fit(train_scores, Y[:, 0]) train_probs = ir.predict(train_scores) elif args.prob_fit == 'SIGMOID': Y = label_binarize(y, [-1, 1]) sc = _SigmoidCalibration() sc.fit(train_scores, Y) train_probs = sc.predict(train_scores) if args.all: scores = np.median(scores, axis=1) for i, idx in enumerate(train_genes_idx): scores[idx] = train_scores[i] probs = np.median(probs, axis=1) for i, idx in enumerate(train_genes_idx): probs[idx] = train_probs[i] genes = dab.gene_list else: scores = train_scores