Example #1
0
    def test_requires_classifier(self):
        """
        Assert requires a classifier
        """
        message = "requires a probabilistic binary classifier"
        assert not is_classifier(Ridge)

        with pytest.raises(yb.exceptions.YellowbrickError, match=message):
            DiscriminationThreshold(Ridge())
    def test_requires_classifier(self):
        """
        Assert requires a classifier
        """
        message = "requires a probabilistic binary classifier"
        assert not is_classifier(Ridge)

        with pytest.raises(yb.exceptions.YellowbrickError, match=message):
            DiscriminationThreshold(Ridge())
Example #3
0
    def test_requires_probabilistic_classifier(self):
        """
        Assert requires probabilistic classifier
        """
        message = "requires a probabilistic binary classifier"
        assert is_classifier(RadiusNeighborsClassifier)
        assert not is_probabilistic(RadiusNeighborsClassifier)

        with pytest.raises(yb.exceptions.YellowbrickError, match=message):
            DiscriminationThreshold(RadiusNeighborsClassifier())
    def test_requires_probabilistic_classifier(self):
        """
        Assert requires probabilistic classifier
        """
        message = "requires a probabilistic binary classifier"
        assert is_classifier(RadiusNeighborsClassifier)
        assert not is_probabilistic(RadiusNeighborsClassifier)

        with pytest.raises(yb.exceptions.YellowbrickError, match=message):
            DiscriminationThreshold(RadiusNeighborsClassifier())
Example #5
0
    def test_accepts_decision_function(self):
        """
        Will accept classifiers with decision function
        """
        model = LinearSVC
        assert is_classifier(model)
        assert is_probabilistic(model)
        assert hasattr(model, "decision_function")
        assert not hasattr(model, "predict_proba")

        try:
            DiscriminationThreshold(model())
        except YellowbrickTypeError:
            pytest.fail("did not accept decision function model")
    def test_accepts_decision_function(self):
        """
        Will accept classifiers with decision function
        """
        model = LinearSVC
        assert is_classifier(model)
        assert is_probabilistic(model)
        assert hasattr(model, "decision_function")
        assert not hasattr(model, "predict_proba")

        try:
            DiscriminationThreshold(model())
        except YellowbrickTypeError:
            pytest.fail("did not accept decision function model")
Example #7
0
    def __init__(
        self,
        estimator,
        ax=None,
        n_trials=50,
        cv=0.1,
        fbeta=1.0,
        argmax="fscore",
        exclude=None,
        quantiles=QUANTILES_MEDIAN_80,
        random_state=None,
        is_fitted="auto",
        force_model=False,
        **kwargs
    ):

        # Perform some quick type checking to help users avoid error.
        if not force_model and (
            not is_classifier(estimator) or not is_probabilistic(estimator)
        ):
            raise YellowbrickTypeError(
                "{} requires a probabilistic binary classifier".format(
                    self.__class__.__name__
                )
            )

        # Check the various inputs
        self._check_quantiles(quantiles)
        self._check_cv(cv)
        self._check_exclude(exclude)

        # Initialize the ModelVisualizer
        super(DiscriminationThreshold, self).__init__(
            estimator, ax=ax, is_fitted=is_fitted, **kwargs
        )

        # Set params
        self.n_trials = n_trials
        self.cv = cv
        self.fbeta = fbeta
        self.argmax = argmax
        self.exclude = exclude
        self.quantiles = quantiles
        self.random_state = random_state
Example #8
0
    def test_requires_probabilistic_classifier(self):
        """
        Assert requires probabilistic classifier
        """
        message = "requires a probabilistic binary classifier"

        class RoboClassifier(ClassifierMixin):
            """
            Dummy Non-Probabilistic Classifier
            """
            def fit(self, X, y):
                self.classes_ = [0, 1]
                return self

        assert is_classifier(RoboClassifier)
        assert not is_probabilistic(RoboClassifier)

        with pytest.raises(yb.exceptions.YellowbrickError, match=message):
            DiscriminationThreshold(RoboClassifier())
Example #9
0
    def __init__(self,
                 model,
                 ax=None,
                 n_trials=50,
                 cv=0.1,
                 fbeta=1.0,
                 argmax='fscore',
                 exclude=None,
                 quantiles=QUANTILES_MEDIAN_80,
                 random_state=None,
                 **kwargs):

        # Perform some quick type checking to help users avoid error.
        if not is_classifier(model) or not is_probabilistic(model):
            raise YellowbrickTypeError(
                "{} requires a probabilistic binary classifier".format(
                    self.__class__.__name__))

        # Check the various inputs
        self._check_quantiles(quantiles)
        self._check_cv(cv)
        self._check_exclude(exclude)

        # Initialize the ModelVisualizer
        super(DiscriminationThreshold, self).__init__(model, ax=ax, **kwargs)

        # Set params
        self.set_params(
            n_trials=n_trials,
            cv=cv,
            fbeta=fbeta,
            argmax=argmax,
            exclude=exclude,
            quantiles=quantiles,
            random_state=random_state,
        )
Example #10
0
    def __init__(self, model, ax=None, n_trials=50, cv=0.1, fbeta=1.0,
                 argmax='fscore', exclude=None, quantiles=QUANTILES_MEDIAN_80,
                 random_state=None, **kwargs):

        # Perform some quick type checking to help users avoid error.
        if not is_classifier(model) or not is_probabilistic(model):
            raise YellowbrickTypeError(
                "{} requires a probabilistic binary classifier".format(
                self.__class__.__name__
            ))

        # Check the various inputs
        self._check_quantiles(quantiles)
        self._check_cv(cv)
        self._check_exclude(exclude)

        # Initialize the ModelVisualizer
        super(DiscriminationThreshold, self).__init__(model, ax=ax, **kwargs)

        # Set params
        self.set_params(
            n_trials=n_trials, cv=cv, fbeta=fbeta, argmax=argmax,
            exclude=exclude, quantiles=quantiles, random_state=random_state,
        )
    def fit(self, X, y=None, **kwargs):
        """
        Fits the estimator to discover the feature importances described by
        the data, then draws those importances as a bar plot.

        Parameters
        ----------
        X : ndarray or DataFrame of shape n x m
            A matrix of n instances with m features

        y : ndarray or Series of length n
            An array or series of target or class values

        kwargs : dict
            Keyword arguments passed to the fit method of the estimator.

        Returns
        -------
        self : visualizer
            The fit method must always return self to support pipelines.
        """
        super(FeatureImportances, self).fit(X, y, **kwargs)

        # Get the feature importances from the model
        self.feature_importances_ = self._find_importances_param()

        # Get the classes from the model
        if is_classifier(self):
            self.classes_ = self._find_classes_param()
        else:
            self.classes_ = None
            self.stack = False

        # If self.stack = True and feature importances is a multidim array,
        # we're expecting a shape of (n_classes, n_features)
        # therefore we flatten by taking the average by
        # column to get shape (n_features,)  (see LogisticRegression)
        if not self.stack and self.feature_importances_.ndim > 1:
            self.feature_importances_ = np.mean(self.feature_importances_,
                                                axis=0)

        # Apply absolute value filter before normalization
        if self.absolute:
            self.feature_importances_ = np.abs(self.feature_importances_)

        # Normalize features relative to the maximum
        if self.relative:
            maxv = np.abs(self.feature_importances_).max()
            self.feature_importances_ /= maxv
            self.feature_importances_ *= 100.0

        # Create labels for the feature importances
        # NOTE: this code is duplicated from MultiFeatureVisualizer
        if self.labels is None:
            # Use column names if a dataframe
            if is_dataframe(X):
                self.features_ = np.array(X.columns)

            # Otherwise use the column index as the labels
            else:
                _, ncols = X.shape
                self.features_ = np.arange(0, ncols)
        else:
            self.features_ = np.array(self.labels)

        # Sort the features and their importances
        if self.stack:
            sort_idx = np.argsort(np.mean(self.feature_importances_, 0))
            self.features_ = self.features_[sort_idx]
            self.feature_importances_ = self.feature_importances_[:, sort_idx]
        else:
            sort_idx = np.argsort(self.feature_importances_)
            self.features_ = self.features_[sort_idx]
            self.feature_importances_ = self.feature_importances_[sort_idx]

        # Draw the feature importances
        self.draw()
        return self
Example #12
0
    def fit(self, X, y=None, **kwargs):
        """
        Fits the estimator to discover the feature importances described by
        the data, then draws those importances as a bar plot.

        Parameters
        ----------
        X : ndarray or DataFrame of shape n x m
            A matrix of n instances with m features

        y : ndarray or Series of length n
            An array or series of target or class values

        kwargs : dict
            Keyword arguments passed to the fit method of the estimator.

        Returns
        -------
        self : visualizer
            The fit method must always return self to support pipelines.
        """
        # Super call fits the underlying estimator if it's not already fitted
        super(FeatureImportances, self).fit(X, y, **kwargs)

        # Get the feature importances from the model
        self.feature_importances_ = self._find_importances_param()

        # Get the classes from the model
        if is_classifier(self):
            self.classes_ = self._find_classes_param()
        else:
            self.classes_ = None
            self.stack = False

        # If self.stack = True and feature importances is a multidim array,
        # we're expecting a shape of (n_classes, n_features)
        # therefore we flatten by taking the average by
        # column to get shape (n_features,)  (see LogisticRegression)
        if not self.stack and self.feature_importances_.ndim > 1:
            self.feature_importances_ = np.mean(self.feature_importances_,
                                                axis=0)
            warnings.warn(
                ("detected multi-dimensional feature importances but stack=False, "
                 "using mean to aggregate them."),
                YellowbrickWarning,
            )

        # Apply absolute value filter before normalization
        if self.absolute:
            self.feature_importances_ = np.abs(self.feature_importances_)

        # Normalize features relative to the maximum
        if self.relative:
            maxv = np.abs(self.feature_importances_).max()
            self.feature_importances_ /= maxv
            self.feature_importances_ *= 100.0

        # Create labels for the feature importances
        # NOTE: this code is duplicated from MultiFeatureVisualizer
        if self.labels is None:
            # Use column names if a dataframe
            if is_dataframe(X):
                self.features_ = np.array(X.columns)

            # Otherwise use the column index as the labels
            else:
                _, ncols = X.shape
                self.features_ = np.arange(0, ncols)
        else:
            self.features_ = np.array(self.labels)

        # Sort the features and their importances
        if self.stack:
            sort_idx = np.argsort(np.mean(self.feature_importances_, 0))
            self.features_ = self.features_[sort_idx]
            self.feature_importances_ = self.feature_importances_[:, sort_idx]
        else:
            sort_idx = np.argsort(self.feature_importances_)
            self.features_ = self.features_[sort_idx]
            self.feature_importances_ = self.feature_importances_[sort_idx]

        # Draw the feature importances
        self.draw()
        return self