Esempio n. 1
0
def get_model_name(model):
    """
    Detects the model name for a Scikit-Learn model or pipeline.

    Parameters
    ----------
    model: class or instance
        The object to determine the name for. If the model is an estimator it
        returns the class name; if it is a Pipeline it returns the class name
        of the final transformer or estimator in the Pipeline.

    Returns
    -------
    name : string
        The name of the model or pipeline.
    """
    if not is_estimator(model):
        raise YellowbrickTypeError(
            "Cannot detect the model name for non estimator: '{}'".format(
                type(model)))

    else:
        if isinstance(model, Pipeline):
            return get_model_name(model.steps[-1][-1])
        else:
            return model.__class__.__name__
Esempio n. 2
0
    def __init__(self,
                 estimator,
                 ax=None,
                 fig=None,
                 classes=None,
                 encoder=None,
                 is_fitted="auto",
                 force_model=False,
                 **kwargs):
        # A bit of type checking
        if not force_model and not isclassifier(estimator):
            raise YellowbrickTypeError(
                "This estimator is not a classifier; "
                "try a regression or clustering score visualizer instead!")

        # Initialize the super method.
        super(ClassificationScoreVisualizer,
              self).__init__(estimator,
                             ax=ax,
                             fig=fig,
                             is_fitted=is_fitted,
                             **kwargs)

        self.classes = classes
        self.encoder = encoder
        self.force_model = force_model
Esempio n. 3
0
    def __init__(self,
                 model,
                 ax=None,
                 alphas=None,
                 cv=None,
                 scoring=None,
                 **kwargs):

        # Check to make sure this is not a "RegressorCV"
        name = model.__class__.__name__
        if name.endswith("CV"):
            raise YellowbrickTypeError(
                ("'{}' is a CV regularization model;"
                 " try AlphaSelection instead.").format(name))

        # Call super to initialize the class
        super(AlphaSelection, self).__init__(model, ax=ax, **kwargs)

        # Set manual alpha selection parameters
        if alphas is not None:
            self.alphas = alphas
        else:
            self.alphas = np.logspace(-10, -2, 200)
        self.errors = None
        self.score_method = partial(cross_val_score, cv=cv, scoring=scoring)
Esempio n. 4
0
 def __init__(self, estimator, ax=None, fig=None, force_model=False, **kwargs):
     if not force_model and not isclusterer(estimator):
         raise YellowbrickTypeError(
             "The supplied model is not a clustering estimator; try a "
             "classifier or regression score visualizer instead!"
         )
     self.force_model = force_model
     super(ClusteringScoreVisualizer, self).__init__(
         estimator, ax=ax, fig=fig, **kwargs
     )
Esempio n. 5
0
    def _find_importances_param(self):
        """
        Searches the wrapped model for the feature importances parameter.
        """
        for attr in ("feature_importances_", "coef_"):
            try:
                return getattr(self.estimator, attr)
            except AttributeError:
                continue

        raise YellowbrickTypeError(
            "could not find feature importances param on {}".format(
                self.estimator.__class__.__name__))
Esempio n. 6
0
    def _find_classes_param(self):
        """
        Searches the wrapped model for the classes_ parameter.
        """
        for attr in ["classes_"]:
            try:
                return getattr(self.estimator, attr)
            except AttributeError:
                continue

        raise YellowbrickTypeError(
            "could not find classes_ param on {}".format(
                self.estimator.__class__.__name__))
Esempio n. 7
0
def get_model_name(model):
    """
    Detects the model name for a Scikit-Learn model or pipeline
    """
    if not is_estimator(model):
        raise YellowbrickTypeError(
            "Cannot detect the model name for non estimator: '{}'".format(
                type(model)))

    else:
        if isinstance(model, Pipeline):
            return model.steps[-1][-1].__class__.__name__
        else:
            return model.__class__.__name__
Esempio n. 8
0
    def __init__(self, model, ax=None, is_fitted="auto", **kwargs):

        # Check to make sure this is a "RegressorCV"
        name = model.__class__.__name__
        if not name.endswith("CV"):
            raise YellowbrickTypeError(
                ("'{}' is not a CV regularization model;"
                 " try ManualAlphaSelection instead.").format(name))

        # Set the store_cv_values parameter on RidgeCV
        if "store_cv_values" in model.get_params().keys():
            model.set_params(store_cv_values=True)

        # Call super to initialize the class
        super(AlphaSelection, self).__init__(model, ax=ax, **kwargs)
Esempio n. 9
0
    def __init__(
        self,
        estimator,
        ax=None,
        n_trials=50,
        cv=0.1,
        fbeta=1.0,
        argmax="fscore",
        exclude=None,
        quantiles=QUANTILES_MEDIAN_80,
        random_state=None,
        is_fitted="auto",
        force_model=False,
        **kwargs
    ):

        # Perform some quick type checking to help users avoid error.
        if not force_model and (
            not is_classifier(estimator) or not is_probabilistic(estimator)
        ):
            raise YellowbrickTypeError(
                "{} requires a probabilistic binary classifier".format(
                    self.__class__.__name__
                )
            )

        # Check the various inputs
        self._check_quantiles(quantiles)
        self._check_cv(cv)
        self._check_exclude(exclude)

        # Initialize the ModelVisualizer
        super(DiscriminationThreshold, self).__init__(
            estimator, ax=ax, is_fitted=is_fitted, **kwargs
        )

        # Set params
        self.n_trials = n_trials
        self.cv = cv
        self.fbeta = fbeta
        self.argmax = argmax
        self.exclude = exclude
        self.quantiles = quantiles
        self.random_state = random_state
Esempio n. 10
0
    def __init__(self,
                 ax=None,
                 features=None,
                 classes=None,
                 normalize=None,
                 sample=1.0,
                 color=None,
                 colormap=None,
                 vlines=True,
                 vlines_kwds=None,
                 **kwargs):
        super(ParallelCoordinates, self).__init__(ax, features, classes, color,
                                                  colormap, **kwargs)

        # Validate 'normalize' argument
        if normalize in self.normalizers or normalize is None:
            self.normalize = normalize
        else:
            raise YellowbrickValueError(
                "'{}' is an unrecognized normalization method".format(
                    normalize))

        # Validate 'sample' argument
        if isinstance(sample, int):
            if sample < 1:
                raise YellowbrickValueError(
                    "`sample` parameter of type `int` must be greater than 1")
        elif isinstance(sample, float):
            if sample <= 0 or sample > 1:
                raise YellowbrickValueError(
                    "`sample` parameter of type `float` must be between 0 and 1"
                )
        else:
            raise YellowbrickTypeError(
                "`sample` parameter must be int or float")
        self.sample = sample

        # Visual Parameters
        self.show_vlines = vlines
        self.vlines_kwds = vlines_kwds or {'linewidth': 1, 'color': 'black'}
Esempio n. 11
0
    def __init__(self,
                 model,
                 n_trials=50,
                 test_size_percent=0.1,
                 quantiles=(0.1, 0.5, 0.9),
                 random_state=None,
                 **kwargs):
        # Check to see if model is an instance of a classifier.
        # Should return an error if it isn't.
        if not isclassifier(model):
            raise YellowbrickTypeError(
                "This estimator is not a classifier; try a regression or clustering score visualizer instead!"
            )
        super(ThresholdVisualizer, self).__init__(model, **kwargs)

        self.estimator = model
        self.n_trials = n_trials
        self.test_size_percent = test_size_percent
        self.quantiles = quantiles
        self.random_state = random_state

        # to be set later
        self.plot_data = None
Esempio n. 12
0
    def __init__(self,
                 model,
                 ax=None,
                 n_trials=50,
                 cv=0.1,
                 fbeta=1.0,
                 argmax='fscore',
                 exclude=None,
                 quantiles=QUANTILES_MEDIAN_80,
                 random_state=None,
                 **kwargs):

        # Perform some quick type checking to help users avoid error.
        if not is_classifier(model) or not is_probabilistic(model):
            raise YellowbrickTypeError(
                "{} requires a probabilistic binary classifier".format(
                    self.__class__.__name__))

        # Check the various inputs
        self._check_quantiles(quantiles)
        self._check_cv(cv)
        self._check_exclude(exclude)

        # Initialize the ModelVisualizer
        super(DiscriminationThreshold, self).__init__(model, ax=ax, **kwargs)

        # Set params
        self.set_params(
            n_trials=n_trials,
            cv=cv,
            fbeta=fbeta,
            argmax=argmax,
            exclude=exclude,
            quantiles=quantiles,
            random_state=random_state,
        )
Esempio n. 13
0
    def fit(self, X, y=None, **kwargs):
        """
        The fit method is the primary drawing input for the decision boundaries
        visualization since it has both the X and y data required for the
        viz and the transform method does not.

        Parameters
        ----------
        X : ndarray or DataFrame of shape n x m
            A matrix of n instances with m features

        y : ndarray or Series of length n
            An array or series of target or class values

        kwargs : dict
            Pass generic arguments to the drawing method

        Returns
        -------
        self : instance
            Returns the instance of the visualizer
        """
        X = self._select_feature_columns(X)

        # Assign each class a unique number for drawing
        if self.classes_ is None:
            self.classes_ = {
                label: str(kls_num)
                for kls_num, label in enumerate(np.unique(y))
            }
            self.class_labels = None
        elif len(set(y)) == len(self.classes_):
            self.classes_ = {
                label: str(kls_num)
                for kls_num, label in enumerate(self.classes_)
            }
            self.class_labels = dict(zip(set(y), self.classes_))
        else:
            raise YellowbrickTypeError(
                """Number of classes must be the same length of number of
                target y""")

        # ensure that only
        self.estimator.fit(X, y)

        # Plot the decision boundary. For that, we will assign a color to each
        # point in the mesh [x_min, x_max]x[y_min, y_max].
        x_min, x_max = X[:, 0].min() - (X[:, 0].min() * .01), X[:, 0].max() + (
            X[:, 0].max() * .01)
        y_min, y_max = X[:, 1].min() - (X[:, 1].min() * .01), X[:, 1].max() + (
            X[:, 1].max() * .01)

        self.ax.set_xlim([x_min, x_max])
        self.ax.set_ylim([y_min, y_max])
        # set the step increment for drawing the boundary graph
        x_step = (x_max - x_min) * self.step_size
        y_step = (y_max - y_min) * self.step_size

        self.xx, self.yy = np.meshgrid(np.arange(x_min, x_max, x_step),
                                       np.arange(y_min, y_max, y_step))

        # raise Exception(self.yy.ravel().shape)
        Z = self.estimator.predict(np.c_[self.xx.ravel(), self.yy.ravel()])
        self.Z_shape = Z.reshape(self.xx.shape)
        return self
Esempio n. 14
0
    def __init__(self,
                 ax=None,
                 features=None,
                 classes=None,
                 normalize=None,
                 sample=1.0,
                 random_state=None,
                 shuffle=False,
                 colors=None,
                 colormap=None,
                 alpha=None,
                 fast=False,
                 vlines=True,
                 vlines_kwds=None,
                 **kwargs):
        if "target_type" not in kwargs:
            kwargs["target_type"] = "discrete"
        super(ParallelCoordinates, self).__init__(ax=ax,
                                                  features=features,
                                                  classes=classes,
                                                  colors=colors,
                                                  colormap=colormap,
                                                  **kwargs)

        # Validate 'normalize' argument
        if normalize in self.NORMALIZERS or normalize is None:
            self.normalize = normalize
        else:
            raise YellowbrickValueError(
                "'{}' is an unrecognized normalization method".format(
                    normalize))

        # Validate 'sample' argument
        if isinstance(sample, int):
            if sample < 1:
                raise YellowbrickValueError(
                    "`sample` parameter of type `int` must be greater than 1")
        elif isinstance(sample, float):
            if sample <= 0 or sample > 1:
                raise YellowbrickValueError(
                    "`sample` parameter of type `float` must be between 0 and 1"
                )
        else:
            raise YellowbrickTypeError(
                "`sample` parameter must be int or float")
        self.sample = sample

        # Set sample parameters
        if isinstance(shuffle, bool):
            self.shuffle = shuffle
        else:
            raise YellowbrickTypeError("`shuffle` parameter must be boolean")
        if self.shuffle:
            if (random_state is None) or isinstance(random_state, int):
                self._rng = RandomState(random_state)
            elif isinstance(random_state, RandomState):
                self._rng = random_state
            else:
                raise YellowbrickTypeError(
                    "`random_state` must be None, int, or np.random.RandomState"
                )
        else:
            self._rng = None

        # Visual and drawing parameters
        self.fast = fast
        self.alpha = alpha
        self.show_vlines = vlines
        self.vlines_kwds = vlines_kwds or {"linewidth": 1, "color": "black"}

        # Internal properties
        self._increments = None
        self._colors = None
Esempio n. 15
0
 def ax(self, ax):
     raise YellowbrickTypeError(
         "cannot set new axes objects on multiple visualizers")