예제 #1
0
    def fit(
            self,
            doc,  # type: str
            predict_proba,  # type: Callable[[Any], Any]
    ):
        # type: (...) -> TextExplainer
        """
        Explain ``predict_proba`` probabilistic classification function
        for the ``doc`` example. This method fits a local classification
        pipeline following LIME approach.

        To get the explanation use :meth:`show_prediction`,
        :meth:`show_weights`, :meth:`explain_prediction` or
        :meth:`explain_weights`.

        Parameters
        ----------
        doc : str
            Text to explain
        predict_proba : callable
            Black-box classification pipeline. ``predict_proba``
            should be a function which takes a list of strings (documents)
            and return a matrix of shape ``(n_samples, n_classes)`` with
            probability values - a row per document and a column per output
            label.
        """
        self.doc_ = doc

        if self.position_dependent:
            samples, sims, mask, text = self.sampler.sample_near_with_mask(
                doc=doc, n_samples=self.n_samples)
            self.vec_ = SingleDocumentVectorizer(
                token_pattern=self.token_pattern).fit([doc])
            X = ~mask
        else:
            self.vec_ = clone(self.vec).fit([doc])
            samples, sims = self.sampler.sample_near(doc=doc,
                                                     n_samples=self.n_samples)
            X = self.vec_.transform(samples)

        if self.rbf_sigma is not None:
            sims = rbf(1 - sims, sigma=self.rbf_sigma)

        self.samples_ = samples
        self.similarity_ = sims
        self.X_ = X
        self.y_proba_ = predict_proba(samples)
        self.clf_ = clone(self.clf)

        self.metrics_ = _train_local_classifier(
            estimator=self.clf_,
            samples=X,
            similarity=sims,
            y_proba=self.y_proba_,
            expand_factor=self.expand_factor,
            random_state=self.rng_)
        return self
예제 #2
0
 def _similarity(self, doc, samples):
     distance = _distances(doc, samples, metric=self.metric)
     return rbf(distance, sigma=self.sigma_)