def fit( self, doc, # type: str predict_proba, # type: Callable[[Any], Any] ): # type: (...) -> TextExplainer """ Explain ``predict_proba`` probabilistic classification function for the ``doc`` example. This method fits a local classification pipeline following LIME approach. To get the explanation use :meth:`show_prediction`, :meth:`show_weights`, :meth:`explain_prediction` or :meth:`explain_weights`. Parameters ---------- doc : str Text to explain predict_proba : callable Black-box classification pipeline. ``predict_proba`` should be a function which takes a list of strings (documents) and return a matrix of shape ``(n_samples, n_classes)`` with probability values - a row per document and a column per output label. """ self.doc_ = doc if self.position_dependent: samples, sims, mask, text = self.sampler.sample_near_with_mask( doc=doc, n_samples=self.n_samples) self.vec_ = SingleDocumentVectorizer( token_pattern=self.token_pattern).fit([doc]) X = ~mask else: self.vec_ = clone(self.vec).fit([doc]) samples, sims = self.sampler.sample_near(doc=doc, n_samples=self.n_samples) X = self.vec_.transform(samples) if self.rbf_sigma is not None: sims = rbf(1 - sims, sigma=self.rbf_sigma) self.samples_ = samples self.similarity_ = sims self.X_ = X self.y_proba_ = predict_proba(samples) self.clf_ = clone(self.clf) self.metrics_ = _train_local_classifier( estimator=self.clf_, samples=X, similarity=sims, y_proba=self.y_proba_, expand_factor=self.expand_factor, random_state=self.rng_) return self
def _similarity(self, doc, samples): distance = _distances(doc, samples, metric=self.metric) return rbf(distance, sigma=self.sigma_)