Exemple #1
0
 def explain_instance(self,
                      timeseries,
                      classifier_fn,
                      training_set,
                      num_slices,
                      labels=(1, ),
                      top_labels=None,
                      num_features=10,
                      num_samples=5000,
                      distance_metric='cosine',
                      model_regressor=None,
                      replacement_method='mean'):
     """Generates explanations for a prediction.
     Args:
         time_series: Time Series to be explained.
         classifier_fn: classifier prediction probability function
         num_slices: Defines into how many slices the series will be split up
         labels: iterable with labels to be explained.
         top_labels: if not None, ignore labels and produce explanations for
         the K labels with highest prediction probabilities, where K is
         this parameter.
         num_features: maximum number of features present in explanation
         num_samples: size of the neighborhood to learn the linear model
         distance_metric: the distance metric to use for sample weighting,
         defaults to cosine similarity
         model_regressor: sklearn regressor to use in explanation. Defaults
         to Ridge regression in LimeBase. Must have model_regressor.coef_
         and 'sample_weight' as a parameter to model_regressor.fit()
     Returns:
         An Explanation object (see explanation.py) with the corresponding
         explanations.
    """
     domain_mapper = explanation.DomainMapper()
     data, yss, distances = self.__data_labels_distances(
         timeseries, classifier_fn, num_samples, num_slices, training_set,
         replacement_method)
     if self.class_names is None:
         self.class_names = [str(x) for x in range(yss[0].shape[0])]
     ret_exp = explanation.Explanation(domain_mapper=domain_mapper,
                                       class_names=self.class_names)
     ret_exp.predict_proba = yss[0]
     for label in labels:
         (ret_exp.intercept[int(label)], ret_exp.local_exp[int(label)],
          ret_exp.score,
          ret_exp.local_pred) = self.base.explain_instance_with_data(
              data,
              yss,
              distances,
              label,
              num_features,
              feature_selection=self.feature_selection)
     ret_exp.local_exp = {
         k: [(int(j1), float(j2)) for j1, j2 in v]
         for k, v in ret_exp.local_exp.items()
     }
     return ret_exp
Exemple #2
0
 def explain_instance(self,
                      text_instance,
                      classifier_fn,
                      labels=(1, ),
                      top_labels=None,
                      num_features=10,
                      num_samples=5000,
                      distance_metric='cosine',
                      model_regressor=None):
     """This basically just a copy of :class:`LimeTextExplainer` with our custom
        implementation of :class:`IndexedString`.
     """
     indexed_string = IndexedString(text_instance,
                                    bow=self.bow,
                                    split_expression=self.split_expression)
     domain_mapper = TextDomainMapper(indexed_string)
     data, yss, distances = self.__data_labels_distances(
         indexed_string,
         classifier_fn,
         num_samples,
         distance_metric=distance_metric)
     if self.class_names is None:
         self.class_names = [str(x) for x in range(yss[0].shape[0])]
     ret_exp = explanation.Explanation(domain_mapper=domain_mapper,
                                       class_names=self.class_names,
                                       random_state=self.random_state)
     ret_exp.predict_proba = yss[0]
     if top_labels:
         labels = np.argsort(yss[0])[-top_labels:]
         ret_exp.top_labels = list(labels)
         ret_exp.top_labels.reverse()
     for label in labels:
         (ret_exp.intercept[label], ret_exp.local_exp[label], ret_exp.score,
          ret_exp.local_pred) = self.base.explain_instance_with_data(
              data,
              yss,
              distances,
              label,
              num_features,
              model_regressor=model_regressor,
              feature_selection=self.feature_selection)
     return ret_exp
Exemple #3
0
    def explain_instance(self,
                         timeseries_instance,
                         classifier_fn,
                         num_slices,
                         labels=(1, ),
                         top_labels=None,
                         num_features=10,
                         num_samples=5000,
                         model_regressor=None,
                         replacement_method='mean'):
        """Generates explanations for a prediction.

        First, we generate neighborhood data by randomly hiding features from
        the instance (see __data_labels_distance_mapping). We then learn
        locally weighted linear models on this neighborhood data to explain
        each of the classes in an interpretable way (see lime_base.py).
        As distance function DTW metric is used.

        Args:
            time_series_instance: time series to be explained.
            classifier_fn: classifier prediction probability function,
                which takes a list of d arrays with time series values
                and outputs a (d, k) numpy array with prediction
                probabilities, where k is the number of classes.
                For ScikitClassifiers , this is classifier.predict_proba.
            num_slices: Defines into how many slices the time series will
                be split up
            labels: iterable with labels to be explained.
            top_labels: if not None, ignore labels and produce explanations for
            the K labels with highest prediction probabilities, where K is
            this parameter.
            num_features: maximum number of features present in explanation
            num_samples: size of the neighborhood to learn the linear model
            distance_metric: the distance metric to use for sample weighting,
                defaults to cosine similarity
            model_regressor: sklearn regressor to use in explanation. Defaults
                to Ridge regression in LimeBase. Must have
                model_regressor.coef_ and 'sample_weight' as a parameter to
                model_regressor.fit()
        Returns:
            An Explanation object (see explanation.py) with the corresponding
            explanations.
       """

        permutations, predictions, distances = self.__data_labels_distances(
            timeseries_instance, classifier_fn, num_samples, num_slices,
            replacement_method)

        is_multivariate = len(timeseries_instance.shape) > 1

        if self.class_names is None:
            self.class_names = [str(x) for x in range(predictions[0].shape[0])]

        domain_mapper = TSDomainMapper(self.signal_names, num_slices,
                                       is_multivariate)
        ret_exp = explanation.Explanation(domain_mapper=domain_mapper,
                                          class_names=self.class_names)
        ret_exp.predict_proba = predictions[0]

        if top_labels:
            labels = np.argsort(predictions[0])[-top_labels:]
            ret_exp.top_labels = list(predictions)
            ret_exp.top_labels.reverse()
        for label in labels:
            (ret_exp.intercept[int(label)], ret_exp.local_exp[int(label)],
             ret_exp.score,
             ret_exp.local_pred) = self.base.explain_instance_with_data(
                 permutations,
                 predictions,
                 distances,
                 label,
                 num_features,
                 model_regressor=model_regressor,
                 feature_selection=self.feature_selection)
        return ret_exp
Exemple #4
0
    def explain_instance(self,
                         data_row,
                         predict_fn,
                         labels=(1, ),
                         top_labels=None,
                         num_features=10,
                         num_samples=5000,
                         distance_metric='euclidean',
                         model_regressor=None):
        """Generates explanations for a prediction.

        First, we generate neighborhood data by randomly perturbing features
        from the instance (see __data_inverse). We then learn locally weighted
        linear models on this neighborhood data to explain each of the classes
        in an interpretable way (see lime_base.py).

        Args:
            data_row: 1d numpy array or scipy.sparse matrix, corresponding to a row
            predict_fn: prediction function. For classifiers, this should be a
                function that takes a numpy array and outputs prediction
                probabilities. For regressors, this takes a numpy array and
                returns the predictions. For ScikitClassifiers, this is
                `classifier.predict_proba()`. For ScikitRegressors, this
                is `regressor.predict()`. The prediction function needs to work
                on multiple feature vectors (the vectors randomly perturbed
                from the data_row).
            labels: iterable with labels to be explained.
            top_labels: if not None, ignore labels and produce explanations for
                the K labels with highest prediction probabilities, where K is
                this parameter.
            num_features: maximum number of features present in explanation
            num_samples: size of the neighborhood to learn the linear model
            distance_metric: the distance metric to use for weights.
            model_regressor: sklearn regressor to use in explanation. Defaults
                to Ridge regression in LimeBase. Must have model_regressor.coef_
                and 'sample_weight' as a parameter to model_regressor.fit()

        Returns:
            An Explanation object (see explanation.py) with the corresponding
            explanations.
        """
        if sp.sparse.issparse(
                data_row) and not sp.sparse.isspmatrix_csr(data_row):
            # Preventative code: if sparse, convert to csr format if not in csr format already
            data_row = data_row.tocsr()
        data, inverse = self.__data_inverse(data_row, num_samples)
        if sp.sparse.issparse(data):
            # Note in sparse case we don't subtract mean since data would become dense
            scaled_data = data.multiply(self.scaler.scale_)
            # Multiplying with csr matrix can return a coo sparse matrix
            if not sp.sparse.isspmatrix_csr(scaled_data):
                scaled_data = scaled_data.tocsr()
        else:
            scaled_data = (data - self.scaler.mean_) / self.scaler.scale_
        distances = sklearn.metrics.pairwise_distances(
            scaled_data, scaled_data[0].reshape(1, -1),
            metric=distance_metric).ravel()

        yss = predict_fn(inverse)

        # for classification, the model needs to provide a list of tuples - classes
        # along with prediction probabilities
        if self.mode == "classification":
            if len(yss.shape) == 1:
                raise NotImplementedError(
                    "LIME does not currently support "
                    "classifier models without probability "
                    "scores. If this conflicts with your "
                    "use case, please let us know: "
                    "https://github.com/datascienceinc/lime/issues/16")
            elif len(yss.shape) == 2:
                if self.class_names is None:
                    self.class_names = [str(x) for x in range(yss[0].shape[0])]
                else:
                    self.class_names = list(self.class_names)
                if not np.allclose(yss.sum(axis=1), 1.0):
                    warnings.warn("""
                    Prediction probabilties do not sum to 1, and
                    thus does not constitute a probability space.
                    Check that you classifier outputs probabilities
                    (Not log probabilities, or actual class predictions).
                    """)
            else:
                raise ValueError("Your model outputs "
                                 "arrays with {} dimensions".format(
                                     len(yss.shape)))

        # for regression, the output should be a one-dimensional array of predictions
        else:
            try:
                if len(yss.shape) != 1 and len(yss[0].shape) == 1:
                    yss = np.array([v[0] for v in yss])
                assert isinstance(yss, np.ndarray) and len(yss.shape) == 1
            except AssertionError:
                raise ValueError(
                    "Your model needs to output single-dimensional \
                    numpyarrays, not arrays of {} dimensions".format(
                        yss.shape))

            predicted_value = yss[0]
            min_y = min(yss)
            max_y = max(yss)

            # add a dimension to be compatible with downstream machinery
            yss = yss[:, np.newaxis]

        feature_names = copy.deepcopy(self.feature_names)
        if feature_names is None:
            feature_names = [str(x) for x in range(data_row.shape[0])]

        if sp.sparse.issparse(data_row):
            values = self.convert_and_round(data_row.data)
            feature_indexes = data_row.indices
        else:
            values = self.convert_and_round(data_row)
            feature_indexes = None

        for i in self.categorical_features:
            if self.discretizer is not None and i in self.discretizer.lambdas:
                continue
            name = int(data_row[i])
            if i in self.categorical_names:
                name = self.categorical_names[i][name]
            feature_names[i] = '%s=%s' % (feature_names[i], name)
            values[i] = 'True'
        categorical_features = self.categorical_features

        discretized_feature_names = None
        if self.discretizer is not None:
            categorical_features = range(data.shape[1])
            discretized_instance = self.discretizer.discretize(data_row)
            discretized_feature_names = copy.deepcopy(feature_names)
            for f in self.discretizer.names:
                discretized_feature_names[f] = self.discretizer.names[f][int(
                    discretized_instance[f])]

        domain_mapper = TableDomainMapper(
            feature_names,
            values,
            scaled_data[0],
            categorical_features=categorical_features,
            discretized_feature_names=discretized_feature_names,
            feature_indexes=feature_indexes)
        ret_exp = explanation.Explanation(domain_mapper,
                                          mode=self.mode,
                                          class_names=self.class_names)
        if self.mode == "classification":
            ret_exp.predict_proba = yss[0]
            if top_labels:
                labels = np.argsort(yss[0])[-top_labels:]
                ret_exp.top_labels = list(labels)
                ret_exp.top_labels.reverse()
        else:
            ret_exp.predicted_value = predicted_value
            ret_exp.min_value = min_y
            ret_exp.max_value = max_y
            labels = [0]
        for label in labels:
            (ret_exp.intercept[label], ret_exp.local_exp[label], ret_exp.score,
             ret_exp.local_pred) = self.base.explain_instance_with_data(
                 scaled_data,
                 yss,
                 distances,
                 label,
                 num_features,
                 model_regressor=model_regressor,
                 feature_selection=self.feature_selection)

        if self.mode == "regression":
            ret_exp.intercept[1] = ret_exp.intercept[0]
            ret_exp.local_exp[1] = [x for x in ret_exp.local_exp[0]]
            ret_exp.local_exp[0] = [(i, -1 * j)
                                    for i, j in ret_exp.local_exp[1]]

        import pandas as pd
        self.lime_preds = yss

        return ret_exp
Exemple #5
0
    def explain_instance(self,
                         instance,
                         rec_model,
                         neighborhood_entity,
                         labels=(1, ),
                         num_features=10,
                         num_samples=50,
                         distance_metric='cosine',
                         model_regressor=None):

        # get neighborhood
        neighborhood_df = self.generate_neighborhood(instance,
                                                     neighborhood_entity,
                                                     num_samples)

        # compute distance based on interpretable format
        data, _ = Dataset.convert_to_pyfm_format(
            neighborhood_df, columns=rec_model.one_hot_columns)
        distances = sklearn.metrics.pairwise_distances(
            data, data[0].reshape(1, -1), metric=distance_metric).ravel()

        # get predictions from original complex model
        yss = np.array(rec_model.predict(neighborhood_df))

        # for classification, the model needs to provide a list of tuples - classes along with prediction probabilities
        if self.mode == "classification":
            raise NotImplementedError(
                "LIME-RS does not currently support classifier models.")
        # for regression, the output should be a one-dimensional array of predictions
        else:
            try:
                assert isinstance(yss, np.ndarray) and len(yss.shape) == 1
            except AssertionError:
                raise ValueError(
                    "Your model needs to output single-dimensional \
                            numpyarrays, not arrays of {} dimensions".format(
                        yss.shape))

            predicted_value = yss[0]
            min_y = min(yss)
            max_y = max(yss)

            # add a dimension to be compatible with downstream machinery
            yss = yss[:, np.newaxis]

        ret_exp = explanation.Explanation(domain_mapper=None,
                                          mode=self.mode,
                                          class_names=self.class_names)
        if self.mode == "classification":
            raise NotImplementedError(
                "LIME-RS does not currently support classifier models.")
        else:
            ret_exp.predicted_value = predicted_value
            ret_exp.min_value = min_y
            ret_exp.max_value = max_y
            labels = [0]

        for label in labels:
            (ret_exp.intercept[label], ret_exp.local_exp[label], ret_exp.score,
             ret_exp.local_pred) = self.base.explain_instance_with_data(
                 data,
                 yss,
                 distances,
                 label,
                 num_features,
                 model_regressor=model_regressor,
                 feature_selection=self.feature_selection)

        return ret_exp
Exemple #6
0
    def explain_instance(self,
                         text_instance,
                         classifier_fn,
                         labels=(1, ),
                         top_labels=None,
                         num_features=10,
                         num_samples=5000,
                         distance_metric='cosine',
                         model_regressor=None,
                         care_words=None,
                         spans=(2, ),
                         include_original_feature=True):
        """Generates explanations for a prediction.

        First, we generate neighborhood data by randomly hiding features from
        the instance (see __data_labels_distance_mapping). We then learn
        locally weighted linear models on this neighborhood data to explain
        each of the classes in an interpretable way (see lime_base.py).

        Args:
            text_instance: raw text string to be explained.
            classifier_fn: classifier prediction probability function, which
                takes a list of d strings and outputs a (d, k) numpy array with
                prediction probabilities, where k is the number of classes.
                For ScikitClassifiers , this is classifier.predict_proba.
            labels: iterable with labels to be explained.
            top_labels: if not None, ignore labels and produce explanations for
                the K labels with highest prediction probabilities, where K is
                this parameter.
            num_features: maximum number of features present in explanation
            num_samples: size of the neighborhood to learn the linear model
            distance_metric: the distance metric to use for sample weighting,
                defaults to cosine similarity
            model_regressor: sklearn regressor to use in explanation. Defaults
            to Ridge regression in LimeBase. Must have model_regressor.coef_
            and 'sample_weight' as a parameter to model_regressor.fit()
        Returns:
            An Explanation object (see explanation.py) with the corresponding
            explanations.
        """

        self.care_words = care_words
        self.spans = spans
        self.include_original_feature = include_original_feature
        indexed_string = (IndexedCharacters(
            text_instance, bow=self.bow, mask_string=self.mask_string)
                          if self.char_level else IndexedString(
                              text_instance,
                              bow=self.bow,
                              split_expression=self.split_expression,
                              mask_string=self.mask_string))
        domain_mapper = TextDomainMapper(indexed_string)
        data, yss, distances = self.__data_labels_distances(
            indexed_string,
            classifier_fn,
            num_samples,
            distance_metric=distance_metric)
        if self.class_names is None:
            self.class_names = [str(x) for x in range(yss[0].shape[0])]
        ret_exp = explanation.Explanation(domain_mapper=domain_mapper,
                                          class_names=self.class_names,
                                          random_state=self.random_state)
        ret_exp.predict_proba = yss[0]
        if top_labels:
            labels = np.argsort(yss[0])[-top_labels:]
            ret_exp.top_labels = list(labels)
            ret_exp.top_labels.reverse()
        for label in labels:
            (ret_exp.intercept[label], ret_exp.local_exp[label], ret_exp.score,
             ret_exp.local_pred) = self.base.explain_instance_with_data(
                 data,
                 yss,
                 distances,
                 label,
                 num_features,
                 model_regressor=model_regressor,
                 feature_selection=self.feature_selection)
        return ret_exp