def generate_neighborhood_data(self, sample, predict_fn, distance_metric='euclidean', n_samples=500, seed=1, **kwargs): '''Generate neighborhood data for a given point (currently using LIME) Args: train_data: Training data predict_fn was trained on sample: Observed sample predict_fn: Black box predictor to predict all points distance_metric: Distance metric used for weights n_samples: Number of samples to generate Returns: neighor_data (xs around sample), weights (weights of instances in xs), neighor_data_labels (ys around sample, corresponding to xs) ''' from lime.lime_tabular import LimeTabularExplainer e = LimeTabularExplainer( self.train_data, categorical_features=self.categorical_features, discretize_continuous=False) _, neighbor_data = e._LimeTabularExplainer__data_inverse( sample, n_samples) scaled_data = (neighbor_data - e.scaler.mean_) / e.scaler.scale_ return (*self._data(neighbor_data, scaled_data, distance_metric, predict_fn), sample)
def generate_neighborhood_data(self, sample, predict_fn, distance_metric='euclidean', n_samples=500, seed=1, **kwargs): """Generate neighborhood data for a given point (currently samples training data) Args: sample: Observed sample predict_fn: Black box predictor to predict all points distance_metric: Distance metric used for weights n_samples: Number of samples to generate Returns: neighor_data (xs around sample), weights (weights of instances in xs), neighor_data_labels (ys around sample, corresponding to xs) """ from lime.lime_tabular import LimeTabularExplainer categorical_features = None if self.categorical_features is not None: cfi = itertools.chain.from_iterable categorical_features = list( cfi(self.feature_map[c] for c in self.categorical_features)) e = LimeTabularExplainer(self.train_data, categorical_features=categorical_features, discretize_continuous=False, random_state=check_random_state(seed)) _, neighbor_data = e._LimeTabularExplainer__data_inverse( sample, n_samples) scaled_data = (neighbor_data - e.scaler.mean_) / e.scaler.scale_ predict_data = self._apply_decode(neighbor_data) return (*self._data(neighbor_data, scaled_data, predict_data, distance_metric, predict_fn), sample)