def train(self): if self.uses_features: df = pd.merge(self.dataset.training_df, self.dataset.item_features, on="item_id", how="left") else: df = self.dataset.training_df.copy() training_data, training_columns = Dataset.convert_to_pyfm_format(df) self.one_hot_columns = training_columns self.fm.fit(training_data, self.dataset.y_train)
def predict(self, df): if self.uses_features: df = pd.merge(df, self.dataset.item_features, on="item_id", how="left") all_predictions = list() # divide in chunks to avoid memory errors chunk_size = 10 chunks = np.array_split(df, chunk_size) for chunck in chunks: # convert test_data, _ = Dataset.convert_to_pyfm_format(chunck) # get predictions preds = self.fm.predict(test_data) all_predictions.extend(preds.round(3)) return all_predictions
def explain_instance(self, instance, rec_model, neighborhood_entity, labels=(1, ), num_features=10, num_samples=50, distance_metric='cosine', model_regressor=None): # get neighborhood neighborhood_df = self.generate_neighborhood(instance, neighborhood_entity, num_samples) # compute distance based on interpretable format data, _ = Dataset.convert_to_pyfm_format( neighborhood_df, columns=rec_model.one_hot_columns) distances = sklearn.metrics.pairwise_distances( data, data[0].reshape(1, -1), metric=distance_metric).ravel() # get predictions from original complex model yss = np.array(rec_model.predict(neighborhood_df)) # for classification, the model needs to provide a list of tuples - classes along with prediction probabilities if self.mode == "classification": raise NotImplementedError( "LIME-RS does not currently support classifier models.") # for regression, the output should be a one-dimensional array of predictions else: try: assert isinstance(yss, np.ndarray) and len(yss.shape) == 1 except AssertionError: raise ValueError( "Your model needs to output single-dimensional \ numpyarrays, not arrays of {} dimensions".format( yss.shape)) predicted_value = yss[0] min_y = min(yss) max_y = max(yss) # add a dimension to be compatible with downstream machinery yss = yss[:, np.newaxis] ret_exp = explanation.Explanation(domain_mapper=None, mode=self.mode, class_names=self.class_names) if self.mode == "classification": raise NotImplementedError( "LIME-RS does not currently support classifier models.") else: ret_exp.predicted_value = predicted_value ret_exp.min_value = min_y ret_exp.max_value = max_y labels = [0] for label in labels: (ret_exp.intercept[label], ret_exp.local_exp[label], ret_exp.score, ret_exp.local_pred) = self.base.explain_instance_with_data( data, yss, distances, label, num_features, model_regressor=model_regressor, feature_selection=self.feature_selection) return ret_exp