Esempio n. 1
0
    def train(self):
        if self.uses_features:
            df = pd.merge(self.dataset.training_df,
                          self.dataset.item_features,
                          on="item_id",
                          how="left")
        else:
            df = self.dataset.training_df.copy()

        training_data, training_columns = Dataset.convert_to_pyfm_format(df)
        self.one_hot_columns = training_columns

        self.fm.fit(training_data, self.dataset.y_train)
Esempio n. 2
0
    def predict(self, df):
        if self.uses_features:
            df = pd.merge(df,
                          self.dataset.item_features,
                          on="item_id",
                          how="left")

        all_predictions = list()

        # divide in chunks to avoid memory errors
        chunk_size = 10
        chunks = np.array_split(df, chunk_size)
        for chunck in chunks:
            # convert
            test_data, _ = Dataset.convert_to_pyfm_format(chunck)

            # get predictions
            preds = self.fm.predict(test_data)
            all_predictions.extend(preds.round(3))

        return all_predictions
Esempio n. 3
0
    def explain_instance(self,
                         instance,
                         rec_model,
                         neighborhood_entity,
                         labels=(1, ),
                         num_features=10,
                         num_samples=50,
                         distance_metric='cosine',
                         model_regressor=None):

        # get neighborhood
        neighborhood_df = self.generate_neighborhood(instance,
                                                     neighborhood_entity,
                                                     num_samples)

        # compute distance based on interpretable format
        data, _ = Dataset.convert_to_pyfm_format(
            neighborhood_df, columns=rec_model.one_hot_columns)
        distances = sklearn.metrics.pairwise_distances(
            data, data[0].reshape(1, -1), metric=distance_metric).ravel()

        # get predictions from original complex model
        yss = np.array(rec_model.predict(neighborhood_df))

        # for classification, the model needs to provide a list of tuples - classes along with prediction probabilities
        if self.mode == "classification":
            raise NotImplementedError(
                "LIME-RS does not currently support classifier models.")
        # for regression, the output should be a one-dimensional array of predictions
        else:
            try:
                assert isinstance(yss, np.ndarray) and len(yss.shape) == 1
            except AssertionError:
                raise ValueError(
                    "Your model needs to output single-dimensional \
                            numpyarrays, not arrays of {} dimensions".format(
                        yss.shape))

            predicted_value = yss[0]
            min_y = min(yss)
            max_y = max(yss)

            # add a dimension to be compatible with downstream machinery
            yss = yss[:, np.newaxis]

        ret_exp = explanation.Explanation(domain_mapper=None,
                                          mode=self.mode,
                                          class_names=self.class_names)
        if self.mode == "classification":
            raise NotImplementedError(
                "LIME-RS does not currently support classifier models.")
        else:
            ret_exp.predicted_value = predicted_value
            ret_exp.min_value = min_y
            ret_exp.max_value = max_y
            labels = [0]

        for label in labels:
            (ret_exp.intercept[label], ret_exp.local_exp[label], ret_exp.score,
             ret_exp.local_pred) = self.base.explain_instance_with_data(
                 data,
                 yss,
                 distances,
                 label,
                 num_features,
                 model_regressor=model_regressor,
                 feature_selection=self.feature_selection)

        return ret_exp