def compare_lime(self):
        lime = self.create_lime_surrogate(self.explainer.last_instance,
                                          self.explainer.dataset,
                                          self.explainer.clf)

        data = construct_test_data_around_instance(
            self.explainer.dataset,
            self.explainer.touchpoints[0],
            max_distance=self.max_distance)
        clf_pred = self.explainer.clf.predict(data)
        srg_pred = lime.predict(data)

        self.lime_score_db = accuracy_score(srg_pred, clf_pred)

        data = construct_test_data_around_instance(
            self.explainer.dataset,
            self.explainer.last_instance,
            max_distance=self.max_distance)
        clf_pred = self.explainer.clf.predict(data)
        srg_pred = lime.predict(data)
        self.lime_score_instance = accuracy_score(srg_pred, clf_pred)

        print('LIME surrogate around DB', self.lime_score_db)
        print('LIME surrogate around instance', self.lime_score_instance)
        print('----------------------------- \n')
    def export_decision_tree(self):

        data = construct_test_data_around_instance(
            self.explainer.dataset,
            self.explainer.touchpoints[0],
            max_distance=self.max_distance)
        clf_pred = self.explainer.clf.predict(data)
        X_train, X_test, Y_train, Y_test = train_test_split(data,
                                                            clf_pred,
                                                            test_size=0.2,
                                                            random_state=1000)

        tree = DecisionTreeClassifier(max_depth=3)
        tree.fit(X_train, Y_train)

        self.surrogate_features = np.array(self.feature_names)[np.flip(
            np.argsort(np.abs(tree.feature_importances_)))][0:10]

        data_db = construct_test_data_around_instance(
            self.explainer.dataset,
            self.explainer.touchpoints[0],
            max_distance=self.max_distance)
        tree_pred = tree.predict(X_test)
        clf_pred = self.explainer.clf.predict(X_test)

        export_tree(tree, 'exports/db_tree.pdf', self.feature_names)
        self.tree_surrogate = tree
        self.tree_score_db = accuracy_score(tree_pred, clf_pred)
        print('accuracy tree around DB', self.tree_score_db)

        print(
            'LOCAL tree feature importance ',
            list(
                zip(
                    np.array(self.feature_names)[np.flip(
                        np.argsort(np.abs(tree.feature_importances_))[-10:])],
                    np.flip(tree.feature_importances_[np.argsort(
                        np.abs(tree.feature_importances_))][-10:]))))

        data = construct_test_data_around_instance(
            self.explainer.dataset,
            self.explainer.last_instance,
            max_distance=self.max_distance)
        tree_pred = tree.predict(data)
        clf_pred = self.explainer.clf.predict(data)
        self.tree_score_instance = accuracy_score(tree_pred, clf_pred)
        print('accuracy tree around instance', self.tree_score_instance)
        print('----------------------------- \n')
 def lars_features_local(self):
     data_subset = construct_test_data_around_instance(
         self.explainer.dataset,
         self.explainer.last_instance,
         max_distance=self.max_distance)
     labels = self.explainer.clf.predict(data_subset)
     features = get_primary_features(
         data_subset, labels, num_features=self.explainer.num_features)
     print('FEATURE IMPORTANCE LARS locally around instance')
     print(np.array(self.feature_names)[features])
    def compare_surrogate(self):

        # data = sample_normal(self.explainer.touchpoints, 500, 2)

        # Compare around decision boundary
        data = construct_test_data_around_instance(
            self.explainer.dataset,
            self.explainer.touchpoints[0],
            max_distance=self.max_distance)
        clf_pred = self.explainer.clf.predict(data)
        srg_pred = self.explainer.sg.surrogate.predict(data)
        sg = self.explainer.sg.surrogate

        self.linear_surrogate = self.explainer.sg.surrogate
        self.linear_score_db = accuracy_score(srg_pred, clf_pred)

        print(
            'LOCAL LINEAR feature importance ',
            list(
                zip(
                    np.array(self.feature_names)[np.flip(
                        np.argsort(np.abs(sg.coef_[0]))[-10:])],
                    np.flip(sg.coef_[0][np.argsort(np.abs(
                        sg.coef_[0]))][-10:]))))

        # Compare around original distance
        data = construct_test_data_around_instance(
            self.explainer.dataset,
            self.explainer.last_instance,
            max_distance=self.max_distance)
        clf_pred = self.explainer.clf.predict(data)
        srg_pred = self.explainer.sg.surrogate.predict(data)
        self.linear_score_instance = accuracy_score(srg_pred, clf_pred)
        print('accuracy surrogate around DB', self.linear_score_db)
        print('accuracy surrogate around instance', self.linear_score_instance)
        print('----------------------------- \n')
Exemple #5
0
    def sample_around_instance_from_dataset(self, border_touchpoints, num_samples, max_distance=0.5):
        """

        :param border_touchpoints:
        :param num_samples:
        :return:
        """
        result = np.array(border_touchpoints)

        num_per_point = int(num_samples / len(border_touchpoints))
        for point in border_touchpoints:
            set = utils.construct_test_data_around_instance(self.dataset, point, max_distance=max_distance, size=num_per_point)
            result = np.append(result, set, axis=0)

        return result
    def support_with_random_sampling(self, instance, counterfactual, num_support=10):

        max_distance = 0.3
        while True:
            sample = construct_test_data_around_instance(self.data, instance, max_distance=max_distance)
            if len(sample) == 0:
                max_distance += 0.3
                continue

            pred = self.clf.predict(sample)
            sample = sample[pred == 1] # TODO: Change to dynamic
            if len(sample) > num_support:
                return sample[0:num_support]
            else:
                max_distance += 0.3
Exemple #7
0
    def random(self, instance, target_value=1):

        counterfact = None
        max_distance = 0.3

        while not counterfact:
            print(max_distance)
            sample = construct_test_data_around_instance(
                self.data, instance, max_distance=max_distance)
            if len(sample) == 0:
                max_distance += 0.3
                continue

            pred = self.clf.predict(sample)
            sample = sample[pred == target_value]
            if len(sample) > 0:
                counterfact = sample[0]
                break
            else:
                max_distance += 0.3

        return counterfact
    def feature_importance(self):
        """
        Trains a local surrogate random forest and returns its feature importance
        :return: feature importance of surrogate random forest
        """

        data_subset = construct_test_data_around_instance(
            self.explainer.dataset,
            self.explainer.touchpoints[0],
            max_distance=0.6)
        pred = self.explainer.clf.predict(data_subset)
        rf = RandomForestClassifier(n_estimators=100)
        rf.fit(data_subset, pred)

        p, b, c = ti.predict(rf, self.explainer.last_instance.reshape(1, -1))
        c = c[0]
        print('FEATURE IMPORTANCES RF around DB: \n')

        for c, feature in sorted(zip(c[:, 0], self.feature_names),
                                 key=lambda x: -abs(x[0]))[0:10]:
            print(feature, c)
        print('------------------------- \n')