Ejemplo n.º 1
0
def get_growing_sphere_from_class(opponent_class, growing_sphere_enemies,
                                  raw_data, clf, vectorizer,
                                  continuous_features, categorical_features,
                                  categorical_values, text, obs):
    growing_sphere = cf.CounterfactualExplanation(
        raw_data,
        clf.predict,
        method='GS',
        target_class=opponent_class,
        vectorizer=vectorizer,
        continuous_features=continuous_features,
        categorical_features=categorical_features,
        categorical_values=categorical_values)
    growing_sphere.fit(n_in_layer=2000,
                       first_radius=0.1,
                       dicrease_radius=10,
                       sparse=True,
                       verbose=True,
                       text=text)
    largest = get_distances(growing_sphere.onevsrest[-1], obs)["euclidean"]
    smallest = get_distances(growing_sphere.onevsrest[0], obs)["euclidean"]
    if opponent_class == None:
        print("classe la plus proche : ",
              clf.predict(growing_sphere.onevsrest[0].reshape(1, -1)))
    print("largest distance from class ", opponent_class, " : ", largest)
    print("smallest distance from class ", opponent_class, " : ", smallest)
    return growing_sphere.enemy, largest, smallest, growing_sphere.onevsrest, growing_sphere.radius, growing_sphere.iteration
Ejemplo n.º 2
0
    def explain_instance(self, instance, opponent_class=None, n_instance_per_layer=2000, first_radius=0.1, nb_features_employed=4,
                        dicrease_radius=10, all_explanations_model=False, user_experiments=False, lime_vs_local_surrogate=False,
                        local_surrogate_experiment=False, illustrative_results=False):
        """
        Returns either an explanation from anchors or lime along with one or multiple counter factual explanation
        Args: instance: Target instance to explain
              opponent_class: Class of the desired counterfactual instance
              n_instance_per_layer: Number of instances require in each layer for growing field
              first_radius: Radius of the initial field for growing field
              nb_features_employed: Indicate how many features will be used as explanation for the linear explanation (used also for experiments)
              dicrease_radius: Ratio of dicreasing the radius of the growing field
              all_explanation_model: generate explanation with multiple explanation models (for experiments)
              user_experiments: return features employed by linear and rule based explanation (for experiments)
              lime_vs_local_surrogate: Return features employed by Lime and LS (for experiments)
              local_surrogate_experiment: Compute multiple local surrogate explanations and return precision, coverage and F1 (for experiments)
        Return: APE's coverage
                APE's precision
                APE's F1
                Indicate whether counter factual instances are multimodal: 1 or unimodal: 0
        """
        self.target_class = self.black_box_predict(instance.reshape(1, -1))[0]

        # Computes the distance to the farthest instance from the training dataset to bound generating instances 
        farthest_distance = 0
        for training_instance in self.train_data:
            # get_distance is similar to pairwise distance (i.e: it is the same results for euclidean distance) 
            # but it adds a sparsity distance computation (i.e: number of same values) 
            if get_distances(training_instance, instance, categorical_features=self.categorical_features)["euclidean"] > farthest_distance:
                farthest_distance = get_distances(training_instance, instance, categorical_features=self.categorical_features)["euclidean"]
        
        growing_sphere = cf.CounterfactualExplanation(instance, self.black_box_predict, method='GS', target_class=opponent_class, 
                    continuous_features=self.continuous_features, categorical_features=self.categorical_features, categorical_values=self.categorical_values)
        growing_sphere.fit(n_in_layer=n_instance_per_layer, first_radius=first_radius, dicrease_radius=dicrease_radius, sparse=True, 
                    verbose=self.verbose, feature_variance=self.feature_variance, farthest_distance_training_dataset=farthest_distance, 
                    probability_categorical_feature=self.probability_categorical_feature, min_counterfactual_in_sphere=self.nb_min_instance_per_class_in_sphere)
        first_closest_counterfactual = growing_sphere.enemy

        # After searching for the closest counterfactual, we take the closest from this point from the same class as the target instance to explain
        second_growing_sphere = cf.CounterfactualExplanation(first_closest_counterfactual, self.black_box_predict, method='GS', target_class=self.target_class, 
                    continuous_features=self.continuous_features, categorical_features=self.categorical_features, categorical_values=self.categorical_values)
        second_growing_sphere.fit(n_in_layer=n_instance_per_layer, first_radius=first_radius, dicrease_radius=dicrease_radius, sparse=True, 
                    verbose=self.verbose, feature_variance=self.feature_variance, farthest_distance_training_dataset=farthest_distance, 
                    probability_categorical_feature=self.probability_categorical_feature, min_counterfactual_in_sphere=self.nb_min_instance_per_class_in_sphere)
        self.closest_counterfactual = second_growing_sphere.enemy            

        if self.verbose:
            print("The farthest instance from the training dataset is ", farthest_distance, " away from the target.")
            if opponent_class == None:
                opponent_class = self.black_box_predict(growing_sphere.enemy.reshape(1, -1))[0]
                print("Class of the closest counterfactual: ", opponent_class)
            print("radius of the hyperfield:", growing_sphere.radius)
            print("The target instance to explain is ", instance)
            print("The associated closest counterfactual is ", self.closest_counterfactual)
        
        """ Generates or store instances in the area of the hyperfield and their corresponding labels """
        min_instance_per_class = self.nb_min_instance_per_class_in_sphere
        position_instances_in_sphere, nb_training_instance_in_sphere = self.instances_from_dataset_inside_sphere(self.closest_counterfactual, growing_sphere.radius)

        instances_in_sphere, labels_in_sphere, percentage_distribution, instances_in_sphere_libfolding = self.generate_instances_inside_sphere(growing_sphere.radius, 
                                                                                                                self.closest_counterfactual, farthest_distance, 
                                                                                                                min_instance_per_class, position_instances_in_sphere, 
                                                                                                                nb_training_instance_in_sphere, libfolding=True)
        
        if local_surrogate_experiment:
            local_surrogate_precision, local_surrogate_coverage, f1_local_surrogate = compute_local_surrogate_precision_coverage(self, 
                                                instance, growing_sphere,
                                                instances_in_sphere, labels_in_sphere,
                                                position_instances_in_sphere, nb_training_instance_in_sphere, nb_features_employed)
            return local_surrogate_precision, local_surrogate_coverage, f1_local_surrogate

        """ Compute the libfolding test to verify wheter instances in the area of the hyper sphere is multimodal or unimodal """
        if instances_in_sphere_libfolding != []:
            # In case of categorical data, we transform categorical values into probability distribution (continuous values for libfolding) 
            index_counterfactual_instances_in_sphere = self.store_counterfactual_instances_in_sphere(instances_in_sphere, self.target_class, libfolding=True)
            counterfactual_instances_in_sphere = instances_in_sphere[index_counterfactual_instances_in_sphere]
            counterfactual_libfolding = instances_in_sphere_libfolding[index_counterfactual_instances_in_sphere]
            unimodal_test = self.check_test_unimodal_data(np.array(counterfactual_instances_in_sphere), instances_in_sphere, growing_sphere.radius,
                                                         counterfactual_libfolding=counterfactual_libfolding)
        else:
            counterfactual_instances_in_sphere = self.store_counterfactual_instances_in_sphere(instances_in_sphere, self.target_class)
            unimodal_test = self.check_test_unimodal_data(np.array(counterfactual_instances_in_sphere), instances_in_sphere, growing_sphere.radius)

        nb = 0
        while not unimodal_test:
            # While the libfolding test is not able to declare that data are multimodal or unimodal we extend the number of instances that are generated
            min_instance_per_class *= 1.5
            instances_in_sphere, labels_in_sphere, percentage_distribution, instances_in_sphere_libfolding = self.generate_instances_inside_sphere(growing_sphere.radius, 
                                                                                                                self.closest_counterfactual, farthest_distance, 
                                                                                                                min_instance_per_class, position_instances_in_sphere, 
                                                                                                                nb_training_instance_in_sphere, libfolding=True)
            
            if instances_in_sphere_libfolding != []:
                index_counterfactual_instances_in_sphere = self.store_counterfactual_instances_in_sphere(instances_in_sphere, self.target_class, libfolding=True)
                counterfactual_instances_in_sphere = instances_in_sphere[index_counterfactual_instances_in_sphere]
                counterfactual_libfolding = instances_in_sphere_libfolding[index_counterfactual_instances_in_sphere]
                unimodal_test = self.check_test_unimodal_data(np.array(counterfactual_instances_in_sphere), instances_in_sphere, growing_sphere.radius,
                                                            counterfactual_libfolding=counterfactual_libfolding)
            else:
                counterfactual_instances_in_sphere = self.store_counterfactual_instances_in_sphere(instances_in_sphere, self.target_class)
                unimodal_test = self.check_test_unimodal_data(np.array(counterfactual_instances_in_sphere), instances_in_sphere, growing_sphere.radius)
            if self.verbose:
                print("nb times libfolding is not able to determine wheter datas are unimodal or multimodal:", nb)
                print("There are ", len(counterfactual_instances_in_sphere), " instances in the datas given to libfolding.")
                print()
            nb += 1
        
        """ Computes the labels for instances from the dataset to compute precision for explanation method """
        labels_instance_train_data = self.black_box_predict(self.train_data)
        nb_instance_train_data_label_as_target = sum(x == self.target_class for x in labels_instance_train_data)
        
        """ Different cases for experiments """
        if user_experiments:
            return simulate_user_experiments(self, instance, nb_features_employed, farthest_distance, self.closest_counterfactual, growing_sphere,
                                            position_instances_in_sphere, nb_training_instance_in_sphere)

        if lime_vs_local_surrogate:
            return simulate_user_experiments_lime_ls() 

        elif all_explanations_model:
            return compute_all_explanation_method_precision(self, instance, growing_sphere, dicrease_radius, growing_sphere.radius,
                                                nb_training_instance_in_sphere, nb_instance_train_data_label_as_target,
                                                position_instances_in_sphere, instances_in_sphere, labels_in_sphere,
                                                farthest_distance, percentage_distribution) 
        elif illustrative_results:
            return ape_illustrative_results(self, instance, counterfactual_instances_in_sphere)

        if self.multimodal_results:
            # In case of multimodal data, we generate a rule based explanation and compute precision and coverage of this explanation model
            ape_precision, ape_coverage, ape_f1, ape_explanation = self.compute_anchor_precision_coverage(instance, 
                                        labels_instance_train_data, nb_instances_in_sphere, 
                                        farthest_distance, percentage_distribution, nb_instance_train_data_label_as_target)
        
        else:
            # In case of unimodal data, we generate linear explanation and compute precision and coverage of this explanation model
            ape_precision, ape_coverage, ape_f1, ape_explanation = self.compute_lime_extending_precision_coverage(instances_in_sphere, 
                                                labels_in_sphere, growing_sphere, nb_features_employed,
                                                farthest_distance, dicrease_radius, nb_instance_train_data_label_as_target)

        return ape_coverage, ape_precision, ape_f1, 1 if self.multimodal_results else 0
def simulate_user_experiments_lime_ls(instance, nb_features_employed,
                                      erl_tabular):
    target_class = erl_tabular.black_box_predict(instance.reshape(1, -1))[0]
    erl_tabular.target_class = target_class
    # Computes the distance to the farthest instance from the training dataset to bound generating instances
    farthest_distance = 0
    for training_instance in erl_tabular.train_data:
        if get_distances(training_instance,
                         instance)["euclidean"] > farthest_distance:
            farthest_distance = np.round(get_distances(training_instance,
                                                       instance)["euclidean"],
                                         decimals=3)

    growing_sphere = cf.CounterfactualExplanation(
        instance,
        erl_tabular.black_box_predict,
        method='GS',
        target_class=None,
        continuous_features=erl_tabular.continuous_features,
        categorical_features=erl_tabular.categorical_features,
        categorical_values=erl_tabular.categorical_values)
    growing_sphere.fit(n_in_layer=2000,
                       first_radius=0.1,
                       dicrease_radius=10,
                       sparse=True,
                       verbose=erl_tabular.verbose,
                       feature_variance=erl_tabular.feature_variance,
                       farthest_distance_training_dataset=farthest_distance,
                       probability_categorical_feature=erl_tabular.
                       probability_categorical_feature,
                       min_counterfactual_in_sphere=erl_tabular.
                       nb_min_instance_per_class_in_sphere)

    closest_counterfactual = growing_sphere.enemy
    """ Generates or store instances in the area of the hypersphere and their correspoinding labels """
    min_instance_per_class = erl_tabular.nb_min_instance_per_class_in_sphere
    position_instances_in_sphere, nb_training_instance_in_sphere = erl_tabular.instances_from_dataset_inside_sphere(
        closest_counterfactual, growing_sphere.radius)

    instances_in_sphere, labels_in_sphere, percentage_distribution, instances_in_sphere_libfolding = erl_tabular.generate_instances_inside_sphere(
        growing_sphere,
        growing_sphere.radius,
        closest_counterfactual,
        farthest_distance,
        min_instance_per_class,
        position_instances_in_sphere,
        nb_training_instance_in_sphere,
        libfolding=True)
    """ Compute the libfolding test to verify wheter instances in the area of the hyper sphere is multimodal or unimodal """
    if instances_in_sphere_libfolding != []:
        counterfactual_instances_in_sphere = erl_tabular.store_counterfactual_instances_in_sphere(
            instances_in_sphere_libfolding, target_class)
    else:
        counterfactual_instances_in_sphere = erl_tabular.store_counterfactual_instances_in_sphere(
            instances_in_sphere, target_class)
    unimodal_test = erl_tabular.check_test_unimodal_data(
        np.array(counterfactual_instances_in_sphere), instances_in_sphere,
        growing_sphere.radius)
    nb = 0
    while not unimodal_test:
        min_instance_per_class *= 2
        instances_in_sphere, labels_in_sphere, percentage_distribution, instances_in_sphere_libfolding = erl_tabular.generate_instances_inside_sphere(
            growing_sphere,
            growing_sphere.radius,
            closest_counterfactual,
            farthest_distance,
            min_instance_per_class,
            position_instances_in_sphere,
            nb_training_instance_in_sphere,
            libfolding=True)
        if instances_in_sphere_libfolding != []:
            counterfactual_instances_in_sphere = erl_tabular.store_counterfactual_instances_in_sphere(
                instances_in_sphere_libfolding, target_class)
        else:
            counterfactual_instances_in_sphere = erl_tabular.store_counterfactual_instances_in_sphere(
                instances_in_sphere, target_class)
        unimodal_test = erl_tabular.check_test_unimodal_data(
            np.array(counterfactual_instances_in_sphere), instances_in_sphere,
            growing_sphere.radius)
        print(
            "nb times libfolding is not able to determine wheter datas are unimodal or multimodal:",
            nb)
        print("There are ", len(counterfactual_instances_in_sphere),
              " instances in the datas given to libfolding.")
        print()
        nb += 1

    if not erl_tabular.multimodal_results:
        lime_exp = erl_tabular.lime_explainer.explain_instance(
            instance,
            erl_tabular.black_box_predict_proba,
            num_features=nb_features_employed)
        #lime_exp = erl_tabular.lime_explainer.explain_instance(instance, erl_tabular.black_box_predict, num_features=nb_features_employed,
        #model_regressor = LogisticRegression())
        #print("all", lime_exp.as_list())
        features_linear_employed = []
        for feature_linear_employed in lime_exp.as_list():
            features_linear_employed.append(feature_linear_employed[0])
        #print("features linear employed", features_linear_employed)
        rules, training_instances_pandas_frame, features_employed_in_linear = erl_tabular.generate_rule_and_data_for_anchors(
            features_linear_employed,
            target_class,
            erl_tabular.train_data,
            simulated_user_experiment=True)

        growing_sphere_closest_cf = cf.CounterfactualExplanation(
            closest_counterfactual,
            erl_tabular.black_box_predict,
            method='GS',
            target_class=target_class,
            continuous_features=erl_tabular.continuous_features,
            categorical_features=erl_tabular.categorical_features,
            categorical_values=erl_tabular.categorical_values)
        growing_sphere_closest_cf.fit(
            n_in_layer=2000,
            first_radius=0.1,
            dicrease_radius=10,
            sparse=True,
            verbose=erl_tabular.verbose,
            feature_variance=erl_tabular.feature_variance,
            farthest_distance_training_dataset=farthest_distance,
            probability_categorical_feature=erl_tabular.
            probability_categorical_feature,
            min_counterfactual_in_sphere=erl_tabular.
            nb_min_instance_per_class_in_sphere)

        instance_local_surrogate = growing_sphere_closest_cf.enemy
        print("classe cible", target_class)
        print(
            "classe de l'instance donnée à LS",
            erl_tabular.black_box_predict(
                instance_local_surrogate.reshape(1, -1))[0])
        local_surrogate_exp = erl_tabular.lime_explainer.explain_instance_training_dataset(
            instance,
            erl_tabular.black_box_predict_proba,
            num_features=nb_features_employed)  #,
        #model_regressor = LogisticRegression())
        """
            local_surrogate_exp = erl_tabular.lime_explainer.explain_instance(instance_local_surrogate, 
                                                                                erl_tabular.black_box_predict_proba, 
                                                                                num_features=nb_features_employed)
            """
        features_local_surrogate_employed = []
        for feature_local_surrogate_employed in local_surrogate_exp.as_list():
            features_local_surrogate_employed.append(
                feature_local_surrogate_employed[0])
        rules, training_instances_pandas_frame, features_employed_in_local_surrogate = erl_tabular.generate_rule_and_data_for_anchors(
            features_local_surrogate_employed,
            target_class,
            erl_tabular.train_data,
            simulated_user_experiment=True)
        """
            counter_factual_class = erl_tabular.black_box_predict(closest_counterfactual.reshape(1,-1))[0]
            print("la classe du contre factuel le plus proche : ", counter_factual_class)
            print('Lime explanation for %s' % erl_tabular.class_names[target_class])
            print('\n'.join(map(str, lime_exp.as_list())))
            print('Local Surrogate explanation for %s' % erl_tabular.class_names[counter_factual_class])
            print('\n'.join(map(str, local_surrogate_exp.as_list())))
            """
        features_employed_in_linear.sort()
        features_employed_in_local_surrogate.sort()
        return features_employed_in_linear, features_employed_in_local_surrogate

    else:
        return [], []
Ejemplo n.º 4
0
    def explain_instance(self,
                         instance,
                         opponent_class=None,
                         n_instance_per_layer=2000,
                         first_radius=0.1,
                         dicrease_radius=10,
                         all_explanations_model=False,
                         user_experiments=False,
                         lime_vs_local_surrogate=False,
                         nb_features_employed=2,
                         local_surrogate_experiment=False):

        self.target_class = self.black_box_predict(instance.reshape(1, -1))[0]

        # Computes the distance to the farthest instance from the training dataset to bound generating instances
        farthest_distance = 0
        for training_instance in self.train_data:
            if get_distances(training_instance,
                             instance,
                             categorical_features=self.categorical_features
                             )["euclidean"] > farthest_distance:
                farthest_distance = get_distances(
                    training_instance,
                    instance,
                    categorical_features=self.categorical_features
                )["euclidean"]

        growing_sphere = cf.CounterfactualExplanation(
            instance,
            self.black_box_predict,
            method='GS',
            target_class=opponent_class,
            continuous_features=self.continuous_features,
            categorical_features=self.categorical_features,
            categorical_values=self.categorical_values)
        growing_sphere.fit(
            n_in_layer=n_instance_per_layer,
            first_radius=first_radius,
            dicrease_radius=dicrease_radius,
            sparse=True,
            verbose=self.verbose,
            feature_variance=self.feature_variance,
            farthest_distance_training_dataset=farthest_distance,
            probability_categorical_feature=self.
            probability_categorical_feature,
            min_counterfactual_in_sphere=self.
            nb_min_instance_per_class_in_sphere)

        # get_distance is similar to pairwise distance (i.e: it is the same results for euclidean distance)
        # but it adds a sparsity distance computation (i.e: number of same values)
        first_closest_counterfactual = growing_sphere.enemy
        # After searching for the closest counterfactual, we take the closest from this point from the same class as the target instance to explain
        second_growing_sphere = cf.CounterfactualExplanation(
            first_closest_counterfactual,
            self.black_box_predict,
            method='GS',
            target_class=self.target_class,
            continuous_features=self.continuous_features,
            categorical_features=self.categorical_features,
            categorical_values=self.categorical_values)
        second_growing_sphere.fit(
            n_in_layer=n_instance_per_layer,
            first_radius=first_radius,
            dicrease_radius=dicrease_radius,
            sparse=True,
            verbose=self.verbose,
            feature_variance=self.feature_variance,
            farthest_distance_training_dataset=farthest_distance,
            probability_categorical_feature=self.
            probability_categorical_feature,
            min_counterfactual_in_sphere=self.
            nb_min_instance_per_class_in_sphere)

        # get_distance is similar to pairwise distance (i.e: it is the same results for euclidean distance)
        # but it adds a sparsity distance computation (i.e: number of same values)
        self.closest_counterfactual = second_growing_sphere.enemy

        if self.verbose:
            print("The farthest instance from the training dataset is ",
                  farthest_distance, " away from the target.")
            if opponent_class == None:
                opponent_class = self.black_box_predict(
                    growing_sphere.enemy.reshape(1, -1))[0]
                print("Class of the closest counterfactual: ", opponent_class)
            print("radius of the hypersphere:", growing_sphere.radius)
            print("The target instance to explain is ", instance)
            print("The associated closest counterfactual is ",
                  self.closest_counterfactual)
        """ Generates or store instances in the area of the hypersphere and their corresponding labels """
        min_instance_per_class = self.nb_min_instance_per_class_in_sphere
        position_instances_in_sphere, nb_training_instance_in_sphere = self.instances_from_dataset_inside_sphere(
            self.closest_counterfactual, growing_sphere.radius)

        instances_in_sphere, labels_in_sphere, percentage_distribution, instances_in_sphere_libfolding = self.generate_instances_inside_sphere(
            growing_sphere,
            growing_sphere.radius,
            self.closest_counterfactual,
            farthest_distance,
            min_instance_per_class,
            position_instances_in_sphere,
            nb_training_instance_in_sphere,
            libfolding=True)

        if local_surrogate_experiment:
            local_surrogate_precision, local_surrogate_coverage, f1_local_surrogate = compute_local_surrogate_precision_coverage(
                self, instance, growing_sphere, instances_in_sphere,
                labels_in_sphere, position_instances_in_sphere,
                nb_training_instance_in_sphere)
            return local_surrogate_precision, local_surrogate_coverage, f1_local_surrogate
        """ Compute the libfolding test to verify wheter instances in the area of the hyper sphere is multimodal or unimodal """

        if instances_in_sphere_libfolding != []:
            index_counterfactual_instances_in_sphere = self.store_counterfactual_instances_in_sphere(
                instances_in_sphere, self.target_class, libfolding=True)
            counterfactual_instances_in_sphere = instances_in_sphere[
                index_counterfactual_instances_in_sphere]
            counterfactual_libfolding = instances_in_sphere_libfolding[
                index_counterfactual_instances_in_sphere]
            unimodal_test = self.check_test_unimodal_data(
                np.array(counterfactual_instances_in_sphere),
                instances_in_sphere,
                growing_sphere.radius,
                counterfactual_libfolding=counterfactual_libfolding)
        else:
            counterfactual_instances_in_sphere = self.store_counterfactual_instances_in_sphere(
                instances_in_sphere, self.target_class)
            unimodal_test = self.check_test_unimodal_data(
                np.array(counterfactual_instances_in_sphere),
                instances_in_sphere, growing_sphere.radius)

        nb = 0
        while not unimodal_test:
            min_instance_per_class *= 1.5
            instances_in_sphere, labels_in_sphere, percentage_distribution, instances_in_sphere_libfolding = self.generate_instances_inside_sphere(
                growing_sphere,
                growing_sphere.radius,
                self.closest_counterfactual,
                farthest_distance,
                min_instance_per_class,
                position_instances_in_sphere,
                nb_training_instance_in_sphere,
                libfolding=True)

            if instances_in_sphere_libfolding != []:
                index_counterfactual_instances_in_sphere = self.store_counterfactual_instances_in_sphere(
                    instances_in_sphere, self.target_class, libfolding=True)
                counterfactual_instances_in_sphere = instances_in_sphere[
                    index_counterfactual_instances_in_sphere]
                counterfactual_libfolding = instances_in_sphere_libfolding[
                    index_counterfactual_instances_in_sphere]
                unimodal_test = self.check_test_unimodal_data(
                    np.array(counterfactual_instances_in_sphere),
                    instances_in_sphere,
                    growing_sphere.radius,
                    counterfactual_libfolding=counterfactual_libfolding)
            else:
                counterfactual_instances_in_sphere = self.store_counterfactual_instances_in_sphere(
                    instances_in_sphere, self.target_class)
                unimodal_test = self.check_test_unimodal_data(
                    np.array(counterfactual_instances_in_sphere),
                    instances_in_sphere, growing_sphere.radius)
            print(
                "nb times libfolding is not able to determine wheter datas are unimodal or multimodal:",
                nb)
            print("There are ", len(counterfactual_instances_in_sphere),
                  " instances in the datas given to libfolding.")
            print()
            nb += 1
        """ Computes the labels for instances from the dataset to compute precision for explanation method """
        labels_instance_train_data = self.black_box_predict(self.train_data)
        nb_instance_train_data_label_as_target = sum(
            x == self.target_class for x in labels_instance_train_data)

        if user_experiments:
            #return self.simulate_user_experiments(instance, nb_features_employed)
            return simulate_user_experiments(self, instance,
                                             nb_features_employed)

        elif all_explanations_model:
            return compute_all_explanation_method_precision(
                self, instance, growing_sphere, dicrease_radius,
                growing_sphere.radius, nb_training_instance_in_sphere,
                nb_instance_train_data_label_as_target,
                position_instances_in_sphere, instances_in_sphere,
                labels_in_sphere, farthest_distance, percentage_distribution)

        if self.multimodal_results:
            anchor_exp = self.anchor_explainer.explain_instance(
                instance,
                self.black_box_predict,
                threshold=self.threshold_precision,
                delta=0.1,
                tau=0.15,
                batch_size=100,
                max_anchor_size=None,
                stop_on_first=False,
                desired_label=None,
                beam_size=4)
            # Generate rules and data frame for applying anchors
            rules, training_instances_pandas_frame = self.generate_rule_and_data_for_anchors(
                anchor_exp.names(), self.target_class, self.train_data)
            # Apply anchors and returns his assiciated coverage and precision
            training_instances_in_anchor = self.get_base_model_data(
                rules, training_instances_pandas_frame)

            # Computes the number of instances from the training set that are classified as the target instance and validate the anchor rules.
            index_instances_train_data_labels_as_target = np.where(
                [x == self.target_class for x in labels_instance_train_data])
            instances_from_index = self.train_data[
                index_instances_train_data_labels_as_target]
            coverage_training_instances_in_anchor = training_instances_in_anchor.copy(
            )
            nb_train_instances_in_anchor = 0
            for instance_index in instances_from_index:
                matches = coverage_training_instances_in_anchor[(
                    coverage_training_instances_in_anchor == instance_index
                ).all(axis=1)]
                if len(matches) > 0:
                    nb_train_instances_in_anchor += 1

            # Generates artificial instances in the area of the anchor rules until there are as many instances as in the hypersphere
            instances_in_anchor = self.generate_artificial_instances_in_anchor(
                training_instances_in_anchor, len(instances_in_sphere),
                instance, rules, farthest_distance, percentage_distribution)
            labels_in_anchor = self.black_box_predict(instances_in_anchor)
            anchor_coverage = nb_train_instances_in_anchor / nb_instance_train_data_label_as_target
            anchor_precision = sum(
                labels_in_anchor == self.target_class) / len(labels_in_anchor)
            f1_anchor = (anchor_coverage + anchor_precision) / 2
            erl_precision = anchor_precision
            erl_coverage = anchor_coverage
            erl_f1 = f1_anchor
        else:
            """ Lime explanation and computation of precision inside the initial hypersphere """
            local_surrogate_raw_data = self.lime_explainer.explain_instance_training_dataset(
                self.closest_counterfactual,
                self.black_box_predict,
                num_features=4,
                model_regressor=LogisticRegression())
            prediction_inside_sphere = self.modify_instance_for_linear_model(
                local_surrogate_raw_data, instances_in_sphere)
            """ computation of the precision inside the sphere for linear model on training data """
            ls_extending_precision = sum(
                labels_in_sphere == prediction_inside_sphere) / len(
                    prediction_inside_sphere)

            radius = growing_sphere.radius
            while ls_extending_precision > self.threshold_precision and radius < farthest_distance:
                """ Extending the hypersphere radius until the precision inside the hypersphere is lower than the threshold 
                and the radius of the hyper sphere is not longer than the distances to the farthest instance from the dataset """
                instances_in_sphere, labels_in_sphere, percentage_distribution, _ = self.generate_instances_inside_sphere(
                    growing_sphere, radius, self.closest_counterfactual,
                    farthest_distance,
                    self.nb_min_instance_per_class_in_sphere,
                    position_instances_in_sphere,
                    nb_training_instance_in_sphere)
                radius += (dicrease_radius - 1) * radius / 5.0
                prediction_inside_sphere = self.modify_instance_for_linear_model(
                    local_surrogate_raw_data, instances_in_sphere)
                ls_extending_precision = sum(
                    labels_in_sphere == prediction_inside_sphere) / len(
                        prediction_inside_sphere)
            """ computation of the coverage inside the sphere for linear model on training data """
            if nb_training_instance_in_sphere > 0:
                # Check that there is at least one instance from the training dataset in the area of the hypersphere
                labels_training_instance_in_sphere = self.black_box_predict(
                    self.train_data[position_instances_in_sphere])
                nb_training_instance_in_sphere_label_as_target = sum(
                    y == self.target_class
                    for y in labels_training_instance_in_sphere)
            else:
                nb_training_instance_in_sphere_label_as_target = 1
            lime_extending_coverage = nb_training_instance_in_sphere_label_as_target / nb_instance_train_data_label_as_target
            f1_lime_extending = (ls_extending_precision +
                                 lime_extending_coverage) / 2
            erl_coverage = lime_extending_coverage
            erl_precision = ls_extending_precision
            erl_f1 = f1_lime_extending

        return erl_coverage, erl_precision, erl_f1, 1 if self.multimodal_results else 0
def simulate_user_experiments(erl_tabular, instance, nb_features_employed):
    target_class = erl_tabular.black_box_predict(instance.reshape(1, -1))[0]
    erl_tabular.target_class = target_class
    # Computes the distance to the farthest instance from the training dataset to bound generating instances
    farthest_distance = 0
    for training_instance in erl_tabular.train_data:
        if get_distances(training_instance,
                         instance)["euclidean"] > farthest_distance:
            farthest_distance = np.round(get_distances(training_instance,
                                                       instance)["euclidean"],
                                         decimals=3)

    growing_sphere = cf.CounterfactualExplanation(
        instance,
        erl_tabular.black_box_predict,
        method='GS',
        target_class=None,
        continuous_features=erl_tabular.continuous_features,
        categorical_features=erl_tabular.categorical_features,
        categorical_values=erl_tabular.categorical_values)
    growing_sphere.fit(n_in_layer=2000,
                       first_radius=0.1,
                       dicrease_radius=10,
                       sparse=True,
                       verbose=erl_tabular.verbose,
                       feature_variance=erl_tabular.feature_variance,
                       farthest_distance_training_dataset=farthest_distance,
                       probability_categorical_feature=erl_tabular.
                       probability_categorical_feature,
                       min_counterfactual_in_sphere=erl_tabular.
                       nb_min_instance_per_class_in_sphere)

    first_growing_sphere = cf.CounterfactualExplanation(
        growing_sphere.enemy,
        erl_tabular.black_box_predict,
        method='GS',
        target_class=target_class,
        continuous_features=erl_tabular.continuous_features,
        categorical_features=erl_tabular.categorical_features,
        categorical_values=erl_tabular.categorical_values)
    first_growing_sphere.fit(
        n_in_layer=2000,
        first_radius=0.1,
        dicrease_radius=10,
        sparse=True,
        verbose=erl_tabular.verbose,
        feature_variance=erl_tabular.feature_variance,
        farthest_distance_training_dataset=farthest_distance,
        probability_categorical_feature=erl_tabular.
        probability_categorical_feature,
        min_counterfactual_in_sphere=erl_tabular.
        nb_min_instance_per_class_in_sphere)
    # get_distance is similar to pairwise distance (i.e: it is the same results for euclidean distance)
    # but it adds a sparsity distance computation (i.e: number of same values)
    closest_counterfactual = first_growing_sphere.enemy
    """ Generates or store instances in the area of the hypersphere and their correspoinding labels """
    min_instance_per_class = erl_tabular.nb_min_instance_per_class_in_sphere
    position_instances_in_sphere, nb_training_instance_in_sphere = erl_tabular.instances_from_dataset_inside_sphere(
        closest_counterfactual, growing_sphere.radius)

    instances_in_sphere, labels_in_sphere, percentage_distribution, instances_in_sphere_libfolding = erl_tabular.generate_instances_inside_sphere(
        growing_sphere,
        growing_sphere.radius,
        closest_counterfactual,
        farthest_distance,
        min_instance_per_class,
        position_instances_in_sphere,
        nb_training_instance_in_sphere,
        libfolding=True)
    """ Compute the libfolding test to verify wheter instances in the area of the hyper sphere is multimodal or unimodal """
    if instances_in_sphere_libfolding != []:
        index_counterfactual_instances_in_sphere = erl_tabular.store_counterfactual_instances_in_sphere(
            instances_in_sphere, erl_tabular.target_class, libfolding=True)
        counterfactual_instances_in_sphere = instances_in_sphere[
            index_counterfactual_instances_in_sphere]
        counterfactual_libfolding = instances_in_sphere_libfolding[
            index_counterfactual_instances_in_sphere]
        unimodal_test = erl_tabular.check_test_unimodal_data(
            np.array(counterfactual_instances_in_sphere),
            instances_in_sphere,
            growing_sphere.radius,
            counterfactual_libfolding=counterfactual_libfolding)
    else:
        counterfactual_instances_in_sphere = erl_tabular.store_counterfactual_instances_in_sphere(
            instances_in_sphere, erl_tabular.target_class)
        unimodal_test = erl_tabular.check_test_unimodal_data(
            np.array(counterfactual_instances_in_sphere), instances_in_sphere,
            growing_sphere.radius)
    nb = 0
    while not unimodal_test:
        min_instance_per_class *= 2
        instances_in_sphere, labels_in_sphere, percentage_distribution, instances_in_sphere_libfolding = erl_tabular.generate_instances_inside_sphere(
            growing_sphere,
            growing_sphere.radius,
            closest_counterfactual,
            farthest_distance,
            min_instance_per_class,
            position_instances_in_sphere,
            nb_training_instance_in_sphere,
            libfolding=True)
        if instances_in_sphere_libfolding != []:
            index_counterfactual_instances_in_sphere = erl_tabular.store_counterfactual_instances_in_sphere(
                instances_in_sphere, erl_tabular.target_class, libfolding=True)
            counterfactual_instances_in_sphere = instances_in_sphere[
                index_counterfactual_instances_in_sphere]
            counterfactual_libfolding = instances_in_sphere_libfolding[
                index_counterfactual_instances_in_sphere]
            unimodal_test = erl_tabular.check_test_unimodal_data(
                np.array(counterfactual_instances_in_sphere),
                instances_in_sphere,
                growing_sphere.radius,
                counterfactual_libfolding=counterfactual_libfolding)
        else:
            counterfactual_instances_in_sphere = erl_tabular.store_counterfactual_instances_in_sphere(
                instances_in_sphere, erl_tabular.target_class)
            unimodal_test = erl_tabular.check_test_unimodal_data(
                np.array(counterfactual_instances_in_sphere),
                instances_in_sphere, growing_sphere.radius)
        print(
            "nb times libfolding is not able to determine wheter datas are unimodal or multimodal:",
            nb)
        print("There are ", len(counterfactual_instances_in_sphere),
              " instances in the datas given to libfolding.")
        print()
        nb += 1

    anchor_exp = erl_tabular.anchor_explainer.explain_instance(
        instance,
        erl_tabular.black_box_predict,
        threshold=erl_tabular.threshold_precision,
        delta=0.1,
        tau=0.15,
        batch_size=100,
        max_anchor_size=None,
        stop_on_first=False,
        desired_label=None,
        beam_size=4)
    # Generate rules and data frame for applying anchors
    #print("rule by anchor", anchor_exp.names())
    rules, training_instances_pandas_frame, features_employed_in_rule = erl_tabular.generate_rule_and_data_for_anchors(
        anchor_exp.names(),
        erl_tabular.target_class,
        erl_tabular.train_data,
        simulated_user_experiment=True)
    if not erl_tabular.multimodal_results:
        ls_raw_data = erl_tabular.lime_explainer.explain_instance_training_dataset(
            instance,
            erl_tabular.black_box_predict_proba,
            num_features=nb_features_employed,
            instances_in_sphere=instances_in_sphere)

        features_linear_employed = []
        for feature_linear_employed in ls_raw_data.as_list():
            features_linear_employed.append(feature_linear_employed[0])
        #print("features linear employed", features_linear_employed)

        rules, training_instances_pandas_frame, features_employed_in_linear = erl_tabular.generate_rule_and_data_for_anchors(
            features_linear_employed,
            erl_tabular.target_class,
            erl_tabular.train_data,
            simulated_user_experiment=True)
        features_employed_by_erl = features_employed_in_linear
    else:
        features_employed_by_erl = features_employed_in_rule

    local_surrogate = erl_tabular.lime_explainer.explain_instance(
        closest_counterfactual,
        erl_tabular.black_box_predict_proba,
        num_features=nb_features_employed)

    features_linear_employed = []
    for feature_linear_employed in local_surrogate.as_list():
        features_linear_employed.append(feature_linear_employed[0])
    #print("features linear employed", features_linear_employed)

    rules, training_instances_pandas_frame, features_employed_in_linear = erl_tabular.generate_rule_and_data_for_anchors(
        features_linear_employed,
        erl_tabular.target_class,
        erl_tabular.train_data,
        simulated_user_experiment=True)

    return features_employed_in_linear, features_employed_by_erl, features_employed_in_rule