예제 #1
0
def predict_and_explain(x, model, exp, num_features, num_samples):
    '''
    Use the model to predict a single example and apply LIME to generate an explanation.
    :param x: Preprocessed image to predict
    :param model: The trained neural network model
    :param exp: A LimeImageExplainer object
    :param num_features: # of features to use in explanation
    :param num_samples: # of times to perturb the example to be explained
    :return: The LIME explainer for the instance
    '''
    def predict(x):
        '''
        Helper function for LIME explainer. Runs model prediction on perturbations of the example.
        :param x: List of perturbed examples from an example
        :return: A numpy array constituting a list of class probabilities for each predicted perturbation
        '''
        probs = predict_instance(x, model)
        return probs

    # Algorithm for superpixel segmentation. Parameters set to limit size of superpixels and promote border smoothness
    segmentation_fn = SegmentationAlgorithm('quickshift',
                                            kernel_size=2.25,
                                            max_dist=50,
                                            ratio=0.1,
                                            sigma=0.15)

    # Generate explanation for the example
    explanation = exp.explain_instance(x.astype(np.double),
                                       predict,
                                       num_features=num_features,
                                       num_samples=num_samples,
                                       segmentation_fn=segmentation_fn)
    probs = predict_instance(np.expand_dims(x, axis=0), model)
    return explanation, probs
예제 #2
0
    def explain_image(self, model, data, class_to_explain):
        explainer = lime_image.LimeImageExplainer()
        if data.shape[1] < 50:
            segmenter = SegmentationAlgorithm(
                'quickshift', kernel_size=1, max_dist=200, ratio=0.2)

            explanation = explainer.explain_instance(data[0],
                                                     model.predict,
                                                     top_labels=self.top_labels,
                                                     # hide_color=0,
                                                     num_samples=self.num_samples,
                                                     segmentation_fn=segmenter)

        else:
            explanation = explainer.explain_instance(data[0],
                                                     model.predict,
                                                     top_labels=self.top_labels,
                                                     # hide_color=0,
                                                     num_samples=self.num_samples)

        temp, mask = explanation.get_image_and_mask(class_to_explain,
                                                    positive_only=False,
                                                    num_features=self.num_features,
                                                    hide_rest=False)

        return mark_boundaries(temp / 2 + 0.5, mask)
예제 #3
0
 def __init__(self, model: nn.Module):
     self.model = model
     self.explainer = lime_image.LimeImageExplainer(verbose=False)
     self.segmenter = SegmentationAlgorithm('quickshift',
                                            kernel_size=1,
                                            max_dist=200,
                                            ratio=0.2)
예제 #4
0
def limeify(image_to_explain, trained_pipeline, class_names):
    logger.info("Start a LIME explanation")
    lime_image_probabilities = trained_pipeline.predict(np.array([image_to_explain]))[0]
    image_probabilities = tuple(zip(class_names, lime_image_probabilities))
    logger.info(
        "Models predicted probabilities for image:\n" + pformat(image_probabilities)
    )
    plt.imshow(image_to_explain)
    plt.show()
    explainer = lime_image.LimeImageExplainer()
    segmenter = SegmentationAlgorithm(
        "quickshift", kernel_size=1, max_dist=200, ratio=0.2
    )
    explanation = explainer.explain_instance(
        image_to_explain,
        trained_pipeline.predict,
        top_labels=10,
        num_samples=1000,
        segmentation_fn=segmenter,
    )
    logger.info("Done with a LIME")
    temp, mask = explanation.get_image_and_mask(
        explanation.top_labels[0], positive_only=False, num_features=5, hide_rest=False
    )
    plt.imshow(mark_boundaries(temp, mask))
    plt.show()
예제 #5
0
 def __init__(self, trainer, num_samples=256, num_features=3, kernel_size=1, batch_size=2):
     ExplainerBase.__init__(self, trainer)
     lime_image.LimeImageExplainer.__init__(self, verbose=False)
     self.segmenter = SegmentationAlgorithm('quickshift', kernel_size=kernel_size, max_dist=200, ratio=0.2)
     self.max_imgs_bs = 1
     self.num_samples = num_samples
     self.num_classes = trainer.options["num_classes"]
     self.num_features = num_features
     self.batch_size = batch_size
예제 #6
0
    def test_instanciate_segmentation_algorithm(self):
        img = img_as_float(chelsea()[::2, ::2])

        # wrapped functions provide the same result
        fn = SegmentationAlgorithm('quickshift', kernel_size=3, max_dist=6,
                                   ratio=0.5, random_seed=133)
        fn_result = fn(img)
        original_result = quickshift(img, kernel_size=3, max_dist=6, ratio=0.5,
                                     random_seed=133)

        # same segments
        self.assertTrue(np.array_equal(fn_result, original_result))
예제 #7
0
파일: model.py 프로젝트: zillow/kfserving
    def explain(self, request: Dict) -> Dict:
        instances = request["instances"]
        try:
            inputs = np.array(instances[0])
            logging.info("Calling explain on image of shape %s",
                         (inputs.shape, ))
        except Exception as err:
            raise Exception(
                "Failed to initialize NumPy array from inputs: %s, %s" %
                (err, instances))
        try:
            if str.lower(self.explainer_type) == "limeimages":
                explainer = LimeImageExplainer(verbose=False)
                segmenter = SegmentationAlgorithm(self.segmentation_alg,
                                                  kernel_size=1,
                                                  max_dist=200,
                                                  ratio=0.2)
                explanation = explainer.explain_instance(
                    inputs,
                    classifier_fn=self._predict,
                    top_labels=self.top_labels,
                    hide_color=0,
                    num_samples=self.num_samples,
                    segmentation_fn=segmenter)

                temp = []
                masks = []
                for i in range(0, self.top_labels):
                    temp, mask = explanation.get_image_and_mask(
                        explanation.top_labels[i],
                        positive_only=self.positive_only,
                        num_features=10,
                        hide_rest=False,
                        min_weight=self.min_weight)
                    masks.append(mask.tolist())

                return {
                    "explanations": {
                        "temp":
                        temp.tolist(),
                        "masks":
                        masks,
                        "top_labels":
                        np.array(explanation.top_labels).astype(
                            np.int32).tolist()
                    }
                }

        except Exception as err:
            raise Exception("Failed to explain %s" % err)
예제 #8
0
 def get_model_image(self):
     read_image = cv2.imread(self.raw_file)
     read_image = cv2.cvtColor(read_image, cv2.COLOR_BGR2RGB)
     process_image = preprocess_input(read_image)
     explainer = lime_image.LimeImageExplainer()
     explanation = explainer.explain_instance(
         np.array(process_image),
         self.model.predict,
         top_labels=10,
         hide_color=0,
         num_samples=10,
         segmentation_fn=SegmentationAlgorithm('felzenszwalb'))
     temp, mask = explanation.get_image_and_mask(explanation.top_labels[0],
                                                 positive_only=False,
                                                 num_features=10,
                                                 hide_rest=True)
     model_image = mark_boundaries(temp / 2 + 0.5, mask)
     self.getCanvas(model_image)
예제 #9
0
def lime(model, X, y):
    idx = [0, 299, 2, 7, 3, 15, 4]
    explainer = lime_image.LimeImageExplainer()
    segmenter = SegmentationAlgorithm('slic',
                                      n_segments=100,
                                      compactness=1,
                                      sigma=1)
    for i, j in enumerate(idx):
        explanation = explainer.explain_instance(
            X[j].reshape(48, 48),
            lambda x: model.predict(rgb2gray(x).reshape(-1, 48, 48, 1)),
            labels=(i, ),
            top_labels=7,
            hide_color=0,
            num_samples=1000,
            segmentation_fn=segmenter)
        temp, mask = explanation.get_image_and_mask(i,
                                                    positive_only=False,
                                                    num_features=10,
                                                    hide_rest=False)
        plt.imshow(temp / 2 + 0.5)
        plt.savefig(os.path.join(sys.argv[2], 'fig3_' + str(i) + '.jpg'))
        plt.close('all')
예제 #10
0
def plotLime(folder, image_list):
    global seed
    segmenter = SegmentationAlgorithm('quickshift',
                                      kernel_size=1,
                                      max_dist=200,
                                      ratio=0.2,
                                      random_seed=seed)
    explainer = lime_image.LimeImageExplainer(random_state=seed)
    for cnt, idx in enumerate(image_list):
        sample = x_train_rgb[idx]
        explanation = explainer.explain_instance(
            sample,
            classifier_fn=prediction_function,
            top_labels=7,
            hide_color=1,
            num_samples=100,
            random_seed=seed,
            segmentation_fn=segmenter)
        temp, mask = explanation.get_image_and_mask(cnt,
                                                    positive_only=True,
                                                    num_features=3,
                                                    hide_rest=False)
        plt.imshow(mark_boundaries(temp, mask))
        plt.savefig(folder + "fig3_" + str(cnt))
예제 #11
0
    def lime_interpret(self, x_tensor, clss):
        def predict_fun(x):
            # ====================================================================================
            # The original gray image is change to RGB image by LIME by default.
            # To use the original classifier, we need to remove the channels added by LIME
            x = torch.tensor(x[:, :, :, 0],
                             device=config.DEVICE,
                             dtype=torch.float64).view(-1, self.var_num)
            # ====================================================================================
            rst = self.forward(x).detach().cpu().numpy()  # Output 1 * 2 array
            return rst

        # ====================================================================================
        # Each pixel is separated as a single segmentation
        segmenter = SegmentationAlgorithm("quickshift",
                                          kernel_size=1,
                                          max_dist=0.0001,
                                          ratio=0.2)
        # ====================================================================================
        var_num = x_tensor.size()[1]
        x_tensor = x_tensor.view(28, 28)
        explainer = lime_image.LimeImageExplainer(feature_selection='none')
        explanation = explainer.explain_instance(x_tensor.cpu().numpy(),
                                                 predict_fun,
                                                 top_labels=None,
                                                 hide_color=0,
                                                 num_samples=var_num + 2,
                                                 num_features=var_num,
                                                 segmentation_fn=segmenter,
                                                 labels=(clss, ))

        w_lime = sorted(explanation.local_exp[clss], key=lambda i: i[0])
        w_lime = torch.tensor([v for _, v in w_lime],
                              dtype=torch.float64,
                              device=config.DEVICE).unsqueeze(0)
        return w_lime
예제 #12
0
def main():

    dataset = sys.argv[1]

    black_box = 'DNN'
    neigh_type = 'hrgp'

    if len(sys.argv) > 2:
        start_from = int(sys.argv[2])
    else:
        start_from = 0

    random_state = 0
    ae_name = 'aae'
    num_classes = 10

    nbr_experiments = 200

    if dataset not in ['mnist', 'cifar10', 'fashion']:
        print('unknown dataset %s' % dataset)
        return -1

    if black_box not in ['RF', 'AB', 'DNN']:
        print('unknown black box %s' % black_box)
        return -1

    if neigh_type not in ['rnd', 'gntp', 'hrgp']:
        print('unknown neigh type %s' % neigh_type)
        return -1

    path = './'
    path_models = path + 'models/'
    path_results = path + 'results/stability/'
    path_aemodels = path + 'aemodels/%s/%s/' % (dataset, ae_name)

    black_box_filename = path_models + '%s_%s' % (dataset, black_box)
    results_filename = path_results + 'sta_%s_%s_%s.json' % (
        dataset, black_box, neigh_type)

    _, _, X_test, Y_test, use_rgb = get_dataset(dataset)
    bb, transform = get_black_box(black_box,
                                  black_box_filename,
                                  use_rgb,
                                  return_model=True)
    bb_predict, bb_predict_proba = get_black_box(black_box, black_box_filename,
                                                 use_rgb)

    Y_pred = bb_predict(X_test)
    Y_pred_proba = bb_predict_proba(X_test)

    ae = get_autoencoder(X_test, ae_name, dataset, path_aemodels)
    ae.load_model()

    class_name = 'class'
    class_values = ['%s' % i for i in range(len(np.unique(Y_test)))]

    explainer = ILOREM(bb_predict,
                       class_name,
                       class_values,
                       neigh_type=neigh_type,
                       use_prob=True,
                       size=1000,
                       ocr=0.1,
                       kernel_width=None,
                       kernel=None,
                       autoencoder=ae,
                       use_rgb=use_rgb,
                       valid_thr=0.5,
                       filter_crules=True,
                       random_state=random_state,
                       verbose=False,
                       alpha1=0.5,
                       alpha2=0.5,
                       metric=neuclidean,
                       ngen=10,
                       mutpb=0.2,
                       cxpb=0.5,
                       tournsize=3,
                       halloffame_ratio=0.1,
                       bb_predict_proba=bb_predict_proba)

    lime_explainer = lime_image.LimeImageExplainer()
    segmenter = SegmentationAlgorithm('quickshift',
                                      kernel_size=1,
                                      max_dist=200,
                                      ratio=0.2)

    input_tensor = bb.layers[0].input
    last_layer = -2 if dataset == 'mnist' else -1
    bb_model = Model(inputs=input_tensor, outputs=bb.layers[last_layer].output)
    target_tensor = bb_model(input_tensor)
    de_list = ['grad*input', 'saliency', 'intgrad', 'elrp', 'occlusion']

    errors = open(
        path_results + 'errors_stability_%s_%s.csv' % (dataset, black_box),
        'w')

    with DeepExplain(session=K.get_session()) as de:

        for i2e in range(nbr_experiments):

            if i2e < start_from:
                continue

            try:

                print(
                    datetime.datetime.now(),
                    '[%s/%s] %s %s - checking stability' %
                    (i2e, nbr_experiments, dataset, black_box))

                expl_list = list()
                jrow_list = list()

                jrow_coh_o = {
                    'i2e': i2e,
                    'dataset': dataset,
                    'black_box': black_box
                }

                # Crate random noise
                img = X_test[i2e]
                bbo = bb_predict(np.array([img]))
                bbop = Y_pred_proba[i2e]
                X_random_noise = generate_random_noise(img,
                                                       bb_predict,
                                                       bbo[0],
                                                       nbr_samples=20)
                # Y_pred_random_noise = bb_predict(X_random_noise)
                Y_pred_proba_random_noise = bb_predict_proba(X_random_noise)

                # plt.subplot(1, 3, 1)
                # plt.imshow(X_random_noise[0], cmap='gray')
                # plt.subplot(1, 3, 2)
                # plt.imshow(X_random_noise[1], cmap='gray')
                # plt.subplot(1, 3, 3)
                # plt.imshow(X_random_noise[2], cmap='gray')
                # plt.show()

                # Alore
                print(datetime.datetime.now(), 'calculating alore')
                exp = explainer.explain_instance(img,
                                                 num_samples=1000,
                                                 use_weights=True,
                                                 metric=neuclidean)
                _, diff = exp.get_image_rule(features=None, samples=100)
                expl_list.append(diff)

                # Lime
                print(datetime.datetime.now(), 'calculating lime')
                exp = lime_explainer.explain_instance(
                    img,
                    bb_predict_proba,
                    top_labels=1,
                    hide_color=0,
                    num_samples=1000,
                    segmentation_fn=segmenter)
                _, mask = exp.get_image_and_mask(bbo[0],
                                                 positive_only=False,
                                                 num_features=5,
                                                 hide_rest=False,
                                                 min_weight=0.01)
                expl_list.append(mask)

                # Deep Explain
                xs = transform(np.array([img]))
                ys = to_categorical(bbo, num_classes)

                for det in de_list:
                    print(datetime.datetime.now(), 'calculating %s' % det)
                    if det == 'shapley_sampling':
                        maps = de.explain(det,
                                          target_tensor,
                                          input_tensor,
                                          xs,
                                          ys=ys,
                                          samples=10)[0]
                    else:
                        maps = de.explain(det,
                                          target_tensor,
                                          input_tensor,
                                          xs,
                                          ys=ys)[0]
                    maps = np.mean(maps, axis=2)
                    expl_list.append(maps)

                lipschitz_list = defaultdict(list)
                lipschitz_list_bb = defaultdict(list)

                print(datetime.datetime.now(), 'calculating lipschitz')
                for i2e1 in range(len(X_random_noise)):
                    img1 = X_random_noise[i2e1]
                    bbo1 = bb_predict(np.array([img1]))
                    bbop1 = Y_pred_proba_random_noise[i2e1]
                    norm_bb = calculate_lipschitz_factor(bbop, bbop1)
                    norm_x = calculate_lipschitz_factor(img, img1)

                    # Alore
                    exp1 = explainer.explain_instance(img1,
                                                      num_samples=1000,
                                                      use_weights=True,
                                                      metric=neuclidean)
                    _, diff1 = exp1.get_image_rule(features=None, samples=100)

                    norm_exp = calculate_lipschitz_factor(expl_list[0], diff1)
                    lipschitz_list['alore'].append(norm_exp / norm_x)
                    lipschitz_list_bb['alore'].append(norm_exp / norm_bb)
                    print(datetime.datetime.now(), '\talore',
                          norm_exp / norm_x)

                    # Lime
                    exp1 = lime_explainer.explain_instance(
                        img1,
                        bb_predict_proba,
                        top_labels=1,
                        hide_color=0,
                        num_samples=1000,
                        segmentation_fn=segmenter)
                    _, mask1 = exp1.get_image_and_mask(bbo[0],
                                                       positive_only=False,
                                                       num_features=5,
                                                       hide_rest=False,
                                                       min_weight=0.01)
                    norm_exp = calculate_lipschitz_factor(expl_list[1], mask1)
                    lipschitz_list['lime'].append(norm_exp / norm_x)
                    lipschitz_list_bb['lime'].append(norm_exp / norm_bb)
                    print(datetime.datetime.now(), '\tlime', norm_exp / norm_x)

                    # DeepExplain
                    xs1 = transform(np.array([img1]))
                    ys1 = to_categorical(bbo1, num_classes)

                    for i, det in enumerate(de_list):
                        if det == 'shapley_sampling':
                            maps1 = de.explain(det,
                                               target_tensor,
                                               input_tensor,
                                               xs1,
                                               ys=ys1,
                                               samples=10)[0]
                        else:
                            maps1 = de.explain(det,
                                               target_tensor,
                                               input_tensor,
                                               xs1,
                                               ys=ys1)[0]
                        maps1 = np.mean(maps1, axis=2)
                        norm_exp = calculate_lipschitz_factor(
                            expl_list[i + 2], maps1)
                        lipschitz_list[det].append(norm_exp / norm_x)
                        lipschitz_list_bb[det].append(norm_exp / norm_bb)
                        print(datetime.datetime.now(), '\t%s' % det,
                              norm_exp / norm_x)

                for k in lipschitz_list:
                    jrow_coh = copy.deepcopy(jrow_coh_o)
                    jrow_coh['method'] = k
                    jrow_coh['mean'] = float(np.nanmean(lipschitz_list[k]))
                    jrow_coh['std'] = float(np.nanstd(lipschitz_list[k]))
                    jrow_coh['max'] = float(np.nanmax(lipschitz_list[k]))
                    jrow_coh['mean_bb'] = float(
                        np.nanmean(lipschitz_list_bb[k]))
                    jrow_coh['std_bb'] = float(np.nanstd(lipschitz_list_bb[k]))
                    jrow_coh['max_bb'] = float(np.nanmax(lipschitz_list_bb[k]))
                    jrow_list.append(jrow_coh)
                    print(
                        datetime.datetime.now(),
                        '[%s/%s] %s %s %s - mean: %.3f, max: %.3f' %
                        (i2e, nbr_experiments, dataset, black_box, k,
                         jrow_coh['mean'], jrow_coh['max']))

            except Exception:
                print('error instance to explain: %d' % i2e)
                errors.write('%d\n' % i2e)
                continue

            results = open(results_filename, 'a')
            for jrow in jrow_list:
                results.write('%s\n' % json.dumps(jrow))
            results.close()

    errors.close()
예제 #13
0
def explain_lime(image,
                 label,
                 model,
                 num_superpixels=10,
                 save_name="lime",
                 save_dir="",
                 imagenet=True):
    """
    Creates an explanation using LIME.
    Arguments:
        image (np.array) :                          array representative of image (width x height x layers)
        label (np.array) :                          prediction of image by classifier (1 x 1)
        model (keras.model) :                       classifier model to explain  
        num_superpixels (int) :                     number of superpixels to highlight
        save_name (str) :                           name of file to save output in (ignore extension)
        save_dir (str) :                            directory to save outputs to
        imagenet (Boolean) :                        mode for more complex, larger images (e.g., imagenet as opposed to mnist)
    Returns:
        list of files (str) of images generated
    """

    if save_dir != "":
        if save_dir[-1] != "/":
            save_dir += "/"

    print("Creating Explainer...")
    explainer = lime_image.LimeImageExplainer()

    if imagenet:
        explanation = explainer.explain_instance(image, model.predict)
        min_weight = 0.0
    else:
        print("Segmenting...")
        #   higher max_dist = fewer clusters
        #   need to have a smaller kernel_size than the default (4) for smaller images
        segmenter = SegmentationAlgorithm('quickshift',
                                          kernel_size=1,
                                          max_dist=255,
                                          ratio=0.4)
        print("Explaining...")
        explanation = explainer.explain_instance(image,
                                                 model.predict,
                                                 segmentation_fn=segmenter)
        min_weight = 0.05

    labels = [0]  #   only 0 if binary
    positive_only = False
    hide_rest = False

    #   increase min_weight to the minimum weight seen in the explanation to prevent no superpixels
    exp_max_weight = 0
    for label in labels:
        #   get the maximum weight value in the explanation
        exp = explanation.local_exp[label]
        for f, w in exp[:num_superpixels]:
            if w >= exp_max_weight:
                exp_max_weight = w
    if exp_max_weight < min_weight:
        min_weight = exp_max_weight
    print("Exp_max_weight: ", exp_max_weight)
    print("Min_weight: ", min_weight)

    for label in labels:
        filename = plot_mask_for_label(explanation,
                                       label,
                                       positive_only,
                                       hide_rest,
                                       num_superpixels,
                                       min_weight,
                                       save_name=save_name + "_" + str(label),
                                       imagenet=imagenet,
                                       save_dir=save_dir)

    return filename
    explanation = explainer.explain_instance(inputIMG, 
                                            classifier_fn = get_probability, top_labels=2, 
                                            hide_color=0, num_samples=100, segmentation_fn=segmenter)

    # display top 5 features
    for i in range(5, 0, -1):
        temp, mask = explanation.get_image_and_mask(0, positive_only=False, num_features=i, hide_rest=False)
        plt.figure()
        plt.imshow(mark_boundaries(temp, mask))


# specify model input, compile, then load the weights
FRmodel= faceRecoModel(input_shape=(3, 96, 96))
FRmodel.compile(optimizer = 'adam', loss = triplet_loss, metrics = ['accuracy'])
load_weights_from_FaceNet(FRmodel)


# fill database
database = {}
fillDatabase('images', database)


# init LIME explainer and segmentation function
explainer = lime_image.LimeImageExplainer(verbose = False)
segmenter = SegmentationAlgorithm('slic', n_segments=50, compactness=1, sigma=1)


# see explanations
get_explanation("images/greg_positive.jpg")
get_explanation("images/maxim_positive.jpg")
def explain(params=None):
    DCG, gen, disc, g_index, d_index, normalize_by_mean = params
    Generator = DCG.DCGANG_1
    Discriminator = DCG.DCGAND_1
    BATCH_SIZE = FLAGS.batch_size
    with tf.Graph().as_default() as graph:
        noise_tf = tf.convert_to_tensor(noise, dtype=tf.float32)
        fake_data = Generator(BATCH_SIZE)
        disc_fake, pre_fake = Discriminator(fake_data)
        gen_vars = lib.params_with_name('Generator')
        gen_saver = tf.train.Saver(gen_vars)
        disc_vars = lib.params_with_name("Discriminator")
        disc_saver = tf.train.Saver(disc_vars)
        ckpt_gen = tf.train.get_checkpoint_state(
            "./saved_models/" + gen + "/")
        ckpt_disc = tf.train.get_checkpoint_state(
            "./saved_models/" + disc + "/")
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.33)
        config = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options)
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            sess.run(tf.global_variables_initializer())
            if ckpt_gen and ckpt_gen.model_checkpoint_path:
                gen_saver.restore(sess, ckpt_gen.model_checkpoint_path)
            else:
                print("Failed to load Generator", gen)
            if ckpt_disc and ckpt_disc.model_checkpoint_path:
                disc_saver.restore(
                    sess, ckpt_disc.model_checkpoint_path)

                def disc_prediction(image):
                    # make fake batch:
                    # Transform to -1 to 1:
                    if np.max(image) > 1.1 or np.min(image) > 0.0:
                        image = (image.astype(np.float32) * 2.0 / 255.0) - 1.0
                    if len(image.shape) == 4:
                        no_ims = image.shape[0]
                    else:
                        no_ims = 1
                    images_batch = np.zeros(
                        [256, 64, 64, 3]).astype(np.float32)
                    images_batch[0:no_ims] = image
                    prediction, _ = sess.run([Discriminator(images_batch)])[0]
                    # Need to map input from [-inf, + inf] to [-1, +1]
                    pred_array = np.zeros((no_ims, 2))
                    for i, x in enumerate(prediction[:no_ims]):
                        if normalize_by_mean:
                            bias = means_matrix[g_index][d_index]
                            pred_array[i, 1] = expit(x-bias)
                            pred_array[i, 0] = 1 - pred_array[i, 1]
                        else:
                            pred_array[i, 1] = expit(x)
                            pred_array[i, 0] = 1 - pred_array[i, 1]
                    return pred_array
                images_to_explain = sess.run(
                    [Generator(no_samples, noise=noise_tf)])[0]
                images_to_explain = (images_to_explain + 1.0) * 255.0 / 2.0
                images_to_explain = images_to_explain.astype(np.uint8)
                images_to_explain = np.reshape(
                    images_to_explain, [no_samples, 64, 64, 3])
                explanations = []
                explainer = lime_image.LimeImageExplainer(verbose=False)
                segmenter = SegmentationAlgorithm(
                    'slic', n_segments=100, compactness=1, sigma=1)
                for image_to_explain in tqdm(images_to_explain):
                    explanation = explainer.explain_instance(image_to_explain,
                                                             classifier_fn=disc_prediction, batch_size=256,
                                                             top_labels=2, hide_color=None, num_samples=no_perturbed_images,
                                                             segmentation_fn=segmenter)
                    explanations.append(explanation)
                make_figures(images_to_explain, explanations,
                             DCG.get_G_dim(), DCG.get_D_dim(), normalize_by_mean)
            else:
                print("Failed to load Discriminator", disc)
예제 #16
0
 def __init__(self, var_num, img_size):
     # Make sure that each pixel is a segmentation
     self.segmenter = SegmentationAlgorithm("quickshift", kernel_size=1, max_dist=0.0001, ratio=0.2)
     self.var_num = var_num
     self.py = None
     self.img_size = img_size
예제 #17
0
	def process(self, file_path, perturbation=50, rnge=5):
		# Should take file_path and return maskLst, age prediction, and an overlay.

		# Get Downsized Image
		origImg = self.get_original_image(file_path)
		resizedImg = self.get_downsized_image(file_path)

		print("downsized image...")

		# Instantiate the Explainer and Segmenter
		explainer = lime_image.LimeImageExplainer(verbose = False)
		segmenter = SegmentationAlgorithm('slic', n_segments=100, compactness=1, sigma=1)

		print("generating explanation...")

		# Generate Explanation from LIME
		explanation = explainer.explain_instance(resizedImg, classifier_fn = self.SingleYearPredictor, top_labels=101, hide_color=0, num_samples=perturbation, segmentation_fn=segmenter)

		print("generating model predictions...")
		# Generate model predictions
		preds=self.SingleYearPredictor(np.asarray([resizedImg]))[0]
		specificAgePrediction = [i for i, j in enumerate(preds) if j == max(preds)][0]


		print("collecting masks...")
		# Collect all the masks from each age. Store in a List.
		maskLst=[]
		for i in range(101):
			temp, mask = explanation.get_image_and_mask(i, positive_only=True, num_features=5, hide_rest=False, min_weight=0.01)
			maskLst.append(mask)

		print("generating age range estimation of bounding box...")
		
		# Generate Age Estimation of the range
		vector=self.AreaAgeEstimatorVector(maskLst, (0,0), (63,63))
		rngeVec=self.AreaAgeEstimatorRange(vector, rnge=rnge)

		# Give the most representative range
		rangeMode = [i for i, j in enumerate(rngeVec) if j == max(rngeVec)][0]
		
		# Generate Tuple representing range
		predictionOfBox = (rangeMode, rangeMode+rnge)

		print("returning answer...")
		# Returns a tuple of representative Image+Mask and age range of box.
		# Example: (IMG, (21, 26))

		print("specificAgePrediction", specificAgePrediction)
		print("rngeVec", np.asarray(rngeVec))
		print("vector", vector)


		# New Addition:  Overlaying Mask onto originalSized Image.
		origDim = origImg.shape

		mask = maskLst[specificAgePrediction]

		reMask = imresize(mask, origDim)

		# Make Mask boolean 2D array
		for i in range(len(reMask)):
			for j in range(len(reMask[0])):
				if reMask[i][j] != 0:
					reMask[i][j] = 1

		grayImg = cv2.cvtColor(origImg, cv2.COLOR_RGB2GRAY)
		overlay = label2rgb(reMask,grayImg, bg_label = 0)

		facialFeatures = self.process_facial_feature(file_path, maskLst, specificAgePrediction)

		laymans = self.laymans_explanation(facialFeatures, specificAgePrediction)

		return (maskLst, overlay, specificAgePrediction, laymans)
예제 #18
0
        # load the class label
        label_map = load_class_label()

    else:
        print('Invalid datasest!!')
        exit(0)

    pytorch_explainer = lime_image.LimeImageExplainer(
        random_state=args.lime_explainer_seed)
    slic_parameters = {
        'n_segments': args.lime_superpixel_num,
        'compactness': 30,
        'sigma': 3
    }
    segmenter = SegmentationAlgorithm('slic', **slic_parameters)
    pill_transf = get_pil_transform()

    #########################################################
    # Function to compute probabilities
    # Pytorch
    pytorch_preprocess_transform = get_pytorch_preprocess_transform()

    def pytorch_batch_predict(images):
        batch = torch.stack(tuple(
            pytorch_preprocess_transform(i) for i in images),
                            dim=0)
        batch = batch.to('cuda')

        if args.if_pre == 1:
            logits = pytorch_model(batch)
예제 #19
0
def main():

    dataset = 'mnist'
    black_box = 'RF'

    path = './'
    path_models = path + 'models/'

    black_box_filename = path_models + '%s_%s' % (dataset, black_box)

    _, _, X_test, Y_test, use_rgb = get_dataset(dataset)
    bb_predict, bb_predict_proba = get_black_box(black_box, black_box_filename,
                                                 use_rgb)

    lime_explainer = lime_image.LimeImageExplainer()
    segmenter = SegmentationAlgorithm('quickshift',
                                      kernel_size=1,
                                      max_dist=200,
                                      ratio=0.2)

    i2e = 1
    img = X_test[i2e]

    exp = lime_explainer.explain_instance(img,
                                          bb_predict_proba,
                                          top_labels=1,
                                          hide_color=0,
                                          num_samples=1000,
                                          segmentation_fn=segmenter)
    print(exp.local_exp)
    print(exp.local_pred)

    # print(lime_explainer.Zlr)
    # print(lime_explainer.Zl)

    label = bb_predict(np.array([X_test[i2e]]))[0]
    print(label)

    # print(lime_explainer.Zl[:, label][0])
    # print(lime_explainer.lr.predict(lime_explainer.Zlr)[0])

    bb_probs = lime_explainer.Zl[:, label]
    lr_probs = lime_explainer.lr.predict(lime_explainer.Zlr)

    print(1 - np.sum(np.abs(np.round(bb_probs) - np.round(lr_probs))) /
          len(bb_probs))

    img2show, mask = exp.get_image_and_mask(Y_test[i2e],
                                            positive_only=False,
                                            num_features=5,
                                            hide_rest=False,
                                            min_weight=0.01)
    plt.imshow(label2rgb(mask, img2show, bg_label=0), interpolation='nearest')
    plt.show()

    img2show, mask = exp.get_image_and_mask(Y_test[i2e],
                                            positive_only=True,
                                            num_features=5,
                                            hide_rest=True,
                                            min_weight=0.01)
    plt.imshow(img2show.astype(np.int), cmap=None if use_rgb else 'gray')
    plt.show()
예제 #20
0
파일: views.py 프로젝트: hojuly/classifier
def predict(request):
    """ Predict - Show Image(with lime) and Probabilities """
    import numpy as np
    import matplotlib.pyplot as plt

    from skimage.segmentation import mark_boundaries
    from keras.preprocessing import image
    from keras.models import load_model
    from lime.lime_image import LimeImageExplainer
    from lime.wrappers.scikit_image import SegmentationAlgorithm

    if request.method == 'POST' and request.FILES['test']:
        if not os.path.exists(os.path.join(STATIC_URL, 'img/test/')):
            os.mkdir(os.path.join(STATIC_URL, 'img/test/'))

        test = request.FILES['test']
        with open(os.path.join(STATIC_URL, 'img/test/', 'test.jpg'),
                  'wb+') as destination:
            for chunk in test.chunks():
                destination.write(chunk)

        img = image.load_img(os.path.join(STATIC_URL, 'img/test/', 'test.jpg'),
                             target_size=(128, 128))
        img = image.img_to_array(img)
        img = np.expand_dims(img, axis=0)
        o_img = img / 255

        t_img = o_img[0]  #for lime (4D -> 3D)
        t_img = t_img.astype('double')

        model = cnn_model()
        model.load_weights('./model/cnn_model.h5')
        guess = np.argmax(model.predict(o_img), axis=-1)
        out = 'dog' if guess == 1 else 'cat'

        lime_explainer = LimeImageExplainer()
        segmenter = SegmentationAlgorithm('slic',
                                          n_segments=100,
                                          compactness=1,
                                          sigma=1)
        explanation = lime_explainer.explain_instance(
            t_img, model.predict, segmentation_fn=segmenter)
        temp, mask = explanation.get_image_and_mask(
            model.predict(o_img).argmax(axis=1)[0],
            positive_only=True,
            hide_rest=False)

        fig = plt.figure()
        plt.imshow(mark_boundaries(temp, mask))
        plt.axis('off')
        plt.savefig(os.path.join(STATIC_URL, 'img/test/', 'lime.jpg'), )
        plt.close(fig)

        context = {
            'content': out,
            'prob_cat': model.predict(o_img)[0][0],
            'prob_dog': model.predict(o_img)[0][1],
        }

        return render(request, 'predict/predict.html', context)

    return render(request, 'predict/predict.html', {'content': 'wrong access'})
예제 #21
0
    def explain_instance(self,
                         image,
                         classifier_fn,
                         labels=(1, ),
                         hide_color=None,
                         top_labels=5,
                         num_features=100000,
                         num_samples=1000,
                         batch_size=10,
                         segmentation_fn=None,
                         distance_metric='cosine',
                         model_regressor=None,
                         random_seed=None,
                         care_segments=None,
                         spans=(2, ),
                         include_original_feature=True):
        """Generates explanations for a prediction.

        First, we generate neighborhood data by randomly perturbing features
        from the instance (see __data_inverse). We then learn locally weighted
        linear models on this neighborhood data to explain each of the classes
        in an interpretable way (see lime_base.py).

        Args:
            image: 3 dimension RGB image. If this is only two dimensional,
                we will assume it's a grayscale image and call gray2rgb.
            classifier_fn: classifier prediction probability function, which
                takes a numpy array and outputs prediction probabilities.  For
                ScikitClassifiers , this is classifier.predict_proba.
            labels: iterable with labels to be explained.
            hide_color: TODO
            top_labels: if not None, ignore labels and produce explanations for
                the K labels with highest prediction probabilities, where K is
                this parameter.
            num_features: maximum number of features present in explanation
            num_samples: size of the neighborhood to learn the linear model
            batch_size: TODO
            distance_metric: the distance metric to use for weights.
            model_regressor: sklearn regressor to use in explanation. Defaults
            to Ridge regression in LimeBase. Must have model_regressor.coef_
            and 'sample_weight' as a parameter to model_regressor.fit()
            segmentation_fn: SegmentationAlgorithm, wrapped skimage
            segmentation function
            random_seed: integer used as random seed for the segmentation
                algorithm. If None, a random integer, between 0 and 1000,
                will be generated using the internal random number generator.

        Returns:
            An Explanation object (see explanation.py) with the corresponding
            explanations.
        """
        self.care_segments = care_segments
        self.spans = spans
        self.include_original_feature = include_original_feature

        if len(image.shape) == 2:
            image = gray2rgb(image)
        if random_seed is None:
            random_seed = self.random_state.randint(0, high=1000)

        if segmentation_fn is None:
            segmentation_fn = SegmentationAlgorithm('quickshift',
                                                    kernel_size=4,
                                                    max_dist=200,
                                                    ratio=0.2,
                                                    random_seed=random_seed)
        try:
            segments = segmentation_fn(image)
        except ValueError as e:
            raise e

        fudged_image = image.copy()
        if hide_color is None:
            for x in np.unique(segments):
                fudged_image[segments == x] = (np.mean(
                    image[segments == x][:, 0]),
                                               np.mean(
                                                   image[segments == x][:, 1]),
                                               np.mean(
                                                   image[segments == x][:, 2]))
        else:
            fudged_image[:] = hide_color

        top = labels

        data, labels = self.data_labels(image,
                                        fudged_image,
                                        segments,
                                        classifier_fn,
                                        num_samples,
                                        batch_size=batch_size)

        distances = sklearn.metrics.pairwise_distances(
            data, data[0].reshape(1, -1), metric=distance_metric).ravel()

        ret_exp = CLEImageExplanation(
            image,
            segments,
            self.all_combinations,
            care_segments=self.care_segments,
            spans=self.spans,
            include_original_feature=self.include_original_feature)
        if top_labels:
            top = np.argsort(labels[0])[-top_labels:]
            ret_exp.top_labels = list(top)
            ret_exp.top_labels.reverse()
        for label in top:
            (ret_exp.intercept[label], ret_exp.local_exp[label], ret_exp.score,
             ret_exp.local_pred) = self.base.explain_instance_with_data(
                 data,
                 labels,
                 distances,
                 label,
                 num_features,
                 model_regressor=model_regressor,
                 feature_selection=self.feature_selection)
        return ret_exp
예제 #22
0
    def data_labels(self, num_samples, classifier_fn, detection=False):
        """
        Steps of this function:
            1. generate perturbed text features and image features
            2. in a loop, 1) using these features to make instances of perturbed (text, image) pairs,
                          2) make predictions on these pairs, store labels into 'labels'
            3. concatenate text and image features, store into 'data',
                also append the original input and prediction of it
            4. calculate distances
            Arguments:
                classifier_fn: classification function to give predictions for given texts and images
                num_samples: size of the neighborhood to learn the linear model
                detection: Whether object detection method is invoked, default to be false
            Return:
            data: dense num_samples * num_superpixels
            labels: prediction probabilities matrix
            distances:distance including text/image distance ratio where
            text and image distance are cosine distances between the original instance and
                    each perturbed instance (computed in the binary 'data'
                    matrix), times 100.
            doc_size: number of words in indexed string, where indexed string is the string with various indexes
            n_img_features: number of superpixels to include in explanation
            segments:2d numpy array, with the output from skimage.segmentation
            domain_mapper:Maps text feature ids to words or word-positions
            num_object_detection:number of detected objects to include in explanation
            ori_label: numpy including deteced objects in the original image
            ratio_txt_img: weight ratio between text and image features
        """

        """ 1. make text features """
        indexed_string = IndexedString(
            self.text, bow=True, split_expression=r"\W+", mask_string=None
        )
        domain_mapper = TextDomainMapper(indexed_string)

        doc_size = indexed_string.num_words()
        sample = self.random_state.randint(
            1, doc_size + 1, num_samples
        )  # num_samples - 1
        data_txt = np.ones((num_samples, doc_size))
        # data[0] = np.ones(doc_size)
        features_range = range(doc_size)
        inverse_data_txt = []

        """ 1. make image features """
        random_seed = self.random_state.randint(0, high=1000)
        segmentation_fn = SegmentationAlgorithm(
            "quickshift",
            kernel_size=4,
            max_dist=200,
            ratio=0.2,
            random_seed=random_seed,
        )

        # segmentation_fn = SegmentationAlgorithm('felzenszwalb', scale=200, sigma=2, min_size=100)
        """segmentation_fn = SegmentationAlgorithm('slic', n_segments=60, compactness=10, sigma=1,
                     start_label=1)"""

        segments = segmentation_fn(self.image)  # get segmentation
        n_img_features = np.unique(segments).shape[0]  # get num of superpixel features
        data_img = self.random_state.randint(
            0, 2, n_img_features * num_samples
        ).reshape((num_samples, n_img_features))
        data_img_rows = tqdm(data_img)
        imgs = []

        """ 1. make object detection features 
        if detection:
            predictor, cfg = object_detection_predictor()
            ori_label = object_detection_obtain_label(predictor, cfg, self.image)
            num_object_detection = ori_label.shape[0]
            data_object_detection = np.zeros((num_samples,num_object_detection))"""

        # create fudged_image
        fudged_image = self.image.copy()
        for x in np.unique(segments):
            fudged_image[segments == x] = (
                np.mean(self.image[segments == x][:, 0]),
                np.mean(self.image[segments == x][:, 1]),
                np.mean(self.image[segments == x][:, 2]),
            )

        # img_features[0, :] = 1  # the first sample is the full image                                # num_samples

        """2. create data instances and make predictions"""
        labels = []
        for i, instance in enumerate(zip(sample, data_img_rows)):
            size_txt, row_img = instance

            # make text instance
            inactive = self.random_state.choice(features_range, size_txt, replace=False)
            data_txt[i, inactive] = 0
            inverse_data_txt.append(indexed_string.inverse_removing(inactive))

            # make image instance
            temp = copy.deepcopy(self.image)
            zeros = np.where(row_img == 0)[
                0
            ]  # get segment numbers that are turned off in this instance
            mask = np.zeros(segments.shape).astype(bool)
            for zero in zeros:
                mask[segments == zero] = True
            temp[mask] = fudged_image[mask]

            """if detection:
                label = object_detection_obtain_label(predictor, cfg, temp)
                label_diff = compare_labels(ori_label,label)
                data_object_detection[i] = label_diff"""
            imgs.append(temp)

            # make prediction and append result
            if len(imgs) == 10:
                preds = classifier_fn(self.pred_model, imgs, inverse_data_txt)
                labels.extend(preds)
                imgs = []
                inverse_data_txt = []

        if len(imgs) > 0:
            preds = classifier_fn(self.pred_model, imgs, inverse_data_txt)
            labels.extend(preds)

        """3. concatenate and append features"""
        data = np.concatenate((data_txt, data_img), axis=1)

        # append the original input to the last
        orig_img_f = np.ones((n_img_features,))
        orig_txt_f = np.ones(doc_size)

        """if detection:
            data = np.concatenate((data, data_object_detection),axis=1)
            orig_ot = np.ones(num_object_detection)
            data = np.vstack((data, np.concatenate((np.concatenate((orig_txt_f, orig_img_f)),orig_ot))))
        else:"""
        data = np.vstack((data, np.ones((data.shape[1]))))  ###

        labels.extend(classifier_fn(self.pred_model, [self.image], [self.text]))

        """4. compute distance# distances[:, :(doc_size-1)] *= 100
            use platt scaling t get relative importance of text and image modalities
        """

        labels = np.array(labels, dtype=float)

        # Modify MMF source code to zero out image / text attributes
        # dummy_label_image = np.array(classifier_fn([self.image], [self.text], zero_text=True))  # zero out text
        # dummy_label_text = np.array(classifier_fn([self.image], [self.text], zero_image=True))  # zero out image

        # perform calibration
        try:
            labels_for_calib = np.array(labels[:, 0] < 0.5, dtype=float)
            calibrated = CalibratedClassifierCV(cv=3)
            calibrated.fit(data[:, : doc_size + n_img_features], labels_for_calib)

            calib_data = np.ones((3, doc_size + n_img_features), dtype=float)
            calib_data[0][:doc_size] = 0  # zero out text
            calib_data[1][doc_size:] = 0  # zero out image
            calibrated_labels = calibrated.predict_proba(calib_data)

            delta_txt = abs(calibrated_labels[-1][0] - calibrated_labels[0][0])
            delta_img = abs(calibrated_labels[-1][0] - calibrated_labels[1][0])

            ratio_txt_img = max(min(100, delta_txt / delta_img), 0.01)
        except:
            dummy_text = ""
            dummy_image = np.zeros_like(self.image)
            label_text_out = np.array(
                classifier_fn(
                    self.pred_model, [self.image], [self.text], zero_text=True
                )
            )  # zero out text
            label_image_out = np.array(
                classifier_fn(
                    self.pred_model, [self.image], [self.text], zero_image=True
                )
            )  # zero out image

            delta_txt = abs(labels[-1][0] - label_text_out[0][0])
            delta_img = abs(labels[-1][0] - label_image_out[0][0])
            ratio_txt_img = max(min(10, delta_txt / delta_img), 0.1)

        # calculate distances
        distances_img = sklearn.metrics.pairwise_distances(
            data[:, doc_size:], data[-1, doc_size:].reshape(1, -1), metric="cosine"
        ).ravel()

        def distance_fn(x):
            return sklearn.metrics.pairwise.pairwise_distances(
                x, x[-1], metric="cosine"
            ).ravel()

        distances_txt = distance_fn(sp.sparse.csr_matrix(data[:, :doc_size]))

        distances = (
            1 / (1 + ratio_txt_img) * distances_img
            + (1 - 1 / (1 + ratio_txt_img)) * distances_txt
        )

        # As required by lime_base, make the first element of data, labels, distances the original data point
        data[0] = data[-1]
        labels[0] = labels[-1]
        distances[0] = distances[-1]

        """if not detection:"""
        num_object_detection = 0
        ori_label = None

        return (
            data,
            labels,
            distances,
            doc_size,
            n_img_features,
            segments,
            domain_mapper,
            num_object_detection,
            ori_label,
            ratio_txt_img,
        )
예제 #23
0
def main():

    dataset = sys.argv[1]
    black_box = sys.argv[2]

    # dataset = 'mnist'
    # black_box = 'RF'

    nbr_experiments = 200

    if dataset not in ['mnist', 'cifar10', 'fashion']:
        print('unknown dataset %s' % dataset)
        return -1

    if black_box not in ['RF', 'AB', 'DNN']:
        print('unknown black box %s' % black_box)
        return -1

    path = './'
    path_models = path + 'models/'
    path_results = path + 'results/fcp/'
    path_neigh = './neigh/'

    black_box_filename = path_models + '%s_%s' % (dataset, black_box)
    results_filename = path_results + 'lime_p_%s_%s.json' % (dataset,
                                                             black_box)
    neigh_filename = path_neigh + 'lime_%s_%s.json' % (dataset, black_box)

    _, _, X_test, Y_test, use_rgb = get_dataset(dataset)
    bb, transform = get_black_box(black_box,
                                  black_box_filename,
                                  use_rgb,
                                  return_model=True)
    bb_predict, bb_predict_proba = get_black_box(black_box, black_box_filename,
                                                 use_rgb)

    lime_explainer = lime_image.LimeImageExplainer()
    segmenter = SegmentationAlgorithm('quickshift',
                                      kernel_size=1,
                                      max_dist=200,
                                      ratio=0.2)

    for i2e in range(nbr_experiments):
        img = X_test[i2e]

        start_time = datetime.datetime.now()
        exp = lime_explainer.explain_instance(img,
                                              bb_predict_proba,
                                              top_labels=1,
                                              hide_color=0,
                                              num_samples=1000,
                                              segmentation_fn=segmenter)
        run_time = (datetime.datetime.now() - start_time).total_seconds()

        label = bb_predict(np.array([X_test[i2e]]))[0]

        bb_probs = lime_explainer.Zl[:, label]
        lr_probs = lime_explainer.lr.predict(lime_explainer.Zlr)

        fidelity = 1 - np.sum(
            np.abs(bb_probs - lr_probs) < 0.01) / len(bb_probs)

        img_cdist = transform(np.array([img]))
        Z_cdist = transform(lime_explainer.Z)

        if black_box == 'DNN':
            img_cdist = np.array([x.ravel() for x in img_cdist])
            Z_cdist = np.array([x.ravel() for x in Z_cdist])

        rdist = cdist(img_cdist, Z_cdist, metric='euclidean')
        compact, compact_var = float(np.mean(rdist)), float(np.std(rdist))

        sdist = pairwise_distances(lime_explainer.Zlr,
                                   np.array([lime_explainer.Zlr[0]]),
                                   metric='cosine').ravel()
        lcompact, lcompact_var = float(np.mean(sdist)), float(np.std(sdist))

        X_test_cdist = transform(X_test)
        if black_box == 'DNN':
            X_test_cdist = np.array([x.ravel() for x in X_test_cdist])

        dist = cdist(img_cdist, X_test_cdist, metric='euclidean')
        nbr_real_instances = len(X_test)
        plausibility = calculate_plausibilities(rdist, dist,
                                                nbr_real_instances)

        print(
            datetime.datetime.now(),
            '[%s/%s] %s %s - f: %.2f, c: %.2f, lc: %.2f, p: %.2f' %
            (i2e, nbr_experiments, dataset, black_box, fidelity, compact,
             lcompact, plausibility[-2]))

        Z = lime_explainer.Z
        Zl = lime_explainer.Zlr

        store_fcpn(i2e, results_filename, neigh_filename, dataset, black_box,
                   fidelity, compact, compact_var, lcompact, lcompact_var,
                   plausibility, run_time, Z, Zl, 'rnd')
예제 #24
0
    def data_labels(self, num_samples, classifier_fn, detection=False):
        '''
        Steps of this function:
            1. generate perturbed text features and image features
            2. in a loop, 1) using these features to make instances of perturbed (text, image) pairs,
                          2) make predictions on these pairs, store labels into 'labels'
            3. concatenate text and image features, store into 'data',
                also append the original input and prediction of it
            4. calculate distances

            TODO: add object detection: first run on original image, create feature components,
                    then run on perturbed images to get corresponding value

        :param num_samples:
        :param classifier_fn:
        :param object_detection:
        :return:
        '''

        ''' 1. make text features '''
        indexed_string = IndexedString(self.text, bow=True, split_expression=r'\W+', mask_string=None)
        domain_mapper = TextDomainMapper(indexed_string)

        doc_size = indexed_string.num_words()
        sample = self.random_state.randint(1, doc_size + 1, num_samples)                        # num_samples - 1
        data_txt = np.ones((num_samples, doc_size))
        # data[0] = np.ones(doc_size)
        features_range = range(doc_size)
        inverse_data_txt = []

        ''' 1. make image features '''
        random_seed = self.random_state.randint(0, high=1000)
        segmentation_fn = SegmentationAlgorithm('quickshift', kernel_size=4,
                                                max_dist=200, ratio=0.2,
                                                random_seed=random_seed)

        #segmentation_fn = SegmentationAlgorithm('felzenszwalb', scale=200, sigma=2, min_size=100)
        '''segmentation_fn = SegmentationAlgorithm('slic', n_segments=60, compactness=10, sigma=1,
                     start_label=1)'''

        segments = segmentation_fn(self.image)  # get segmentation
        n_img_features = np.unique(segments).shape[0]  # get num of superpixel features
        data_img = self.random_state.randint(0, 2, n_img_features * num_samples).reshape(
            (num_samples, n_img_features))
        data_img_rows = tqdm(data_img)
        imgs = []

        ''' 1. make object detection features 
        if detection:
            predictor, cfg = object_detection_predictor()
            ori_label = object_detection_obtain_label(predictor, cfg, self.image)
            num_object_detection = ori_label.shape[0]
            data_object_detection = np.zeros((num_samples,num_object_detection))'''
        
        # create fudged_image
        fudged_image = self.image.copy()
        for x in np.unique(segments):
            fudged_image[segments == x] = (
                np.mean(self.image[segments == x][:, 0]),
                np.mean(self.image[segments == x][:, 1]),
                np.mean(self.image[segments == x][:, 2]))

        # img_features[0, :] = 1  # the first sample is the full image                                # num_samples

        '''2. create data instances and make predictions'''
        labels = []
        for i, instance in enumerate(zip(sample, data_img_rows)):
            size_txt, row_img = instance

            # make text instance
            inactive = self.random_state.choice(features_range, size_txt,
                                                replace=False)
            data_txt[i, inactive] = 0
            inverse_data_txt.append(indexed_string.inverse_removing(inactive))

            # make image instance
            temp = copy.deepcopy(self.image)
            zeros = np.where(row_img == 0)[0]             # get segment numbers that are turned off in this instance
            mask = np.zeros(segments.shape).astype(bool)
            for zero in zeros:
                mask[segments == zero] = True
            temp[mask] = fudged_image[mask]

            '''if detection:
                label = object_detection_obtain_label(predictor, cfg, temp)
                label_diff = compare_labels(ori_label,label)
                data_object_detection[i] = label_diff'''
            imgs.append(temp)

            # make prediction and append result
            if len(imgs) == 10:
                preds = classifier_fn(self.pred_model, imgs, inverse_data_txt)
                labels.extend(preds)
                imgs = []
                inverse_data_txt = []

        if len(imgs) > 0:
            preds = classifier_fn(self.pred_model, imgs, inverse_data_txt)
            labels.extend(preds)

        '''3. concatenate and append features'''
        data = np.concatenate((data_txt, data_img), axis=1)

        # append the original input to the last
        orig_img_f = np.ones((n_img_features,))
        orig_txt_f = np.ones(doc_size)

        '''if detection:
            data = np.concatenate((data, data_object_detection),axis=1)
            orig_ot = np.ones(num_object_detection)
            data = np.vstack((data, np.concatenate((np.concatenate((orig_txt_f, orig_img_f)),orig_ot))))
        else:'''
        data = np.vstack((data, np.ones((data.shape[1])))) ###
            
        labels.extend(classifier_fn(self.pred_model, [self.image], [self.text]))


        '''4. compute distance# distances[:, :(doc_size-1)] *= 100
            use platt scaling t get relative importance of text and image modalities
        '''

        labels = np.array(labels, dtype=float)

        # Modify MMF source code to zero out image / text attributes
        #dummy_label_image = np.array(classifier_fn([self.image], [self.text], zero_text=True))  # zero out text
        #dummy_label_text = np.array(classifier_fn([self.image], [self.text], zero_image=True))  # zero out image

        # perform calibration
        try:
            labels_for_calib = np.array(labels[:, 0] < 0.5, dtype=float)
            calibrated = CalibratedClassifierCV(cv=3)
            calibrated.fit(data[:,:doc_size + n_img_features], labels_for_calib)

            calib_data = np.ones((3, doc_size + n_img_features), dtype=float)
            calib_data[0][:doc_size] = 0        # zero out text
            calib_data[1][doc_size:] = 0        # zero out image
            calibrated_labels = calibrated.predict_proba(calib_data)

            delta_txt = abs(calibrated_labels[-1][0] - calibrated_labels[0][0])
            delta_img = abs(calibrated_labels[-1][0] - calibrated_labels[1][0])

            ratio_txt_img = max(min(10, delta_txt/delta_img), 0.1)
        except:
            dummy_text = ""
            dummy_image = np.zeros_like(self.image)
            try:
                label_text_out = np.array(classifier_fn(self.pred_model, [self.image], [self.text], zero_text=True))  # zero out text
                label_image_out = np.array(classifier_fn(self.pred_model, [self.image], [self.text], zero_image=True))  # zero out image
            except:
                label_text_out = np.array(classifier_fn(self.pred_model, [self.image], [dummy_text]))
                label_image_out = np.array(classifier_fn(self.pred_model, [dummy_image], [self.text]))

            delta_txt = abs(labels[-1][0] - label_text_out[0][0])
            delta_img = abs(labels[-1][0] - label_image_out[0][0])
            ratio_txt_img = max(min(10, delta_txt / delta_img), 0.1)

        # calculate distances
        distances_img = sklearn.metrics.pairwise_distances(
            data[:, doc_size:],
            data[-1, doc_size:].reshape(1, -1),
            metric='cosine'
        ).ravel()

        def distance_fn(x):
            return sklearn.metrics.pairwise.pairwise_distances(
                x, x[-1], metric='cosine').ravel()

        distances_txt = distance_fn(sp.sparse.csr_matrix(data[:, :doc_size]))

        distances = 1/(1 + ratio_txt_img) * distances_img + (1 - 1/(1 + ratio_txt_img)) * distances_txt

        # As required by lime_base, make the first element of data, labels, distances the original data point
        data[0] = data[-1]
        labels[0] = labels[-1]
        distances[0] = distances[-1]

        '''if not detection:'''
        num_object_detection = 0
        ori_label = None

        return data, labels, distances, doc_size, n_img_features, \
            segments, domain_mapper, num_object_detection, ori_label, ratio_txt_img
예제 #25
0
def main():

    dataset = sys.argv[1]

    # dataset = 'mnist'

    black_box = 'RF'
    neigh_type = 'hrgp'

    random_state = 0
    ae_name = 'aae'
    num_classes = 10

    nbr_experiments = 200

    if dataset not in ['mnist', 'cifar10', 'fashion']:
        print('unknown dataset %s' % dataset)
        return -1

    if black_box not in ['RF', 'AB', 'DNN']:
        print('unknown black box %s' % black_box)
        return -1

    if neigh_type not in ['rnd', 'gntp', 'hrgp']:
        print('unknown neigh type %s' % neigh_type)
        return -1

    path = './'
    path_models = path + 'models/'
    path_results = path + 'results/coherence/'
    path_aemodels = path + 'aemodels/%s/%s/' % (dataset, ae_name)

    black_box_filename = path_models + '%s_%s' % (dataset, black_box)
    results_filename = path_results + 'coh_%s_%s_%s.json' % (
        dataset, black_box, neigh_type)

    _, _, X_test, Y_test, use_rgb = get_dataset(dataset)
    bb, transform = get_black_box(black_box,
                                  black_box_filename,
                                  use_rgb,
                                  return_model=True)
    bb_predict, bb_predict_proba = get_black_box(black_box, black_box_filename,
                                                 use_rgb)

    Y_pred = bb_predict(X_test)
    Y_pred_proba = bb_predict_proba(X_test)

    X_test_comp = X_test[nbr_experiments:]
    Y_pred_comp = Y_pred[nbr_experiments:]
    Y_pred_proba_comp = Y_pred_proba[nbr_experiments:]

    ae = get_autoencoder(X_test, ae_name, dataset, path_aemodels)
    ae.load_model()

    class_name = 'class'
    class_values = ['%s' % i for i in range(len(np.unique(Y_test)))]

    explainer = ILOREM(bb_predict,
                       class_name,
                       class_values,
                       neigh_type=neigh_type,
                       use_prob=True,
                       size=1000,
                       ocr=0.1,
                       kernel_width=None,
                       kernel=None,
                       autoencoder=ae,
                       use_rgb=use_rgb,
                       valid_thr=0.5,
                       filter_crules=True,
                       random_state=random_state,
                       verbose=False,
                       alpha1=0.5,
                       alpha2=0.5,
                       metric=neuclidean,
                       ngen=10,
                       mutpb=0.2,
                       cxpb=0.5,
                       tournsize=3,
                       halloffame_ratio=0.1,
                       bb_predict_proba=bb_predict_proba)

    lime_explainer = lime_image.LimeImageExplainer()
    segmenter = SegmentationAlgorithm('quickshift',
                                      kernel_size=1,
                                      max_dist=200,
                                      ratio=0.2)

    errors = open(
        path_results + 'errors_coehrence_%s_%s.csv' % (dataset, black_box),
        'w')

    for i2e in range(nbr_experiments):

        try:

            expl_list = list()
            jrow_list = list()

            jrow_coh_o = {
                'i2e': i2e,
                'dataset': dataset,
                'black_box': black_box
            }

            # Finding Lipswhitz neighborhood
            img = X_test[i2e]
            bbo = bb_predict(np.array([img]))
            bbop = Y_pred_proba[i2e]

            X_idx = np.where(Y_pred_comp == bbo[0])[0]

            scaler = MinMaxScaler()
            x0 = scaler.fit_transform(img.ravel().reshape(-1, 1))
            Xj = scaler.fit_transform([x.ravel() for x in X_test_comp[X_idx]])
            dist = cdist(x0.reshape(1, -1), Xj)[0]
            eps = np.percentile(dist, 5)
            X_idx_eps = X_idx[np.where(dist <= eps)]

            # Alore
            exp = explainer.explain_instance(img,
                                             num_samples=1000,
                                             use_weights=True,
                                             metric=neuclidean)
            _, diff = exp.get_image_rule(features=None, samples=100)
            expl_list.append(diff)

            # Lime
            exp = lime_explainer.explain_instance(img,
                                                  bb_predict_proba,
                                                  top_labels=1,
                                                  hide_color=0,
                                                  num_samples=1000,
                                                  segmentation_fn=segmenter)
            _, mask = exp.get_image_and_mask(bbo[0],
                                             positive_only=False,
                                             num_features=5,
                                             hide_rest=False,
                                             min_weight=0.01)
            expl_list.append(mask)

            lipschitz_list = defaultdict(list)
            lipschitz_list_bb = defaultdict(list)

            print(
                datetime.datetime.now(), '[%s/%s] %s %s - checking coherence' %
                (i2e, nbr_experiments, dataset, black_box))

            for i2e1 in X_idx_eps[:20]:
                img1 = X_test_comp[i2e1]
                bbo1 = bb_predict(np.array([img1]))
                bbop1 = Y_pred_proba_comp[i2e1]
                norm_bb = calculate_lipschitz_factor(bbop, bbop1)
                norm_x = calculate_lipschitz_factor(img, img1)

                # Alore
                exp1 = explainer.explain_instance(img1,
                                                  num_samples=1000,
                                                  use_weights=True,
                                                  metric=neuclidean)
                _, diff1 = exp1.get_image_rule(features=None, samples=100)

                norm_exp = calculate_lipschitz_factor(expl_list[0], diff1)
                lipschitz_list['alore'].append(norm_exp / norm_x)
                lipschitz_list_bb['alore'].append(norm_exp / norm_bb)
                print(datetime.datetime.now(), '\talore', norm_exp / norm_x)

                # Lime
                exp1 = lime_explainer.explain_instance(
                    img1,
                    bb_predict_proba,
                    top_labels=1,
                    hide_color=0,
                    num_samples=1000,
                    segmentation_fn=segmenter)
                _, mask1 = exp1.get_image_and_mask(bbo[0],
                                                   positive_only=False,
                                                   num_features=5,
                                                   hide_rest=False,
                                                   min_weight=0.01)
                norm_exp = calculate_lipschitz_factor(expl_list[1], mask1)
                lipschitz_list['lime'].append(norm_exp / norm_x)
                lipschitz_list_bb['lime'].append(norm_exp / norm_bb)
                print(datetime.datetime.now(), '\tlime', norm_exp / norm_x)

            for k in lipschitz_list:
                jrow_coh = copy.deepcopy(jrow_coh_o)
                jrow_coh['method'] = k
                jrow_coh['mean'] = float(np.nanmean(lipschitz_list[k]))
                jrow_coh['std'] = float(np.nanstd(lipschitz_list[k]))
                jrow_coh['max'] = float(np.nanmax(lipschitz_list[k]))
                jrow_coh['mean_bb'] = float(np.nanmean(lipschitz_list_bb[k]))
                jrow_coh['std_bb'] = float(np.nanstd(lipschitz_list_bb[k]))
                jrow_coh['max_bb'] = float(np.nanmax(lipschitz_list_bb[k]))
                jrow_list.append(jrow_coh)
                print(
                    datetime.datetime.now(),
                    '[%s/%s] %s %s %s - mean: %.3f, max: %.3f' %
                    (i2e, nbr_experiments, dataset, black_box, k,
                     jrow_coh['mean'], jrow_coh['max']))

        except Exception:
            print('error instance to explain: %d' % i2e)
            errors.write('%d\n' % i2e)
            continue

        results = open(results_filename, 'a')
        for jrow in jrow_list:
            results.write('%s\n' % json.dumps(jrow))
        results.close()
예제 #26
0
    def test_LIME(self):

        # test invocation of lime explainer on tabular data
        iris = sklearn.datasets.load_iris()
        train, test, labels_train, labels_test = sklearn.model_selection.train_test_split(
            iris.data, iris.target, train_size=0.80)
        rf = sklearn.ensemble.RandomForestClassifier(n_estimators=500)
        rf.fit(train, labels_train)

        sklearn.metrics.accuracy_score(labels_test, rf.predict(test))

        explainer = LimeTabularExplainer(train,
                                         feature_names=iris.feature_names,
                                         class_names=iris.target_names,
                                         discretize_continuous=True)

        i = 19
        explanation = explainer.explain_instance(test[i],
                                                 rf.predict_proba,
                                                 num_features=2,
                                                 top_labels=1)
        print(i, explanation.as_map())
        print('Invoked Tabular explainer\n')

        # test invocation of lime explainer on text data

        newsgroups_train = fetch_20newsgroups(subset='train')
        newsgroups_test = fetch_20newsgroups(subset='test')

        # making class names shorter
        class_names = [
            x.split('.')[-1] if 'misc' not in x else '.'.join(
                x.split('.')[-2:]) for x in newsgroups_train.target_names
        ]
        class_names[3] = 'pc.hardware'
        class_names[4] = 'mac.hardware'

        print(','.join(class_names))

        vectorizer = sklearn.feature_extraction.text.TfidfVectorizer(
            lowercase=False)
        train_vectors = vectorizer.fit_transform(newsgroups_train.data)
        test_vectors = vectorizer.transform(newsgroups_test.data)

        nb = MultinomialNB(alpha=.01)
        nb.fit(train_vectors, newsgroups_train.target)

        pred = nb.predict(test_vectors)
        sklearn.metrics.f1_score(newsgroups_test.target,
                                 pred,
                                 average='weighted')

        c = make_pipeline(vectorizer, nb)
        print(c.predict_proba([newsgroups_test.data[0]]).round(3))

        explainer = LimeTextExplainer(class_names=class_names)

        idx = 1340
        exp = explainer.explain_instance(newsgroups_test.data[idx],
                                         c.predict_proba,
                                         num_features=6,
                                         labels=[0, 17])
        print('Document id: %d' % idx)
        print('Predicted class =',
              class_names[nb.predict(test_vectors[idx]).reshape(1, -1)[0, 0]])
        print('True class: %s' % class_names[newsgroups_test.target[idx]])

        print('Explanation for class %s' % class_names[0])
        print('\n'.join(map(str, exp.as_list(label=0))))
        print()
        print('Explanation for class %s' % class_names[17])
        print('\n'.join(map(str, exp.as_list(label=17))))

        print('Invoked Text explainer\n')

        # test invocation of lime explainer on Image data
        mnist = fetch_openml('mnist_784')

        # make each image color so lime_image works correctly
        X_vec = np.stack(
            [gray2rgb(iimg) for iimg in mnist.data.reshape((-1, 28, 28))], 0)
        y_vec = mnist.target.astype(np.uint8)

        class PipeStep(object):
            """
            Wrapper for turning functions into pipeline transforms (no-fitting)
            """
            def __init__(self, step_func):
                self._step_func = step_func

            def fit(self, *args):
                return self

            def transform(self, X):
                return self._step_func(X)

        makegray_step = PipeStep(
            lambda img_list: [rgb2gray(img) for img in img_list])
        flatten_step = PipeStep(
            lambda img_list: [img.ravel() for img in img_list])

        simple_rf_pipeline = Pipeline([
            ('Make Gray', makegray_step),
            ('Flatten Image', flatten_step),
            # ('Normalize', Normalizer()),
            # ('PCA', PCA(16)),
            ('RF', RandomForestClassifier())
        ])

        X_train, X_test, y_train, y_test = train_test_split(X_vec,
                                                            y_vec,
                                                            train_size=0.55)
        simple_rf_pipeline.fit(X_train, y_train)

        explainer = LimeImageExplainer(verbose=False)
        segmenter = SegmentationAlgorithm('quickshift',
                                          kernel_size=1,
                                          max_dist=200,
                                          ratio=0.2)

        explanation = explainer.explain_instance(
            X_test[0],
            classifier_fn=simple_rf_pipeline.predict_proba,
            top_labels=10,
            hide_color=0,
            num_samples=10000,
            segmentation_fn=segmenter)
        print('Invoked Image explainer\n')
예제 #27
0
    #('Normalize', Normalizer()),
    #('PCA', PCA(16)),
    ('RF', RandomForestClassifier())
])
X_train, X_test, y_train, y_test = train_test_split(X_vec,
                                                    y_vec,
                                                    train_size=0.55)
simple_rf_pipeline.fit(X_train, y_train)
try:
    import lime
except:
    sys.path.append(os.path.join('..', '..'))  # add the current directory
    import lime
explainer = lime_image.LimeImageExplainer(verbose=False)
segmenter = SegmentationAlgorithm('quickshift',
                                  kernel_size=1,
                                  max_dist=200,
                                  ratio=0.2)
explanation = explainer.explain_instance(
    X_test[0],
    classifier_fn=simple_rf_pipeline.predict_proba,
    top_labels=10,
    hide_color=0,
    num_samples=10000,
    segmentation_fn=segmenter)
temp, mask = explanation.get_image_and_mask(y_test[0],
                                            positive_only=True,
                                            num_features=10,
                                            hide_rest=False,
                                            min_weight=0.01)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 4))
ax1.imshow(label2rgb(mask, temp, bg_label=0), interpolation='nearest')
예제 #28
0
def compute_analysis(args):
    ############# DATA ##################

    image_size = settings.image_size
    #alpha = 3
    num_classes = settings.num_classes
    #n_test_examples = 2
    n = image_size * image_size
    k = n // 2
    ######################################

    print('\nLoading the model')
    model_path = os.path.abspath(args.model_path)[:-3]
    model = load_model(model_path + '.h5')
    print('Model Loaded\n')
    print('Model Summary')
    model.summary()

    if args.img_path is None:
        print('Creating a random image')
        resultFolder = args.out_path + 'random_' + time.strftime(
            "%Y%m%d-%H%M%S")

        if not settings.random_image_flag:
            print(
                'Generating the random image based on the seed from settings file'
            )
            rng = np.random.RandomState(settings.numpy_image_seed)
            original_img = rng.randint(num_classes,
                                       size=(image_size, image_size))
        else:
            original_img = np.random.randint(num_classes,
                                             size=(image_size, image_size))

        original_img = original_img.astype(np.float32)
        img = original_img

    else:
        ret = PIL.Image.open(args.img_path)
        ret = ret.resize((image_size, image_size))
        ret = ret.convert('L')
        img = np.asarray(ret, dtype=np.uint8).astype(np.float32)
        resultFolder = args.out_path + args.img_path.split('/')[-1].split(
            '.')[0]

    if resultFolder[-1] != '/':
        resultFolder = resultFolder + '/'

    if args.centre_pixel is not None:
        img[image_size // 2, image_size // 2] = args.centre_pixel

    #ipdb.set_trace()

    y = to_categorical(img[image_size // 2, image_size // 2],
                       num_classes=num_classes)
    y_index = np.argmax(y)
    print('\nTrue Label is:', y_index)

    img = img / 255
    data = img.flatten()
    data = np.expand_dims(data, axis=0)
    preds = model.predict(data)
    predict_indicies = np.argmax(preds)
    print('Predicted label is:', predict_indicies)
    #ipdb.set_trace()

    ## BTW, 2nd paramter (count starts from 0) is redundant and not used
    # Heatmapping Methods
    methods = [("input", {}, "Input")]

    if 'grad' in args.heatmap_methods:
        methods.append(("gradient", {}, "Gradient"))
    if 'gb' in args.heatmap_methods:
        methods.append(("guided_backprop", {}, "Guided Backprop "))
    if 'deconvnet' in args.heatmap_methods:
        methods.append(("deconvnet", {}, "Deconvnet"))
    if 'sg' in args.heatmap_methods:
        methods.append(
            ("smoothgrad", settings.smooth_grad_parameters, "SmoothGrad"))
    if 'inpgrad' in args.heatmap_methods:
        methods.append(("input_t_gradient", {}, "Input * Gradient"))
    if 'ig' in args.heatmap_methods:
        methods.append(
            ("integrated_gradients", settings.integrated_grad_parameters,
             "Integrated Gradients"))
    if 'lrp' in args.heatmap_methods:
        methods.append(("lrp.z", {}, "LRP-Z"))
        methods.append(
            ("lrp.epsilon", settings.lrp_epsilon_parameters, "LRP-Epsilon"))
        methods.append(("lrp.alpha_beta", {
            "alpha": 1,
            "beta": 0
        }, "LRP-alpha1_beta0"))
        methods.append(("lrp.alpha_beta", settings.lrp_alpha_beta_parameters,
                        "LRP-alpha2_beta1"))

    if 'occlusion' in args.heatmap_methods:
        methods.append(("occlusion", {}, "Occlusion"))
    if 'deeplift' in args.heatmap_methods:
        from deepexplain.tensorflow import DeepExplain
        methods.append(("deeplift", {}, "DeepLift"))
    if 'shapley' in args.heatmap_methods:
        methods.append(("shapley", {}, "Shapley Sampling"))
    if 'pda' in args.heatmap_methods:
        methods.append(("pda", {}, "Prediction Difference Analysis"))
    if 'lime' in args.heatmap_methods:
        methods.append(('lime', {}, "Lime"))
    if 'shap' in args.heatmap_methods:
        methods.append(('shap', {}, "Kernel_SHAP"))
    if 'mp' in args.heatmap_methods:
        methods.append(("mp", {}, "Meaningful_Perturbation"))

    model_wo_softmax = iutils.keras.graph.model_wo_softmax(model)

    ##############################################################
    # Create analyzers.
    analyzers = []
    for method in methods:
        #ipdb.set_trace()
        if method[0] == 'occlusion':
            from occlusion import occlusion_analysis
            kwargs = {
                'image': data,
                'model': model,
                'num_classes': num_classes,
                'img_size': image_size,
            }
            analyzer = occlusion_analysis(**kwargs)

        elif method[0] == 'deeplift':
            with DeepExplain(session=K.get_session()) as de:
                input_tensor = model.layers[0].input
                fModel = Model(inputs=input_tensor,
                               outputs=model.layers[-2].output)
                target_tensor = fModel(input_tensor)
                dl_bl = settings.deeplift_parameters['baseline'].flatten()
                analyzer = de.get_explainer('deeplift',
                                            target_tensor,
                                            input_tensor,
                                            baseline=dl_bl)

        elif method[0] == 'shapley':
            with DeepExplain(session=K.get_session()) as de:
                input_tensor = model.layers[0].input
                fModel = Model(inputs=input_tensor,
                               outputs=model.layers[-2].output)
                target_tensor = fModel(input_tensor)
                analyzer = de.get_explainer('shapley_sampling',
                                            target_tensor,
                                            input_tensor,
                                            samples=2)

        elif method[0] == 'pda':
            from pda import prediction_difference_analysis
            train_samples = settings.pda_parameters['train_samples']
            # ipdb.set_trace()
            kwargs = {
                'image': data,
                'model': model,
                'num_classes': num_classes,
                'img_size': image_size,
                'train_samples': train_samples
            }
            analyzer = prediction_difference_analysis(**kwargs)

        elif method[0] == 'mp':
            from mp import meaningful_perturbation

            #########################################
            # Need to convert this in (0-255) first
            mp_par = settings.mp_parameters
            mp_par['num_classes'] = num_classes
            mp_par['img_size'] = image_size
            #ipdb.set_trace()
            analyzer = meaningful_perturbation(
                (data.reshape((image_size, -1)) * 255).astype('uint8'),
                model_path + '.pt',
                resultFolder,
                **mp_par,
            )

        elif method[0] == 'lime':
            import lime
            from lime import lime_image
            from lime.wrappers.scikit_image import SegmentationAlgorithm
            from skimage.segmentation import mark_boundaries

            # Make the explainer object
            analyzer = lime_image.LimeImageExplainer()

        elif method[0] == 'shap':
            from shap_class import shap_analysis
            from skimage.segmentation import slic as slic_super_pixel
            # Make the explainer object
            analyzer = shap_analysis(
                np.repeat(np.expand_dims(data.reshape((image_size, -1)) * 255,
                                         axis=-1),
                          3,
                          axis=-1), model, resultFolder,
                **settings.shap_parameters)

        else:
            try:
                analyzer = innvestigate.create_analyzer(
                    method[0],  # analysis method identifier
                    model_wo_softmax,  # model without softmax output
                    neuron_selection_mode="index",
                    **method[1])  # optional analysis parameters

                if method[0] == "pattern.attribution":
                    analyzer.fit(data, batch_size=256, verbose=1)
            except innvestigate.NotAnalyzeableModelException:
                # Not all methods work with all models.
                analyzer = None
        analyzers.append(analyzer)

    ########## GENERATE HEATMAPS ###########
    IMG_ROWS = image_size
    IMG_COLS = image_size

    #analysis = np.zeros([len(data), len(analyzers), IMG_ROWS, IMG_COLS]) #Use analyzer for the analysis

    heatmap_grids = []
    extra_info = []
    for i, x in enumerate(data):
        # Add batch axis.
        x = np.expand_dims(x, axis=0)
        y_true = (x[:, k] * 255).astype('int64')[0]

        ### Model predictions
        pred = model.predict(x)  #256 probabilities
        pred_label = np.argmax(pred, axis=1)[0]
        pred_prob = np.amax(pred)
        neuron = pred_label

        if args.clamp_label is not None:
            neuron = args.clamp_label

        #print('pred shape', np.shape(pred))
        fiveClasses = np.argsort(-pred[0, :])[:5]
        #fiveProbs = np.zeros(np.shape(fiveClasses))
        fiveProbs = pred[0, fiveClasses]
        ##########################

        analysis = np.zeros([1, len(analyzers), IMG_ROWS,
                             IMG_COLS])  # Use analyzer for the analysis

        for aidx, analyzer in enumerate(analyzers):

            print(f'Computing analysis for {methods[aidx][2]}')
            if methods[aidx][0] == "input":
                a = x
                a = a.reshape(image_size, -1)

            elif methods[aidx][0] in ['deeplift', 'shapley']:
                ys = to_categorical(neuron, num_classes=num_classes)
                if ys.shape[0] != x.shape[0]:
                    ys = np.expand_dims(ys, axis=0)
                a = analyzer.run(x, ys)
                a = a.reshape(image_size, -1)
                a = (a - np.mean(a)) / (np.std(a) + 1e-15)

            elif methods[aidx][0] == 'occlusion':
                #ipdb.set_trace()
                a = analyzer.explain(neuron, **settings.occlusion_parameters)
                a = a.reshape(image_size, -1)
                a = (a - np.mean(a)) / (np.std(a) + 1e-15)

            elif methods[aidx][0] == 'pda':
                print('PDA takes a lot time. Please wait...')
                num = settings.pda_parameters['num']
                a = analyzer.explain(neuron, num=num)
                a = a.reshape(image_size, -1)
                a = (a - np.mean(a)) / (np.std(a) + 1e-15)

            elif methods[aidx][0] == 'mp':
                print('MP takes some time. Please wait...')
                a = analyzer.explain(neuron)
                a = a.reshape(image_size, -1)
                a = (a - np.mean(a)) / (np.std(a) + 1e-15)

            elif methods[aidx][0] == 'lime':
                segmenter = SegmentationAlgorithm(
                    'quickshift', **settings.lime_segmenter_parameters)

                def lime_preprocess_input(im):
                    im = im[:, :, :, 0]
                    return np.reshape(im, (im.shape[0], -1))

                def lime_predict(x):
                    return model.predict(lime_preprocess_input(x))

                explanation = analyzer.explain_instance(
                    image=np.reshape(data, (image_size, -1)),
                    classifier_fn=lime_predict,
                    top_labels=settings.num_classes,
                    segmentation_fn=segmenter,
                    **settings.lime_explainer_parameters)

                temp, mask = explanation.get_image_and_mask(
                    label=neuron, **settings.lime_mask_parameters)

                bb = (mark_boundaries(temp, mask))
                eutils.save_lime_mask(bb, resultFolder)
                a = bb[:, :, 0]
                # ipdb.set_trace()

            elif methods[aidx][0] == 'shap':
                print('SHAP takes some time. Please wait...')
                segments_slic = slic_super_pixel(
                    PIL.Image.fromarray(np.uint8(analyzer.img_orig.copy())),
                    **settings.shap_slic_parameters)
                a = analyzer.explain(segments_slic, neuron)

            else:
                a = analyzer.analyze(x, neuron_selection=neuron)
                a = a.reshape(image_size, -1)
                a = (a - np.mean(a)) / (np.std(a) + 1e-15)

            analysis[i, aidx] = a
            print('Done')

        # Prepare the grid as rectengular list
        grid = [[analysis[i, j] for j in range(analysis.shape[1])]
                for i in range(analysis.shape[0])]

        #ipdb.set_trace()
        pred_prob = round(pred_prob, 5)
        #ipdb.set_trace()
        row_labels_left = [('True Label: {}'.format(y_true),
                            'Pred Label: {}'.format(pred_label),
                            'clamped Neuron: {}'.format(neuron),
                            'Probability: ' + str(pred_prob))]

        row_labels_right = [(
            '\n\n\nClass: %d' % fiveClasses[0] + ' Prob: %.5f' % fiveProbs[0],
            '\nClass: %d' % fiveClasses[1] + ' Prob: %.5f' % fiveProbs[1],
            '\nClass: %d' % fiveClasses[2] + ' Prob: %.5f' % fiveProbs[2],
            '\nClass: %d' % fiveClasses[3] + ' Prob: %.5f' % fiveProbs[3],
            '\nClass: %d' % fiveClasses[4] + ' Prob: %.5f' % fiveProbs[4],
        )]

        col_labels = [''.join(method[2]) for method in methods]

        eutils.plot_image_grid(grid,
                               resultFolder,
                               row_labels_left,
                               row_labels_right,
                               col_labels,
                               file_name='heatmap_' +
                               time.strftime("%Y%m%d-%H%M%S") + '.png',
                               dpi=image_size)

        heatmap_grids.append(grid)
        extra_info.append([row_labels_left, row_labels_right])

    return heatmap_grids, extra_info
예제 #29
0
def run_segment_activation(**args):

    #Set parameters for script
    sp = args.get("super_pixels")
    num_samples = args.get("num_samples")
    path_or_pkl = args.get("path_pkl").lower()
    tp_or_fn = args.get("tp_fn").upper()
    pth = f'/zhome/ca/6/92701/Desktop/Master_Thesis/Results/Lime/Good Performance/'

    #Get list of image names to use
    if path_or_pkl == 'path':
        path = f"{pth}/*.jpg"
        images = glob.glob(path)
    elif path_or_pkl == 'pkl':
        images = p.load(open('rejected_errors.pkl', 'rb'))
        for i, img in enumerate(images):
            images[i] = "/".join(
                ("./stylegan2/rl_images/256/Validation/rejected/", img[11:]))

    print(f"Loaded {len(images)} images")
    print(f"Using {sp} super pixels and loading from {path_or_pkl.lower()}")

    def get_image(path):
        with open(os.path.abspath(path), 'rb') as f:
            with Image.open(f) as img:
                return img.convert('RGB')

    #Load the model
    model = Inception.inception_v3()
    cp = torch.load(
        '/zhome/ca/6/92701/Desktop/Master_Thesis/Results/Inception/First/inception_data-augment.pth.tar'
    )

    state_dict = cp['state_dict']

    from collections import OrderedDict
    new_state_dict = OrderedDict()

    for k, v in state_dict.items():
        name = k[7:]
        new_state_dict[name] = v

    model.load_state_dict(new_state_dict)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model.eval()
    model.to(device)

    #Transformations in PIL image
    def get_PIL_transform():
        transf = transforms.Compose([transforms.Pad((21, 22, 22, 21))])

        return transf

    #Transformations in numpy image
    def get_preprocess_transform():
        normalize = transforms.Normalize(mean=[0.1446, 0.1561, 0.0794],
                                         std=[0.1223, 0.1178, 0.0936])
        transf = transforms.Compose([transforms.ToTensor(), normalize])

        return transf

    pil_transf = get_PIL_transform()
    preprocess_transform = get_preprocess_transform()

    #Make batch and apply transforms
    def batch_predict(images):
        model.eval()

        batch = torch.stack(tuple(preprocess_transform(i) for i in images),
                            dim=0)
        batch = batch.to(device)
        std = torch.tensor([0.1223, 0.1178, 0.0936])
        mean = torch.tensor([0.1446, 0.1561, 0.0794])
        logits = model(batch * std[None, :, None, None].to(device) +
                       mean[None, :, None, None].to(device))

        probs0 = 1 - torch.sigmoid(logits)
        probs1 = torch.sigmoid(logits)

        probs = torch.stack((probs0, probs1), dim=1).squeeze()

        return probs.detach().cpu().numpy()

    #Define segmentation alogrithm ("Quickshift")
    #segmentation_fn = SegmentationAlgorithm('quickshift', kernel_size=4,
    #													max_dist=200, ratio=0.2,
    #													random_seed=41)
    segmentation_fn = SegmentationAlgorithm('slic',
                                            n_segments=99,
                                            compactness=2,
                                            sigma=3)

    #Where to save output
    save_dir = "/".join(
        (os.getcwd(), "Boundry", "Handpicked", f"{sp}"))  #f"{tp_or_fn}",

    try:
        os.mkdir(save_dir)
    except Exception:
        pass

    #Clear output folder
    files = glob.glob("/".join((save_dir, '*')))
    for f in files:
        os.remove(f)

    #Iterate over every image
    for idx, img_name in enumerate(reversed(images)):
        print("Running boundries on images %d of %d" % (idx + 1, len(images)))
        img = get_image(img_name)
        explainer = lime_image.LimeImageExplainer()
        explanation = explainer.explain_instance(
            np.array(pil_transf(img)),
            batch_predict,
            top_labels=2,
            hide_color=0,
            num_samples=num_samples,
            segmentation_fn=segmentation_fn)

        img_name_split = img_name.split("/")
        file_name = img_name_split[-1]

        temp, mask = explanation.get_image_and_mask(1,
                                                    positive_only=False,
                                                    num_features=sp,
                                                    hide_rest=False)
        img_boundry = mark_boundaries(temp / 255.0, mask)

        try:
            os.mkdir(save_dir)
            os.mkdir(save_dir)
        except Exception:
            pass

        fig, ax = plt.subplots(1, 2, figsize=(8, 4), sharex=True, sharey=True)

        ax[0].imshow(img)
        ax[1].imshow(img_boundry[21:277, 22:278])

        for a in ax.ravel():
            a.set_axis_off()

        plt.tight_layout()
        plt.savefig("/".join((save_dir, file_name)))
def explain(params=None):
    DCG, disc, images_to_explain, d_index, normalize_by_mean = params
    Discriminator = DCG.DCGAND_1
    BATCH_SIZE = FLAGS.batch_size
    with tf.Graph().as_default() as graph:
        train_data_list = helpers.get_dataset_files()
        real_data = input_pipeline(train_data_list, batch_size=BATCH_SIZE)
        # Normalize -1 to 1
        real_data = 2 * ((tf.cast(real_data, tf.float32) / 255.) - .5)
        disc_real, _ = Discriminator(real_data)
        disc_vars = lib.params_with_name("Discriminator")
        disc_saver = tf.train.Saver(disc_vars)
        ckpt_disc = tf.train.get_checkpoint_state(
            "./saved_models/" + disc + "/")
        config = tf.ConfigProto(allow_soft_placement=True)
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            sess.run(tf.global_variables_initializer())
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            # print('Queue runners started.')
            if ckpt_disc and ckpt_disc.model_checkpoint_path:
                # print("Restoring discriminator...", disc)
                disc_saver.restore(
                    sess, ckpt_disc.model_checkpoint_path)

                def disc_prediction(image):
                    # make fake batch:
                    # Transform to -1 to 1:
                    if np.max(image) > 1.1 or np.min(image) > 0.0:
                        image = (image.astype(np.float32) * 2.0 / 255.0) - 1.0
                    if len(image.shape) == 4:
                        no_ims = image.shape[0]
                    else:
                        no_ims = 1
                    images_batch = np.zeros(
                        [256, 64, 64, 3]).astype(np.float32)
                    images_batch[0:no_ims] = image
                    prediction, _ = sess.run([Discriminator(images_batch)])[0]
                    # Need to map input from [-inf, + inf] to [-1, +1]
                    pred_array = np.zeros((no_ims, 2))
                    # Normalize predictions to see what happens:
                    # prediction = (prediction-np.mean(prediction))/np.std(prediction)
                    for i, x in enumerate(prediction[:no_ims]):
                        if normalize_by_mean:
                            bias = marginalized_means[d_index]
                            pred_array[i, 1] = expit(x-bias)
                            pred_array[i, 0] = 1 - pred_array[i, 1]
                        else:
                            pred_array[i, 1] = expit(x)
                            pred_array[i, 0] = 1 - pred_array[i, 1]
                        # 1 == REAL; 0 == FAKE
                    return pred_array
                explanations = []
                explainer = lime_image.LimeImageExplainer(verbose=False)
                segmenter = SegmentationAlgorithm(
                    'slic', n_segments=100, compactness=1, sigma=1)
                try:
                    if not len(images_to_explain):
                        images_to_explain = sess.run(real_data)[:no_samples]
                        images_to_explain = (images_to_explain + 1.0) * 255.0 / 2.0
                        images_to_explain = images_to_explain.astype(np.uint8)
                        images_to_explain = np.reshape(
                        images_to_explain, [no_samples, 64, 64, 3])
                    for image_to_explain in tqdm(images_to_explain):
                        explanation = explainer.explain_instance(image_to_explain,
                                                                 classifier_fn=disc_prediction, batch_size=256,
                                                                 top_labels=2, hide_color=None, num_samples=no_perturbed_images,
                                                                 segmentation_fn=segmenter)
                        explanations.append(explanation)
                except KeyboardInterrupt as e:
                    print("Manual interrupt occurred.")
                finally:
                    coord.request_stop()
                    coord.join(threads)
                make_figures(images_to_explain, explanations,
                             DCG.get_G_dim(), DCG.get_D_dim(), normalize_by_mean)
                return images_to_explain
            else:
                print("Failed to load Discriminator", disc)