Ejemplo n.º 1
0
def main():
    n_features = (20, 20)
    img_size = (32, 32, 3)
    cell_size = (4, 4)
    colors_p = np.array([0.15, 0.7, 0.15])
    p_border = 1.0

    sic = generate_synthetic_image_classifier(img_size=img_size,
                                              cell_size=cell_size,
                                              n_features=n_features,
                                              p_border=p_border)

    pattern = sic['pattern']
    predict = sic['predict']
    predict_proba = sic['predict_proba']

    plt.imshow(pattern)
    plt.xticks(())
    plt.yticks(())
    plt.savefig('../fig/pattern.png', format='png', bbox_inches='tight')
    plt.show()

    X_test = generate_random_img_dataset(pattern,
                                         nbr_images=1000,
                                         pattern_ratio=0.4,
                                         img_size=img_size,
                                         cell_size=cell_size,
                                         min_nbr_cells=0.1,
                                         max_nbr_cells=0.3,
                                         colors_p=colors_p)

    Y_test = predict(X_test)
    idx = np.where(Y_test == 1)[0][0]

    x = X_test[idx]
    plt.imshow(x)
    plt.xticks(())
    plt.yticks(())
    plt.savefig('../fig/image.png', format='png', bbox_inches='tight')
    plt.show()

    gt_val = get_pixel_importance_explanation(x, sic)
    max_val = np.nanpercentile(np.abs(gt_val), 99.9)
    plt.imshow(np.reshape(gt_val, img_size[:2]),
               cmap='RdYlBu',
               vmin=-max_val,
               vmax=max_val,
               alpha=0.7)
    plt.xticks(())
    plt.yticks(())
    plt.savefig('../fig/saliencymap.png', format='png', bbox_inches='tight')
    plt.show()
def main():
    n_features = (16, 16)
    img_size = (32, 32, 3)
    cell_size = (4, 4)
    colors_p = np.array([0.15, 0.7, 0.15])
    p_border = 1.0

    sic = generate_synthetic_image_classifier(img_size=img_size,
                                              cell_size=cell_size,
                                              n_features=n_features,
                                              p_border=p_border)

    pattern = sic['pattern']
    predict = sic['predict']
    predict_proba = sic['predict_proba']

    plt.imshow(pattern)
    plt.show()

    X_test = generate_random_img_dataset(pattern,
                                         nbr_images=1000,
                                         pattern_ratio=0.4,
                                         img_size=img_size,
                                         cell_size=cell_size,
                                         min_nbr_cells=0.1,
                                         max_nbr_cells=0.3,
                                         colors_p=colors_p)

    Y_test = predict(X_test)
    # img = X_test[0]

    from skimage.segmentation import mark_boundaries
    # from skimage.color import rgb2gray
    # from skimage.filters import sobel
    # from skimage.segmentation import felzenszwalb, slic, quickshift, watershed
    #
    # segments_fz = felzenszwalb(img, scale=100, sigma=0.5, min_size=50)
    # segments_slic = slic(img, n_segments=250, compactness=10, sigma=1)
    # segments_quick = quickshift(img, kernel_size=3, max_dist=6, ratio=0.5)
    # gradient = sobel(rgb2gray(img))
    # segments_watershed = watershed(gradient, markers=250, compactness=0.001)
    #
    # print("Felzenszwalb number of segments: {}".format(len(np.unique(segments_fz))))
    # print('SLIC number of segments: {}'.format(len(np.unique(segments_slic))))
    # print('Quickshift number of segments: {}'.format(len(np.unique(segments_quick))))

    # fig, ax = plt.subplots(2, 2, figsize=(10, 10), sharex=True, sharey=True)

    # ax[0, 0].imshow(mark_boundaries(img, segments_fz))
    # ax[0, 0].set_title("Felzenszwalbs's method")
    # ax[0, 1].imshow(mark_boundaries(img, segments_slic))
    # ax[0, 1].set_title('SLIC')
    # ax[1, 0].imshow(mark_boundaries(img, segments_quick))
    # plt.imshow(mark_boundaries(img, segments_quick))
    # ax[1, 0].set_title('Quickshift')
    # ax[1, 1].imshow(mark_boundaries(img, segments_watershed))
    # ax[1, 1].set_title('Compact watershed')

    # for a in ax.ravel():
    #     a.set_axis_off()
    #
    # plt.tight_layout()
    # plt.show()

    explainer = LimeImageExplainer()
    segmenter = SegmentationAlgorithm('quickshift',
                                      kernel_size=1,
                                      max_dist=10,
                                      ratio=0.5)
    # segmenter = SegmentationAlgorithm('slic', n_segments=200, compactness=10, sigma=0, min_size_factor=10)
    # segmenter = SegmentationAlgorithm('felzenszwalb', scale=0.1, sigma=1, min_size=2)

    for x, y in zip(X_test[:1], Y_test[:1]):
        print(y)
        # plt.imshow(x)
        # plt.show()
        exp = explainer.explain_instance(x,
                                         predict_proba,
                                         top_labels=2,
                                         hide_color=0,
                                         num_samples=10000,
                                         segmentation_fn=segmenter)
        temp, mask = exp.get_image_and_mask(y,
                                            positive_only=True,
                                            num_features=1000,
                                            hide_rest=False,
                                            min_weight=0.0)
        # print(np.unique(temp), 'a')
        # print(np.unique(mask), 'b')
        # print(temp)
        # print(mask)  # usare mask come feature importance

        max_val = np.nanpercentile(np.abs(mask), 99.9)
        # plt.imshow(x)
        plt.imshow(mask, cmap='RdYlBu', vmin=-max_val, vmax=max_val, alpha=0.7)
        plt.show()

        gt_val = get_pixel_importance_explanation(x, sic)
        print(gt_val.shape)
        max_val = np.nanpercentile(np.abs(gt_val), 99.9)
        # plt.imshow(x)
        plt.imshow(np.reshape(gt_val, img_size[:2]),
                   cmap='RdYlBu',
                   vmin=-max_val,
                   vmax=max_val,
                   alpha=0.7)
        plt.show()

        print(pixel_based_similarity(mask.ravel(), gt_val))
def main():
    n_features = (8, 8)
    img_size = (32, 32, 3)
    cell_size = (4, 4)
    colors_p = np.array([0.15, 0.7, 0.15])
    p_border = 0.0

    sic = generate_synthetic_image_classifier(img_size=img_size,
                                              cell_size=cell_size,
                                              n_features=n_features,
                                              p_border=p_border)

    pattern = sic['pattern']
    predict = sic['predict']
    predict_proba = sic['predict_proba']

    plt.imshow(pattern)
    plt.show()

    X_test = generate_random_img_dataset(pattern,
                                         nbr_images=1000,
                                         pattern_ratio=0.4,
                                         img_size=img_size,
                                         cell_size=cell_size,
                                         min_nbr_cells=0.1,
                                         max_nbr_cells=0.3,
                                         colors_p=colors_p)

    Y_test = predict(X_test)

    nbr_records = 10
    Xm_test = np.array([x.ravel() for x in X_test[:nbr_records]])

    explainer = MAPLE(Xm_test,
                      Y_test[:nbr_records],
                      Xm_test,
                      Y_test[:nbr_records],
                      n_estimators=5,
                      max_features=0.5,
                      min_samples_leaf=5)

    x = X_test[-1]
    plt.imshow(x)
    plt.show()

    exp = explainer.explain(x)
    expl_val = exp['coefs'][:-1]
    print(expl_val)
    expl_val = np.array([1.0 if v > 0.0 else 0.0 for v in expl_val])
    # expl_val = (expl_val - np.min(expl_val)) / (np.max(expl_val) - np.min(expl_val))
    print(expl_val)
    print(np.unique(expl_val, return_counts=True))
    print(expl_val.shape)

    sv = np.sum(np.reshape(expl_val, img_size), axis=2)
    sv01 = np.zeros(sv.shape)
    sv01[np.where(sv > 0.0)] = 1.0
    # np.array([1.0 if v > 0.0 else 0.0 for v in expl_val])
    sv = sv01
    print(sv)
    print(sv.shape)

    max_val = np.nanpercentile(np.abs(sv), 99.9)
    # plt.imshow(x)
    plt.imshow(sv, cmap='RdYlBu', vmin=-max_val, vmax=max_val, alpha=0.7)
    plt.show()
    # shap.image_plot(expl_val, x)

    gt_val = get_pixel_importance_explanation(x, sic)
    print(gt_val.shape)
    max_val = np.nanpercentile(np.abs(gt_val), 99.9)
    # plt.imshow(x)
    plt.imshow(np.reshape(gt_val, img_size[:2]),
               cmap='RdYlBu',
               vmin=-max_val,
               vmax=max_val,
               alpha=0.7)
    plt.show()

    print(np.unique(gt_val, return_counts=True))
    print(np.unique(sv.ravel(), return_counts=True))
    print(pixel_based_similarity(sv.ravel(), gt_val))
Ejemplo n.º 4
0
def main():
    n_features = (20, 20)
    img_size = (32, 32, 3)
    cell_size = (4, 4)
    colors_p = np.array([0.15, 0.7, 0.15])
    p_border = 1.0

    # img_draft = np.array([ # ['k', 'k', 'k', 'k', 'k', 'k', 'k', 'k'],
    #     ['k', 'k', 'k', 'k', 'k', 'g', 'r', 'k'],
    #     ['g', 'k', 'k', 'k', 'k', 'k', 'k', 'g'],
    #     ['k', 'g', 'k', 'k', 'k', 'b', 'k', 'k'],
    #     ['k', 'g', 'k', 'k', 'g', 'g', 'k', 'b'],
    #     ['k', 'k', 'k', 'k', 'g', 'k', 'k', 'g'],
    #     ['g', 'k', 'k', 'k', 'k', 'k', 'k', 'k'],
    #     ['k', 'k', 'k', 'k', 'k', 'k', 'k', 'k'],
    #     ['k', 'k', 'k', 'k', 'g', 'k', 'k', 'k'],
    #
    # ])
    # img = generate_img_defined(img_draft, img_size=img_size, cell_size=cell_size)
    # plt.imshow(img)
    # plt.xticks(())
    # plt.yticks(())
    # # plt.savefig('../fig/pattern.png', format='png', bbox_inches='tight')
    # plt.show()

    pattern_draft = np.array([  # ['k', 'k', 'k', 'k', 'k', 'k', 'k', 'k'],
        ['k', 'k', 'k', 'k', 'k'],
        ['k', 'k', 'k', 'b', 'k'],
        ['k', 'k', 'g', 'g', 'k'],
        ['k', 'k', 'g', 'k', 'k'],
        ['k', 'k', 'k', 'k', 'k'],
    ])

    pattern = generate_img_defined(pattern_draft,
                                   img_size=(20, 20, 3),
                                   cell_size=cell_size)

    sic = generate_synthetic_image_classifier(img_size=img_size,
                                              cell_size=cell_size,
                                              n_features=n_features,
                                              p_border=p_border,
                                              pattern=pattern)

    pattern = sic['pattern']
    predict = sic['predict']
    predict_proba = sic['predict_proba']

    plt.imshow(pattern)
    plt.xticks(())
    plt.yticks(())
    # plt.savefig('../fig/pattern.png', format='png', bbox_inches='tight')
    plt.show()

    X_test = generate_random_img_dataset(pattern,
                                         nbr_images=1000,
                                         pattern_ratio=0.4,
                                         img_size=img_size,
                                         cell_size=cell_size,
                                         min_nbr_cells=0.1,
                                         max_nbr_cells=0.3,
                                         colors_p=colors_p)

    Y_test = predict(X_test)
    idx = np.where(Y_test == 1)[0][0]

    # x = X_test[idx]
    img_draft = np.array([  # ['k', 'k', 'k', 'k', 'k', 'k', 'k', 'k'],
        ['k', 'k', 'k', 'k', 'k', 'g', 'r', 'k'],
        ['g', 'k', 'k', 'k', 'k', 'k', 'k', 'g'],
        ['k', 'g', 'k', 'k', 'k', 'b', 'k', 'k'],
        ['k', 'g', 'k', 'k', 'g', 'g', 'k', 'b'],
        ['k', 'k', 'k', 'k', 'g', 'k', 'k', 'g'],
        ['g', 'k', 'k', 'k', 'k', 'k', 'k', 'k'],
        ['k', 'k', 'k', 'k', 'k', 'k', 'k', 'k'],
        ['k', 'k', 'k', 'k', 'g', 'k', 'k', 'k'],
    ])
    x = generate_img_defined(img_draft, img_size=img_size, cell_size=cell_size)
    plt.imshow(x)
    plt.xticks(())
    plt.yticks(())
    # plt.savefig('../fig/image.png', format='png', bbox_inches='tight')
    plt.show()

    gt_val = get_pixel_importance_explanation(x, sic)
    max_val = np.nanpercentile(np.abs(gt_val), 99.9)
    plt.imshow(np.reshape(gt_val, img_size[:2]),
               cmap='RdYlBu',
               vmin=-max_val,
               vmax=max_val,
               alpha=0.7)
    plt.xticks(())
    plt.yticks(())
    # plt.savefig('../fig/saliencymap.png', format='png', bbox_inches='tight')
    plt.show()

    # plt.imshow(x)
    # plt.imshow(np.reshape(gt_val, img_size[:2]), cmap='RdYlBu', vmin=-max_val, vmax=max_val, alpha=0.7)
    # plt.xticks(())
    # plt.yticks(())
    # plt.savefig('../fig/saliencymap2.png', format='png', bbox_inches='tight')
    # plt.show()

    lime_explainer = LimeImageExplainer()
    segmenter = SegmentationAlgorithm('quickshift',
                                      kernel_size=1,
                                      max_dist=10,
                                      ratio=0.5)
    tot_num_features = img_size[0] * img_size[1]

    lime_exp = lime_explainer.explain_instance(x,
                                               predict_proba,
                                               top_labels=2,
                                               hide_color=0,
                                               num_samples=10000,
                                               segmentation_fn=segmenter)
    _, lime_expl_val = lime_exp.get_image_and_mask(
        1,
        positive_only=True,
        num_features=tot_num_features,
        hide_rest=False,
        min_weight=0.0)
    max_val = np.nanpercentile(np.abs(lime_expl_val), 99.9)
    plt.imshow(lime_expl_val,
               cmap='RdYlBu',
               vmin=-max_val,
               vmax=max_val,
               alpha=0.7)
    plt.xticks(())
    plt.yticks(())
    plt.title('lime', fontsize=20)
    plt.savefig('../fig/saliencymap_lime.png',
                format='png',
                bbox_inches='tight')
    plt.show()

    background = np.array([np.zeros(img_size).ravel()] * 10)
    shap_explainer = KernelExplainer(predict_proba, background)

    shap_expl_val = shap_explainer.shap_values(x.ravel(), l1_reg='bic')[1]
    shap_expl_val = np.sum(np.reshape(shap_expl_val, img_size), axis=2)
    tmp = np.zeros(shap_expl_val.shape)
    tmp[np.where(shap_expl_val > 0.0)] = 1.0
    shap_expl_val = tmp
    max_val = np.nanpercentile(np.abs(shap_expl_val), 99.9)
    plt.imshow(shap_expl_val,
               cmap='RdYlBu',
               vmin=-max_val,
               vmax=max_val,
               alpha=0.7)
    plt.xticks(())
    plt.yticks(())
    plt.title('shap', fontsize=20)
    plt.savefig('../fig/saliencymap_shap.png',
                format='png',
                bbox_inches='tight')
    plt.show()

    nbr_records = 10
    Xm_test = np.array([x.ravel() for x in X_test[:nbr_records]])
    maple_explainer = MAPLE(Xm_test,
                            Y_test[:nbr_records],
                            Xm_test,
                            Y_test[:nbr_records],
                            n_estimators=5,
                            max_features=0.5,
                            min_samples_leaf=5)

    maple_exp = maple_explainer.explain(x)
    maple_expl_val = maple_exp['coefs'][:-1]
    maple_expl_val = np.sum(np.reshape(maple_expl_val, img_size), axis=2)
    tmp = np.zeros(maple_expl_val.shape)
    tmp[np.where(maple_expl_val > 0.0)] = 1.0
    maple_expl_val = tmp
    max_val = np.nanpercentile(np.abs(shap_expl_val), 99.9)
    plt.imshow(maple_expl_val,
               cmap='RdYlBu',
               vmin=-max_val,
               vmax=max_val,
               alpha=0.7)
    plt.xticks(())
    plt.yticks(())
    plt.title('maple', fontsize=20)
    plt.savefig('../fig/saliencymap_maple.png',
                format='png',
                bbox_inches='tight')
    plt.show()

    lime_f1, lime_pre, lime_rec = pixel_based_similarity(lime_expl_val.ravel(),
                                                         gt_val,
                                                         ret_pre_rec=True)
    shap_f1, shap_pre, shap_rec = pixel_based_similarity(shap_expl_val.ravel(),
                                                         gt_val,
                                                         ret_pre_rec=True)
    maple_f1, maple_pre, maple_rec = pixel_based_similarity(
        maple_expl_val.ravel(), gt_val, ret_pre_rec=True)

    print(lime_f1, lime_pre, lime_rec)
    print(shap_f1, shap_pre, shap_rec)
    print(maple_f1, maple_pre, maple_rec)
Ejemplo n.º 5
0
def run(black_box, n_records, img_size, cell_size, n_features, p_border,
        colors_p, random_state, filename):

    sic = generate_synthetic_image_classifier(img_size=img_size,
                                              cell_size=cell_size,
                                              n_features=n_features,
                                              p_border=p_border,
                                              random_state=random_state)

    pattern = sic['pattern']
    predict = sic['predict']
    predict_proba = sic['predict_proba']

    X_test = generate_random_img_dataset(pattern,
                                         nbr_images=n_records,
                                         pattern_ratio=0.5,
                                         img_size=img_size,
                                         cell_size=cell_size,
                                         min_nbr_cells=0.1,
                                         max_nbr_cells=0.3,
                                         colors_p=colors_p)

    Y_test_proba = predict_proba(X_test)
    Y_test = predict(X_test)

    lime_explainer = LimeImageExplainer()
    segmenter = SegmentationAlgorithm('quickshift',
                                      kernel_size=1,
                                      max_dist=10,
                                      ratio=0.5)
    tot_num_features = img_size[0] * img_size[1]

    background = np.array([np.zeros(img_size).ravel()] * 10)
    shap_explainer = KernelExplainer(predict_proba, background)

    nbr_records_explainer = 10
    idx_records_train_expl = np.random.choice(range(len(X_test)),
                                              size=nbr_records_explainer,
                                              replace=False)
    idx_records_test_expl = np.random.choice(range(len(X_test)),
                                             size=nbr_records_explainer,
                                             replace=False)

    Xm_train = np.array([x.ravel() for x in X_test[idx_records_train_expl]])
    Xm_test = np.array([x.ravel() for x in X_test[idx_records_test_expl]])

    print(datetime.datetime.now(), 'build maple')
    maple_explainer = MAPLE(Xm_train,
                            Y_test_proba[idx_records_train_expl][:, 1],
                            Xm_test,
                            Y_test_proba[idx_records_test_expl][:, 1],
                            n_estimators=100,
                            max_features=0.5,
                            min_samples_leaf=2)
    print(datetime.datetime.now(), 'build maple done')

    idx = 0
    results = list()
    for x, y in zip(X_test, Y_test):
        print(datetime.datetime.now(),
              'seneca - image',
              'black_box %s' % black_box,
              'n_features %s' % str(n_features),
              'rs %s' % random_state,
              '%s/%s' % (idx, n_records),
              end=' ')

        gt_val = get_pixel_importance_explanation(x, sic)

        lime_exp = lime_explainer.explain_instance(x,
                                                   predict_proba,
                                                   top_labels=2,
                                                   hide_color=0,
                                                   num_samples=10000,
                                                   segmentation_fn=segmenter)
        _, lime_expl_val = lime_exp.get_image_and_mask(
            y,
            positive_only=True,
            num_features=tot_num_features,
            hide_rest=False,
            min_weight=0.0)

        shap_expl_val = shap_explainer.shap_values(x.ravel(), l1_reg='bic')[1]
        shap_expl_val = np.sum(np.reshape(shap_expl_val, img_size), axis=2)
        tmp = np.zeros(shap_expl_val.shape)
        tmp[np.where(shap_expl_val > 0.0)] = 1.0
        shap_expl_val = tmp

        maple_exp = maple_explainer.explain(x)
        maple_expl_val = maple_exp['coefs'][:-1]
        maple_expl_val = np.sum(np.reshape(maple_expl_val, img_size), axis=2)
        tmp = np.zeros(maple_expl_val.shape)
        tmp[np.where(maple_expl_val > 0.0)] = 1.0
        maple_expl_val = tmp

        lime_f1, lime_pre, lime_rec = pixel_based_similarity(
            lime_expl_val.ravel(), gt_val, ret_pre_rec=True)
        shap_f1, shap_pre, shap_rec = pixel_based_similarity(
            shap_expl_val.ravel(), gt_val, ret_pre_rec=True)
        maple_f1, maple_pre, maple_rec = pixel_based_similarity(
            maple_expl_val.ravel(), gt_val, ret_pre_rec=True)

        res = {
            'black_box': black_box,
            'n_records': n_records,
            'img_size': '"%s"' % str(img_size),
            'cell_size': '"%s"' % str(cell_size),
            'n_features': '"%s"' % str(n_features),
            'random_state': random_state,
            'idx': idx,
            'lime_f1': lime_f1,
            'lime_pre': lime_pre,
            'lime_rec': lime_rec,
            'shap_f1': shap_f1,
            'shap_pre': shap_pre,
            'shap_rec': shap_rec,
            'maple_f1': maple_f1,
            'maple_pre': maple_pre,
            'maple_rec': maple_rec,
            'p_border': p_border
        }
        results.append(res)
        print('lime %.2f' % lime_f1, 'shap %.2f' % shap_f1,
              'maple %.2f' % maple_f1)

        idx += 1

    df = pd.DataFrame(data=results)
    df = df[[
        'black_box', 'n_records', 'img_size', 'cell_size', 'n_features',
        'random_state', 'idx', 'lime_f1', 'lime_pre', 'lime_rec', 'shap_f1',
        'shap_pre', 'shap_rec', 'maple_f1', 'maple_pre', 'maple_rec',
        'p_border'
    ]]
    # print(df.head())

    if not os.path.isfile(filename):
        df.to_csv(filename, index=False)
    else:
        df.to_csv(filename, mode='a', index=False, header=False)