def main(): n_features = (20, 20) img_size = (32, 32, 3) cell_size = (4, 4) colors_p = np.array([0.15, 0.7, 0.15]) p_border = 1.0 sic = generate_synthetic_image_classifier(img_size=img_size, cell_size=cell_size, n_features=n_features, p_border=p_border) pattern = sic['pattern'] predict = sic['predict'] predict_proba = sic['predict_proba'] plt.imshow(pattern) plt.xticks(()) plt.yticks(()) plt.savefig('../fig/pattern.png', format='png', bbox_inches='tight') plt.show() X_test = generate_random_img_dataset(pattern, nbr_images=1000, pattern_ratio=0.4, img_size=img_size, cell_size=cell_size, min_nbr_cells=0.1, max_nbr_cells=0.3, colors_p=colors_p) Y_test = predict(X_test) idx = np.where(Y_test == 1)[0][0] x = X_test[idx] plt.imshow(x) plt.xticks(()) plt.yticks(()) plt.savefig('../fig/image.png', format='png', bbox_inches='tight') plt.show() gt_val = get_pixel_importance_explanation(x, sic) max_val = np.nanpercentile(np.abs(gt_val), 99.9) plt.imshow(np.reshape(gt_val, img_size[:2]), cmap='RdYlBu', vmin=-max_val, vmax=max_val, alpha=0.7) plt.xticks(()) plt.yticks(()) plt.savefig('../fig/saliencymap.png', format='png', bbox_inches='tight') plt.show()
def main(): n_features = (16, 16) img_size = (32, 32, 3) cell_size = (4, 4) colors_p = np.array([0.15, 0.7, 0.15]) p_border = 1.0 sic = generate_synthetic_image_classifier(img_size=img_size, cell_size=cell_size, n_features=n_features, p_border=p_border) pattern = sic['pattern'] predict = sic['predict'] predict_proba = sic['predict_proba'] plt.imshow(pattern) plt.show() X_test = generate_random_img_dataset(pattern, nbr_images=1000, pattern_ratio=0.4, img_size=img_size, cell_size=cell_size, min_nbr_cells=0.1, max_nbr_cells=0.3, colors_p=colors_p) Y_test = predict(X_test) # img = X_test[0] from skimage.segmentation import mark_boundaries # from skimage.color import rgb2gray # from skimage.filters import sobel # from skimage.segmentation import felzenszwalb, slic, quickshift, watershed # # segments_fz = felzenszwalb(img, scale=100, sigma=0.5, min_size=50) # segments_slic = slic(img, n_segments=250, compactness=10, sigma=1) # segments_quick = quickshift(img, kernel_size=3, max_dist=6, ratio=0.5) # gradient = sobel(rgb2gray(img)) # segments_watershed = watershed(gradient, markers=250, compactness=0.001) # # print("Felzenszwalb number of segments: {}".format(len(np.unique(segments_fz)))) # print('SLIC number of segments: {}'.format(len(np.unique(segments_slic)))) # print('Quickshift number of segments: {}'.format(len(np.unique(segments_quick)))) # fig, ax = plt.subplots(2, 2, figsize=(10, 10), sharex=True, sharey=True) # ax[0, 0].imshow(mark_boundaries(img, segments_fz)) # ax[0, 0].set_title("Felzenszwalbs's method") # ax[0, 1].imshow(mark_boundaries(img, segments_slic)) # ax[0, 1].set_title('SLIC') # ax[1, 0].imshow(mark_boundaries(img, segments_quick)) # plt.imshow(mark_boundaries(img, segments_quick)) # ax[1, 0].set_title('Quickshift') # ax[1, 1].imshow(mark_boundaries(img, segments_watershed)) # ax[1, 1].set_title('Compact watershed') # for a in ax.ravel(): # a.set_axis_off() # # plt.tight_layout() # plt.show() explainer = LimeImageExplainer() segmenter = SegmentationAlgorithm('quickshift', kernel_size=1, max_dist=10, ratio=0.5) # segmenter = SegmentationAlgorithm('slic', n_segments=200, compactness=10, sigma=0, min_size_factor=10) # segmenter = SegmentationAlgorithm('felzenszwalb', scale=0.1, sigma=1, min_size=2) for x, y in zip(X_test[:1], Y_test[:1]): print(y) # plt.imshow(x) # plt.show() exp = explainer.explain_instance(x, predict_proba, top_labels=2, hide_color=0, num_samples=10000, segmentation_fn=segmenter) temp, mask = exp.get_image_and_mask(y, positive_only=True, num_features=1000, hide_rest=False, min_weight=0.0) # print(np.unique(temp), 'a') # print(np.unique(mask), 'b') # print(temp) # print(mask) # usare mask come feature importance max_val = np.nanpercentile(np.abs(mask), 99.9) # plt.imshow(x) plt.imshow(mask, cmap='RdYlBu', vmin=-max_val, vmax=max_val, alpha=0.7) plt.show() gt_val = get_pixel_importance_explanation(x, sic) print(gt_val.shape) max_val = np.nanpercentile(np.abs(gt_val), 99.9) # plt.imshow(x) plt.imshow(np.reshape(gt_val, img_size[:2]), cmap='RdYlBu', vmin=-max_val, vmax=max_val, alpha=0.7) plt.show() print(pixel_based_similarity(mask.ravel(), gt_val))
def main(): n_features = (8, 8) img_size = (32, 32, 3) cell_size = (4, 4) colors_p = np.array([0.15, 0.7, 0.15]) p_border = 0.0 sic = generate_synthetic_image_classifier(img_size=img_size, cell_size=cell_size, n_features=n_features, p_border=p_border) pattern = sic['pattern'] predict = sic['predict'] predict_proba = sic['predict_proba'] plt.imshow(pattern) plt.show() X_test = generate_random_img_dataset(pattern, nbr_images=1000, pattern_ratio=0.4, img_size=img_size, cell_size=cell_size, min_nbr_cells=0.1, max_nbr_cells=0.3, colors_p=colors_p) Y_test = predict(X_test) nbr_records = 10 Xm_test = np.array([x.ravel() for x in X_test[:nbr_records]]) explainer = MAPLE(Xm_test, Y_test[:nbr_records], Xm_test, Y_test[:nbr_records], n_estimators=5, max_features=0.5, min_samples_leaf=5) x = X_test[-1] plt.imshow(x) plt.show() exp = explainer.explain(x) expl_val = exp['coefs'][:-1] print(expl_val) expl_val = np.array([1.0 if v > 0.0 else 0.0 for v in expl_val]) # expl_val = (expl_val - np.min(expl_val)) / (np.max(expl_val) - np.min(expl_val)) print(expl_val) print(np.unique(expl_val, return_counts=True)) print(expl_val.shape) sv = np.sum(np.reshape(expl_val, img_size), axis=2) sv01 = np.zeros(sv.shape) sv01[np.where(sv > 0.0)] = 1.0 # np.array([1.0 if v > 0.0 else 0.0 for v in expl_val]) sv = sv01 print(sv) print(sv.shape) max_val = np.nanpercentile(np.abs(sv), 99.9) # plt.imshow(x) plt.imshow(sv, cmap='RdYlBu', vmin=-max_val, vmax=max_val, alpha=0.7) plt.show() # shap.image_plot(expl_val, x) gt_val = get_pixel_importance_explanation(x, sic) print(gt_val.shape) max_val = np.nanpercentile(np.abs(gt_val), 99.9) # plt.imshow(x) plt.imshow(np.reshape(gt_val, img_size[:2]), cmap='RdYlBu', vmin=-max_val, vmax=max_val, alpha=0.7) plt.show() print(np.unique(gt_val, return_counts=True)) print(np.unique(sv.ravel(), return_counts=True)) print(pixel_based_similarity(sv.ravel(), gt_val))
def main(): n_features = (20, 20) img_size = (32, 32, 3) cell_size = (4, 4) colors_p = np.array([0.15, 0.7, 0.15]) p_border = 1.0 # img_draft = np.array([ # ['k', 'k', 'k', 'k', 'k', 'k', 'k', 'k'], # ['k', 'k', 'k', 'k', 'k', 'g', 'r', 'k'], # ['g', 'k', 'k', 'k', 'k', 'k', 'k', 'g'], # ['k', 'g', 'k', 'k', 'k', 'b', 'k', 'k'], # ['k', 'g', 'k', 'k', 'g', 'g', 'k', 'b'], # ['k', 'k', 'k', 'k', 'g', 'k', 'k', 'g'], # ['g', 'k', 'k', 'k', 'k', 'k', 'k', 'k'], # ['k', 'k', 'k', 'k', 'k', 'k', 'k', 'k'], # ['k', 'k', 'k', 'k', 'g', 'k', 'k', 'k'], # # ]) # img = generate_img_defined(img_draft, img_size=img_size, cell_size=cell_size) # plt.imshow(img) # plt.xticks(()) # plt.yticks(()) # # plt.savefig('../fig/pattern.png', format='png', bbox_inches='tight') # plt.show() pattern_draft = np.array([ # ['k', 'k', 'k', 'k', 'k', 'k', 'k', 'k'], ['k', 'k', 'k', 'k', 'k'], ['k', 'k', 'k', 'b', 'k'], ['k', 'k', 'g', 'g', 'k'], ['k', 'k', 'g', 'k', 'k'], ['k', 'k', 'k', 'k', 'k'], ]) pattern = generate_img_defined(pattern_draft, img_size=(20, 20, 3), cell_size=cell_size) sic = generate_synthetic_image_classifier(img_size=img_size, cell_size=cell_size, n_features=n_features, p_border=p_border, pattern=pattern) pattern = sic['pattern'] predict = sic['predict'] predict_proba = sic['predict_proba'] plt.imshow(pattern) plt.xticks(()) plt.yticks(()) # plt.savefig('../fig/pattern.png', format='png', bbox_inches='tight') plt.show() X_test = generate_random_img_dataset(pattern, nbr_images=1000, pattern_ratio=0.4, img_size=img_size, cell_size=cell_size, min_nbr_cells=0.1, max_nbr_cells=0.3, colors_p=colors_p) Y_test = predict(X_test) idx = np.where(Y_test == 1)[0][0] # x = X_test[idx] img_draft = np.array([ # ['k', 'k', 'k', 'k', 'k', 'k', 'k', 'k'], ['k', 'k', 'k', 'k', 'k', 'g', 'r', 'k'], ['g', 'k', 'k', 'k', 'k', 'k', 'k', 'g'], ['k', 'g', 'k', 'k', 'k', 'b', 'k', 'k'], ['k', 'g', 'k', 'k', 'g', 'g', 'k', 'b'], ['k', 'k', 'k', 'k', 'g', 'k', 'k', 'g'], ['g', 'k', 'k', 'k', 'k', 'k', 'k', 'k'], ['k', 'k', 'k', 'k', 'k', 'k', 'k', 'k'], ['k', 'k', 'k', 'k', 'g', 'k', 'k', 'k'], ]) x = generate_img_defined(img_draft, img_size=img_size, cell_size=cell_size) plt.imshow(x) plt.xticks(()) plt.yticks(()) # plt.savefig('../fig/image.png', format='png', bbox_inches='tight') plt.show() gt_val = get_pixel_importance_explanation(x, sic) max_val = np.nanpercentile(np.abs(gt_val), 99.9) plt.imshow(np.reshape(gt_val, img_size[:2]), cmap='RdYlBu', vmin=-max_val, vmax=max_val, alpha=0.7) plt.xticks(()) plt.yticks(()) # plt.savefig('../fig/saliencymap.png', format='png', bbox_inches='tight') plt.show() # plt.imshow(x) # plt.imshow(np.reshape(gt_val, img_size[:2]), cmap='RdYlBu', vmin=-max_val, vmax=max_val, alpha=0.7) # plt.xticks(()) # plt.yticks(()) # plt.savefig('../fig/saliencymap2.png', format='png', bbox_inches='tight') # plt.show() lime_explainer = LimeImageExplainer() segmenter = SegmentationAlgorithm('quickshift', kernel_size=1, max_dist=10, ratio=0.5) tot_num_features = img_size[0] * img_size[1] lime_exp = lime_explainer.explain_instance(x, predict_proba, top_labels=2, hide_color=0, num_samples=10000, segmentation_fn=segmenter) _, lime_expl_val = lime_exp.get_image_and_mask( 1, positive_only=True, num_features=tot_num_features, hide_rest=False, min_weight=0.0) max_val = np.nanpercentile(np.abs(lime_expl_val), 99.9) plt.imshow(lime_expl_val, cmap='RdYlBu', vmin=-max_val, vmax=max_val, alpha=0.7) plt.xticks(()) plt.yticks(()) plt.title('lime', fontsize=20) plt.savefig('../fig/saliencymap_lime.png', format='png', bbox_inches='tight') plt.show() background = np.array([np.zeros(img_size).ravel()] * 10) shap_explainer = KernelExplainer(predict_proba, background) shap_expl_val = shap_explainer.shap_values(x.ravel(), l1_reg='bic')[1] shap_expl_val = np.sum(np.reshape(shap_expl_val, img_size), axis=2) tmp = np.zeros(shap_expl_val.shape) tmp[np.where(shap_expl_val > 0.0)] = 1.0 shap_expl_val = tmp max_val = np.nanpercentile(np.abs(shap_expl_val), 99.9) plt.imshow(shap_expl_val, cmap='RdYlBu', vmin=-max_val, vmax=max_val, alpha=0.7) plt.xticks(()) plt.yticks(()) plt.title('shap', fontsize=20) plt.savefig('../fig/saliencymap_shap.png', format='png', bbox_inches='tight') plt.show() nbr_records = 10 Xm_test = np.array([x.ravel() for x in X_test[:nbr_records]]) maple_explainer = MAPLE(Xm_test, Y_test[:nbr_records], Xm_test, Y_test[:nbr_records], n_estimators=5, max_features=0.5, min_samples_leaf=5) maple_exp = maple_explainer.explain(x) maple_expl_val = maple_exp['coefs'][:-1] maple_expl_val = np.sum(np.reshape(maple_expl_val, img_size), axis=2) tmp = np.zeros(maple_expl_val.shape) tmp[np.where(maple_expl_val > 0.0)] = 1.0 maple_expl_val = tmp max_val = np.nanpercentile(np.abs(shap_expl_val), 99.9) plt.imshow(maple_expl_val, cmap='RdYlBu', vmin=-max_val, vmax=max_val, alpha=0.7) plt.xticks(()) plt.yticks(()) plt.title('maple', fontsize=20) plt.savefig('../fig/saliencymap_maple.png', format='png', bbox_inches='tight') plt.show() lime_f1, lime_pre, lime_rec = pixel_based_similarity(lime_expl_val.ravel(), gt_val, ret_pre_rec=True) shap_f1, shap_pre, shap_rec = pixel_based_similarity(shap_expl_val.ravel(), gt_val, ret_pre_rec=True) maple_f1, maple_pre, maple_rec = pixel_based_similarity( maple_expl_val.ravel(), gt_val, ret_pre_rec=True) print(lime_f1, lime_pre, lime_rec) print(shap_f1, shap_pre, shap_rec) print(maple_f1, maple_pre, maple_rec)
def run(black_box, n_records, img_size, cell_size, n_features, p_border, colors_p, random_state, filename): sic = generate_synthetic_image_classifier(img_size=img_size, cell_size=cell_size, n_features=n_features, p_border=p_border, random_state=random_state) pattern = sic['pattern'] predict = sic['predict'] predict_proba = sic['predict_proba'] X_test = generate_random_img_dataset(pattern, nbr_images=n_records, pattern_ratio=0.5, img_size=img_size, cell_size=cell_size, min_nbr_cells=0.1, max_nbr_cells=0.3, colors_p=colors_p) Y_test_proba = predict_proba(X_test) Y_test = predict(X_test) lime_explainer = LimeImageExplainer() segmenter = SegmentationAlgorithm('quickshift', kernel_size=1, max_dist=10, ratio=0.5) tot_num_features = img_size[0] * img_size[1] background = np.array([np.zeros(img_size).ravel()] * 10) shap_explainer = KernelExplainer(predict_proba, background) nbr_records_explainer = 10 idx_records_train_expl = np.random.choice(range(len(X_test)), size=nbr_records_explainer, replace=False) idx_records_test_expl = np.random.choice(range(len(X_test)), size=nbr_records_explainer, replace=False) Xm_train = np.array([x.ravel() for x in X_test[idx_records_train_expl]]) Xm_test = np.array([x.ravel() for x in X_test[idx_records_test_expl]]) print(datetime.datetime.now(), 'build maple') maple_explainer = MAPLE(Xm_train, Y_test_proba[idx_records_train_expl][:, 1], Xm_test, Y_test_proba[idx_records_test_expl][:, 1], n_estimators=100, max_features=0.5, min_samples_leaf=2) print(datetime.datetime.now(), 'build maple done') idx = 0 results = list() for x, y in zip(X_test, Y_test): print(datetime.datetime.now(), 'seneca - image', 'black_box %s' % black_box, 'n_features %s' % str(n_features), 'rs %s' % random_state, '%s/%s' % (idx, n_records), end=' ') gt_val = get_pixel_importance_explanation(x, sic) lime_exp = lime_explainer.explain_instance(x, predict_proba, top_labels=2, hide_color=0, num_samples=10000, segmentation_fn=segmenter) _, lime_expl_val = lime_exp.get_image_and_mask( y, positive_only=True, num_features=tot_num_features, hide_rest=False, min_weight=0.0) shap_expl_val = shap_explainer.shap_values(x.ravel(), l1_reg='bic')[1] shap_expl_val = np.sum(np.reshape(shap_expl_val, img_size), axis=2) tmp = np.zeros(shap_expl_val.shape) tmp[np.where(shap_expl_val > 0.0)] = 1.0 shap_expl_val = tmp maple_exp = maple_explainer.explain(x) maple_expl_val = maple_exp['coefs'][:-1] maple_expl_val = np.sum(np.reshape(maple_expl_val, img_size), axis=2) tmp = np.zeros(maple_expl_val.shape) tmp[np.where(maple_expl_val > 0.0)] = 1.0 maple_expl_val = tmp lime_f1, lime_pre, lime_rec = pixel_based_similarity( lime_expl_val.ravel(), gt_val, ret_pre_rec=True) shap_f1, shap_pre, shap_rec = pixel_based_similarity( shap_expl_val.ravel(), gt_val, ret_pre_rec=True) maple_f1, maple_pre, maple_rec = pixel_based_similarity( maple_expl_val.ravel(), gt_val, ret_pre_rec=True) res = { 'black_box': black_box, 'n_records': n_records, 'img_size': '"%s"' % str(img_size), 'cell_size': '"%s"' % str(cell_size), 'n_features': '"%s"' % str(n_features), 'random_state': random_state, 'idx': idx, 'lime_f1': lime_f1, 'lime_pre': lime_pre, 'lime_rec': lime_rec, 'shap_f1': shap_f1, 'shap_pre': shap_pre, 'shap_rec': shap_rec, 'maple_f1': maple_f1, 'maple_pre': maple_pre, 'maple_rec': maple_rec, 'p_border': p_border } results.append(res) print('lime %.2f' % lime_f1, 'shap %.2f' % shap_f1, 'maple %.2f' % maple_f1) idx += 1 df = pd.DataFrame(data=results) df = df[[ 'black_box', 'n_records', 'img_size', 'cell_size', 'n_features', 'random_state', 'idx', 'lime_f1', 'lime_pre', 'lime_rec', 'shap_f1', 'shap_pre', 'shap_rec', 'maple_f1', 'maple_pre', 'maple_rec', 'p_border' ]] # print(df.head()) if not os.path.isfile(filename): df.to_csv(filename, index=False) else: df.to_csv(filename, mode='a', index=False, header=False)