def viz_cam(model_file, data_dir): ds = get_data('val') pred_config = PredictConfig( model=Model(), session_init=SmartInit(model_file), input_names=['input', 'label'], output_names=['wrong-top1', 'group3new/bnlast/Relu', 'linearnew/W'], return_input=True) meta = dataset.ILSVRCMeta().get_synset_words_1000() pred = SimpleDatasetPredictor(pred_config, ds) cnt = 0 for inp, outp in pred.get_result(): images, labels = inp wrongs, convmaps, W = outp batch = wrongs.shape[0] for i in range(batch): if wrongs[i]: continue weight = W[:, [labels[i]]].T # 512x1 convmap = convmaps[i, :, :, :] # 512xhxw mergedmap = np.matmul(weight, convmap.reshape( (512, -1))).reshape(14, 14) mergedmap = cv2.resize(mergedmap, (224, 224)) heatmap = viz.intensity_to_rgb(mergedmap, normalize=True) blend = images[i] * 0.5 + heatmap * 0.5 concat = np.concatenate((images[i], heatmap, blend), axis=1) classname = meta[labels[i]].split(',')[0] cv2.imwrite('cam{}-{}.jpg'.format(cnt, classname), concat) cnt += 1 if cnt == 500: return
def viz_cam(model_file, data_dir): ds = get_data('val') pred_config = PredictConfig( model=Model(), session_init=get_model_loader(model_file), input_names=['input', 'label'], output_names=['wrong-top1', 'group3new/bnlast/Relu', 'linearnew/W'], return_input=True ) meta = dataset.ILSVRCMeta().get_synset_words_1000() pred = SimpleDatasetPredictor(pred_config, ds) cnt = 0 for inp, outp in pred.get_result(): images, labels = inp wrongs, convmaps, W = outp batch = wrongs.shape[0] for i in range(batch): if wrongs[i]: continue weight = W[:, [labels[i]]].T # 512x1 convmap = convmaps[i, :, :, :] # 512xhxw mergedmap = np.matmul(weight, convmap.reshape((512, -1))).reshape(14, 14) mergedmap = cv2.resize(mergedmap, (224, 224)) heatmap = viz.intensity_to_rgb(mergedmap, normalize=True) blend = images[i] * 0.5 + heatmap * 0.5 concat = np.concatenate((images[i], heatmap, blend), axis=1) classname = meta[labels[i]].split(',')[0] cv2.imwrite('cam{}-{}.jpg'.format(cnt, classname), concat) cnt += 1 if cnt == 500: return
def run(model_path, image_path): predict_func = tp.OfflinePredictor(tp.PredictConfig( model=Model(), session_init=tp.get_model_loader(model_path), input_names=['image'], output_names=['saliency'])) im = cv2.imread(image_path) assert im is not None and im.ndim == 3, image_path # resnet expect RGB inputs of 224x224x3 im = cv2.resize(im, (IMAGE_SIZE, IMAGE_SIZE)) im = im.astype(np.float32)[:, :, ::-1] saliency_images = predict_func([im])[0] abs_saliency = np.abs(saliency_images).max(axis=-1) pos_saliency = np.maximum(0, saliency_images) neg_saliency = np.maximum(0, -saliency_images) pos_saliency -= pos_saliency.min() pos_saliency /= pos_saliency.max() cv2.imwrite('pos.jpg', pos_saliency * 255) neg_saliency -= neg_saliency.min() neg_saliency /= neg_saliency.max() cv2.imwrite('neg.jpg', neg_saliency * 255) abs_saliency = viz.intensity_to_rgb(abs_saliency, normalize=True)[:, :, ::-1] # bgr cv2.imwrite("abs-saliency.jpg", abs_saliency) rsl = im * 0.2 + abs_saliency * 0.8 cv2.imwrite("blended.jpg", rsl)
def run(model_path, image_path): predictor = tp.OfflinePredictor(tp.PredictConfig( model=Model(), session_init=tp.get_model_loader(model_path), input_names=['image'], output_names=['saliency'])) im = cv2.imread(image_path) assert im is not None and im.ndim == 3, image_path # resnet expect RGB inputs of 224x224x3 im = cv2.resize(im, (IMAGE_SIZE, IMAGE_SIZE)) im = im.astype(np.float32)[:, :, ::-1] saliency_images = predictor(im)[0] abs_saliency = np.abs(saliency_images).max(axis=-1) pos_saliency = np.maximum(0, saliency_images) neg_saliency = np.maximum(0, -saliency_images) pos_saliency -= pos_saliency.min() pos_saliency /= pos_saliency.max() cv2.imwrite('pos.jpg', pos_saliency * 255) neg_saliency -= neg_saliency.min() neg_saliency /= neg_saliency.max() cv2.imwrite('neg.jpg', neg_saliency * 255) abs_saliency = viz.intensity_to_rgb(abs_saliency, normalize=True)[:, :, ::-1] # bgr cv2.imwrite("abs-saliency.jpg", abs_saliency) rsl = im * 0.2 + abs_saliency * 0.8 cv2.imwrite("blended.jpg", rsl)
def get_cam(index, averaged_gradients, convmaps, option): batch_size, channel_size, height, width = np.shape(convmaps) averaged_gradient = averaged_gradients[index] convmap = convmaps[index, :, :, :] mergedmap = np.matmul(averaged_gradient, convmap.reshape((channel_size, -1))). \ reshape(height, width) mergedmap = cv2.resize(mergedmap, (option.final_size, option.final_size)) heatmap = viz.intensity_to_rgb(mergedmap, normalize=True) return heatmap
def cam(model, option, gradcam=False, flag=None): model_file = option.load data_dir = option.data if option.imagenet: valnum = 50000 elif option.cub: valnum = 5794 ds = get_data('val', option) pred_config = PredictConfig( model=model, session_init=get_model_loader(model_file), input_names=['input', 'label','bbox'], output_names= ['wrong-top1', 'top5', 'actmap', 'grad'], return_input=True ) if option.imagenet: meta = Imagenet.ImagenetMeta(dir=option.data). \ get_synset_words_1000(option.dataname) meta_labels = Imagenet.ImagenetMeta(dir=option.data). \ get_synset_1000(option.dataname) elif option.cub: meta = CUB200.CUB200Meta(dir=option.data). \ get_synset_words_1000(option.dataname) meta_labels = CUB200.CUB200Meta(dir=option.data). \ get_synset_1000(option.dataname) pred = SimpleDatasetPredictor(pred_config, ds) cnt = 0 cnt_false = 0 hit_known = 0 hit_top1 = 0 index = int(option.locthr*100) if option.camrelu: dirname = os.path.join( 'train_log',option.logdir,'result_camrelu',str(index)) else: dirname = os.path.join( 'train_log',option.logdir,'result_norelu',str(index)) if not os.path.isdir(dirname): mkdir_p(dirname) for inp, outp in pred.get_result(): images, labels, bbox = inp if gradcam: wrongs, top5, convmaps, grads_val = outp batch = wrongs.shape[0] if option.chlast: NUMBER,HEIGHT,WIDTH,CHANNEL = np.shape(convmaps) else: NUMBER,CHANNEL,HEIGHT,WIDTH = np.shape(convmaps) if not option.chlast: grads_val = np.transpose(grads_val, [0,2,3,1]) W = np.mean(grads_val, axis=(1,2)) if option.chlast: convmaps = np.transpose(convmaps, [0,3,1,2]) else: wrongs, top5, convmaps, W = outp batch = wrongs.shape[0] NUMBER,CHANNEL,HEIGHT,WIDTH = np.shape(convmaps) for i in range(batch): gxa = int(bbox[i][0][0]) gya = int(bbox[i][0][1]) gxb = int(bbox[i][1][0]) gyb = int(bbox[i][1][1]) # generating heatmap weight = W[i] # c x 1 convmap = convmaps[i, :, :, :] # c x h x w mergedmap = np.matmul(weight, convmap.reshape((CHANNEL, -1))). \ reshape(HEIGHT, WIDTH) if option.camrelu: mergedmap = np.maximum(mergedmap, 0) mergedmap = cv2.resize(mergedmap, (option.final_size, option.final_size)) heatmap = viz.intensity_to_rgb(mergedmap, normalize=True) blend = images[i] * 0.5 + heatmap * 0.5 # initialization for boundary box bbox_img = images[i] bbox_img = bbox_img.astype('uint8') heatmap = heatmap.astype('uint8') blend = blend.astype('uint8') # thresholding heatmap # For computation efficiency, we revise this part by directly using mergedmap. gray_heatmap = cv2.cvtColor(heatmap,cv2.COLOR_RGB2GRAY) th_value = np.max(gray_heatmap)*option.locthr _, thred_gray_heatmap = \ cv2.threshold(gray_heatmap,int(th_value), 255,cv2.THRESH_TOZERO) _, contours, _ = \ cv2.findContours(thred_gray_heatmap, cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) # calculate bbox coordinates rect = [] for c in contours: x, y, w, h = cv2.boundingRect(c) rect.append([x,y,w,h]) if rect == []: estimated_box = [0,0,1,1] #dummy else: x,y,w,h = large_rect(rect) estimated_box = [x,y,x+w,y+h] cv2.rectangle(bbox_img, (x, y), (x + w, y + h), (0, 255, 0), 2) cv2.rectangle(bbox_img, (gxa, gya), (gxb, gyb), (0, 0, 255), 2) gt_box = [gxa,gya,gxb,gyb] IOU_ = bb_IOU(estimated_box, gt_box) if IOU_ > 0.5 or IOU_ == 0.5: hit_known = hit_known + 1 if (IOU_ > 0.5 or IOU_ == 0.5) and not wrongs[i]: hit_top1 = hit_top1 + 1 if wrongs[i]: cnt_false += 1 concat = np.concatenate((bbox_img, heatmap, blend), axis=1) classname = meta[meta_labels[labels[i]]].split(',')[0] if cnt < 500: if option.camrelu: cv2.imwrite( 'train_log/{}/result_camrelu/{}/cam{}-{}.jpg'. \ format(option.logdir, index, cnt, classname), concat) else: cv2.imwrite( 'train_log/{}/result_norelu/{}/cam{}-{}.jpg'. \ format(option.logdir, index, cnt, classname), concat) cnt += 1 if cnt == valnum: if option.camrelu: fname = 'train_log/{}/result_camrelu/{}/Loc.txt'. \ format(option.logdir, index) else: fname = 'train_log/{}/result_norelu/{}/Loc.txt'. \ format(option.logdir, index) f = open(fname, 'w') acc_known = hit_known/cnt acc_top1 = hit_top1/cnt top1_acc = 1 - cnt_false / (cnt) if option.camrelu: print ("\nGRADCAM (use relu)") else: print ("\nCAM (do not use relu)") print ('Flag: {}\nCAM Threshold: {}\nGT-known Loc: {} \ \nTop-1 Loc: {}\nTop-1 Acc: {}' \ .format(flag,option.locthr,acc_known,acc_top1,top1_acc)) line = 'GT-known Loc: {}\nTop-1 Loc: {}\nTop-1 Acc: {}'. \ format(acc_known,acc_top1,top1_acc) f.write(line) f.close() return
def cam(model, option, gradcam=False): model_file = option.load data_dir = option.data valnum = option.valnum ds = get_data('val', option) if gradcam: pred_config = PredictConfig( model=model, session_init=get_model_loader(model_file), input_names=['input', 'label', 'xa', 'ya', 'xb', 'yb'], output_names=['wrong-top1', 'actmap', 'grad'], return_input=True) else: pred_config = PredictConfig( model=model, session_init=get_model_loader(model_file), input_names=['input', 'label', 'xa', 'ya', 'xb', 'yb'], output_names=['wrong-top1', 'actmap', 'linearnew/W'], return_input=True) meta = dataset.tinyImagenetHaSMeta(dir=option.data).get_synset_words_1000( option.dataname) meta_labels = dataset.tinyImagenetHaSMeta(dir=option.data).get_synset_1000( option.dataname) pred = SimpleDatasetPredictor(pred_config, ds) cnt = 0 cnt_false = 0 hit_known = 0 hit_top1 = 0 for inp, outp in pred.get_result(): images, labels, gxa, gya, gxb, gyb = inp if gradcam: wrongs, convmaps, grads_val = outp convmaps = np.transpose(convmaps, [0, 3, 1, 2]) batch = wrongs.shape[0] NUMBER, CHANNEL, HEIGHT, WIDTH = np.shape(convmaps) #grads_val = np.transpose(grads_val, [0,2,3,1]) W = np.mean(grads_val, axis=(1, 2)) else: wrongs, convmaps, W = outp convmaps = np.transpose(convmaps, [0, 3, 1, 2]) batch = wrongs.shape[0] NUMBER, CHANNEL, HEIGHT, WIDTH = np.shape(convmaps) for i in range(batch): # generating heatmap #if wrongs[i]: # cnt += 1 # continue if gradcam: weight = W[i] # c x 1 else: weight = W[:, [labels[i]]].T convmap = convmaps[i, :, :, :] # c x h x w mergedmap = np.matmul(weight, convmap.reshape( (CHANNEL, -1))).reshape(HEIGHT, WIDTH) #mergedmap = np.maximum(mergedmap, 0) if gradcam: mergedmap = np.maximum(mergedmap, 0) mergedmap = cv2.resize(mergedmap, (option.final_size, option.final_size)) heatmap = viz.intensity_to_rgb(mergedmap, normalize=True) blend = images[i] * 0.5 + heatmap * 0.5 # initialization for boundary box bbox_img = images[i] bbox_img = bbox_img.astype('uint8') heatmap = heatmap.astype('uint8') blend = blend.astype('uint8') # thresholding heatmap gray_heatmap = cv2.cvtColor(heatmap, cv2.COLOR_RGB2GRAY) th_value = np.max(gray_heatmap) * 0.2 #th_value = 0 _, thred_gray_heatmap = cv2.threshold(gray_heatmap, int(th_value), 255, cv2.THRESH_TOZERO) _, contours, _ = cv2.findContours(thred_gray_heatmap, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # calculate bbox coordinates rect = [] for c in contours: x, y, w, h = cv2.boundingRect(c) rect.append([x, y, w, h]) x, y, w, h = large_rect(rect) cv2.rectangle(bbox_img, (x, y), (x + w, y + h), (0, 255, 0), 2) cv2.rectangle(bbox_img, (gxa[i], gya[i]), (gxb[i], gyb[i]), (0, 0, 255), 2) # calculate IOU estimated_box = [x, y, x + w, y + h] gt_box = [gxa[i], gya[i], gxb[i], gyb[i]] IOU_ = bb_IOU(estimated_box, gt_box) if IOU_ > 0.5: hit_known = hit_known + 1 if IOU_ > 0.5 and not wrongs[i]: hit_top1 = hit_top1 + 1 if wrongs[i]: cnt_false += 1 concat = np.concatenate((bbox_img, heatmap, blend), axis=1) classname = meta[meta_labels[labels[i]]].split(',')[0] dirname = 'result/{}'.format(option.logdir) if not os.path.isdir(dirname): os.mkdir(dirname) if cnt < 50: cv2.imwrite( 'result/{}/cam{}-{}.jpg'.format(option.logdir, cnt, classname), concat) cnt += 1 if cnt == valnum: fname = 'result/{}/Loc.txt'.format(option.logdir) f = open(fname, 'w') acc_known = hit_known / cnt acc_top1 = hit_top1 / cnt top1_acc = 1 - cnt_false / (cnt) line = 'GT-known Loc: {}\nTop-1 Loc: {}\nTop-1 Acc: {}'.format( acc_known, acc_top1, top1_acc) f.write(line) f.close() return
def colorize(img, heatmap): """ img: bgr, [0,255] heatmap: [0,1] """ heatmap = viz.intensity_to_rgb(heatmap, cmap='jet')[:, :, ::-1] return img * 0.5 + heatmap * 0.5
def viz_CAM(model, sessinit, name, dataflow, CAM_dir, save_PKL=False, save_REP=False): # set the configuration during the prediction process # and apply the SimpleDatasetPredictor to extract the output_names pred_config = PredictConfig( model=model, session_init=sessinit, # NOTE: the names in input_names & output_names depends on the definitions in the loaded model input_names=['input', 'label'], output_names=[ 'wrong-top1', 'group3/block1/ReLU_output', 'linear_C2/W' ], return_input=True) pred = SimpleDatasetPredictor(pred_config, dataflow) # create or clear CAM_dir for the output of results of CAM visualization CAM_dir = '{}{}'.format(CAM_dir, name) if os.path.isdir(CAM_dir): print('--> clear the existing results in the directory {}'.format( CAM_dir)) os.system('rm -r {}'.format(CAM_dir)) os.system('mkdir -p {}'.format(CAM_dir)) # for the sake of the ease of file government, we save # jpgs, pkls and reps into three different directories print('--> during the viz_CAM, we will generate the jpgs', end='') os.system('mkdir -p {}'.format(CAM_dir + '/jpg')) if save_PKL: print(', pkl', end='') os.system('mkdir -p {}'.format(CAM_dir + '/pkl')) if save_REP: print(', rep', end='') os.system('mkdir -p {}'.format(CAM_dir + '/rep')) print(' files for furthre usage') # get the img_lab_list for proper formation of result recording img_lab_list = dataset.AVA2012Meta().get_image_list(name)[0] # BEGIN cnt = 0 for inp, outp in pred.get_result(): # images, labels = inp wrongs, convmaps, W = outp batch = wrongs.shape[0] # for i in range(batch): convmap = convmaps[i, :, :, :] # 512 x 7 x 7 weight0 = W[:, 0].T # 512 x 1 for negative mergedmap0_7x7 = np.matmul(weight0, convmap.reshape( (512, -1))).reshape(7, 7) mergedmap0 = cv2.resize(mergedmap0_7x7, (224, 224)) heatmap0 = viz.intensity_to_rgb(mergedmap0) blend0 = images[i] * 0.5 + heatmap0 * 0.5 weight1 = W[:, 1].T # 512 x 1 for positive mergedmap1_7x7 = np.matmul(weight1, convmap.reshape( (512, -1))).reshape(7, 7) mergedmap1 = cv2.resize(mergedmap1_7x7, (224, 224)) heatmap1 = viz.intensity_to_rgb(mergedmap1) blend1 = images[i] * 0.5 + heatmap1 * 0.5 concat = np.concatenate( (images[i], heatmap0, blend0, heatmap1, blend1), axis=1) imgName, lab01 = img_lab_list[cnt] assert lab01 == labels[i], \ '*** in viz_CAM: lab01 ({0}) != labels[i] ({1}) in image {2}'.format(lab01, labels[i], imgName) # save image of CAM visualization cv2.imwrite('{0}/jpg/cam_{1}_{2}_{3}.jpg'.format(CAM_dir, os.path.splitext(imgName)[0], \ lab01, int(wrongs[i])), concat) # add @20171123: for CAMCrop if save_PKL: with open( '{0}/pkl/{1}.pkl'.format(CAM_dir, os.path.splitext(imgName)[0]), 'wb') as output_stream: pickle.dump( { "GT01": lab01, "CAM0": mergedmap0_7x7, "CAM1": mergedmap1_7x7 }, output_stream) if save_REP: with open( '{0}/rep/{1}.rep'.format(CAM_dir, os.path.splitext(imgName)[0]), 'wb') as output_stream: pickle.dump({ "convmap": convmap, "W": W, "GT01": lab01 }, output_stream) cnt += 1 # print( '=== Finish CAM_viz on all the images in the validation dataset in AVA2012' )
logits = graph.get_tensor_by_name('logits/BiasAdd:0') neuron_selector = tf.placeholder(tf.int32) y = logits[0][neuron_selector] # Construct tensor for predictions prediction = tf.argmax(logits, 1) # Load an image im = cv2.imread('all_db_fire/isolated_superpixels/test/310_rgb_sp25.png') # Cast im as float32 im = np.float32(im) # Make a prediction prediction_class = sess.run(prediction, feed_dict={images: [im]})[0] print("Prediction class: " + str(prediction_class)) # Construct the saliency object. This doesn't yet compute the saliency mask, it just sets up the necessary ops. gradient_saliency = saliency.GradientSaliency(graph, sess, y, images) # Compute the vanilla mask and the smoothed mask. vanilla_mask_3d = gradient_saliency.GetMask( im, feed_dict={neuron_selector: prediction_class}) # Generate saliency map and save abs_saliency = np.abs(vanilla_mask_3d).max(axis=-1) abs_saliency = viz.intensity_to_rgb( abs_saliency, normalize=True)[:, :, ::-1] # cv2 loads as BGR cv2.imwrite("abs-saliency.jpg", abs_saliency)