def repro_fig_4(gpu = None, interp = 'bicubic'): net = caffe.Net('/home/ruthfong/packages/caffe/models/bvlc_googlenet/deploy_force_backward.prototxt', '/home/ruthfong/packages/caffe/models/bvlc_googlenet/bvlc_googlenet.caffemodel', caffe.TEST) topName = 'loss3/classifier' bottomName = 'pool2/3x3_s2' zebra_i = 340 elephant_i = 386 # African elephant; Indian elephant = 385 transformer = get_ILSVRC_net_transformer(net) img_path = '/home/ruthfong/neural_coding/fnn_images/zeb-ele1.jpg' zebra_map = compute_heatmap(net = net, transformer = transformer, paths = img_path, labels = zebra_i, heatmap_type = 'excitation_backprop', topBlobName = topName, topLayerName = topName, outputBlobName = bottomName, outputLayerName = bottomName, gpu = gpu) elephant_map = compute_heatmap(net = net, transformer = transformer, paths = img_path, labels = elephant_i, heatmap_type = 'excitation_backprop', topBlobName = topName, topLayerName = topName, outputBlobName = bottomName, outputLayerName = bottomName, gpu = gpu) img = caffe.io.load_image(img_path) pylab.rcParams['figure.figsize'] = (12.0, 12.0) f, ax = plt.subplots(1, 3) ax[0].imshow(img) ax[1].imshow(overlay_map(img, zebra_map, overlay = False, interp = interp), interpolation = interp) #ax[1].set_title('zebra') ax[2].imshow(overlay_map(img, elephant_map, overlay = False, interp = interp), interpolation = interp)
def repro_fig_3(gpu = None, interp = 'nearest'): net = caffe.Net('/home/ruthfong/packages/caffe/models/vgg16/VGG_ILSVRC_16_layers_deploy_force_backward.prototxt', '/home/ruthfong/packages/caffe/models/vgg16/VGG_ILSVRC_16_layers.caffemodel', caffe.TEST) transformer = get_ILSVRC_net_transformer(net) topName = 'fc8' bottomNames = ['pool5', 'pool4', 'pool3', 'pool2', 'pool1'] tabby_i = 281 #img_path = '/home/ruthfong/packages/caffe/examples/images/cat.jpg' img_path = '/home/ruthfong/neural_coding/images/tabby_cat_cropped.jpg' img = caffe.io.load_image(img_path) pylab.rcParams['figure.figsize'] = (12.0, 12.0) f, ax = plt.subplots(1, len(bottomNames)+1) ax[0].imshow(img) for i in range(len(bottomNames)): heatmap = compute_heatmap(net = net, transformer = transformer, paths = img_path, labels = tabby_i, heatmap_type = 'excitation_backprop', topBlobName = topName, topLayerName = topName, outputBlobName = bottomNames[i], outputLayerName = bottomNames[i], gpu = gpu) ax[i+1].imshow(overlay_map(img, heatmap, overlay = False, interp = interp), interpolation = interp)
def play_pointing_game(net, transformer, paths, labels, ann_paths, heatmap_type, labels_desc, top_name='loss3/classifier-ft', bottom_name='data', norm_deg=np.inf, batch_size=64, gpu=None): num_imgs = len(paths) assert (num_imgs == len(labels)) assert (num_imgs == len(ann_paths)) num_classes = len(labels_desc) num_hits = np.zeros(num_classes) num_total = np.array([np.sum(labels == i) for i in range(num_classes)]) num_diff_hits = np.zeros(num_classes) num_diff_total = np.zeros(num_classes) num_batches = int(np.ceil(num_imgs / float(batch_size))) print heatmap_type for i in range(num_batches): start = time.time() if (i + 1) * batch_size < num_imgs: idx = range(i * batch_size, (i + 1) * batch_size) else: idx = range(i * batch_size, num_imgs) if heatmap_type != 'center': heatmaps = compute_heatmap(net=net, transformer=transformer, paths=paths[idx], labels=labels[idx], heatmap_type=heatmap_type, topBlobName=top_name, topLayerName=top_name, outputBlobName=bottom_name, outputLayerName=bottom_name, norm_deg=norm_deg, gpu=gpu) for j in range(len(idx)): c = labels[idx[j]] resize = caffe.io.load_image(paths[idx[j]]).shape[:2] if heatmap_type == 'center': max_coords = (resize[1] / float(2), resize[0] / float(2)) #max_coords (resize[1]/2, resize[0],2) else: max_coords = get_maximum_from_heatmap(heatmaps[j], resize=resize) #print max_coords objs = load_objs(ann_paths[idx[j]]) target_objs = objs[labels_desc[labels[idx[j]]]] is_hit = False exists_distractor = len(np.unique(objs.keys())) > 1 bb_area = 0 for k in range(len(target_objs)): bb_coords = target_objs[k] is_hit = is_hit or (bb_coords[0] <= max_coords[0] and bb_coords[1] <= max_coords[1] and bb_coords[2] >= max_coords[0] and bb_coords[3] >= max_coords[1]) #print bb_coords, is_hit bb_area += (bb_coords[2] - bb_coords[0]) * (bb_coords[3] - bb_coords[1]) if is_hit and not exists_distractor: break is_diff = exists_distractor and bb_area < 0.25 * np.prod(resize) if is_hit: num_hits[c] += 1 num_diff_hits[c] += 1 if is_diff else 0 num_diff_total[c] += 1 if is_diff else 0 print '%d/%d: %.4f' % (i, num_batches, time.time() - start) accs = np.true_divide(num_hits, num_total) diff_accs = np.true_divide(num_diff_hits, num_diff_total) return (accs, num_hits, num_total, diff_accs, num_diff_hits, num_diff_total)
def evalPointingGame(cocoAnn, cat, caffeNet, imgDir, transformer, heatmapType, topName='loss3/classifier', bottomName='data', normDeg=np.inf, naiveMax=True, maxImgs=None, maskDir=None, gpu=None): imgIds = cocoAnn.getImgIds(catIds=cat['id']) imgList = cocoAnn.loadImgs(ids=imgIds) hit = 0 miss = 0 hitDiff = 0 missDiff = 0 t0 = time.time() numImgs = len(imgList) if maxImgs is not None: numImgs = np.minimum(numImgs, maxImgs) accuracy = None accuracyDiff = None for i in range(numImgs): I = imgList[i] # run EB on img, get max location on attMap imgName = os.path.join(imgDir, I['file_name']) img = caffe.io.load_image(imgName) catLabel = tag2ID[cat['name']] if heatmapType == 'center': # choose center of image maxSub = (img.shape[0] / float(2), img.shape[1] / float(2)) else: if heatmapType == 'mask': assert (maskDir is not None) mask_path = os.path.join( maskDir, '%s_%d.npy' % (imgName.strip('.jpg').split('/')[-1], catLabel)) if not os.path.exists(mask_path): print '%d: %s does not exist' % (i, mask_path) break attMap = 1 - np.load(mask_path) elif heatmapType == 'contrast_excitation_backprop' and use_orig_imp: if i < 10: print 'here' attMap = doExcitationBackprop(caffeNet, img, cat['name']) else: catLabel = tag2ID[cat['name']] attMap = compute_heatmap(net=caffeNet, transformer=transformer, paths=imgName, labels=catLabel, heatmap_type=heatmapType, topBlobName=topName, topLayerName=topName, outputBlobName=bottomName, outputLayerName=bottomName, norm_deg=normDeg, gpu=gpu) # reshape to original image attMap = transform.resize(attMap, (img.shape[:2]), order=3, mode='nearest') if naiveMax: # naively take argmax maxSub = np.unravel_index(np.argmax(attMap), attMap.shape) else: # take center of max locations maxAtt = np.max(attMap) maxInd = np.where(attMap == maxAtt) maxSub = (np.mean(maxInd[0]), np.mean(maxInd[1])) # determine if it's a difficult image (1) sum of the area of bounding boxes is less than 1/4 of image area, # 2) at least one distractor category allAnnList = cocoAnn.loadAnns(cocoAnn.getAnnIds(imgIds=I['id'])) bbsArea = np.sum([a['area'] for a in allAnnList]) imgArea = np.prod(img.shape[:2]) numCats = len(np.unique([a['category_id'] for a in allAnnList])) isDiff = bbsArea < 0.25 * imgArea and numCats > 1 # load annotations (for target category) annList = cocoAnn.loadAnns( cocoAnn.getAnnIds(imgIds=I['id'], catIds=cat['id'])) # hit/miss? isHit = 0 for ann in annList: # create a radius-15 circle around max location and see if it # intersects with segmentation mask if type(ann['segmentation']) == list: # polygon for seg in ann['segmentation']: polyPts = np.array(seg).reshape((len(seg) / 2, 2)) poly = shapely.geometry.Polygon(polyPts) circ = shapely.geometry.Point(maxSub[::-1]).buffer(15) isHit += poly.intersects(circ) else: # RLE if type(ann['segmentation']['counts']) == list: rle = mask.frPyObjects([ann['segmentation']], I['height'], I['width']) else: rle = [ann['segmentation']] m = mask.decode(rle) m = m[:, :, 0] ind = np.where(m > 0) mp = shapely.geometry.MultiPoint(zip(ind[0], ind[1])) circ = shapely.geometry.Point(maxSub).buffer(15) isHit += circ.intersects(mp) if isHit: break if isHit: hit += 1 hitDiff += 1 if isDiff else 0 else: miss += 1 missDiff += 1 if isDiff else 0 try: accuracy = (hit + 0.0) / (hit + miss) except: accuracy = None try: accuracyDiff = (hitDiff + 0.0) / (hitDiff + missDiff) except: accuracyDiff = None if time.time() - t0 > 10: print cat[ 'name'], '(', i, '/', numImgs, '): Hit =', hit, 'Miss =', miss, ' Acc =', accuracy, ' Diff Hit =', hitDiff, ' Diff Miss =', missDiff, ' Diff Acc =', accuracyDiff t0 = time.time() return (accuracy, accuracyDiff)
def main(argv): parser = argparse.ArgumentParser(description='Save numpy files of heatmaps (use default settings).') # TODO make default settings a boolean flag parser.add_argument('dataset', default='imagenet', type=str, help="choose from ['imagenet', 'voc2007', 'COCO']") parser.add_argument('split', default='val', type=str, help="choose from ['train', 'train_heldout', 'val', 'test']") parser.add_argument('heatmap', default='saliency', type=str, help="choose from ['saliency', 'guided_backprop', 'excitation_backprop', 'contrast_excitation_backprop', 'grad_cam'") parser.add_argument('-r', '--results_dir', default=None, type=str, help="directory to save heatmaps") parser.add_argument('-g', '--gpu', default=None, type=int, help="zero-indexed gpu to use [i.e. 0-3]") parser.add_argument('-b', '--batch_size', default=64, type=int, help="batch size") #parser.add_argument('-t', '--top_name', default='loss3/classifier', type=str, help="name of the top layer") #parser.add_argument('-b', '--bottom_name', default='data', type=str, help="name of the bottom layer") #parser.add_argument('-n', '--norm_deg', default=np.inf, type=int) parser.add_argument('-a', '--start', default=0, type=int, help="start index") parser.add_argument('-z', '--end', default=None, type=int, help="end index") args = parser.parse_args(argv) dataset = args.dataset split = args.split heatmap_type = args.heatmap results_dir = args.results_dir gpu = args.gpu batch_size = args.batch_size #top_name = args.top_name start = args.start end = args.end if gpu is None: caffe.set_mode_cpu() else: caffe.set_device(gpu) caffe.set_mode_gpu() if dataset == 'imagenet': net = get_net('googlenet') top_name = 'loss3/classifier' labels_desc = np.loadtxt('/home/ruthfong/packages/caffe/data/ilsvrc12/synset_words.txt', str, delimiter='\t') #synsets = np.loadtxt('/home/ruthfong/packages/caffe/data/ilsvrc12/synsets.txt', str, delimiter='\t') transformer = get_ILSVRC_net_transformer(net) if split == 'train_heldout': (paths, labels) = read_imdb('/home/ruthfong/packages/caffe/data/ilsvrc12/annotated_train_heldout_imdb.txt') elif split == 'val': (paths, labels) = read_imdb('/home/ruthfong/packages/caffe/data/ilsvrc12/val_imdb.txt') elif split == 'animal_parts': (paths, labels) = read_imdb('/home/ruthfong/packages/caffe/data/ilsvrc12/animal_parts_require_both_min_10_imdb.txt') else: print '%s is not supported' % split paths = np.array(paths) labels = np.array(labels) #ann_dir = '/data/ruthfong/ILSVRC2012/annotated_train_heldout_ground_truth_annotations' #ann_paths = [os.path.join(ann_dir, f) for f in os.listdir(ann_dir)] elif dataset == 'voc2007': net = get_net('googlenet_voc') top_name = 'loss3/classifier-ft' voc_dir = '/data/ruthfong/VOCdevkit/VOC2007/' labels_desc = voc_labels_desc transformer = get_VOC_net_transformer(net) (paths, labels) = read_imdb(os.path.join(voc_dir, 'caffe/%s.txt' % split)) #ann_dir = os.path.join(voc_dir, 'Annotations') #ann_paths = np.array([os.path.join(ann_dir, f.strip('.jpg') + '.xml') for f in paths]) paths = np.array([os.path.join(voc_dir, 'JPEGImages', f) for f in paths]) else: assert(False) if results_dir is not None and not os.path.exists(results_dir): os.makedirs(results_dir) if end is None: end = len(paths) if heatmap == 'excitation_backprop': norm_deg = -1 bottom_name = 'pool2/3x3_s2' elif heatmap == 'contrast_excitation_backprop': norm_deg = -2 bottom_name = 'pool2/3x3_s2' elif heatmap == 'grad_cam': norm_deg = None bottom_name = 'inception_4e/output' else: norm_deg = np.inf bottom_name = 'data' img_idx = range(start, end) num_imgs = len(img_idx) num_batches = int(np.ceil(num_imgs/float(batch_size))) for i in range(num_batches): start_time = time.time() if (i+1)*batch_size < num_imgs: idx = img_idx[range(i*batch_size, (i+1)*batch_size)] else: idx = img_idx[range(i*batch_size, num_imgs)] out_file = os.path.join(results_dir, '%d.npy' % idx[-1]) if os.path.exists(out_file): print '%s already exists; skipping batch from %d to %d' % (out_file, idx[0], idx[-1]) continue heatmaps = compute_heatmap(net, transformer, paths[idx], labels[idx], heatmap_type, top_name, top_name, outputBlobName = bottom_name, outputLayerName = bottom_name, norm_deg = norm_deg, gpu = gpu) for j in range(len(idx)): out_file = os.path.join(results_dir, '%d.npy' % idx[j]) np.save(out_file, heatmaps[j]) print 'gpu %d - batch %d/%d complete [%d-%d] (time: %.4f s)' % (gpu if gpu is not None else -1, i, num_batches, idx[0], idx[-1], time.time() - start_time)