def tattoo_detection(net, image_name, args): """Detect object classes in an image using pre-computed object proposals.""" im_in = cv2.imread(image_name) if im_in is None: print('cannot open %s for read' % image_name ) exit(-1) rows,cols = im_in.shape[:2] print([rows,cols]) scale=1.0 if rows >= cols: scale = float(args.longdim) / float(rows) im = cv2.resize( im_in, (int(0.5 + float(cols)*scale), args.longdim) ) else: scale = float(args.longdim) / float(cols) im = cv2.resize( im_in, (args.longdim, int(0.5 + float(rows)*scale)) ) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() seconds = '%.3f' % timer.total_time print('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) max_scores = scores.max(axis=0) print(max_scores) print(boxes.shape) # Visualize detections for each class CONF_THRESH = args.threshold NMS_THRESH = args.nms_thresh tattoo_dets=[] for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] inds = np.where(dets[:, -1] >= CONF_THRESH)[0] dets_filter = dets[inds] vis_detections(im, cls, dets_filter, thresh=CONF_THRESH) if cls == 'tattoo' and len(dets_filter)>0: plt.savefig(os.path.join(args.output, os.path.splitext(os.path.basename(image_name))[0] + '_det.png')) tattoo_dets = dets_filter if args.inspect == 'v': plt.show() plt.clf() return tattoo_dets, max_scores, seconds, scale
def detect_bboxes(net, im_names, subset_classes): """Detect object classes in an image using pre-computed object proposals.""" df = cnn_utils.create_bbox_data_frame(with_object_index=False) for im_name in im_names: print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' print 'Demo for {}'.format(im_name) # Load the input image. im_file = os.path.join(FLAGS.data_dir, 'images', im_name) im = cv2.imread(im_file) im_size_x = im.shape[1] im_size_y = im.shape[0] # Detect all object classes and regress object bounds. timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format( timer.total_time, boxes.shape[0]) # Detect for each class for subset_cls_ind in range(len(class_names_to_be_detected)): cls = class_names_to_be_detected[subset_cls_ind] try: cls_ind = CLASSES.index(cls) except: print('error: class does not exist in training data: ' '{0}'.format(cls)) exit(-1) cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, FLAGS.nms_thresh) dets = dets[keep, :] inds = np.where(dets[:, -1] >= FLAGS.conf_thresh)[0] if len(inds) > 0: print ('{} {}(s) are detected.'.format(len(inds), cls)) for i in inds: # ['image_name', 'class_index', 'x1', 'y1', 'x2', 'y2', 'score'] x1 = dets[i, 0] y1 = dets[i, 1] x2 = dets[i, 2] y2 = dets[i, 3] score = dets[i, -1] if FLAGS.ignore_bbox_on_boundary: # Ignore bounding boxes on the frame boundary. if x1 <= 0 or x2 >= (im_size_x - 1) or \ y1 <= 0 or y2 >= (im_size_y - 1): continue # Append a row. df.loc[len(df)] = [ im_name, subset_cls_ind, x1, y1, x2, y2, score] return df
def get_ohem_minibatch(loss, rois, labels, bbox_targets=None, bbox_inside_weights=None, bbox_outside_weights=None): """Given rois and their loss, construct a minibatch using OHEM.""" loss = np.array(loss) if cfg.TRAIN.OHEM_USE_NMS: # Do NMS using loss for de-dup and diversity keep_inds = [] nms_thresh = cfg.TRAIN.OHEM_NMS_THRESH source_img_ids = [roi[0] for roi in rois] for img_id in np.unique(source_img_ids): for label in np.unique(labels): sel_indx = np.where(np.logical_and(labels == label, \ source_img_ids == img_id))[0] if not len(sel_indx): continue boxes = np.concatenate((rois[sel_indx, 1:], loss[sel_indx][:,np.newaxis]), axis=1).astype(np.float32) keep_inds.extend(sel_indx[nms(boxes, nms_thresh)]) hard_keep_inds = select_hard_examples(loss[keep_inds]) hard_inds = np.array(keep_inds)[hard_keep_inds] else: hard_inds = select_hard_examples(loss) blobs = {'rois_hard': rois[hard_inds, :].copy(), 'labels_hard': labels[hard_inds].copy()} if bbox_targets is not None: assert cfg.TRAIN.BBOX_REG blobs['bbox_targets_hard'] = bbox_targets[hard_inds, :].copy() blobs['bbox_inside_weights_hard'] = bbox_inside_weights[hard_inds, :].copy() blobs['bbox_outside_weights_hard'] = bbox_outside_weights[hard_inds, :].copy() return blobs
def detect(net, im): """ """ # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 res = 0 global CLASSES for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] res += vis_detections(im, cls, dets, thresh=CONF_THRESH) return im,res
def reduce_boxes(self, scores, boxes): """ Reduce the result boxes """ box_classes = [] box_scores = np.array([], dtype="float32") for j in xrange(1, self.num_classes+1): # single-class NMS inds = np.where(scores[:, j] > self.score_thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j*4:(j+1)*4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, self.IoU_thresh, self.force_cpu) # use vstack or list box_classes += [j] * len(keep) cls_dets = cls_dets[keep, :] box_scores = np.append(box_scores, cls_scores[keep]) if j == 1: all_dets = cls_dets else: all_dets = np.vstack((all_dets, cls_dets)) box_classes = np.array(box_classes, dtype=int) # Limit to max_per_image detections *over all classes* if len(box_classes) > self.max_per_image: indexes = np.argsort(-box_scores)[:self.max_per_image] all_dets = all_dets[indexes, :] box_classes = box_classes[indexes] return (box_classes, all_dets)
def demo_detect(net, filename, blob_name='feat', threshold=0.5): """Detect persons in a gallery image and extract their features Arguments: net (caffe.Net): trained network filename (str): path to a gallery image file (jpg or png) blob_name (str): feature blob name. Default 'feat' threshold (float): detection score threshold. Default 0.5 Returns: boxes (ndarray): N x 5 detected boxes in format [x1, y1, x2, y2, score] features (ndarray): N x D features matrix """ im = cv2.imread(filename) boxes, scores, feat_dic = _im_detect(net, im, None, [blob_name]) j = 1 # only consider j = 1 (foreground class) inds = np.where(scores[:, j] > threshold)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j*4:(j+1)*4] boxes = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(boxes, cfg.TEST.NMS) boxes = boxes[keep] features = feat_dic[blob_name][inds][keep] if boxes.shape[0] == 0: return None, None features = features.reshape(features.shape[0], -1) return boxes, features
def detect_image(net, im): """Detect object classes in an image using pre-computed object proposals.""" # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print(str(current_process().index)+' Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class # CONF_THRESH = 0.0 CONF_THRESH = 0.4 NMS_THRESH = 0.1 person_idx = CLASSES.index('person') person_boxes = boxes[:, 4*person_idx:4*(person_idx + 1)] person_scores = scores[:, person_idx] person_dets = np.hstack((person_boxes, person_scores[:, np.newaxis])).astype(np.float32) person_keep = nms(person_dets, NMS_THRESH) person_dets = person_dets[person_keep, :] # inds = np.where(dets[:, -1] >= CONF_THRESH)[0] person_dets = person_dets[np.where(person_dets[:, -1] >= CONF_THRESH)] return person_dets
def demo(net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im_file = os.path.join(FLAGS.data_dir, 'images', image_name) im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) print ('Target class: {}'.format(FLAGS.target_class)) # Visualize detections for each class for cls_ind, cls in enumerate(CLASSES[1:]): # if cls != FLAGS.target_class: continue # cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, FLAGS.nms_thresh) dets = dets[keep, :] write_cropped_images(im, image_name, cls, dets, thresh=FLAGS.conf_thresh)
def demo(sess, net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name) #im_file = os.path.join('/home/corgi/Lab/label/pos_frame/ACCV/training/000001/',image_name) im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(sess, net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class im = im[:, :, (2, 1, 0)] fig, ax = plt.subplots(figsize=(12, 12)) ax.imshow(im, aspect='equal') CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] vis_detections(im, cls, dets, ax, thresh=CONF_THRESH)
def demo (net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name) im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] vis_detections(im, cls, dets, thresh=CONF_THRESH)
def clean_detections(scores, boxes, thresh = CONF_THRESH): """Clean up scores and boxes and return the interesting object in a handy format.""" for detection_index in range(len(scores)): if scores[detection_index][0] > (1-CONF_THRESH): continue for cls_ind, cls in enumerate(CLASSES[1:]): #There is propably a better way to do this. #print cls_ind, cls cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] detections = [] inds = np.where(dets[:, -1] >= thresh)[0] for i in inds: bbox = dets[i, :4] score = dets[i, -1] detections.append(({"person":score}, tuple(bbox))) return detections
def demo(net, im, return_boxes): """Detect object classes in an image using pre-computed object proposals.""" # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 classes = {} for cls_ind, cls in enumerate(CLASSES[1:]): try: cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] bboxes = vis_detections(im, cls, dets, return_boxes, thresh=CONF_THRESH) classes[cls] = bboxes except Exception as e: continue if not return_boxes: cv2.imshow("image", im) return classes
def apply_nms(all_boxes, thresh): """ Apply non-maximum suppression to all predicted boxes output by the test_net method. """ num_classes = len(all_boxes) num_images = len(all_boxes[0]) nms_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] for cls_ind in range(num_classes): for im_ind in range(num_images): dets = all_boxes[cls_ind][im_ind] if dets == []: continue # CPU NMS is much faster than GPU NMS when the number of boxes # is relative small (e.g., < 10k) # TODO(rbg): autotune NMS dispatch keep = nms(dets, thresh, force_cpu=True) if len(keep) == 0: continue nms_boxes[cls_ind][im_ind] = dets[keep, :].copy() return nms_boxes
def demo(net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo', image_name) im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background\ cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) order = cls_scores.argsort()[::-1] sorted_dets = dets[order, :] keep = nms(dets, NMS_THRESH) with open('/home/xyy/Desktop/doing/Object Detection/py-faster-rcnn/test_python.txt','w') as f: dets = dets[keep, :] for i in dets: for j in i: f.write(str(j)+ ' ') f.write('\n') vis_detections(im, cls, dets, thresh=CONF_THRESH)
def demo(net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name) im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.7 NMS_THRESH = 0.3 json_data_list = [] for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] bbox, score = vis_detections(im, cls, dets, thresh=CONF_THRESH) if score: json_data_list.append({"class":cls, 'bbox':bbox, 'score':score}) if len(json_data_list): f = open("result/"+image_name+".json", "w") json.dump(json_data_list, f, indent=2)
def demo (net, imagePathName, scoreThreshold): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im = cv2.imread(imagePathName) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() debug('Object detection took {:.3f}s for {:d} object proposals'.format(timer.total_time, boxes.shape[0])) # Visualize detections for each class path, imageFilename = os.path.split(imagePathName) catDir = os.path.split(path)[-1] imageName = catDir + '/' + imageFilename for i, cls in enumerate(CLASSES[1:]): i += 1 # because we skipped background cls_boxes = boxes[:, 4 * i:4 * (i + 1)] cls_scores = scores[:, i] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESHOLD) dets = dets[keep, :] vis_detections(im, cls, imageName, dets, scoreThreshold)
def produce(self, ip): scores, bbox = im_detect(self.net_, ip) #Find the top class for each box bestClass = np.argmax(scores,axis=1) bestScore = np.max(scores, axis=1) allDet = edict() for cl in [self.prms_.targetClass]: clsIdx = self.cls_.index(cl) #Get all the boxes that belong to the desired class idx = bestClass == clsIdx clScore = bestScore[idx] clBox = bbox[idx,:] #Sort the boxes by the score sortIdx = np.argsort(-clScore) topK = min(len(sortIdx), self.prms_.topK) sortIdx = sortIdx[0:topK] #Get the desired output clScore = clScore[sortIdx] clBox = clBox[sortIdx] clBox = clBox[:, (clsIdx * 4):(clsIdx*4 + 4)] #Stack detections and perform NMS dets=np.hstack((clBox, clScore[:,np.newaxis])).astype(np.float32) keep = nms(dets, self.prms_.nmsThresh) dets = dets[keep, :] #Only keep detections with high confidence inds = np.where(dets[:, -1] >= self.prms_.confThresh)[0] allDet[cl] = copy.deepcopy(dets[inds]) return allDet
def detect_person(net, im,cls_ind=1,cls='person',CONF_THRESH = 0.8): """Detect object classes in an image using pre-computed object proposals.""" # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class NMS_THRESH = 0.3 cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) # Filtering by confidence threshold as well keep = [ind for ind in keep if cls_scores[ind]>CONF_THRESH] if (len(keep)>1): sizes = np.zeros((len(keep),)) for ind,curr_ind in enumerate(keep): bbox = dets[curr_ind,:4] sizes[ind] = (bbox[3]-bbox[1])*(bbox[2]-bbox[0]) # Retain only the biggest bounding box keep = keep[np.argmax(sizes)] dets = dets[keep, :] return (dets.reshape(1,-1),cls_scores[keep])
def _post_process(self, scores, boxes): obj_list = [] for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, self.nms_thresh) dets = dets[keep, :] inds = np.where(dets[:, -1] >= self.conf_thresh)[0] if len(inds) == 0: continue for i in inds: obj = msg.object() obj.class_name = cls obj.score = dets[i, -1] bbox = dets[i, :4] obj.region.x_offset = bbox[0] obj.region.y_offset = bbox[1] obj.region.width = bbox[2] - bbox[0] obj.region.height = bbox[3] - bbox[1] obj.region.do_rectify = False obj_list.append(obj) return obj_list
def detect_objects(imgpath): """Detect object classes in an image using pre-computed object proposals.""" print("in detect object") # Load the demo image im_file = os.path.join(imgpath) im = cv2.imread(im_file) print("read image") # Detect all object classes and regress object bounds timer = Timer() timer.tic() print("im_detect") scores, boxes = im_detect(app.config['net'], im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 results = dict() for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] results[cls] = detect_positions(im, cls, dets, thresh=CONF_THRESH) return results
def detect(self, image): ''' :param image: Image from which the objects should be detected :param CONF_THRESH: list of confidence threshold for each category. if None or empty 0.7 will be the threshold of each category if non-empty but zero for some of the entries then 0.7 will be threshold for zero threshold value categories. :param NMS_THRESH: bounding box threshold, lower means less repetition and higher with high repetition :return: returns list of tuples of bounding box and category name as ((x1,y1,x2,y2),cls_name) ''' start = time.time() bbox_class_list = [] scores, boxes = im_detect(self.model, image) for cls_ind, (cls, threshold) in enumerate(zip(self.cat, self.CONF_THRESHOLD)): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, self.NMS_THRESHOLD) dets = dets[keep, :] inds = np.where(dets[:, -1] >= threshold)[0] for i in inds: # x1,y1,x2,y2 = dets[i,:-1] bbox_class_list.append( {'bbox': dets[i, :-1].tolist(), 'category': cls, 'confidence': float(dets[i, -1])}) end = time.time() return (bbox_class_list, end - start)
def demo(net, image_path): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im = cv2.imread(image_path) # Detect all object classes and regress object bounds started = time() scores, boxes = im_detect(net, im) elapsed = time() - started print ('Detection took {:.3f}s for ' '{:d} object proposals').format(elapsed, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] vis_detections(im, cls, dets, thresh=CONF_THRESH)
def get_detections(net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im_file = image_name#os.path.join(cfg.DATA_DIR, 'demo', image_name) im = cv2.imread(im_file) # Detect all object classes and regress object bounds #timer = Timer() #timer.tic() scores, boxes, pose_a, pose_e = im_detect(net, im) #timer.toc() #print ('Detection took {:.3f}s for ' # '{:d} object proposals').format(timer.total_time, boxes.shape[0]) #print "a=%s, e=%s"%(5*pose_a, 5*pose_e) # Visualize detections for each class #CONF_THRESH =0.25#0.75 #print 'threashold: {}'.format(CONF_THRESH) NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = np.hstack((cls_boxes, 5*pose_a[:,np.newaxis], 5*pose_e[:,np.newaxis], cls_scores[:, np.newaxis])).astype(np.float32) dets = dets[keep, :] #print "a=%s, e=%s"%(5*pose_a[keep], 5*pose_e[keep]) return dets
def apply_nms(all_boxes, thresh): """Apply non-maximum suppression to all predicted boxes output by the test_net method. """ num_classes = len(all_boxes) num_images = len(all_boxes[0]) nms_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(num_classes)] for cls_ind in xrange(num_classes): if cls_ind ==0: thresh = 0.3 else: thresh =0.7 for im_ind in xrange(num_images): dets = all_boxes[cls_ind][im_ind] if dets == []: continue keep = nms(dets, thresh) if len(keep) == 0: continue nms_boxes[cls_ind][im_ind] = dets[keep, :].copy() return nms_boxes
def get_topK_boxes(scores, boxes, K): keep_boxes = [] keep_scores = [] keep_class = [] for cls_ind, cls in enumerate(CLASSES): if cls_ind == 0: continue cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] keep_boxes.extend(cls_boxes[keep, :]) keep_scores.extend(cls_scores[keep]) keep_class.extend([cls_ind] * len(keep)) mat_scores = np.array(keep_scores) mat_classes = np.array(keep_class) mat_boxes = np.array(keep_boxes, dtype='int16') # get top K order = np.argsort(keep_scores)[::-1] keep = order[0:K] return (mat_boxes[keep, :], mat_scores[keep], mat_classes[keep])
def demo(net, im_file): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image as gray scale gim = cv2.imread(im_file, flags= cv2.CV_LOAD_IMAGE_GRAYSCALE) # convert to rgb repeated in each channel im = cv2.cvtColor(gim, cv2.COLOR_GRAY2BGR) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] vis_detections(im, cls, dets, thresh=CONF_THRESH)
def test_single_frame(sess, net, image_name, mask, force_cpu, output_dir): """Detect object classes in an image using pre-computed object proposals.""" #******************** # Need change here #******************** im_file = os.path.join(cfg.DATA_DIR, 'test/images/', image_name) #im_file = os.path.join('/home/corgi/Lab/label/pos_frame/ACCV/training/000001/',image_name) im_bgr = cv2.imread(im_file) im = np.zeros((im_bgr.shape[0], im_bgr.shape[1], 4)) im[:,:,0:3] = im_bgr im[:,:,3] = mask # Detect all object classes and regress object bounds timer = Timer() timer.tic() score, label, box, mask = im_detect(sess, net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, label.shape[0]) # Visualize detections for each class im_rgb = im_bgr[:, :, (2, 1, 0)] im_mask = np.zeros(im_rgb.shape).astype(im_rgb.dtype) fig, ax = plt.subplots(figsize=(12, 12)) # ax.imshow(im_rgb, aspect='equal') CONF_THRESH = 0.85 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background i = np.where(label==cls_ind)[0] cls_score = score[i] cls_box = box[i, :] cls_mask = mask[i, :, :] if cfg.DEBUG: print 'i: ' print i.shape print 'cls_score: ' print cls_score print 'box.shape: ' print box.shape print 'cls_box shape: ' print cls_box.shape dets = np.hstack((cls_box, cls_score[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH, force_cpu) dets = dets[keep, :] segs = cls_mask[keep, :, :] print ('After nms, {:d} object proposals').format(dets.shape[0]) im_mask = vis_detections(im_rgb, im_mask, cls, dets, segs, ax, thresh=CONF_THRESH) # plt.savefig(os.path.join(output_dir, 'box_'+image_name)) #im2 = cv2.imread(os.path.join(output_dir,'box_'+image_name)) im_rgb += im_mask/2 im_mask_grey = cv2.cvtColor(im_mask, cv2.COLOR_RGB2GRAY) im_mask_grey[np.where(im_mask_grey!=0)] = 255 cv2.imwrite(os.path.join(output_dir,'output_'+image_name), im_rgb[:,:,(2,1,0)]) cv2.imwrite(os.path.join(output_dir,'mask_'+image_name), im_mask_grey) return im_mask_grey
def detect(file_path, NMS_THRESH = 0.3): im = cv2.imread(file_path) scores, boxes = im_detect(net, im) cls_scores = scores[: ,1] cls_boxes = boxes[:, 4:8] dets = np.hstack((cls_boxes,cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) return dets[keep, :]
def demo(net, im_file, output_file): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.3 #0.8 NMS_THRESH = 0.3 #0.3 # Write output to file output = open(output_file, 'w') for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] cls_boxes = cls_boxes.transpose() for box_ind, box in enumerate(cls_boxes): output.write(CLASSES[cls_ind] + '\n') output.write(str(cls_scores[box_ind]) + '\n') output.write(' '.join([str(x) for x in cls_boxes[:,box_ind]]) + '\n') output.close() for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] vis_detections(im, cls, dets, thresh=CONF_THRESH)
def detectionMethod(self, im, im_copy, frame_gray, net, p, detectedItem, colorScheme): scores, boxes = im_detect(net, im) NMS_THRESH = 0.3 detectedItemsInThisFrame = [] for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) #stacks them together keep = nms(dets, NMS_THRESH) #Removes overlapping bounding boxes dets = dets[keep, :] #print "if cls == {0}: {1}".format(str(detectedItem), (cls == detectedItem)) if cls == detectedItem: print "under else" inds = np.where(dets[:, -1] >= 0.5)[0] #Threshold applied to score values here im = im[:, :, (2, 1, 0)] for i in inds: print "running for loop" bbox = dets[i, :4] detectedBBox = bbox.astype(int) score = dets[i, -1] bboxCentroid = c.mathArrayCentroid(detectedBBox) cv2.circle(im_copy, bboxCentroid, 5, colorScheme, -1) #bbox centroid of detectedItemArray #Calculate bbox centroid. Use it to determine if the item should be added to detectedItemArray #Check if the centroid of the detected box is within the designated traffic intersection area if (str(detectedItem) == "car") or (str(detectedItem) == "bus"): print "if statement, detected car or bus" if p.contains_point(bboxCentroid) == 1: print "within area" #Calculate corners of interest within the bounding box area and add them all to the corner array detectedPixels = frame_gray[bbox[1]:bbox[3], bbox[0]:bbox[2]] #[y1:y2, x1:x2] detectedPixelsColor = im_copy[bbox[1]:bbox[3], bbox[0]:bbox[2]] #for show on colored image corners = cv2.goodFeaturesToTrack(detectedPixels, mask=detectedPixels, **self.feature_params).reshape(-1, 2) # for x, y in np.float32(corners).reshape(-1, 2): #black # cv2.circle(detectedPixels, (x,y), 5, (0, 0, 0), -1) # cv2.circle(detectedPixelsColor, (x, y), 5, (0, 0, 0), -1) detectedItemsInThisFrame.append([[detectedBBox, corners]]) else: print "car/bus not added. Coordinates: ", bbox else: print "else not car or bus detected" detectedPixels = frame_gray[bbox[1]:bbox[3], bbox[0]:bbox[2]] #[y1:y2, x1:x2] detectedPixelsColor = im_copy[bbox[1]:bbox[3], bbox[0]:bbox[2]] #for show on colored image corners = cv2.goodFeaturesToTrack(detectedPixels, mask=detectedPixels, **self.feature_params).reshape(-1, 2) print "detectedItemsInThisFrame len: {0}-------------------------------------".format(len(detectedItemsInThisFrame)) print "detectedItemsInThisFrame: ", detectedItemsInThisFrame return detectedItemsInThisFrame
def demo_tuples(net, image_name): """Detect objects, attributes and relations in an image using pre-computed object proposals.""" # Load the demo image im_file = os.path.join(cfg.DATA_DIR, image_name) im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes, attr_scores, rel_scores = im_detect(net, im) if attr_scores is not None: print 'Found attribute scores' if rel_scores is not None: print 'Found relation scores' rel_scores = rel_scores[:, 1:] # drop no relation rel_argmax = np.argmax(rel_scores, axis=1).reshape( (boxes.shape[0], boxes.shape[0])) rel_score = np.max(rel_scores, axis=1).reshape( (boxes.shape[0], boxes.shape[0])) timer.toc() print('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.1 NMS_THRESH = 0.05 ATTR_THRESH = 0.1 im = im[:, :, (2, 1, 0)] fig, ax = plt.subplots(figsize=(12, 12)) ax.imshow(im) # Detections det_indices = [] det_scores = [] det_objects = [] det_bboxes = [] det_attrs = [] for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = np.array(nms(dets, NMS_THRESH)) dets = dets[keep, :] inds = np.where(dets[:, -1] >= CONF_THRESH)[0] if len(inds) > 0: keep = keep[inds] for k in keep: det_indices.append(k) det_bboxes.append(cls_boxes[k]) det_scores.append(cls_scores[k]) det_objects.append(cls) if attr_scores is not None: attr_inds = np.where(attr_scores[k][1:] >= ATTR_THRESH)[0] det_attrs.append([ATTRS[ix] for ix in attr_inds]) else: det_attrs.append([]) rel_score = rel_score[det_indices].T[det_indices].T rel_argmax = rel_argmax[det_indices].T[det_indices].T for i, (idx, score, obj, bbox, attr) in enumerate( zip(det_indices, det_scores, det_objects, det_bboxes, det_attrs)): ax.add_patch( plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor='red', linewidth=3.5)) box_text = '{:s} {:.3f}'.format(obj, score) if len(attr) > 0: box_text += "(" + ",".join(attr) + ")" ax.text(bbox[0], bbox[1] - 2, box_text, bbox=dict(facecolor='blue', alpha=0.5), fontsize=14, color='white') # Outgoing score = np.max(rel_score[i]) ix = np.argmax(rel_score[i]) subject = det_objects[ix] relation = RELATIONS[rel_argmax[i][ix]] print 'Relation: %.2f %s -> %s -> %s' % (score, obj, relation, subject) # Incoming score = np.max(rel_score.T[i]) ix = np.argmax(rel_score.T[i]) subject = det_objects[ix] relation = RELATIONS[rel_argmax[ix][i]] print 'Relation: %.2f %s -> %s -> %s' % (score, subject, relation, obj) ax.set_title(('detections with ' 'p(object|box) >= {:.1f}').format(CONF_THRESH), fontsize=14) plt.axis('off') plt.tight_layout() plt.draw() plt.savefig('data/demo/' + im_file.split('/')[-1].replace(".jpg", "_demo.jpg"))
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride=[ 16, ], anchor_scales=[8, 16, 32], anchor_ratios=[0.5, 1, 2]): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) #layer_params = yaml.load(self.param_str_) _anchors = generate_anchors(ratios=anchor_ratios, scales=np.array(anchor_scales)) _num_anchors = _anchors.shape[0] rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape, [0, 3, 1, 2]) rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2]) #rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1]) #rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1]) im_info = im_info[0] assert rpn_cls_prob_reshape.shape[0] == 1, \ 'Only single item batches are supported' # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' #cfg_key = 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :] bbox_deltas = rpn_bbox_pred #im_info = bottom[2].data[0, :] if DEBUG: print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print('score map size: {}'.format(scores.shape)) # Enumerate all shifts shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] anchors = _anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # remove_option = 1 # if ('TEST' == cfg_key and remove_option in [1, 2]): # # get rid of boxes that are completely inside other boxes # # with options as to which one to get rid of # # 1. always the one with lower scores, 2. always the one inside # new_proposals = [] # removed_indices = set() # num_props = proposals.shape[0] # for i in range(num_props): # if (i in removed_indices): # continue # bxA = proposals[i, :] # for j in range(num_props): # if ((j == i) or (j in removed_indices)): # continue # bxB = proposals[j, :] # if (bbox_contains(bxA, bxB)): # if ((1 == remove_option) and (scores[i] != scores[j])): # if (scores[i] > scores[j]): # removed_indices.add(j) # else: # removed_indices.add(i) # else: # remove_option == 2 or scores[i] == scores[j] # removed_indices.add(j) # nr = len(removed_indices) # if (nr > 0): # new_proposals = sorted(set(range(num_props)) - removed_indices) # proposals = proposals[new_proposals, :] # scores = scores[new_proposals] # # padding to make the total number of proposals == post_nms_topN # proposals = np.vstack((proposals, [proposals[-1, :]] * nr)) # scores = np.vstack((scores, [scores[-1]] * nr)) # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 # batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) # BUT we NOW (18-Sep-2017) abuse batch inds, and use it for carrying scores if ('TEST' == cfg_key): batch_inds = np.reshape(scores, [proposals.shape[0], 1]) else: batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) if (DEBUG): print('blob shape: {0}'.format(blob.shape)) print('proposal shape: {0}'.format(proposals.shape)) return blob
def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) # timers _t = {'im_detect': Timer(), 'misc': Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb for i in xrange(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] if cfg.TEST.BOXSCORE: boxscores = roidb[i]['boxscores'][roidb[i]['gt_classes'] == 0] im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() if cfg.TEST.BOXSCORE: scores, boxes = im_detect(net, im, box_proposals, boxscores) else: scores, boxes = im_detect(net, im, box_proposals) _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class for j in xrange(1, imdb.num_classes): if scores.shape[1] == 20: newj = j - 1 else: newj = j inds = np.where(scores[:, newj] > thresh)[0] cls_scores = scores[inds, newj] cls_boxes = boxes[inds, newj * 4:(newj + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] if vis: vis_detections(im, imdb.classes[j], cls_dets) all_boxes[j][i] = cls_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False): """Test a Fast R-CNN network on an image database.""" if vis: from datasets.kitti import kitti kitti = kitti("valsplit") num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score, [cfg.VIEWP_BINS x viewpoint prob. dist]) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) cache_file = os.path.join(output_dir, 'detections.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: all_boxes = cPickle.load(fid) #print '{} gt roidb loaded from {}'.format(self.name, cache_file) print 'Detections cache loaded' warnings.warn("PLEASE MAKE SURE THAT YOU REALLY WANT TO USE THE CACHE!", UserWarning) #return roidb else: # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb ndetections = 0 if cfg.SMOOTH_L1_ANGLE: viewp_bins = 1 elif cfg.CONTINUOUS_ANGLE: viewp_bins = 1 else: viewp_bins = cfg.VIEWP_BINS if cfg.SMOOTH_L1_ANGLE: allclasses_viewp_bins = imdb.num_classes elif cfg.CONTINUOUS_ANGLE: allclasses_viewp_bins = 1 else: allclasses_viewp_bins = imdb.num_classes*cfg.VIEWP_BINS for i, img_file in enumerate(imdb.image_index): if vis: detts = np.empty([0, 6]) # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. if cfg.TEST.GTPROPOSALS: box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] > -1] else: box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] if box_proposals is not None and box_proposals.shape[0] <= 0: # if there are no proposals.... scores = np.empty((0, imdb.num_classes), dtype=np.float32) boxes = np.empty((0, imdb.num_classes*4), dtype=np.float32) if cfg.VIEWPOINTS: assert cfg.CONTINUOUS_ANGLE==False and cfg.SMOOTH_L1_ANGLE==False, 'not implemented' viewpoints = np.empty((0, allclasses_viewp_bins), dtype=np.float32) else: if cfg.TEST.FOURCHANNELS: im = cv2.imread(imdb.image_path_at(i), cv2.IMREAD_UNCHANGED) else: im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() if cfg.VIEWPOINTS: scores, boxes, viewpoints = im_detect(net, im, box_proposals) else: scores, boxes = im_detect(net, im, box_proposals) _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class for j in xrange(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] ndetections += len(inds) cls_scores = scores[inds, j] cls_boxes = boxes[inds, j*4:(j+1)*4] if cfg.VIEWPOINTS: if cfg.SMOOTH_L1_ANGLE: viewp = viewpoints[inds, j] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis], viewp[:, np.newaxis])) \ .astype(np.float32, copy=False) elif cfg.CONTINUOUS_ANGLE: viewp = viewpoints[inds] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis], viewp)) \ .astype(np.float32, copy=False) else: # Softmax is only performed over the class N_BINSx "slot" # (that is why we apply it outside Caffe) cls_viewp = softmax(viewpoints[inds, j*cfg.VIEWP_BINS:(j+1)*cfg.VIEWP_BINS]) # Assert that the result from softmax makes sense assert(all(abs(np.sum(cls_viewp, axis=1)-1)<0.1)) cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis], cls_viewp)) \ .astype(np.float32, copy=False) else: cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) if cfg.TEST.DO_NMS: if cfg.USE_CUSTOM_NMS: if cfg.VIEWPOINTS: nms_returns = nms(cls_dets[:,:-viewp_bins], cfg.TEST.NMS, force_cpu=True) else: nms_returns = nms(cls_dets, cfg.TEST.NMS, force_cpu=True) if nms_returns: keep = nms_returns[0] suppress = nms_returns[1] else: keep = [] elif cfg.TEST.SOFT_NMS>0: if cfg.VIEWPOINTS: keep = soft_nms(cls_dets[:, :-viewp_bins], method=cfg.TEST.SOFT_NMS) else: keep = soft_nms(cls_dets, method=cfg.TEST.SOFT_NMS) else: if cfg.VIEWPOINTS: keep = nms(cls_dets[:,:-viewp_bins], cfg.TEST.NMS) else: keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] else: if cfg.VIEWPOINTS: cls_dets = cls_dets[cls_dets[:,-viewp_bins-1].argsort()[::-1],:] else: cls_dets = cls_dets[cls_dets[:,-1].argsort()[::-1],:] if vis: pre_detts = np.hstack((np.array(cls_dets[:,:5]), j*np.ones((np.array(cls_dets[:,:5]).shape[0],1)))) detts = np.vstack((detts, pre_detts)) all_boxes[j][i] = cls_dets if vis: gt_roidb = kitti._load_kitti_annotation(img_file) vis_detections(im, imdb.classes, detts, gt_roidb) # Limit to max_per_image detections *over all classes* if max_per_image > 0: if cfg.VIEWPOINTS: image_scores = np.hstack([all_boxes[j][i][:, -viewp_bins-1] for j in xrange(1, imdb.num_classes)]) else: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: # We usually don't want to do this print "WARNING! Limiting the number of detections" image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): if cfg.VIEWPOINTS: keep = np.where(all_boxes[j][i][:, -viewp_bins-1] >= image_thresh)[0] else: keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print 'im_detect: {:d}/{:d} - {:d} detections - {:.3f}s {:.3f}s' \ .format(i + 1, num_images, ndetections,_t['im_detect'].average_time, _t['misc'].average_time) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) if cfg.TEST.SEG: n_seg_classes = cfg.SEG_CLASSES confcounts = np.zeros((n_seg_classes, n_seg_classes)) # timers _t = {'im_detect': Timer(), 'misc': Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb for i in xrange(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() if cfg.TEST.SEG: seg_gt = cv2.imread( get_seg_path(imdb._data_path, imdb.image_path_at(i)), -1) if seg_gt is None: print 'Could not read ', get_seg_path(imdb._data_path, imdb.image_path_at(i)) scores, boxes, seg_scores = im_detect(net, im, box_proposals) else: scores, boxes = im_detect(net, im, box_proposals) _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class for j in xrange(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] if vis: vis_detections(im, imdb.classes[j], cls_dets) all_boxes[j][i] = cls_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() if cfg.TEST.SEG: # evaluate the segmentation seg_labels = np.argmax(seg_scores, axis=2).astype(int) seg_labels = cv2.resize(seg_labels, (seg_gt.shape[1], seg_gt.shape[0]), interpolation=cv2.INTER_NEAREST) sumim = seg_gt + seg_labels * n_seg_classes hs = np.bincount(sumim.flatten(), minlength=n_seg_classes * n_seg_classes) confcounts += hs.reshape((n_seg_classes, n_seg_classes)) print 'Segmentation evaluation' conf = 100.0 * np.divide(confcounts, 1e-20 + confcounts.sum(axis=1)) np.save(output_dir + '/seg_confusion.npy', conf) acc = np.zeros(n_seg_classes) for j in xrange(n_seg_classes): gtj = sum(confcounts[j, :]) resj = sum(confcounts[:, j]) gtresj = confcounts[j, j] acc[j] = 100.0 * gtresj / (gtj + resj - gtresj) print 'Accuracies', acc print 'Mean accuracy', np.mean(acc) print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride=[ 16, ], anchor_scales=[8, 16, 32]): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) #layer_params = yaml.load(self.param_str_) _anchors = generate_anchors(scales=np.array(anchor_scales)) _num_anchors = _anchors.shape[0] rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape, [0, 3, 1, 2]) rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2]) #rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1]) #rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1]) im_info = im_info[0] assert rpn_cls_prob_reshape.shape[0] == 1, \ 'Only single item batches are supported' # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' #cfg_key = 'TEST' if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :] bbox_deltas = rpn_bbox_pred #im_info = bottom[2].data[0, :] if DEBUG: print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print('score map size: {}'.format(scores.shape)) # Enumerate all shifts shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] anchors = _anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob
cls_scores = scores[inds, 1] cls_boxes = boxes[inds, 4:8] ## curve cls_infos_h = infos_h[inds, :14] cls_infos_w = infos_w[inds, :14] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) # stack h and w pred. cls_infos = np.zeros((cls_infos_h.shape[0], 28)) wh_stack_temp = np.dstack((cls_infos_w, cls_infos_h)) assert (wh_stack_temp.shape[0] == cls_infos.shape[0] ), 'wh stack length mismatch.' for ixstack, row_cls_infos in enumerate(cls_infos): cls_infos[ixstack] = wh_stack_temp[ixstack].ravel() cls_dets_withInfo = np.hstack((cls_boxes, cls_scores[:, np.newaxis], cls_infos)) \ .astype(np.float32, copy=False) cls_dets, cls_dets_withInfo = nps(cls_dets, cls_dets_withInfo) if cfg.TEST.USE_PNMS: keep = pnms(cls_dets_withInfo, cfg.TEST.PNMS) else: keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] cls_dets_withInfo = cls_dets_withInfo[keep, :] vis(im, cls_dets_withInfo, 0.1)
def detect(self, im, im3d, detection_graph, trackingSess, adrfpFrame): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image #im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name) #im_file = os.path.join('/home/corgi/Lab/label/pos_frame/ACCV/training/000001/',image_name) #im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(self.sess, self.net, im) hboxes, hscores = detector_utils.detect_objects( im, detection_graph, trackingSess) timer.toc() print('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) num_hands_detect = 1 score_thresh = 0.5 points = detector_utils.draw_box_on_image(num_hands_detect, score_thresh, hscores, hboxes, 320, 240, im, adrfpFrame) # Visualize detections for each class im = im[:, :, (2, 1, 0)] fig, ax = plt.subplots(figsize=(12, 12)) ax.imshow(im, aspect='equal') CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): if cls == "hand": continue cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] inds = np.where(dets[:, -1] >= 0.7)[0] if len(inds) == 0: #print "preskacem "+str(dets[:, -1]) continue im = self.vis_detections(im, cls, dets, inds, im3d, adrfpFrame, points) img = im[:, :, :].copy() img[:, :, 2] = im[:, :, 0].copy() img[:, :, 0] = im[:, :, 2].copy() name = None if self.iterPic < 10: name = "0" + str(self.iterPic) else: name = str(self.iterPic) cv2.imwrite(name + '.png', img) self.iterPic += 1 return im
def test_net(net, imdb, max_per_image=400, thresh=-np.inf, vis=False): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} # imdb=get_testing_roidb(imdb) if not cfg.TEST.HAS_RPN: roidb = imdb.roidb # width = imdb._get_widths() for i in xrange(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] im = cv2.imread(imdb.image_path_at(i)) width = im.shape[1] _t['im_detect'].tic() scores, boxes = im_detect(net, im, box_proposals) if cfg.TEST.USE_FLIPPED: # image flipped flipped_im= cv2.flip(im,1) flip_scores,flip_boxes=im_detect(net, flipped_im, box_proposals) for k in xrange(flip_boxes.shape[1]/4): # if True: # vis_detections(flipped_im, imdb.classes[k], flip_boxes[:,k*4:k*4+4]) # # raw_input() oldx1 = flip_boxes[:, k*4].copy() oldx2 = flip_boxes[:, k*4+2].copy() assert (flip_boxes[:, k*4] >= 0).all() assert (flip_boxes[:, k*4+2] >= flip_boxes[:, k*4]).all() assert (width>= flip_boxes[:, k*4+2]).all() flip_boxes[:, k*4] = width - oldx2 flip_boxes[:,k*4+2] = width - oldx1 assert(flip_boxes[:, k*4+2]>=0).all() assert(flip_boxes[:,k*4]>=0).all() assert (width>= flip_boxes[:, k*4+2]).all() assert (flip_boxes[:, k*4+2] >= flip_boxes[:, k*4]).all() boxes = np.concatenate( (boxes, flip_boxes.copy()), axis = 0 ) scores = np.concatenate( (scores, flip_scores.copy()), axis = 0 ) _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class for j in xrange(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] if cfg.TEST.AGNOSTIC: cls_boxes = boxes[inds, 4:8] else: cls_boxes = boxes[inds, j*4:(j+1)*4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] if vis: vis_detections(im, imdb.classes[j], cls_dets) all_boxes[j][i] = cls_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
def demo(net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image img_name = os.path.basename(image_name) # im_file = image_name # im = cv2.imread(im_file) im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name) im = cv2.imread(im_file) pimg = process_image(im) # cv2.imshow("Processed", pimg) # cv2.waitKey(0) im = pimg height, width = im.shape[:2] mid = width / 2.5 # print('height = {} and width/2.5 = {}'.format(height, mid)) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() # print ('Detection took {:.3f}s for ' # '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] # vis_detections(im, cls, dets, thresh=CONF_THRESH) font = cv2.FONT_HERSHEY_SIMPLEX # print 'class index is {}'.format(cls_ind) color = (0, 0, 255) inds = np.where(dets[:, -1] >= CONF_THRESH)[0] if len(inds) > 0: for i in inds: bbox = dets[i, :4] score = dets[i, -1] cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2) if bbox[0] < mid: cv2.putText(im, 'left {:s}'.format(cls), (bbox[0], (int)( (bbox[1] - 2))), font, 0.5, (255, 0, 0), 1) else: cv2.putText(im, 'right {:s}'.format(cls, score), (bbox[0], (int)( (bbox[1] - 2))), font, 0.5, (255, 0, 0), 1) # cv2.putText(im,'{:s} {:.3f}'.format(cls, score),(bbox[0], (int)((bbox[1]- 2))), font, 0.5, (255,255,255), 1) # Write the resulting frame # print 'Final image name is {}'.format(img_name) splitName = os.path.splitext(img_name)[0] # print (os.path.splitext(img_name)[0]) # print splitName # cv2.imwrite('{:s}_output.jpg'.format(splitName), im) ## Display output frame # cv2.imshow("output", im) # cv2.waitKey(0) ## Write output frame opDir = '/home/student/cmpe295-masters-project/faster-rcnn-resnet/data/output/' cv2.imwrite(os.path.join(opDir, img_name), im)
def test_net(net, imdb,cfg,max_per_image=100, thresh=0.05, vis=False,task = 'det',nmhead=''): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # heuristic: keep an average of 40 detections per class per images prior # to NMS max_per_set = 40 * num_images # heuristic: keep at most 100 detection per class per image prior to NMS max_per_image = 100 # detection threshold for each class (this is adaptively set based on the # max_per_set constraint) thresh = -np.inf * np.ones(imdb.num_classes) # top_scores will hold one minheap of scores per class (used to enforce # the max_per_set constraint) top_scores = [[] for _ in xrange(imdb.num_classes)] # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) #print output_dir if not os.path.exists(output_dir): os.makedirs(output_dir) #if not os.path.exists(output_dir): # os.makedirs(output_dir) do_det = task=='det'; det_file = os.path.join(output_dir, 'detections.pkl') if (do_det and os.path.isfile(det_file)): all_boxes = cPickle.load(open(det_file,'r')); else: # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} if not cfg.TRAIN['HAS_RPN']: roidb = imdb.roidb #only_seg = cfg.TRAIN['USE_SEG'] & (not cfg.TRAIN['USE_DET']) do_seg = task=='seg' do_edg = task=='edg' do_nrm = task=='nrm' do_sbd = task=='sbd' do_prt = task=='prt' do_sal = task=='sal' print "num_images = ",num_images for i in xrange(num_images): next_file = os.path.join(output_dir,imdb.image_index[i] + '.mat') next_file_png = os.path.join(output_dir,imdb.image_index[i] + '.png') if os.path.exists(next_file) or os.path.exists(next_file_png): continue im = cv2.imread(imdb.image_path_at(i)) #shape = im.shape #print im.shape _t['im_detect'].tic() scores, boxes = im_detect(net, im,cfg, None,do_det) _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class if do_det: for j in xrange(1, imdb.num_classes): inds = np.where(scores[:, j] > 0.05)[0] cls_scores = scores[inds, j] if cfg.TEST.AGNOSTIC: cls_boxes = boxes[inds, 4:8] else: cls_boxes = boxes[inds, j*4:(j+1)*4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] if vis: vis_detections(im, imdb.classes[j], cls_dets) all_boxes[j][i] = cls_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) #print "here!!" if do_edg: if nmhead[0:4]!='sigm': nmhead = 'sigmoid-'+nmhead #from IPython import embed; embed() if (do_seg or do_prt): data = net.blobs[nmhead].data.copy(); data= data.squeeze(axis=0) posteriors = data.argmax(axis=0) posteriors = posteriors.astype('uint8').copy(); from PIL import Image im = Image.fromarray(posteriors) im.save(next_file_png) if (do_sal or do_edg): #print "nmhead: ",nmhead data = net.blobs[nmhead].data.copy(); if nmhead=='mtn_result1': slice = 0; else: slice = 1; data= data.squeeze(axis=0) posteriors = 255.0*visualize_gray(data,slice); posteriors = posteriors.astype('uint8').copy(); from PIL import Image im = Image.fromarray(posteriors) im.save(next_file_png) if (do_nrm): data =net.blobs[nmhead].data.copy(); data = -128.*data.copy().squeeze(axis=0) + 128. data = np.transpose(data,(1,2,0)) matplotlib.image.imsave(next_file_png,data.astype('uint8').copy()) if do_sbd: d = {'res':net.blobs[nmhead].data.copy()}; scipy.io.savemat(next_file,d, oned_as='column') # end of for-loop over images if do_det: with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) if do_det: print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
def test_net_mask_reload(net_proto, net_mask_proto, weights, imdb, max_per_image=400, thresh=-np.inf, vis=False, save_path="./output/"): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) caffe.set_mode_gpu() caffe.set_device(cfg.GPU_ID) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] net = caffe.Net(net_proto, weights, caffe.TEST) net.name = os.path.splitext(os.path.basename(weights))[0] output_dir = get_output_dir(imdb, net) if not os.path.exists(save_path): os.makedirs(save_path) # timers _t = {'im_detect': Timer(), 'im_seg': Timer(), 'misc': Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb del net for i in xrange(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] net = caffe.Net(net_proto, weights, caffe.TEST) net.name = os.path.splitext(os.path.basename(weights))[0] im = cv2.imread(imdb.image_path_at(i)) print(im.shape) _t['im_detect'].tic() scores, boxes, feat = im_det(net, im, box_proposals) _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class for j in xrange(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] if cfg.TEST.AGNOSTIC: cls_boxes = boxes[inds, 4:8] else: cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] if vis: vis_detections(im, imdb.classes[j], cls_dets) all_boxes[j][i] = cls_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] print 'im_detection: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) del net net_mask = caffe.Net(net_mask_proto, weights, caffe.TEST) net_mask.name = os.path.splitext(os.path.basename(weights))[0] _t['im_seg'].tic() out_mask = np.zeros((im.shape[0], im.shape[1])) for j in xrange(1, imdb.num_classes): ins_index = 1 boxes_this_im = all_boxes[j][i][:, :-1] seg = im_seg(net_mask, im, feat, boxes_this_im) # print(seg.shape) for ii in xrange(seg.shape[0]): seg_now = seg[ii][0] # seg_now = np.transpose(seg_now) box_now = boxes_this_im[ii] box_now = box_now.astype(int) if box_now[2] == box_now[0] or box_now[3] == box_now[1]: continue # im_now=im[box_now[1]: box_now[3], box_now[0]:box_now[2]] # cv2.imshow("test", im_now) # cv2.waitKey() seg_org_size = cv2.resize( seg_now, (box_now[2] - box_now[0], box_now[3] - box_now[1]), interpolation=cv2.INTER_NEAREST) # print((seg_org_size*99999).shape) # cv2.imshow("seg", seg_org_size*99999) # cv2.waitKey() seg_org_size = seg_org_size * ins_index out_mask[box_now[1]:box_now[3], box_now[0]:box_now[2]] = seg_org_size ins_index += 1 _t['im_seg'].toc() mask_save_path = os.path.join( save_path, os.path.basename(imdb.image_path_at(i)).replace(".jpg", ".png")) cv2.imwrite(mask_save_path, out_mask * 10) print 'im_seg: {:d}/{:d} {:.3f}s' \ .format(i + 1, num_images, _t['im_seg'].average_time) del net_mask det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
def demo_tuples(net, image_name): """Detect objects, attributes and relations in an image using pre-computed object proposals.""" image_num = int(image_name.split(".")[0]) att_unique = np.unique(att_names[image_num * scale:(image_num * scale + scale)]) print(att_unique) att_unique_adv = np.unique( att_names_adv[image_num * scale:(image_num * scale + scale)]) cls_unique = np.unique(att_cls[image_num * scale:(image_num * scale + scale)]) print(cls_unique) cls_unique_adv = np.unique(att_cls_adv[image_num * scale:(image_num * scale + scale)]) # Load the demo image im_file = os.path.join( "/media/sadaf/e4da0f25-29be-4c9e-a432-3193ff5f5baf/Code/Pytorch_Code/transfer_learn/Analysis/CUB_clean", image_name) im = cv2.imread(im_file) print(im.shape) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes, attr_scores, rel_scores = im_detect(net, im) if attr_scores is not None: print 'Found attribute scores' """ if rel_scores is not None: print 'Found relation scores' rel_scores = rel_scores[:,1:] # drop no relation rel_argmax = np.argmax(rel_scores, axis=1).reshape((boxes.shape[0],boxes.shape[0])) rel_score = np.max(rel_scores, axis=1).reshape((boxes.shape[0],boxes.shape[0])) """ timer.toc() print('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.2 NMS_THRESH = 0.05 ATTR_THRESH = 0.1 im = im[:, :, (2, 1, 0)] #im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) fig, ax = plt.subplots(figsize=(12, 12)) plt.imshow(im) # Detections det_indices = [] det_scores = [] det_objects = [] det_bboxes = [] det_attrs = [] for cls_ind, cls in enumerate(classes[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = np.array(nms(dets, NMS_THRESH)) dets = dets[keep, :] inds = np.where(dets[:, -1] >= CONF_THRESH)[0] if len(inds) > 0: keep = keep[inds] for k in keep: det_indices.append(k) det_bboxes.append(cls_boxes[k]) det_scores.append(cls_scores[k]) det_objects.append(cls) if attr_scores is not None: attr_inds = np.where(attr_scores[k][1:] >= ATTR_THRESH)[0] det_attrs.append([attributes[ix] for ix in attr_inds]) else: det_attrs.append([]) #rel_score = rel_score[det_indices].T[det_indices].T #rel_argmax = rel_argmax[det_indices].T[det_indices].T for i, (idx, score, obj, bbox, attr) in enumerate( zip(det_indices, det_scores, det_objects, det_bboxes, det_attrs)): attr_s = [element for element in attr if element in att_unique] for i in range(len(attr)): if attr[i] in att_unique: if obj in cls_unique: box_text = obj if len(attr) > 0: box_text + " " + (attr[i]) ax.add_patch( plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor='red', linewidth=2, alpha=0.5)) ax.text(bbox[0], bbox[1] - 2, '%s' % (box_text), bbox=dict(facecolor='blue', alpha=0.5), fontsize=10, color='white') plt.axis('off') plt.tight_layout() plt.draw() plt.savefig( '/media/sadaf/e4da0f25-29be-4c9e-a432-3193ff5f5baf/Code/Pytorch_Code/transfer_learn/Analysis/clean_bb/' + image_name)
def demoVideo(image): global count global cls_label global b_box count = count + 1 # print ('count before = {}'.format(count)) if (count % 10) > 0: im = process_image(image) height, width = im.shape[:2] mid = width / 2.5 if cls_label is not None: # print('saved label is = {}'.format(cls_label)) font = cv2.FONT_HERSHEY_SIMPLEX cv2.rectangle(im, (b_box[0], b_box[1]), (b_box[2], b_box[3]), (0, 0, 255), 2) if b_box[0] < mid: # cv2.putText(im,'left {:s}'.format(label),(b_box[0], (int)((b_box[1]- 2))), cv2.FONT_HERSHEY_PLAIN, fontScale=1.25, thickness=3, color=(255, 255, 255)) cv2.putText(im, 'left {:s}'.format(cls_label), (b_box[0], (int)( (b_box[1] - 2))), font, 0.5, (255, 0, 0), 1) else: cv2.putText(im, 'right {:s}'.format(cls_label), (b_box[0], (int)( (b_box[1] - 2))), font, 0.5, (255, 0, 0), 1) return im im = process_image(image) height, width = im.shape[:2] mid = width / 2.5 # print('height = {} and width/2.5 = {}'.format(height, mid)) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(default_net, im) timer.toc() # print ('Detection took {:.3f}s for ' # '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 cls_label = None for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] font = cv2.FONT_HERSHEY_SIMPLEX color = (0, 0, 255) inds = np.where(dets[:, -1] >= CONF_THRESH)[0] if len(inds) > 0: for i in inds: bbox = dets[i, :4] b_box = bbox score = dets[i, -1] cls_label = cls cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2) if bbox[0] < mid: cv2.putText(im, 'left {:s}'.format(cls), (bbox[0], (int)( (bbox[1] - 2))), font, 0.5, (255, 0, 0), 1) else: cv2.putText(im, 'right {:s}'.format(cls, score), (bbox[0], (int)( (bbox[1] - 2))), font, 0.5, (255, 0, 0), 1) # cv2.putText(im,'{:s} {:.3f}'.format(cls, score),(bbox[0], (int)((bbox[1]- 2))), font, 0.5, (255,255,255), 1) return im
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) assert bottom[0].data.shape[0] == 1, \ 'Only single item batches are supported' cfg_key = str('TRAIN' if self.phase == 0 else 'TEST') # either 'TRAIN' or 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[0].data[:, self._num_anchors:, :, :] bbox_deltas = bottom[1].data im_info = bottom[2].data[0, :] if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print 'score map size: {}'.format(scores.shape) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) if cfg_key == 'TRAIN' and cfg.TRAIN.RPN_NORMALIZE_TARGETS: bbox_deltas *= cfg.TRAIN.RPN_NORMALIZE_STDS bbox_deltas += cfg.TRAIN.RPN_NORMALIZE_MEANS # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) # print blob.shape top[0].reshape(*(blob.shape)) top[0].data[...] = blob if DEBUG_SHAPE: print 'ProposalLayer top[0] size: {}'.format(top[0].data.shape) # [Optional] output scores blob if len(top) > 1: top[1].reshape(*(scores.shape)) top[1].data[...] = scores if DEBUG_SHAPE: print 'ProposalLayer top[0] size: {}'.format(top[0].data.shape)
def demo(net, image_list): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im_file = os.path.join(cfg.ROOT_DIR, 'data', image_list[0]) im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class ind = 1 color_list = [(255, 0, 0), (0, 255, 0), (0, 0, 255)] color_cls = [(0, 255, 255), (255, 0, 255), (255, 255, 0)] for j in range(1, len(CLASSES)): num_objs = int(image_list[ind + 1]) for i in xrange(num_objs): x1 = int(float(image_list[ind + 2 + i * 4])) y1 = int(float(image_list[ind + 3 + i * 4])) x2 = int(float(image_list[ind + 4 + i * 4])) y2 = int(float(image_list[ind + 5 + i * 4])) rect_start = (x1, y1) rect_end = (x2, y2) #cv2.rectangle(im, rect_start, rect_end, color_list[j-1], 2) ind += 4 * num_objs + 1 thresh = 0.5 NMS_THRESH = 0.3 path = os.path.join(cfg.ROOT_DIR, 'data', 'results', 'show', image_list[0][17:]) for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] inds = np.where(dets[:, -1] >= thresh)[0] index = 1 if len(inds) == 0 and index == len(CLASSES[1:]): cv2.imwrite(path, im) return elif len(inds) == 0 and index < len(CLASSES[1:]): index += 1 continue for i in inds: bbox = dets[i, :4] score = dets[i, -1] x = bbox[0] y = bbox[1] rect_start = (x, y) x1 = bbox[2] y1 = bbox[3] rect_end = (x1, y1) color_pred = color_cls[cls_ind - 1] cv2.rectangle(im, rect_start, rect_end, color_pred, 2) cv2.imwrite(path, im)
def test_net(net, imdb): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # heuristic: keep an average of 40 detections per class per images prior # to NMS max_per_set = 40 * num_images # heuristic: keep at most 100 detection per class per image prior to NMS max_per_image = 100 # detection thresold for each class (this is adaptively set based on the # max_per_set constraint) thresh = -np.inf * np.ones(imdb.num_classes) # top_scores will hold one minheap of scores per class (used to enforce # the max_per_set constraint) top_scores = [[] for _ in xrange(imdb.num_classes)] # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) if not os.path.exists(output_dir): os.makedirs(output_dir) # timers _t = {'im_detect': Timer(), 'misc': Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb for i in xrange(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect(net, im, box_proposals) _t['im_detect'].toc() _t['misc'].tic() for j in xrange(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh[j])[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] top_inds = np.argsort(-cls_scores)[:max_per_image] cls_scores = cls_scores[top_inds] cls_boxes = cls_boxes[top_inds, :] # push new scores onto the minheap for val in cls_scores: heapq.heappush(top_scores[j], val) # if we've collected more than the max number of detection, # then pop items off the minheap and update the class threshold if len(top_scores[j]) > max_per_set: while len(top_scores[j]) > max_per_set: heapq.heappop(top_scores[j]) thresh[j] = top_scores[j][0] all_boxes[j][i] = \ np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) if 0: keep = nms(all_boxes[j][i], 0.3) vis_detections(im, imdb.classes[j], all_boxes[j][i][keep, :]) _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) for j in xrange(1, imdb.num_classes): for i in xrange(num_images): inds = np.where(all_boxes[j][i][:, -1] > thresh[j])[0] all_boxes[j][i] = all_boxes[j][i][inds, :] det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Applying NMS to all detections' nms_dets = apply_nms(all_boxes, cfg.TEST.NMS) print 'Evaluating detections' imdb.evaluate_detections(nms_dets, output_dir)
def get_detections_from_im(net, im_file, image_id, conf_thresh=0.2, visualize=False): """Load im_file and extract bottom-up features using Faster RCNN""" MIN_BOXES, MAX_BOXES=36,36 NMS_THRESH = 0.05 CONF_THRESH = 0.1 ATTR_THRESH = 0.1 im = cv2.imread(im_file) scores, boxes, attr_scores, rel_scores = im_detect(net, im) # Keep the original boxes, don't worry about the regresssion bbox outputs rois = net.blobs['rois'].data.copy() # unscale back to raw image space blobs, im_scales = _get_blobs(im, None) cls_boxes = rois[:, 1:5] / im_scales[0] cls_prob = net.blobs['cls_prob'].data pool5 = net.blobs['pool5_flat'].data # Keep only the best detections max_conf = np.zeros((rois.shape[0])) for cls_ind in range(1,cls_prob.shape[1]): cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = np.array(nms(dets, cfg.TEST.NMS)) max_conf[keep] = np.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = np.where(max_conf >= conf_thresh)[0] if len(keep_boxes) < MIN_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES] #Normalize scores of best detections, sum their features to #obtain bottom-up attention features of im best_scores = max_conf[keep_boxes] best_feats = pool5[keep_boxes] scores_norm = np.expand_dims(np.exp(best_scores)/np.sum(np.exp(best_scores))+eps,axis=1) cumulative_feats = scores_norm.T.dot(best_feats) sum_feats = np.sum(best_feats,axis=0) if visualize: #To visualize the top scoring bounding boxes overlaid on the image im im = im[:, :, (2, 1, 0)] fig, ax = plt.subplots(figsize=(12, 12)) ax.imshow(im, aspect='equal') for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] if attr_scores is not None: attributes = attr_scores[keep] else: attributes = None if rel_scores is not None: rel_argmax_c = rel_argmax[keep] rel_score_c = rel_score[keep] else: rel_argmax_c = None rel_score_c = None vis_detections(ax, cls, dets, attributes, rel_argmax_c, rel_score_c, thresh=CONF_THRESH) plt.savefig('./'+im_file.split('/')[-1].replace(".jpg", "_demo.png")) return { 'image_id': image_id, 'image_h': np.size(im, 0), 'image_w': np.size(im, 1), 'num_boxes' : len(keep_boxes), 'boxes': base64.b64encode(cls_boxes[keep_boxes]), 'features': base64.b64encode(pool5[keep_boxes]), 'cumulative_feats':cumulative_feats, #softmax normalized features of best roi's 'sum_feats':sum_feats # features of best roi's summed }
def test_net(net, imdb, max_per_image=100, thresh=0.05, boxes_num_per_batch=0, vis=False, startIdx=0, endIdx=-1, saveMat=False, svm=False, use_wzctx=True): """Test a Fast R-CNN network on an image database.""" if use_wzctx: print "use use_wzctx!!!" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) #print "4" all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} #print "5" if not cfg.TEST.HAS_RPN: roidb = imdb.roidb if endIdx==-1: endIdx=num_images #print "6" for i in xrange(num_images): # filter out any ground truth boxes if i < startIdx or i>=endIdx: continue if cfg.TEST.HAS_RPN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. #print "x" box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] #print "y" im_name = imdb.image_path_at(i) im_name = im_name.split('/')[-1] im_name = im_name.split('.')[0] im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() #print "boxes_num %d"%boxes_num_per_batch if boxes_num_per_batch > 0: num_boxes = box_proposals.shape[0] num_batch = (num_boxes + boxes_num_per_batch -1) / boxes_num_per_batch #print "zzz" #num_boxes = roidb[i]['boxes'].shape[0] #num_batch = math.ceil(num_boxes/boxes_num_per_batch) scores_batch = np.zeros((num_batch*boxes_num_per_batch, imdb.num_classes), dtype=np.float32) boxes_batch = np.zeros((num_batch*boxes_num_per_batch, 4*imdb.num_classes), dtype=np.float32) # replicate the first box num_batch*boxes_num_per_batch times for preallocation rois = np.tile(box_proposals[0, :], (num_batch*boxes_num_per_batch, 1)) #print "xx" # assign real boxes to rois rois[:num_boxes, :] = box_proposals #print "num_batch: %d"%num_batch for j in xrange(int(num_batch)): roi = rois[j*boxes_num_per_batch:(j+1)*boxes_num_per_batch, :] #print roi.shape score, box = im_detect(net, im, roi, svm, use_wzctx) scores_batch[j*boxes_num_per_batch:(j+1)*boxes_num_per_batch, :] = score# [:,:,0,0] boxes_batch[j*boxes_num_per_batch:(j+1)*boxes_num_per_batch, :] = box # print "6_%d"%j # discard duplicated results scores = scores_batch[:num_boxes, :] #print "kx" boxes = boxes_batch[:num_boxes, :] else: #print box_proposals.shape[0] scores, boxes = im_detect(net, im, box_proposals, svm, use_wzctx) mat_dir = os.path.join(output_dir, 'stage%s'%startIdx) if not os.path.exists(mat_dir): os.mkdir(mat_dir) if True: sio.savemat('%s/%s.mat' % (mat_dir,im_name + '_' + str(i) ), {'scores': scores, 'boxes': boxes}) _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class #print "7" for j in xrange(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j*4:(j+1)*4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) if cfg.TEST.BBOX_VOTE: cls_dets_after_nms = cls_dets[keep, :] cls_dets = bbox_voting(cls_dets_after_nms, cls_dets, threshold=cfg.TEST.BBOX_VOTE_THRESH) else: cls_dets = cls_dets[keep, :] if vis: vis_detections(im, imdb.classes[j], cls_dets) all_boxes[j][i] = cls_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) #det_file = os.path.join(output_dir, 'detection_%sto%s.pkl' % (startIdx,endIdx)) #with open(det_file, 'wb') as f: # cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir, startIdx, endIdx)
numberImg += 1 im = cv2.imread("/nfs/zhengmeisong/wkspace/caffe_wk/py-faster-rcnn/data/test/"+filename) timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print ('No.{:d} took {:.3f}s for ' '{:d} object proposals').format(numberImg, timer.total_time, boxes.shape[0]) CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] #300*4矩阵 cls_scores = scores[:, cls_ind] #300行 dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] inds = np.where(dets[:, -1] >= CONF_THRESH)[0] if len(inds) == 0: continue print inds,dets[inds[0]] for i in inds: bbox = dets[i, :4] score = dets[i, -1] cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2],bbox[3]), (0,0,255),2) cv2.putText(im, '{:s} {:.3f}'.format(cls, score),(bbox[0], bbox[1]), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255),1,4,0) cv2.imwrite("/nfs/zhengmeisong/wkspace/caffe_wk/py-faster-rcnn/data/testOut/"+filename, im);
def nms_detections(pred_boxes, scores, nms_thresh, inds=None): dets = np.hstack((pred_boxes, scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, nms_thresh) if inds is None: return pred_boxes[keep], scores[keep], keep return pred_boxes[keep], scores[keep], inds[keep], keep
def vis_detections(im, wav_name, scores, boxes): """Draw detected bounding boxes.""" ''' im = im[:, :, (2, 1, 0)] im_size=im.shape fig, ax = plt.subplots(figsize=(im_size[1]/100.0, im_size[0]/100.0)) ax.imshow(im, aspect='equal') ''' result = open(data_folder + 'Result/task2_results.txt', 'at') f = open( data_folder + 'Result/estimate_txt/' + wav_name[:-4] + '_estimate.txt', 'wt') result.write(wav_name) write_result = False cls_boxes = np.zeros((boxes.shape[0], 4), boxes.dtype) cls_boxes[:, 1] = boxes[:, 2] cls_boxes[:, 2] = 511 cls_boxes[:, 3] = boxes[:, 3] for cls_ind, class_name in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) #np.save('data/'+im_name+'_keep.npy',keep) #sio.savemat('data/'+im_name+'_keep.mat',{'keep':keep}) #dets = dets[keep, :] #dets=dets[np.argmax(dets[:,-1]),:] #dets=dets[np.newaxis,:] ''' for i in keep: if dets[i,-1]>(CONF_THRESH[cls_ind]+0.5*(1-scores[i,0])): ''' dets = dets[keep, :] inds = np.where(dets[:, -1] > CONF_THRESH[cls_ind])[0] if len(inds) == 0: continue else: for i in inds: onset = str(dets[i, 1] * (nfft - noverlap) / common_fs) offset = str(dets[i, 3] * (nfft - noverlap) / common_fs) if not write_result: result.write('\t' + onset + '\t' + offset + '\t' + name_transform[class_name]) write_result = True else: result.write('\n' + wav_name + '\t' + onset + '\t' + offset + '\t' + name_transform[class_name]) f.write(onset + '\t' + offset + '\t' + name_transform[class_name] + '\n') ''' ax.add_patch( plt.Rectangle((dets[i,0], dets[i,1]), dets[i,2] - dets[i,0], dets[i,3] - dets[i,1], fill=False, edgecolor='red', linewidth=3.5) ) ax.text(dets[i,0], dets[i,1] - 2, '{:s} {:.3f}'.format(class_name, dets[i,-1]), bbox=dict(facecolor='blue', alpha=0.5), fontsize=14, color='white') ''' result.write('\n') result.close() f.close() '''
def demo(net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im_file = os.path.join(cfg.DATA_DIR, image_name) im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes, attr_scores, rel_scores = im_detect(net, im) #print 'relations' #print rel_scores.shape #rel_argmax = np.argsort(rel_scores, axis=1).reshape((boxes.shape[0],boxes.shape[0])) #rel_score = np.max(rel_scores, axis=1).reshape((boxes.shape[0],boxes.shape[0])) #print rel_argmax.shape #print rel_score.shape #print np.min(rel_score) #print np.max(rel_score) #np.savetxt('rel_score.csv', rel_score, delimiter=',') #np.savetxt('rel_argmax.csv', rel_argmax, delimiter=',') #print fail timer.toc() print('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.4 NMS_THRESH = 0.3 im = im[:, :, (2, 1, 0)] fig, ax = plt.subplots(figsize=(12, 12)) ax.imshow(im, aspect='equal') for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] if attr_scores is not None: attributes = attr_scores[keep] else: attributes = None if rel_scores is not None: rel_argmax_c = rel_argmax[keep] rel_score_c = rel_score[keep] else: rel_argmax_c = None rel_score_c = None vis_detections(ax, cls, dets, attributes, rel_argmax_c, rel_score_c, thresh=CONF_THRESH) plt.savefig('data/demo/' + im_file.split('/')[-1].replace(".jpg", "_demo.jpg"))
def forward(self, bottom, top): # params cfg_key = self.phase # either 'TRAIN' or 'TEST' if cfg_key == 0: cfg_ = cfg.TRAIN else: cfg_ = cfg.TEST # corner params pt_thres = cfg_.PT_THRESH pt_max_num = cfg.PT_MAX_NUM pt_nms_range = cfg.PT_NMS_RANGE pt_nms_thres = cfg.PT_NMS_THRESH # proposal params ld_interval = cfg.LD_INTERVAL ld_um_thres = cfg.LD_UM_THRESH # rpn params # min_size = cfg_.RPN_MIN_SIZE nms_thresh = cfg_.RPN_NMS_THRESH pre_nms_topN = cfg_.RPN_PRE_NMS_TOP_N post_nms_topN = cfg_.RPN_POST_NMS_TOP_N im_info = bottom[0].data[0, :] score_tl = bottom[1].data[0, :].transpose((1, 2, 0)) score_tr = bottom[2].data[0, :].transpose((1, 2, 0)) score_br = bottom[3].data[0, :].transpose((1, 2, 0)) score_bl = bottom[4].data[0, :].transpose((1, 2, 0)) scores = np.concatenate([ score_tl[:, :, :, np.newaxis], score_tr[:, :, :, np.newaxis], score_br[:, :, :, np.newaxis], score_bl[:, :, :, np.newaxis] ], axis=3) map_info = scores.shape[:2] # 1. sample corner candidates from prob maps tl, tr, br, bl = _corner_sampling(scores, pt_thres, pt_max_num, pt_nms_range, pt_nms_thres) # 2. assemble corner candidates into proposals proposals = _proposal_sampling(tl, tr, br, bl, map_info, ld_interval, ld_um_thres) # 3. filter proposals = filter_quads(proposals) scores = proposals[:, 8] proposals = proposals[:, :8] # 3. rescale quads into raw image space proposals = proposals * self._feat_stride # 4. quadrilateral non-max surpression order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] keep = nms( np.hstack((proposals, scores[:, np.newaxis])).astype(np.float32, copy=False), nms_thresh) proposals = proposals[keep, :] scores = scores[keep] if post_nms_topN > 0: proposals = proposals[:post_nms_topN, :] scores = scores[:post_nms_topN] if proposals.shape[0] == 0: # add whole image to avoid error print 'NO PROPOSALS!' proposals = np.array( [[0, 0, im_info[1], 0, im_info[1], im_info[0], 0, im_info[0]]]) scores = np.array([0.0]) # output # top[0]: quads(x1, y1, x2, y2, x3, y3, x4, y4) # top[1]: rois(xmin, ymin, xmax, ymax, theta) # top[2]: scores batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) top[0].reshape(*blob.shape) top[0].data[...] = blob if len(top) > 1: if cfg.DUAL_ROI: rois = quad_2_obb(np.array(proposals, dtype=np.float32)) rois = dual_roi(rois) else: rois = quad_2_obb(np.array(proposals, dtype=np.float32)) batch_inds = np.zeros((rois.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, rois.astype(np.float32, copy=False))) top[1].reshape(*blob.shape) top[1].data[...] = blob if len(top) > 2: scores = np.vstack((scores, scores)).transpose() top[2].reshape(*scores.shape) top[2].data[...] = scores
def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False): """Test a Fast R-CNN network on an image database.""" num_images = len( imdb.image_index) # 这是 imdb 子类(比如 pascal_voc.py)从txt里读取的所有测试图片名称 # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [ [[] for _ in xrange(num_images)] # 结构:[num_classes, num_images, (N*5)] for _ in xrange(imdb.num_classes) ] output_dir = get_output_dir(imdb, net) # timers _t = {'im_detect': Timer(), 'misc': Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb '''------- 对每一张图片依次操作 -------''' for i in xrange(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() '''------- 返回结构: scores(R, cls_num), boxes(R, 4 x cls_num) -------''' scores, boxes = im_detect(net, im, box_proposals) # -- 检测的关键步骤 -- _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class for j in xrange(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) # 进行非最大值抑制(NMS) cls_dets = cls_dets[keep, :] if vis: vis_detections(im, imdb.classes[j], cls_dets) # 插入一个 (n,5) 的结构, 其中5元 tuple 是 (x1, y1, x2, y2, score), 为第i张图片中第j类的检测结果 all_boxes[j][i] = cls_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) det_file = os.path.join( output_dir, 'detections.pkl') # 貌似这个 detections.pkl 没有用到,仅用到了 all_boxes with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) # 存放检测文件 print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
def demo(image_name, image_no, net): colors = [ "blue", "green", "red", "cyan", "magenta", "yellow", "black", "white", "darkblue", "orchid", "springgreen", "lime", "deepskyblue", "mediumvioletred", "maroon", "orangered", "navy", "olive", "orange", "orangered", "orchid", "pink", "plum", "purple", "salmon", "sienna", "silver", "tan", "teal", "tomato", "violet", "wheat", "yellow", "yellowgreen", "lavender", "palevioletred" ] conf_thresh = 0.3 min_boxes = 36 max_boxes = 36 indexes = [] cfg.TEST.NMS = 0.6 im = cv2.imread( os.path.join( "/media/sadaf/e4da0f25-29be-4c9e-a432-3193ff5f5baf/Code/Pytorch_Code/transfer_learn/Analysis/targeted_attacks_ep16/adv_images", image_name)) cls_append = [] scores, boxes, attr_scores, rel_scores = im_detect(net, im) print(image_no) # Keep the original boxes, don't worry about the regression bbox outputs rois = net.blobs['rois'].data.copy() # unscale back to raw image space blobs, im_scales = _get_blobs(im, None) cls_boxes = rois[:, 1:5] / im_scales[0] cls_prob = net.blobs['cls_prob'].data attr_prob = net.blobs['attr_prob'].data pool5 = net.blobs['pool5_flat'].data # Keep only the best detections max_conf = np.zeros((rois.shape[0])) for cls_ind in range(1, cls_prob.shape[1]): cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = np.array(nms(dets, cfg.TEST.NMS)) max_conf[keep] = np.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = np.where(max_conf >= conf_thresh)[0] if len(keep_boxes) < min_boxes: keep_boxes = np.argsort(max_conf)[::-1][:min_boxes] elif len(keep_boxes) > max_boxes: keep_boxes = np.argsort(max_conf)[::-1][:max_boxes] ############################ att_unique = np.unique(att_names[image_no * scale:(image_no * scale + scale)]) att_unique_adv = np.unique(att_names_adv[image_no * scale:(image_no * scale + scale)]) cls_unique = np.unique(att_cls[image_no * scale:(image_no * scale + scale)]) cls_unique_adv = np.unique(att_cls_adv[image_no * scale:(image_no * scale + scale)]) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) sizes = np.shape(im) height = float(sizes[0]) width = float(sizes[1]) fig = plt.figure() fig.set_size_inches(width / height, 1, forward=False) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) plt.imshow(im) #colors=["blue","green","red","cyan","magenta","yellow","black","white","darkblue","orchid","springgreen","lime","deepskyblue","mediumvioletred","maroon","orangered"] boxes = cls_boxes[keep_boxes] #print (boxes) #print (keep_boxes) objects = np.argmax(cls_prob[keep_boxes][:, 1:], axis=1) attr_thresh = 0.1 attr = np.argmax(attr_prob[keep_boxes][:, 1:], axis=1) attr_conf = np.max(attr_prob[keep_boxes][:, 1:], axis=1) count_box = 0 for i in range(len(keep_boxes)): bbox = boxes[i] if bbox[0] == 0: bbox[0] = 1 if bbox[1] == 0: bbox[1] = 1 #cls = classes[objects[i]+1] if attr_conf[i] > attr_thresh: #for k in range (len(att_unique)): # for l in range (len(cls_unique)): #if attributes[attr[i]+1]==att_unique[k]: # if classes[objects[i]+1] == cls_unique[l]: #if attributes[attr[i]+1] not in att_unique_adv: #if classes[objects[i]+1] not in cls_unique_adv: if attributes[attr[i] + 1] in att_unique_adv: if classes[objects[i] + 1] in cls_unique_adv: cls = attributes[attr[i] + 1] + " " + classes[objects[i] + 1] cls_append.append(cls) count = cls_append.count(cls) if count == 1: count_box = count_box + 1 print(cls) plt.gca().add_patch( plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor=colors[i], linewidth=0.3, alpha=0.5)) plt.gca().text(bbox[0], bbox[1] - 2, '%s' % (cls), bbox=dict(facecolor='blue', alpha=0, linewidth=0.2), fontsize=2.5, color=colors[i]) #plt.suptitle((correct_cls[int(image_no)])+ " "+(wrong_cls[int(image_no)]),fontsize=2) plt.savefig( '/media/sadaf/e4da0f25-29be-4c9e-a432-3193ff5f5baf/Code/Pytorch_Code/transfer_learn/Analysis/targeted_attacks_ep16/adv_bb1/adv_bb{}.jpg' .format(image_no), dpi=1500) #plt.savefig('/media/sadaf/e4da0f25-29be-4c9e-a432-3193ff5f5baf/Code/Pytorch_Code/transfer_learn/Analysis/clean_bb/clean_bb{}_50.jpg'.format(image_no), dpi = 1500) #plt.tight_layout() plt.close()
def bag_demo_double(net, image_name, cat_ids, bboxes): """Detect object classes in an image using pre-computed object proposals.""" im = cv2.imread(image_name) # im = url_to_image(image_name) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) # print scores timer.toc() print('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class im = im[:, :, (2, 1, 0)] ####### fig = plt.figure() ax1 = fig.add_subplot(1, 2, 1) ####### colors = plt.cm.hsv(np.linspace(0, 1, len(CLASSES))).tolist() ax1.imshow(im) currentAxis = plt.gca() CONF_THRESH = 0.6 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] keep_final = np.where(dets[:, 4] > CONF_THRESH)[0] for i in keep_final: xmin = dets[i, 0] ymin = dets[i, 1] xmax = dets[i, 2] ymax = dets[i, 3] score = dets[i, 4] label_name = cls display_txt = '%s: %.2f' % (label_name, score) coords = (xmin, ymin), xmax - xmin + 1, ymax - ymin + 1 color = colors[cls_ind] currentAxis.add_patch( plt.Rectangle(*coords, fill=False, edgecolor=color, linewidth=2)) currentAxis.text(xmin, ymin, display_txt, bbox={ 'facecolor': color, 'alpha': 0.5 }) ax2 = fig.add_subplot(1, 2, 2) ax2.imshow(im) currentAxis = plt.gca() for idx, cls_ind in enumerate(cat_ids): cls = CLASSES[cls_ind] xmin = bboxes[idx, 0] ymin = bboxes[idx, 1] xmax = xmin + bboxes[idx, 2] - 1 ymax = ymin + bboxes[idx, 3] - 1 label_name = cls display_txt = '%s: %.2f' % (label_name, 1) coords = (xmin, ymin), xmax - xmin + 1, ymax - ymin + 1 color = colors[cls_ind] currentAxis.add_patch( plt.Rectangle(*coords, fill=False, edgecolor=color, linewidth=2)) currentAxis.text(xmin, ymin, display_txt, bbox={ 'facecolor': color, 'alpha': 0.5 }) plt.show()
def demo(net, image_name): """Detect object classes in an image using pre-computed object proposals.""" #print image_name #print '\n\n\n\n' # Load the demo image #im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name) #im_file = os.path.join('/tmp/caffe_demos_uploads/', image_name,'.png') #im=image_name #print im_file im = cv2.imread(image_name) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) print boxes.shape print scores.shape # Visualize detections for each class timer = Timer() timer.tic() CONF_THRESH = 0.8 NMS_THRESH = 0.3 im = im[:, :, (2, 1, 0)] fig, ax = plt.subplots(figsize=(12, 12)) ax.imshow(im, aspect='equal') for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] #print('111111111111111111111\n\n\n') #vis_detections(im, cls, dets, thresh=CONF_THRESH,) # print('jnjnjnjjnjnjjnjjjjjjjjjj\n\n\n') ##################################################### class_name = cls inds = np.where(dets[:, -1] >= 0.8)[0] # print('begin\n\n\n') if len(inds) == 0: continue # print('123\n\n\n') #im = im[:, :, (2, 1, 0)] #fig, ax = plt.subplots(figsize=(12, 12)) #ax.imshow(im, aspect='equal') for i in inds: bbox = dets[i, :4] score = dets[i, -1] ax.add_patch( plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor='red', linewidth=3.5)) ax.text( bbox[0], bbox[1] - 2, #'{:s} {:.3f}'.format(class_name, score), class_name, bbox=dict(facecolor='blue', alpha=0.5), fontsize=14, color='white', fontproperties=myfont) #ax.set_title(('{} detections with ' # 'p({} | box) >= {:.1f}').format(class_name, class_name, # thresh), # fontsize=14) plt.axis('off') plt.tight_layout() plt.draw() # print('end\n\n\n') #ax.imwrite('123.jpg') plt.savefig(image_name, dpi=400, bbox_inches="tight") timer.toc() print('draw time {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0])
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride=[ 16, ], anchor_scales=[8, 16, 32]): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) #layer_params = yaml.load(self.param_str_) _anchors = generate_anchors( scales=np.array(anchor_scales)) # return 9*4 anchors coordinates _num_anchors = _anchors.shape[0] rpn_cls_prob_reshape = np.transpose( rpn_cls_prob_reshape, [0, 3, 1, 2]) #2 score values after softmax------(1,18,14,14) rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2]) #------(1,36,14,14) #rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1]) #rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1]) im_info = im_info[0] # [max_length, max_width, im_scale] assert rpn_cls_prob_reshape.shape[0] == 1, \ 'Only single item batches are supported' # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' #cfg_key = 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N #12000 Number of top scoring boxes to keep before apply NMS to RPN proposals post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N #2000 Number of top scoring boxes to keep after applying NMS to RPN proposals nms_thresh = cfg[cfg_key].RPN_NMS_THRESH #0.7 NMS threshold used on RPN proposals min_size = cfg[cfg_key].RPN_MIN_SIZE #16 Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale) # the first set of _num_anchors channels are bg probs(background) # the second set are the fg probs, which we want scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :] #-------(1,9,14,14) bbox_deltas = rpn_bbox_pred #im_info = bottom[2].data[0, :] if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] #14; 14 if DEBUG: # false print 'score map size: {}'.format(scores.shape) # Enumerate all shifts shift_x = np.arange( 0, width ) * _feat_stride #array([ 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208]) shift_y = np.arange( 0, height ) * _feat_stride #array([ 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208]) shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] anchors = _anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) """ return[196,9,4] array([[[ -83., -39., 100., 56.], [-175., -87., 192., 104.], [-359., -183., 376., 200.], ..., [ -35., -79., 52., 96.], [ -79., -167., 96., 184.], [-167., -343., 184., 360.]], [[ -67., -39., 116., 56.], [-159., -87., 208., 104.], [-343., -183., 392., 200.], ..., [ -19., -79., 68., 96.], [ -63., -167., 112., 184.], [-151., -343., 200., 360.]], [[ -51., -39., 132., 56.], [-143., -87., 224., 104.], [-327., -183., 408., 200.], ..., [ -3., -79., 84., 96.], [ -47., -167., 128., 184.], [-135., -343., 216., 360.]], ..., [[ 93., 169., 276., 264.], [ 1., 121., 368., 312.], [-183., 25., 552., 408.], ..., [ 141., 129., 228., 304.], [ 97., 41., 272., 392.], [ 9., -135., 360., 568.]], [[ 109., 169., 292., 264.], [ 17., 121., 384., 312.], [-167., 25., 568., 408.], ..., [ 157., 129., 244., 304.], [ 113., 41., 288., 392.], [ 25., -135., 376., 568.]], [[ 125., 169., 308., 264.], [ 33., 121., 400., 312.], [-151., 25., 584., 408.], ..., [ 173., 129., 260., 304.], [ 129., 41., 304., 392.], [ 41., -135., 392., 568.]]])""" anchors = anchors.reshape((K * A, 4)) #(14*14*9, 4) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) (1,36,14,14)format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape( (-1, 4)) # (1,36,14,14)-----(1,14,14,36)------(14*14*9,4) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape( (-1, 1)) # (1,9,14,14)-----(1,14,14,9)-----(14*14*9,1) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # generate the prodicted box(x1,y1,x2,y2) maped in original image (14*14*9, 4), only inside anchors, # 因为bbox_deltas(dx,dy,dw,dh)是相对于相应anchors(x1,y1,x2,y2)的偏移 # 2. clip predicted boxes to image boundaries proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) #min_size:16; im_info[2]: max_width; """Remove all boxes with any side smaller than min_size. return index proposals = proposals[keep, :] #len(keep)*4 scores = scores[keep] #len(keep) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: #12000 order = order[:pre_nms_topN] proposals = proposals[order, :] #len(order)*4 scores = scores[order] #len(order) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) # nms_thresh: 0.7 if post_nms_topN > 0: #2000 keep = keep[:post_nms_topN] proposals = proposals[keep, :] #<=2000 scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) #(2000, 1) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) #assert bottom[0].data.shape[0] == 1, \ # 'Only single item batches are supported' #print 'Configuration check here!'; cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE try: num_imgs = cfg[cfg_key].IMS_PER_BATCH except: num_imgs = 1; #post_nms_topN = int(post_nms_topN/num_imgs) # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[0].data[:, self._num_anchors:, :, :] bbox_deltas = bottom[1].data im_info = bottom[2].data #im_info = bottom[2].data[0, :] if DEBUG: print 'im_size: ({}, {})'.format(im_info[0][0], im_info[0][1]) print 'scale: {}'.format(im_info[0][2]) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print 'score map size: {}'.format(scores.shape) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) blob_all = []; #diggy: start #print 'Check the entire loop'; for im_i in range(num_imgs): # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas_i = bbox_deltas[im_i]; bbox_deltas_i = bbox_deltas_i.reshape(1,bbox_deltas_i.shape[0],bbox_deltas_i.shape[1],bbox_deltas_i.shape[2]); bbox_deltas_i = bbox_deltas_i.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores_i = scores[im_i]; scores_i = scores_i.reshape(1,scores_i.shape[0],scores_i.shape[1],scores_i.shape[2]); scores_i = scores_i.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas_i) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[im_i][:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[im_i][2]) proposals = proposals[keep, :] scores_i = scores_i[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores_i.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores_i = scores_i[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores_i)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores_i = scores_i[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) batch_inds[:] = im_i; blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) #print 'Blob copying check'; if blob_all == [] and blob != []: blob_all = blob.copy(); else: if blob != []: blob_all = np.vstack((blob_all,blob)); #diggy: end #print 'Proposal layer: Dimensionality check'; top[0].reshape(*(blob_all.shape)) top[0].data[...] = blob_all # [Optional] output scores blob if len(top) > 1: top[1].reshape(*(scores_i.shape)) top[1].data[...] = scores_i