def build_feature_db(net,images_info,imdbs,out_obj): paths1 = [os.path.basename(imdbs[0].image_path_at(i)) for i in range(imdbs[0].num_images)] paths2 = [os.path.basename(imdbs[1].image_path_at(i)) for i in range(imdbs[1].num_images)] paths = [paths1,paths2] #im_db = [] _t = Timer() for i in range(len(images_info['image_name'])): print 'caching features for image {:d}/{:d}'.format(i+1,len(images_info['image_name'])) _t.tic() if images_info['image_name'][i] in paths[0]: im = cv2.imread(imdbs[0].image_path_at(paths[0].index(images_info['image_name'][i]))) elif images_info['image_name'][i] in paths[1]: im = cv2.imread(imdbs[1].image_path_at(paths[1].index(images_info['image_name'][i]))) print 'Done running NN' #gt features if 'gt' in images_info.keys(): scores, boxes = im_detect(net,im,images_info['gt'][i]) feat_pos = net.blobs['fc7'].data #roi features scores, boxes = im_detect(net,im,images_info['roi'][i]) feat_neg = net.blobs['fc7'].data print 'Done extracting features from fc7' #generate image db if 'gt' in images_info.keys(): im_reg = {'name' : images_info['image_name'][i], 'roi_boxes' : images_info['roi'][i], 'roi_features' : feat_neg,'gt_boxes' : images_info['gt'][i], 'gt_features' : feat_pos} else: im_reg = {'name' : images_info['image_name'][i], 'roi_boxes' : images_info['roi'][i], 'roi_features' : feat_neg} pickle.dump(im_reg,out_obj) _t.toc() #im_db.append(im_reg) print 'Done in {}'.format(_t.average_time)
def demo(net, image_name): # get the proposals by using the shell to use c++ codes os.system( '/media/DataDisk/twwang/fast-rcnn/rcnn_test/proposals_for_python.sh' \ + ' ' + image_name) # Load computed Selected Search object proposals data = open('/home/twwang/temp_proposal', "rb").read() number_proposals = struct.unpack("i", data[0:4])[0] number_edge = struct.unpack("i", data[4:8])[0] assert number_edge == 4, 'The size is not matched!\n' + \ 'Note that the first two variables are the number of proposals\n' + \ ' and number of coordinates in a box, which is 4 by default\n' #cfg.NUM_PPS = 10 number_proposals = min(cfg.NUM_PPS, number_proposals) obj_proposals = np.asarray(struct.unpack( str(number_proposals * 4) + 'f', data[8: 8 + 16 * number_proposals])).reshape(number_proposals, 4) im = cv2.imread(image_name) #print im.shape #im = cv2.flip(im, 0) #im = cv2.transpose(im) # Detect all object classes and regress object bounds timer = Timer() timer.tic() if cfg.MULTI_LABEL: scores, boxes, multi_labels = im_detect(net, im, obj_proposals) else: scores, boxes = im_detect(net, im, obj_proposals) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class for cls in ['Upper', 'Lower', 'Whole']: cls_ind = CLASSES.index(cls) cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] vis_detections(im, cls, dets, image_name, thresh=CONF_THRESH) print ('The demo image is save as {}').format("/home/twwang/demo_results/" + \ os.path.split(image_name)[1])
def generateHeatMap(net, image, box, label, mask_size=(5,5), mask_stride=5): scores, _ = im_detect(net, image, np.array([box])) label = np.argmax(scores[0]) complete_score = scores[0, label] print label, complete_score heatmap = np.zeros((image.shape[0], image.shape[1])) x_range = np.arange(box[0], box[2] - mask_size[1], mask_stride) y_range = np.arange(box[1], box[3] - mask_size[0], mask_stride) count = 0 for y in y_range: for x in x_range: masked = image.copy() cv2.rectangle(masked, (x, y), (x + mask_size[1], y + mask_size[0]), (0, 0, 0), -1) scores, _ = im_detect(net, masked, np.array([box])) masked_score = scores[0, label] delta_score = complete_score - masked_score heatmap[y:y + mask_size[0], x:x + mask_size[1]] += np.ones(mask_size) * delta_score cv2.rectangle(masked, (x, y), (x + mask_size[1], y + mask_size[0]), (0, 0, 0), -1) count += 1 perc = float(count) / (len(x_range) * len(y_range)) print perc #cv2.imshow("Image", masked) #cv2.waitKey() negative_heatmap = heatmap.copy() negative_heatmap[heatmap > 0] = 0 negative_heatmap *= -1.0 negative_heatmap = cv2.normalize(negative_heatmap, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX) positive_heatmap = heatmap.copy() positive_heatmap[heatmap < 0] = 0 positive_heatmap = cv2.normalize(positive_heatmap, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX) heatmap = np.zeros(image.shape).astype(np.uint8) heatmap[:,:,1] = positive_heatmap heatmap[:,:,2] = negative_heatmap overlay = cv2.addWeighted(image, 0.3, heatmap, 0.7, 0.0) cv2.imshow("Heatmap", heatmap) cv2.imshow("Overlay", overlay) cv2.waitKey()
def demo (net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name) im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] vis_detections(im, cls, dets, thresh=CONF_THRESH)
def detect(net, im): """ """ # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 res = 0 global CLASSES for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] res += vis_detections(im, cls, dets, thresh=CONF_THRESH) return im,res
def Detect(net, image_path): """Detect object classes in an image assuming the whole image is an object.""" # Load the image im = cv2.imread(image_path) h, w, c = im.shape # TODO: Run selective search first # # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im, np.array([[0, 0, w, h]])) timer.toc() scores = scores[0] # get top 6 prediction pred_classes = [CLASSES[idx] for idx in ((-scores).argsort()[:6]).tolist()] conf = [ (-1) * prob for prob in np.sort(-scores)[:6].tolist()] img_blob = {} img_blob['image_path'] = image_path img_blob['pred'] = {'text': pred_classes, 'conf': conf} img_blob['rcnn_time'] = timer.total_time return img_blob
def detect_bboxes(net, im_names, subset_classes): """Detect object classes in an image using pre-computed object proposals.""" df = cnn_utils.create_bbox_data_frame(with_object_index=False) for im_name in im_names: print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' print 'Demo for {}'.format(im_name) # Load the input image. im_file = os.path.join(FLAGS.data_dir, 'images', im_name) im = cv2.imread(im_file) im_size_x = im.shape[1] im_size_y = im.shape[0] # Detect all object classes and regress object bounds. timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format( timer.total_time, boxes.shape[0]) # Detect for each class for subset_cls_ind in range(len(class_names_to_be_detected)): cls = class_names_to_be_detected[subset_cls_ind] try: cls_ind = CLASSES.index(cls) except: print('error: class does not exist in training data: ' '{0}'.format(cls)) exit(-1) cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, FLAGS.nms_thresh) dets = dets[keep, :] inds = np.where(dets[:, -1] >= FLAGS.conf_thresh)[0] if len(inds) > 0: print ('{} {}(s) are detected.'.format(len(inds), cls)) for i in inds: # ['image_name', 'class_index', 'x1', 'y1', 'x2', 'y2', 'score'] x1 = dets[i, 0] y1 = dets[i, 1] x2 = dets[i, 2] y2 = dets[i, 3] score = dets[i, -1] if FLAGS.ignore_bbox_on_boundary: # Ignore bounding boxes on the frame boundary. if x1 <= 0 or x2 >= (im_size_x - 1) or \ y1 <= 0 or y2 >= (im_size_y - 1): continue # Append a row. df.loc[len(df)] = [ im_name, subset_cls_ind, x1, y1, x2, y2, score] return df
def get_pos_examples(self): counts = self._get_pos_counts() for i in xrange(len(counts)): self.trainers[i].alloc_pos(counts[i]) _t = Timer() roidb = self.imdb.roidb num_images = len(roidb) # num_images = 100 for i in xrange(num_images): im = cv2.imread(self.imdb.image_path_at(i)) if roidb[i]['flipped']: im = im[:, ::-1, :] gt_inds = np.where(roidb[i]['gt_classes'] > 0)[0] gt_boxes = roidb[i]['boxes'][gt_inds] _t.tic() scores, boxes = im_detect(self.net, im, gt_boxes) _t.toc() feat = self.net.blobs[self.layer].data for j in xrange(1, self.imdb.num_classes): cls_inds = np.where(roidb[i]['gt_classes'][gt_inds] == j)[0] if len(cls_inds) > 0: cls_feat = feat[cls_inds, :] self.trainers[j].append_pos(cls_feat) print('get_pos_examples: {:d}/{:d} {:.3f}s' \ .format(i + 1, len(roidb), _t.average_time))
def train_with_hard_negatives(self): _t = Timer() roidb = self.imdb.roidb num_images = len(roidb) # num_images = 100 for i in xrange(num_images): im = cv2.imread(self.imdb.image_path_at(i)) if roidb[i]['flipped']: im = im[:, ::-1, :] _t.tic() scores, boxes = im_detect(self.net, im, roidb[i]['boxes']) _t.toc() feat = self.net.blobs[self.layer].data for j in xrange(1, self.imdb.num_classes): hard_inds = \ np.where((scores[:, j] > self.hard_thresh) & (roidb[i]['gt_overlaps'][:, j].toarray().ravel() < self.neg_iou_thresh))[0] if len(hard_inds) > 0: hard_feat = feat[hard_inds, :].copy() new_w_b = \ self.trainers[j].append_neg_and_retrain(feat=hard_feat) if new_w_b is not None: self.update_net(j, new_w_b[0], new_w_b[1]) print(('train_with_hard_negatives: ' '{:d}/{:d} {:.3f}s').format(i + 1, len(roidb), _t.average_time))
def demo(net, image_name, classes): """Detect object classes in an image using pre-computed object proposals.""" # Load pre-computed Selected Search object proposals box_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo', image_name + '_boxes.mat') obj_proposals = sio.loadmat(box_file)['boxes'] # Load the demo image im_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo', image_name + '.jpg') im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im, obj_proposals) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls in classes: cls_ind = CLASSES.index(cls) cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] print 'All {} detections with p({} | box) >= {:.1f}'.format(cls, cls, CONF_THRESH) vis_detections(im, cls, dets, thresh=CONF_THRESH)
def _get_feature_scale(self, num_images=100): TARGET_NORM = 20.0 # Magic value from traditional R-CNN _t = Timer() roidb = self.imdb.roidb total_norm = 0.0 count = 0.0 inds = npr.choice( range(self.imdb.num_images), size=num_images, replace=False ) for i_, i in enumerate(inds): im = cv2.imread(self.imdb.image_path_at(i)) if roidb[i]['flipped']: im = im[:, ::-1, :] _t.tic() scores, boxes = im_detect(self.net, im, roidb[i]['boxes']) _t.toc() feat = self.net.blobs[self.layer].data total_norm += np.sqrt((feat ** 2).sum(axis=1)).sum() count += feat.shape[0] print('{}/{}: avg feature norm: {:.3f}'.format( i_ + 1, num_images, total_norm / count ) ) return TARGET_NORM * 1.0 / (total_norm / count)
def demo(net, im_file): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image as gray scale gim = cv2.imread(im_file, flags= cv2.CV_LOAD_IMAGE_GRAYSCALE) # convert to rgb repeated in each channel im = cv2.cvtColor(gim, cv2.COLOR_GRAY2BGR) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] vis_detections(im, cls, dets, thresh=CONF_THRESH)
def demo(net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo', image_name) im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background\ cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) order = cls_scores.argsort()[::-1] sorted_dets = dets[order, :] keep = nms(dets, NMS_THRESH) with open('/home/xyy/Desktop/doing/Object Detection/py-faster-rcnn/test_python.txt','w') as f: dets = dets[keep, :] for i in dets: for j in i: f.write(str(j)+ ' ') f.write('\n') vis_detections(im, cls, dets, thresh=CONF_THRESH)
def produce(self, ip): scores, bbox = im_detect(self.net_, ip) #Find the top class for each box bestClass = np.argmax(scores,axis=1) bestScore = np.max(scores, axis=1) allDet = edict() for cl in [self.prms_.targetClass]: clsIdx = self.cls_.index(cl) #Get all the boxes that belong to the desired class idx = bestClass == clsIdx clScore = bestScore[idx] clBox = bbox[idx,:] #Sort the boxes by the score sortIdx = np.argsort(-clScore) topK = min(len(sortIdx), self.prms_.topK) sortIdx = sortIdx[0:topK] #Get the desired output clScore = clScore[sortIdx] clBox = clBox[sortIdx] clBox = clBox[:, (clsIdx * 4):(clsIdx*4 + 4)] #Stack detections and perform NMS dets=np.hstack((clBox, clScore[:,np.newaxis])).astype(np.float32) keep = nms(dets, self.prms_.nmsThresh) dets = dets[keep, :] #Only keep detections with high confidence inds = np.where(dets[:, -1] >= self.prms_.confThresh)[0] allDet[cl] = copy.deepcopy(dets[inds]) return allDet
def demo(net, im, return_boxes): """Detect object classes in an image using pre-computed object proposals.""" # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 classes = {} for cls_ind, cls in enumerate(CLASSES[1:]): try: cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] bboxes = vis_detections(im, cls, dets, return_boxes, thresh=CONF_THRESH) classes[cls] = bboxes except Exception as e: continue if not return_boxes: cv2.imshow("image", im) return classes
def demo(net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name) im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.7 NMS_THRESH = 0.3 json_data_list = [] for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] bbox, score = vis_detections(im, cls, dets, thresh=CONF_THRESH) if score: json_data_list.append({"class":cls, 'bbox':bbox, 'score':score}) if len(json_data_list): f = open("result/"+image_name+".json", "w") json.dump(json_data_list, f, indent=2)
def init_net(): cfg.TEST.HAS_RPN = True # Use RPN for proposals args = parse_args() # prototxt = os.path.join(cfg.MODELS_DIR, NETS[args.demo_net][0], # 'faster_rcnn_alt_opt', 'faster_rcnn_test.pt') prototxt = os.path.join('/home/dean/Documents/py-faster-rcnn/models/WIDER_FACE/VGG16/faster_rcnn_end2end', 'test.prototxt') # caffemodel = os.path.join(cfg.DATA_DIR, 'faster_rcnn_models', # NETS[args.demo_net][1]) caffemodel = os.path.join('/home/dean/Documents/py-faster-rcnn/output/faster_rcnn_end2end/voc_2007_train', 'vgg16_faster_rcnn_iter_50000.caffemodel') if not os.path.isfile(caffemodel): raise IOError(('{:s} not found.\nDid you run ./data/script/' 'fetch_faster_rcnn_models.sh?').format(caffemodel)) if args.cpu_mode: caffe.set_mode_cpu() else: caffe.set_mode_gpu() caffe.set_device(args.gpu_id) cfg.GPU_ID = args.gpu_id net = caffe.Net(prototxt, caffemodel, caffe.TEST) print '\n\nLoaded network {:s}'.format(caffemodel) im = 128 * np.ones((300, 500, 3), dtype=np.uint8) for i in xrange(2): _, _= im_detect(net, im) return net
def demoRest(net, image_name, classes, box_file, obj_proposals, im_file, im): # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im, obj_proposals) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls in classes: cls_ind = CLASSES.index(cls) cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] keep = np.where(cls_scores >= CONF_THRESH)[0] cls_boxes = cls_boxes[keep, :] cls_scores = cls_scores[keep] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] print 'All {} detections with p({} | box) >= {:.1f}'.format(cls, cls, CONF_THRESH) vis_detections(im, cls, dets, thresh=CONF_THRESH)
def detect(net, image_set, image_name, output_file): """Detect object classes in an image using pre-computed object proposals.""" # Load pre-computed Selected Search object proposals #box_file = os.path.join(coco_root, 'boxes', image_set, image_name + '.mat') box_file = os.path.join(coco_root, 'boxes_full', image_set, image_name + '.mat') if not os.path.exists(box_file): print 'File does not exist', box_file return obj_proposals = sio.loadmat(box_file)['boxes'] # Load the demo image im_file = os.path.join(coco_root, 'images', image_set, image_name + '.jpg') im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im, obj_proposals) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) np.savez(output_file, scores=scores, boxes=boxes)
def detect(self, img): bbox = self.bbox(img) scores, boxes = im_detect(self.net, img, bbox) result = [] CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls in self.CLASSES[1:]: cls_ind = self.CLASSES.index(cls) cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] inds = np.where(dets[:, -1] >= CONF_THRESH)[0] if len(inds) == 0: continue for i in inds: bbox = dets[i, :4] x1, y1, x2, y2 = map(int, bbox) result.append({ "label": cls, "bbox": [x1, y1, x2, y2] }) return result
def detect_person(net, im,cls_ind=1,cls='person',CONF_THRESH = 0.8): """Detect object classes in an image using pre-computed object proposals.""" # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class NMS_THRESH = 0.3 cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) # Filtering by confidence threshold as well keep = [ind for ind in keep if cls_scores[ind]>CONF_THRESH] if (len(keep)>1): sizes = np.zeros((len(keep),)) for ind,curr_ind in enumerate(keep): bbox = dets[curr_ind,:4] sizes[ind] = (bbox[3]-bbox[1])*(bbox[2]-bbox[0]) # Retain only the biggest bounding box keep = keep[np.argmax(sizes)] dets = dets[keep, :] return (dets.reshape(1,-1),cls_scores[keep])
def detect_objects(imgpath): """Detect object classes in an image using pre-computed object proposals.""" print("in detect object") # Load the demo image im_file = os.path.join(imgpath) im = cv2.imread(im_file) print("read image") # Detect all object classes and regress object bounds timer = Timer() timer.tic() print("im_detect") scores, boxes = im_detect(app.config['net'], im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 results = dict() for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] results[cls] = detect_positions(im, cls, dets, thresh=CONF_THRESH) return results
def demo(net, im, scale_factor, classes): """Detect object classes in an image using pre-computed object proposals.""" im2 = cv2.resize(im, (0,0), fx=1.0/scale_factor, fy=1.0/scale_factor) obj_proposals_in = [] dlib.find_candidate_object_locations(im2, obj_proposals_in, min_size=70) obj_proposals = np.empty((len(obj_proposals_in),4)) for idx in range(len(obj_proposals_in)): obj_proposals[idx] = [obj_proposals_in[idx].left(), obj_proposals_in[idx].top(), obj_proposals_in[idx].right(), obj_proposals_in[idx].bottom()] # Detect all object classes and regress object bounds scores, boxes = im_detect(net, im2, obj_proposals) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls in classes: cls_ind = CLASSES.index(cls) cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] return [im2, cls, dets, CONF_THRESH]
def detect(self, image): ''' :param image: Image from which the objects should be detected :param CONF_THRESH: list of confidence threshold for each category. if None or empty 0.7 will be the threshold of each category if non-empty but zero for some of the entries then 0.7 will be threshold for zero threshold value categories. :param NMS_THRESH: bounding box threshold, lower means less repetition and higher with high repetition :return: returns list of tuples of bounding box and category name as ((x1,y1,x2,y2),cls_name) ''' start = time.time() bbox_class_list = [] scores, boxes = im_detect(self.model, image) for cls_ind, (cls, threshold) in enumerate(zip(self.cat, self.CONF_THRESHOLD)): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, self.NMS_THRESHOLD) dets = dets[keep, :] inds = np.where(dets[:, -1] >= threshold)[0] for i in inds: # x1,y1,x2,y2 = dets[i,:-1] bbox_class_list.append( {'bbox': dets[i, :-1].tolist(), 'category': cls, 'confidence': float(dets[i, -1])}) end = time.time() return (bbox_class_list, end - start)
def get_detections(net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im_file = image_name#os.path.join(cfg.DATA_DIR, 'demo', image_name) im = cv2.imread(im_file) # Detect all object classes and regress object bounds #timer = Timer() #timer.tic() scores, boxes, pose_a, pose_e = im_detect(net, im) #timer.toc() #print ('Detection took {:.3f}s for ' # '{:d} object proposals').format(timer.total_time, boxes.shape[0]) #print "a=%s, e=%s"%(5*pose_a, 5*pose_e) # Visualize detections for each class #CONF_THRESH =0.25#0.75 #print 'threashold: {}'.format(CONF_THRESH) NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = np.hstack((cls_boxes, 5*pose_a[:,np.newaxis], 5*pose_e[:,np.newaxis], cls_scores[:, np.newaxis])).astype(np.float32) dets = dets[keep, :] #print "a=%s, e=%s"%(5*pose_a[keep], 5*pose_e[keep]) return dets
def detect(net, im): # Detect all object classes and regress object bounds ims = [] ims.append(im) scores, boxes = im_detect(net, ims) scores = scores[0] boxes = boxes[0] # filter boxes according to prob scores keeps = np.where(scores[:,0] > cfg.TEST.PROB)[0] scores = scores[keeps, :] boxes = boxes[keeps, :] # change boxes according to input size and the original image size im_shape = np.array(im.shape[0:2]) im_scales = float(cfg.TEST.SCALES[0]) / im_shape boxes[:, 0::2] = boxes[:, 0::2] / im_scales[1] boxes[:, 1::2] = boxes[:, 1::2] / im_scales[0] # filter boxes with small sizes boxes = clip_boxes(boxes, im_shape) keeps = filter_boxes(boxes, cfg.TEST.RON_MIN_SIZE ) scores = scores[keeps,:] boxes = boxes[keeps, :] scores = np.tile(scores[:, 0], (len(CLASSES), 1)).transpose() * scores return scores, boxes
def demo(sess, net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name) #im_file = os.path.join('/home/corgi/Lab/label/pos_frame/ACCV/training/000001/',image_name) im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(sess, net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class im = im[:, :, (2, 1, 0)] fig, ax = plt.subplots(figsize=(12, 12)) ax.imshow(im, aspect='equal') CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] vis_detections(im, cls, dets, ax, thresh=CONF_THRESH)
def demo (net, imagePathName, scoreThreshold): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im = cv2.imread(imagePathName) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() debug('Object detection took {:.3f}s for {:d} object proposals'.format(timer.total_time, boxes.shape[0])) # Visualize detections for each class path, imageFilename = os.path.split(imagePathName) catDir = os.path.split(path)[-1] imageName = catDir + '/' + imageFilename for i, cls in enumerate(CLASSES[1:]): i += 1 # because we skipped background cls_boxes = boxes[:, 4 * i:4 * (i + 1)] cls_scores = scores[:, i] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESHOLD) dets = dets[keep, :] vis_detections(im, cls, imageName, dets, scoreThreshold)
def demo(net, image_path): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im = cv2.imread(image_path) # Detect all object classes and regress object bounds started = time() scores, boxes = im_detect(net, im) elapsed = time() - started print ('Detection took {:.3f}s for ' '{:d} object proposals').format(elapsed, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] vis_detections(im, cls, dets, thresh=CONF_THRESH)
def tattoo_detection(net, image_name, args): """Detect object classes in an image using pre-computed object proposals.""" im_in = cv2.imread(image_name) if im_in is None: print('cannot open %s for read' % image_name ) exit(-1) rows,cols = im_in.shape[:2] print([rows,cols]) scale=1.0 if rows >= cols: scale = float(args.longdim) / float(rows) im = cv2.resize( im_in, (int(0.5 + float(cols)*scale), args.longdim) ) else: scale = float(args.longdim) / float(cols) im = cv2.resize( im_in, (args.longdim, int(0.5 + float(rows)*scale)) ) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() seconds = '%.3f' % timer.total_time print('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) max_scores = scores.max(axis=0) print(max_scores) print(boxes.shape) # Visualize detections for each class CONF_THRESH = args.threshold NMS_THRESH = args.nms_thresh tattoo_dets=[] for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] inds = np.where(dets[:, -1] >= CONF_THRESH)[0] dets_filter = dets[inds] vis_detections(im, cls, dets_filter, thresh=CONF_THRESH) if cls == 'tattoo' and len(dets_filter)>0: plt.savefig(os.path.join(args.output, os.path.splitext(os.path.basename(image_name))[0] + '_det.png')) tattoo_dets = dets_filter if args.inspect == 'v': plt.show() plt.clf() return tattoo_dets, max_scores, seconds, scale
def demo(net): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image #im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name) ftest = open('recognition_imageList.txt', 'r') outputFile = open('RecognitionResult.txt', 'w') number = ftest.readline().strip() bbox_filename = '/home/zhusj/Github/py-faster-rcnn/data/CS674/Recognition/test1/' + 'boxes_' + number + '.txt' while bbox_filename: with open(bbox_filename, "r") as filestream: print bbox_filename im_file = '/home/zhusj/Github/py-faster-rcnn/data/CS674/Recognition/test1/' + 'image_' + number + '.png' # print im_file im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) for i, line in enumerate(filestream): currentline = line.split(",") CONF_THRESH = 0.8 NMS_THRESH = 0.3 result = np.zeros(12) # print currentline cls_ind = int(currentline[1]) cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] inds = np.where(dets[:, -1] >= CONF_THRESH)[0] # if inds.shape[0]>0: # if inds.any: print "inds:", inds # print "dets:", dets # print "cls_boxes:", cls_boxes bbox = dets[0, :4] outputFile.write(currentline[0] + ',' + str(int(bbox[0])) + ',' + str(int(bbox[1])) + ',' + str(int(bbox[2])) + ',' + str(int(bbox[3]))) outputFile.write('\n') # else: # # cls_scores_sorted = sorted(cls_scores) # sort_index = np.argsort(cls_scores) # # print cls_scores_sorted[0],cls_scores_sorted[-1] # print cls_scores[sort_index[0]],cls_scores[sort_index[-1]] # bbox = cls_boxes[sort_index[-1], :4] # outputFile.write(currentline[0]+','+str(int(bbox[0]))+','+ str(int(bbox[1]))+','+ str(int(bbox[2]))+','+ str(int(bbox[3]))) # outputFile.write('\n') # else: # outputFile.write(currentline[0]+','+str(280)+','+ str(160)+','+ str(360)+','+ str(240)) # outputFile.write('\n') # print number number = ftest.readline().strip() if number: bbox_filename = '/home/zhusj/Github/py-faster-rcnn/data/CS674/Recognition/test1/' + 'boxes_' + number + '.txt' else: break
args.prototxt = os.path.join(this_dir, '..', "models/pascal_voc/Vehicle/faster_rcnn_end2end/test.prototxt") if args.caffemodel == '': args.caffemodel = os.path.join(this_dir, '..', "output/faster_rcnn_end2end/voc_2007_trainval/vehicle_faster_rcnn_iter_70000.caffemodel") caffemodel = args.caffemodel prototxt = args.prototxt args.input_data_path = '/media/mvn/Data/Dataset/Image/ITS/VehicleDataset/darknet/Test' args.input_data_path = '/media/mvn/Data/Dataset/Image/ITS/Video' if not os.path.isfile(caffemodel): raise IOError(('{:s} not found.\nDid you run ./data/script/' 'fetch_faster_rcnn_models.sh?').format(caffemodel)) if args.cpu_mode: caffe.set_mode_cpu() else: caffe.set_mode_gpu() caffe.set_device(args.gpu_id) cfg.GPU_ID = args.gpu_id net = caffe.Net(prototxt, caffemodel, caffe.TEST) print(('\n\nLoaded network {:s}'.format(caffemodel))) # Warmup on a dummy image im = 128 * np.ones((300, 500, 3), dtype=np.uint8) for i in range(2): _, _= im_detect(net, im) demo(net, args.input_data_path, args.save_data_path)
def demo_all(sess, snet, im_org, strEstPathname, extMat=None, FeatureDB=None, CoorDB=None, GeoDB=None): # scalefactor = 300. / float(min(im.shape[0], im.shape[1])) # tw = int(im.shape[1] * scalefactor) # th = int(im.shape[0] * scalefactor) # im = cv2.resize(im, (tw, th)) ret_list_forKIST = [] ret_list_BB = [] # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(sess, net, im_org) timer.toc() fontFace = cv2.FONT_HERSHEY_PLAIN fontScale = 2 fontThickness = 2 if len(strEstPathname) > 0: tag_anno = Element('annotation') im = im_org.copy() # Visualize detections for each class for cls_ind, class_name in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] inds = np.where(dets[:, -1] >= CONF_THRESH)[0] if len(inds) > 0: for i in inds: bbox = dets[i, :4] # [xmin, ymin, xmax, ymax] score = dets[i, -1] if class_name in Candidate_CLASSES: if score > 0.8: fontColor = (255,0,0) cv2.rectangle(im, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), fontColor, fontThickness) elif score > 0.6: fontColor = (0, 255, 0) cv2.rectangle(im, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), fontColor, fontThickness) else: fontColor = (255, 255, 255) cv2.rectangle(im, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), fontColor, fontThickness) cv2.putText(im, '{:s} {:.3f}'.format(class_name, score), (int(bbox[0]), int(bbox[1] - 2)), fontFace, fontScale, fontColor, thickness = fontThickness) ret_list_BB.append({'bbox': bbox, 'score': score, 'name': class_name}) # print('{:s} {:.3f} {:d}'.format(class_name, score, cls_ind)) if extMat is not None: FeatureDB2 = FeatureDB[Candidate_CLASSES.index(class_name)] CoorDB2 = CoorDB[Candidate_CLASSES.index(class_name)] GeoDB2 = GeoDB[Candidate_CLASSES.index(class_name)] width = np.min([int(bbox[2]) - int(bbox[0]),10*int(bbox[0])]) height = np.min([int(bbox[3]) - int(bbox[1]),10*int(bbox[1])]) cropbox_ly = int(bbox[1]-height*0.1) cropbox_ry = int(bbox[3]+height*0.1) cropbox_lx = int(bbox[0]-width*0.1) cropbox_rx = int(bbox[2]+width*0.1) cropimg = im_org[cropbox_ly:cropbox_ry, cropbox_lx:cropbox_rx, :] print('bbox:') print(bbox) init_coord = np.array([cropbox_lx, cropbox_ly, 0]) # init_coord[x, y, -], lefttop_point rmat, tvec = PoseEstimate(cropimg, FeatureDB2, CoorDB2, extMat, init_coord) print(rmat) print(tvec) if rmat.sum() == 0 or np.isnan(rmat).any() or np.isnan(tvec).any() == True: print('cannot find the pose information and fill with dummy values with all zeros') # return for KIST obj_info = {'object': class_name, 'score': score, 'RMat': rmat, 'TVec': tvec * 0.0, 'x_center': (bbox[0] + bbox[2]) / 2, 'y_center': (bbox[1] + bbox[3]) / 2, 'left': bbox[0], 'top': bbox[1], 'right': bbox[2], 'bottom': bbox[3] } ret_list_forKIST.append(obj_info) else: init_coord = np.array([0, 0, 0]) Result = cornerpointsTransform2(GeoDB2, rmat, tvec, extMat, init_coord) # return for KIST obj_info = {'object': class_name, 'score': score, 'RMat': rmat, 'TVec': tvec, 'x_center': (bbox[0] + bbox[2]) / 2, 'y_center': (bbox[1] + bbox[3]) / 2, 'left': bbox[0], 'top': bbox[1], 'right': bbox[2], 'bottom': bbox[3] } ret_list_forKIST.append(obj_info) print('\tRot info: ') print(rmat) # print('\tTrn info:\n\t\tx: %d\n\t\ty: %d\n\t\tz: %d' % (tvec[1] * 0.1, -tvec[0] * 0.1, tvec[2] * 0.1)) # *0.1 --> mm to cm print('\tTvec info: ') print(tvec) #print('\tTrn info:\n\t\tx: %d\n\t\ty: %d\n\t\tz: %d' % (tvec[1]/tvec[0], -tvec[0]//tvec[0], tvec[2]/tvec[0])) # draw axis drawLineIndeces = ((0, 1), (0, 2), (0, 3)) colorList = ((255, 0, 0), (0, 255, 0), (0, 0, 255)) # z, y, x for (idxStart, idxEnd), color in zip(drawLineIndeces, colorList): cv2.line(im, (int(Result[idxStart][0]), int(Result[idxStart][1])), (int(Result[idxEnd][0]), int(Result[idxEnd][1])), color, thickness=4) # draw point for ptDisp in Result: cv2.circle(im, (int(ptDisp[0]), int(ptDisp[1])), 5, (255,255,255,0), -1) # draw center position from a camera (mm -> cm by x 0.1) cv2.putText(im, '(%d, %d, %d)'%(tvec[1] * 0.1, -tvec[0] * 0.1, tvec[2] * 0.1), (int(Result[0][0]), int(Result[0][1])), fontFace, fontScale, fontColor, thickness=fontThickness) if len(strEstPathname) > 0: tag_object = Element('object') SubElement(tag_object, 'name').text = class_name SubElement(tag_object, 'score').text = str(score) tag_bndbox = Element('bndbox') SubElement(tag_bndbox, 'xmin').text = str(int(bbox[0])) SubElement(tag_bndbox, 'ymin').text = str(int(bbox[1])) SubElement(tag_bndbox, 'xmax').text = str(int(bbox[2])) SubElement(tag_bndbox, 'ymax').text = str(int(bbox[3])) tag_anno.append(tag_object) tag_object.append(tag_bndbox) if extMat is not None and rmat.sum() != 0 and np.isnan(rmat).any() != True: SubElement(tag_object, 'rotation_matrix').text = str(rmat); SubElement(tag_object, 'traslation_vector').text = str(tvec); xyz = rotationMatrixToEulerAngles(rmat) SubElement(tag_object, 'EulerAngles').text = str(xyz) myimshow('display', im) if len(strEstPathname) > 0: cv2.imwrite(strEstPathname + '_est.jpg', im) ElementTree(tag_anno).write(strEstPathname) return im, ret_list_forKIST, ret_list_BB
ftr = h5py.File(strTRSet, 'r') GeoDB.append(np.transpose(np.array(ftr['img']), [2, 1, 0])) tfArch = 'VGGnetslsv1_test' # prototxt tfmodel = '../models/VGGnet_fast_rcnn_iter_70000.ckpt' # init session sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, device_count = {'GPU': 1})) tf.device('') # load network net = get_network(tfArch) # load model print ('Loading network {:s}... '.format(tfArch)) saver = tf.train.Saver() saver.restore(sess, tfmodel) print (' done.') # Warmup on a dummy image im = 128 * np.ones((300, 300, 3), dtype=np.uint8) for i in xrange(2): _, _ = im_detect(sess, net, im) # working as a server ''' Server Info ''' print('Server: waiting of client connection') ThreadedServer(Svr_IP, Svr_PORT).listen()
def run(): prototxt = 'models/sg_vrd/rel_iccv/test_iccv_gt.prototxt' obj_detector_model='data/models/vrd_rfcn/vrd_resnet50_rfcn_iter_70000.caffemodel' relation_model='output/sg_vrd_rfcn/psroi_context_tri_sum_cached_iter_75500.caffemodel' caffe.set_mode_gpu() caffe.set_device(0) net = caffe.Net(prototxt, caffe.TEST) net.copy_from(obj_detector_model) net.copy_from(relation_model) m = h5py.File('/home/zawlin/Dropbox/proj/sg_vrd_meta.h5', 'r') cnt = 0 root_img = '/home/zawlin/Dropbox/iccv17_hw/_results_from_zl/img/' root_cls = '/home/zawlin/Dropbox/iccv17_hw/_results_from_zl/cls/' import glog for k in m['gt/test'].keys(): if not os.path.exists(root_img+k): os.makedirs(root_img+k) cnt += 1 glog.info(cnt) if cnt >80: break rlp_labels = m['gt/test/%s/rlp_labels'%k][...] sub_boxes = m['gt/test/%s/sub_boxes'%k][...].astype(np.float) obj_boxes = m['gt/test/%s/obj_boxes'%k][...].astype(np.float) if sub_boxes.shape[0]>0: zeros = np.zeros((sub_boxes.shape[0],1), dtype=np.float) # first index is always zero since we do one image by one image sub_boxes = np.concatenate((zeros, sub_boxes),axis=1) obj_boxes = np.concatenate((zeros, obj_boxes),axis=1) im_path = C.get_sg_vrd_path_test(k) im = cv2.imread(im_path) for i in xrange(sub_boxes.shape[0]): # sb = sub_boxes[i][1:].astype(np.int) # ob = obj_boxes[i][1:].astype(np.int) rlp = rlp_labels[i] rel = zl.idx2name_cls(m,rlp[0])+' '+zl.idx2name_pre(m,rlp[1])+' '+zl.idx2name_cls(m,rlp[2]) x1,y1,x2,y2 = union(sub_boxes[i][1:],obj_boxes[i][1:]) cv2.rectangle(im,(x1,y1),(x2,y2),(255,0,0),2) cv2.putText(im,rel,(x1,y1),cv2.FONT_HERSHEY_SIMPLEX,1.0,(0,0,255),2) # cv2.rectangle(im,(sb[0],sb[1]),(sb[2],sb[3]),(255,0,0),2) # cv2.rectangle(im,(ob[0],ob[1]),(ob[2],ob[3]),(255,0,0),2) cv2.imshow('im',im) cv2.imwrite(root_img+k+'/_.orig.jpg',im) im_detect(net,im_path,sub_boxes,obj_boxes) rfcn_sub_rel = net.blobs['rfcn_sub_rel'].data[0] rfcn_obj_rel = net.blobs['rfcn_obj_rel'].data[0] rfcn_union_rel = net.blobs['rfcn_union_rel'].data[0] for pi in xrange(70): index = pi head, last = index * 9, (index + 1)*9 feat_sub = rfcn_sub_rel[head:last] feat_obj= rfcn_obj_rel[head:last] feat_union= rfcn_union_rel[head:last] im_vis_sub = get_vis(feat_sub) im_vis_obj = get_vis(feat_obj) im_vis_union = get_vis(feat_union) pre = zl.idx2name_pre(m,pi) cv2.imwrite(root_img+k+'/%s_sub.jpg'%pre,im_vis_sub) cv2.imwrite(root_img+k+'/%s_obj.jpg'%pre,im_vis_obj) cv2.imwrite(root_img+k+'/%s_union.jpg'%pre,im_vis_union) if not os.path.exists(root_cls+pre): os.makedirs(root_cls+pre) cv2.imwrite(root_cls+pre+'/%s_sub.jpg'%k,im_vis_sub) cv2.imwrite(root_cls+pre+'/%s_obj.jpg'%k,im_vis_obj) cv2.imwrite(root_cls+pre+'/%s_union.jpg'%k,im_vis_union) #cv2.imshow(pre+'sub',im_vis_sub) #cv2.imshow(pre+'obj',im_vis_obj) #cv2.imshow(pre+'union',im_vis_union) #if cv2.waitKey(0)==27: # exit(0) else: #todo #print nothing pass cv2.waitKey(0)
def demo(sess, net, im_file, vis_file, fits_fn, conf_thresh=0.8, eval_class=True, extra_vis_png=False): """ Detect object classes in an image using pre-computed object proposals. im_file: The "fused" image file path vis_file: The background image file on which detections are laid. Normallly, this is just the IR image file path fits_fn: The FITS file path eval_class: True - use traditional per class-based evaluation style False - use per RoI-based evaluation """ show_img_size = cfg.TEST.SCALES[0] if (not os.path.exists(im_file)): print('%s cannot be found' % (im_file)) return -1 im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() image_name = osp.basename(im_file) scores, boxes = im_detect(sess, net, im, save_vis_dir=None, img_name=os.path.splitext(image_name)[0]) boxes *= float(show_img_size) / float(im.shape[0]) timer.toc() sys.stdout.write('Done in {:.3f} secs'.format(timer.total_time)) sys.stdout.flush() print(scores) im = cv2.imread(vis_file) my_dpi = 100 fig = plt.figure() fig.set_size_inches(show_img_size / my_dpi, show_img_size / my_dpi) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) ax.set_xlim([0, show_img_size]) ax.set_ylim([show_img_size, 0]) #ax.set_aspect('equal') im = cv2.resize(im, (show_img_size, show_img_size)) im = im[:, :, (2, 1, 0)] ax.imshow(im, aspect='equal') if ((fits_fn is not None) and (not extra_vis_png)): patch_contour = fuse(fits_fn, im, None, sigma_level=4, mask_ir=False, get_path_patch_only=True) ax.add_patch(patch_contour) NMS_THRESH = cfg.TEST.NMS #cfg.TEST.RPN_NMS_THRESH # 0.3 tt_vis = 0 bbox_img = [] bscore_img = [] num_sources = 0 #if (eval_class): for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])) #.astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] num_sources += vis_detections(im, cls, dets, ax, thresh=conf_thresh) #dets = np.hstack((dets, np.ones([dets.shape[0], 1]) * cls_ind)) # if (dets.shape[0] > 0): # bbox_img.append(dets) # bscore_img.append(np.reshape(dets[:, -2], [-1, 1])) # else: # for eoi_ind, eoi in enumerate(boxes): # eoi_scores = scores[eoi_ind, 1:] # skip background # cls_ind = np.argmax(eoi_scores) + 1 # add the background index back # cls_boxes = boxes[eoi_ind, 4 * cls_ind : 4 * (cls_ind + 1)] # cls_scores = scores[eoi_ind, cls_ind] # dets = np.hstack((np.reshape(cls_boxes, [1, -1]), # np.reshape(cls_scores, [-1, 1])))#.astype(np.float32) # dets = np.hstack((dets, np.ones([dets.shape[0], 1]) * cls_ind)) # bbox_img.append(dets) # bscore_img.append(np.reshape(dets[:, -2], [-1, 1])) # # boxes_im = np.vstack(bbox_img) # scores_im = np.vstack(bscore_img) # # #if (not eval_class): # # a numpy float is a C double, so need to use float32 # keep = nms(boxes_im[:, :-1].astype(np.float32), NMS_THRESH) # boxes_im = boxes_im[keep, :] # scores_im = scores_im[keep, :] # # keep_indices = range(boxes_im.shape[0]) #num_sources = vis_detections(im, None, boxes_im[keep_indices, :], ax, thresh=conf_thresh) print(', found %d sources' % num_sources) return 0
def demo(net, leader, label): """Detect object classes in an image using pre-computed object proposals.""" #print image_path # Load the demo image #im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name) img_name = label.split(';')[0] image_path = 'raw/{}-craw/{}.jpg'.format(leader, img_name) ### txt path gt = utils.get_bbox('txts/' + leader + '/', label, default_leader=leader) if gt is None: return # x,y,w,h, id, person, pose bbs = gt[0] for bb in bbs: GT[bb[5]] += 1 bbs_other = gt[1] GT['OTHERS'] += len(bbs_other) print(label) new_txt_file = open(save_path + leader + '/' + img_name + '.txt', 'w') im = cv2.imread(image_path) if im is None: image_path = image_path[:-4] + '.JPG' im = cv2.imread(image_path) if im is None: print(image_path + ' is None') return scores, boxes = im_detect(net, im) # Visualize detections for each class CONF_THRESH = 0.9 NMS_THRESH = 0.5 bbox = [] for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] inds = np.where(dets[:, -1] >= CONF_THRESH)[0] for i in inds: bbox.append(dets[i, :4]) # cv2.rectangle(im, (dets[i,0], dets[i, 1]), (dets[i, 2], dets[i, 3]), (255, 0, 0)) # cv2.putText(im, str(dets[i, -1]), (int((dets[i,0]+dets[i,2])/2), int((dets[i, 1]+ dets[i, 3])/2)), 0, 1, (255,0,0)) for i, d in enumerate(bbox): pose = -1 # bbs: x,y,w,h, id, person, pose for j in range(len(bbs)): bb = bbs[j] bound2 = (float(bb[0]), float(bb[1]), float(bb[0]) + float(bb[2]), float(bb[1]) + float(bb[3])) if calculateIoU(d, bound2) >= 0.5: RT[bb[5]] += 1 f_img_path = fp_path + bb[ 5] + '/fro/' + img_name + '_{}.jpg'.format(i) if os.path.exists(f_img_path): pose = 0 p_img_path = fp_path + bb[ 5] + '/pro/' + img_name + '_{}.jpg'.format(i) if os.path.exists(p_img_path): pose = 1 q_img_path = fp_path + bb[ 5] + '/quarter/' + img_name + '_{}.jpg'.format(i) if os.path.exists(q_img_path): pose = 2 bbs[j][6] = pose break for j in range(len(bbs_other)): bb = bbs_other[j] bound2 = (float(bb[0]), float(bb[1]), float(bb[0]) + float(bb[2]), float(bb[1]) + float(bb[3])) if calculateIoU(d, bound2) >= 0.5: RT['OTHERS'] += 1 f_img_path = fp_path + 'OTHERS/fro/' + img_name + '_{}.jpg'.format( i) if os.path.exists(f_img_path): pose = 0 p_img_path = fp_path + 'OTHERS/pro/' + img_name + '_{}.jpg'.format( i) if os.path.exists(p_img_path): pose = 1 q_img_path = fp_path + bb[ 5] + '/quarter/' + img_name + '_{}.jpg'.format(i) if os.path.exists(q_img_path): pose = 2 bbs_other[j][6] = pose break for gt in (bbs + bbs_other): line = '{} {} {} {} {} {} {}\n'.format(gt[4], gt[0], gt[1], gt[2], gt[3], gt[5], gt[6]) new_txt_file.write(line)
def detect(self, im): scores, boxes = im_detect(self.sess, self.net, im) return (scores, boxes)
def image_callback(self, data): try: self.rgb_image = self.bridge.imgmsg_to_cv2(data, "bgr8") self.have_image = True t = rospy.Time.now() self.rgb_counter = self.rgb_counter + 1 #TODO your code here #=============================get cam_matrix fromo .yaml file=============================# camera_intrinsic = self.yaml_loader( "/home/xiaoqiangyan/work/catkin_ws/src/deep_pose_estimation/calibration/rgb_PS1080_PrimeSense.yaml" ) cam_matrix = camera_intrinsic['projection_matrix']['data'] #print (cam_matrix) principal = np.hstack((cam_matrix[2], cam_matrix[6])) focal_len = np.hstack((cam_matrix[0], cam_matrix[5])) print('********************** Image ' + str(self.rgb_counter) + ' **************************') # ===================== Detect all object classes and regress object bounds===================# start = time.time() im = self.rgb_image.copy() assert im is not None if not os.path.exists( '/home/xiaoqiangyan/Documents/MyData/RGBImage/{}'.format( self.now)): os.mkdir( '/home/xiaoqiangyan/Documents/MyData/RGBImage/{}'.format( self.now)) image_name = "/home/xiaoqiangyan/Documents/MyData/RGBImage/{}/".format( self.now) + str(self.rgb_counter) + "-raw.png" cv2.imwrite(image_name, im) w = np.size(im, 1) h = np.size(im, 0) print(w, h) resize_im = cv2.resize(im, (w, 960)) crop_im = resize_im[240:720, 320:960] #============================Read image in dataset and display in rviz(image).If we want to use camera to capture image, just commit this.=============================# crop_im = cv2.imread( '/data/YCB-dataset/YCB_Video_Dataset/data/0000/{:0>6}-color.png' .format(self.rgb_counter)) msg = cv_bridge.CvBridge().cv2_to_imgmsg(crop_im, encoding="bgr8") msg.header.frame_id = "kinect_optical_frame" msg.header.stamp = rospy.get_rostime() self.pub.publish(msg) #cv2.imshow('dataset_image', crop_im) #cv2.waitKey() show_im = crop_im.copy() #we need to set the gpu mode in this function caffe.set_mode_gpu() caffe.set_device(0) #caffe.set_device(args.gpu_id) #print (args) scores, boxes, tz, rot = im_detect(self.net, crop_im) #print(scores.shape, boxes.shape, tz.shape, rot.shape) end = time.time() print('Detection took {:.3f}s'.format(end - start)) all_dets = np.zeros((0, 6), dtype=np.float32) all_tz = np.zeros((0, 1), dtype=np.float32) all_rot = np.zeros((0, 4), dtype=np.float32) for cls_ind, cls in enumerate(CLASSES[1:]): #print('cls'.format(cls)) cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] # keep = np.where(dets[:, -1] >= CONF_THRESH)[0] # dets = dets[keep, :] # print(np.min(dets[:, -1]), np.max(dets[:, -1])) dets = np.hstack((dets, cls_ind * np.ones( (dets.shape[0], 1), dtype=np.float32))) all_dets = np.vstack((all_dets, dets)) all_tz = np.vstack((all_tz, tz[keep, :])) all_rot = np.vstack((all_rot, rot[keep, :])) show_im, center = self.draw_detections(show_im, all_dets, thresh=CONF_THRESH) #pick the proposals that scores are bigger than CONF_THRESH keep = np.where(all_dets[:, -2] >= CONF_THRESH)[0] all_dets = all_dets[keep, :] all_tz = all_tz[keep, :] all_rot = all_rot[keep, :] poses = np.zeros((all_tz.shape[0], 7), dtype=np.float64) if all_tz.shape[0] != 0: principal[0] /= 2 principal[1] = principal[1] * 480 / 1024 t0 = (center - principal) * all_tz tx = (t0[:, 0] / focal_len[0]).reshape(all_tz.shape) ty = (t0[:, 1] / focal_len[1]).reshape(all_tz.shape) translation = np.hstack((tx, ty, all_tz)) print('Objects Detected and the Predicted 3D Poses: ') print('') markerArray = MarkerArray() for idx in range(all_tz.shape[0]): cls_ind = int(all_dets[idx, -1]) robotMarker = Marker() robotMarker.header.frame_id = "kinect_optical_frame" h = rospy.get_param("height", 100) w = rospy.get_param("width", 100) robotMarker.header.stamp = rospy.get_rostime() robotMarker.ns = "Detection" robotMarker.id = 0 #robotMarker.type = 3 # cubes robotMarker.type = Marker.MESH_RESOURCE robotMarker.mesh_use_embedded_materials = True robotMarker.mesh_resource = "package://deep_pose_estimation/models/" + CLASSES[ cls_ind] + "/textured.obj" robotMarker.action = 0 robotMarker.pose.position.x = tx[idx] robotMarker.pose.position.y = ty[idx] robotMarker.pose.position.z = all_tz[idx] print(all_rot[idx]) # R = quaternion_matrix(all_rot[idx]) #==================X axes +90, y+90=============================== # print (euler_from_quaternion(all_rot[idx])) # euler = list(euler_from_quaternion(all_rot[idx])) # euler[0] += 90*np.pi/180 # euler[1] += 90*np.pi/180 # all_rot[idx] = quaternion_from_euler(euler[0],euler[1],euler[2]) #====================Read gt rotation===================================== # R = rotation_matrix(0.123, (1, 2, 3)) #initilization of R # rot = scipy.io.loadmat('/data/YCB-dataset/YCB_Video_Dataset/data/0000/{:0>6}-meta.mat'.format(self.rgb_counter))['poses'].astype(np.float32)[0:3,0:3,idx] # R[0:3,0:3] = rot # all_rot[idx] = quaternion_from_matrix(R) ## print (all_rot[idx]) robotMarker.pose.orientation.x = all_rot[idx, 0] robotMarker.pose.orientation.y = all_rot[idx, 1] robotMarker.pose.orientation.z = all_rot[idx, 2] robotMarker.pose.orientation.w = all_rot[idx, 3] robotMarker.id = idx robotMarker.scale.x = 1 robotMarker.scale.y = 1 robotMarker.scale.z = 1 #print (COLORS[cls_ind,:]) # robotMarker.color.r = float((idx)/10.0) *(idx)**2 # robotMarker.color.g = 1 #print (robotMarker.color.r, robotMarker.color.g) robotMarker.color.r = 0 robotMarker.color.g = 1 robotMarker.color.b = 0 robotMarker.color.a = 1 robotMarker.lifetime = rospy.Duration(1) markerArray.markers.append(robotMarker) # robotMarker.color.clear() print(CLASSES[cls_ind]) print('3D Translation: ', translation[idx]) print('3D Rotation: ', all_rot[idx]) print('') poses[idx] = np.hstack((translation[idx], all_rot[idx])) print('publish_markers') self.publisher.publish(markerArray.markers) else: print('******* There is no obejct detected ********') if not os.path.exists( '/home/xiaoqiangyan/Documents/MyData/Detection/{}'.format( self.now)): os.mkdir( '/home/xiaoqiangyan/Documents/MyData/Detection/{}'.format( self.now)) scipy.io.savemat( '/home/xiaoqiangyan/Documents/MyData/Detection/{}/{}.mat'. format(self.now, self.rgb_counter), { 'poses': poses, 'rois': all_dets[:, -1] }) Results_image_name = "/home/xiaoqiangyan/Documents/MyData/Detection/{}/".format( self.now) + str(self.rgb_counter) + ".png" cv2.imwrite(Results_image_name, show_im) rospy.sleep(1) except CvBridgeError, e: print(e)
def demoCombined(net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im_file = os.path.join(args.data_dir, image_name) im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.7 NMS_THRESH = 0.3 im = im[:, :, (2, 1, 0)] _, ax = plt.subplots(figsize=(12, 12)) #ax.imshow(im, aspect='equal') plt.imshow(im) ax = plt.gca() #ax.set_autoscale_on(True) print image_name my_dpi = 96 CONTAIN_OBJ = False for cls_ind, class_name in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] #print dets inds = np.where(dets[:, -1] >= CONF_THRESH)[0] if len(inds) == 0: continue CONTAIN_OBJ = True print class_name print(inds) for i in inds: #print inds bbox = dets[i, :4] score = dets[i, -1] ax.add_patch( plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor='red', linewidth=3.5)) ax.text(bbox[0], bbox[1] - 2, '{:s} {:.3f}'.format(class_name, score), bbox=dict(facecolor='blue', alpha=0.5), fontsize=14, color='white') ax.set_title(('object detections with ' 'p(object | box) >= {:.1f}').format(CONF_THRESH), fontsize=14) if CONTAIN_OBJ: plt.axis('off') plt.tight_layout() directory = args.model_dir + 'prediction' if not os.path.exists(directory): os.makedirs(directory) print('printing to {}'.format(directory + '/' + 'pred_' + image_name)) #plt.figure(figsize=(800 / my_dpi, 800 / my_dpi), dpi=my_dpi) plt.savefig(directory + '/' + 'pred_' + image_name) plt.close() #plt.draw() plt.close('all')
def demo(net, im): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.6 NMS_THRESH = 0.3 index = 1 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] #im = im[:, :, (2, 1, 0)] inds = np.where(dets[:, -1] >= CONF_THRESH)[0] if len(inds) == 0 and index == len(CLASSES[1:]): #cv2.imwrite(path,im) video.write(im) return elif len(inds) == 0 and index < len(CLASSES[1:]): index += 1 continue for i in inds: bbox = dets[i, :4] score = dets[i, -1] x = bbox[0] y = bbox[1] rect_start = (x, y) x1 = bbox[2] y1 = bbox[3] rect_end = (x1, y1) color0 = (100, 100, 100) color1 = (255, 0, 0) xx1 = bbox[0] yy1 = int(bbox[1] - 10) point_start = (xx1, yy1) xx2 = bbox[0] + (bbox[2] - bbox[0]) * score yy2 = int(bbox[1] - 2) point_end = (xx2, yy2) color2 = (0, 0, 225) color3 = (0, 255, 0) if cls_ind == 1: cv2.rectangle(im, rect_start, rect_end, color1, 2) elif cls_ind == 2: cv2.rectangle(im, rect_start, rect_end, color3, 2) elif cls_ind == 3: cv2.rectangle(im, rect_start, rect_end, color0, 2) cv2.rectangle(im, point_start, point_end, color2, -1) cv2.namedWindow("Image") res = cv2.resize(im, (1080, 608), interpolation=cv2.INTER_CUBIC) cv2.imshow("Image", res) cv2.waitKey(1)
def detect_single(net, im, box_output_path, visualise_window=False, visualise_output_path=None, conf_thresh=0.7, nms_thresh=0.3, gpu=0): """Runs detector on an image Args: net: Caffe network to process the image im: a C x W x H image box_output_path: Path to write a plain-text file with saved detections. visualise_window: Boolean. Visualise the detection result in a separate window. visualise_output_path: Path to write an image with detections visualised. Visualised result not written if this is None. conf_thresh: Threshold to use for filtering detections nms_thresh: Threshold to use for non-maximal suppression. gpu: The GPU id to use. """ im = im.astype(np.uint8) cfg.TEST.HAS_RPN = True cfg.GPU_ID = gpu if gpu >= 0 else -1 scores, boxes = im_detect(net, im) box_output_file = begin_file_write(box_output_path) for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4:8] cls_scores = scores[:, cls_ind] keep = np.where(cls_scores >= conf_thresh)[0] cls_boxes = cls_boxes[keep, :] cls_scores = cls_scores[keep] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) dets = dets.astype(np.float32) keep = nms(dets, nms_thresh) dets = dets[keep, :] for i in range(dets.shape[0]): write_detection(box_output_file, cls_ind, dets[i][4], dets[i][0:4]) if (visualise_window or visualise_output_path): colour = voc.get_colour_map() idx = cls_ind colour = colour[3 * idx:3 * idx + 3] colour = [float(x) / 256.0 for x in colour] vis_detections(im, cls, dets, conf_thresh, visualise_output_path, box_colour=colour, caption_bg_colour=colour) if visualise_window: plt.show() if visualise_window or visualise_output_path: plt.close('all') close_file_write(box_output_file)
#im_file = 'data/group/pos_34.jpg' for ite, subclass in enumerate(data_classes): for iname in sorted(os.listdir(os.path.join(data_dir, subclass))): print(iname) fname = iname.split('.') postfix_name = fname[-1] save_folder = iname[0:-(len(postfix_name) + 1)] save_path = os.path.join(save_dir, subclass, save_folder) if not os.path.exists(save_path): os.makedirs(save_path) im = cv2.imread(os.path.join(data_dir, subclass, iname)) scores, boxes, attr_scores, rel_scores = im_detect(net, im) # Keep the original boxes, don't worry about the regression bbox outputs rois = net.blobs['rois'].data.copy() # unscale back to raw image space blobs, im_scales = _get_blobs(im, None) cls_boxes = rois[:, 1:5] / im_scales[0] cls_prob = net.blobs['cls_prob'].data attr_prob = net.blobs['attr_prob'].data pool5 = net.blobs['pool5_flat'].data # Keep only the best detections max_conf = np.zeros((rois.shape[0])) for cls_ind in range(1, cls_prob.shape[1]): cls_scores = scores[:, cls_ind]
for i in xrange(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = frcnnt.im_detect(net, im, box_proposals) _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class thresh = 0.05 for j in xrange(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] #if vis:
def detect_faces(self, img, return_best=False): """ Computes a list of faces detected in the input image in the form of a list of bounding-boxes, one per each detected face. Arguments: img: The image to be input to the Faster R-CNN model return_best: boolean indicating whether to return just to best detection or the complete list of detections Returns: A list of lists. Each sublist contains the image coordinates of the corners of a bounding-box and the score of the detection in the form [x1,y1,x2,y2,score], where (x1,y1) are the integer coordinates of the top-left corner of the box and (x2,y2) are the coordinates of the bottom-right corner of the box. The score is a floating-point number. When return_best is True, the returned list will contain only one bounding-box """ if numpy.all(img != None): try: if not self.is_cuda_enable: caffe.set_mode_cpu() else: caffe.set_mode_gpu() caffe.set_device(cfg.GPU_ID) scores, boxes = im_detect(self.net, img) cls_ind = 1 cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = numpy.hstack( (cls_boxes, cls_scores[:, numpy.newaxis])).astype(numpy.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] keep = numpy.where(dets[:, 4] > CONF_THRESH) dets = dets[keep] if len(dets) > 0: if return_best: # dets is ordered by confidence dets[:, 4], so the first one is the best det = [ int(dets[0, 0]), int(dets[0, 1]), int(dets[0, 2]), int(dets[0, 3]), dets[0, 4] ] # extend detection extend_factor = self.face_rect_expand_factor width = round(det[2] - det[0]) height = round(det[3] - det[1]) length = (width + height) / 2.0 centrepoint = [ round(det[0]) + width / 2.0, round(det[1]) + height / 2.0 ] det[0] = centrepoint[0] - round( (1 + extend_factor) * length / 2.0) det[1] = centrepoint[1] - round( (1 + extend_factor) * length / 2.0) det[2] = centrepoint[0] + round( (1 + extend_factor) * length / 2.0) det[3] = centrepoint[1] + round( (1 + extend_factor) * length / 2.0) ## prevent going off image det[0] = int(max(det[0], 0)) det[1] = int(max(det[1], 0)) det[2] = int(min(det[2], img.shape[1])) det[3] = int(min(det[3], img.shape[0])) return [det] else: det_list = [] for j in xrange(dets.shape[0]): det = [ int(dets[j, 0]), int(dets[j, 1]), int(dets[j, 2]), int(dets[j, 3]), dets[0, 4] ] # extend detection extend_factor = self.face_rect_expand_factor width = round(det[2] - det[0]) height = round(det[3] - det[1]) length = (width + height) / 2.0 centrepoint = [ round(det[0]) + width / 2.0, round(det[1]) + height / 2.0 ] det[0] = centrepoint[0] - round( (1 + extend_factor) * length / 2.0) det[1] = centrepoint[1] - round( (1 + extend_factor) * length / 2.0) det[2] = centrepoint[0] + round( (1 + extend_factor) * length / 2.0) det[3] = centrepoint[1] + round( (1 + extend_factor) * length / 2.0) ## prevent going off image det[0] = int(max(det[0], 0)) det[1] = int(max(det[1], 0)) det[2] = int(min(det[2], img.shape[1])) det[3] = int(min(det[3], img.shape[0])) det_list.append(det) return det_list else: return None except Exception as e: print 'Exception in FaceDetectorFasterRCNN:', str(e) pass return None
def tattoo_detection(net, image_name, args): """Detect object classes in an image using pre-computed object proposals.""" im_in = cv2.imread(image_name) if im_in is None: print('cannot open %s for read' % image_name) exit(-1) rows, cols = im_in.shape[:2] print([rows, cols]) scale = 1.0 if rows >= cols: scale = float(args.longdim) / float(rows) im = cv2.resize(im_in, (int(0.5 + float(cols) * scale), args.longdim)) else: scale = float(args.longdim) / float(cols) im = cv2.resize(im_in, (args.longdim, int(0.5 + float(rows) * scale))) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() seconds = '%.3f' % timer.total_time print('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) max_scores = scores.max(axis=0) print(max_scores) print(boxes.shape) # Visualize detections for each class CONF_THRESH = args.threshold NMS_THRESH = args.nms_thresh tattoo_dets = [] for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] inds = np.where(dets[:, -1] >= CONF_THRESH)[0] dets_filter = dets[inds] vis_detections(im, cls, dets_filter, thresh=CONF_THRESH) if cls == 'tattoo' and len(dets_filter) > 0: plt.savefig( os.path.join( args.output, os.path.splitext(os.path.basename(image_name))[0] + '_det.png')) tattoo_dets = dets_filter if args.inspect == 'v': plt.show() plt.clf() return tattoo_dets, max_scores, seconds, scale
def demo_tuples(net, image_name): """Detect objects, attributes and relations in an image using pre-computed object proposals.""" # Load the demo image im_file = os.path.join(cfg.DATA_DIR, image_name) im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes, attr_scores, rel_scores = im_detect(net, im) if attr_scores is not None: print 'Found attribute scores' if rel_scores is not None: print 'Found relation scores' rel_scores = rel_scores[:, 1:] # drop no relation rel_argmax = np.argmax(rel_scores, axis=1).reshape( (boxes.shape[0], boxes.shape[0])) rel_score = np.max(rel_scores, axis=1).reshape( (boxes.shape[0], boxes.shape[0])) timer.toc() print('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.1 NMS_THRESH = 0.05 ATTR_THRESH = 0.1 im = im[:, :, (2, 1, 0)] fig, ax = plt.subplots(figsize=(12, 12)) ax.imshow(im) # Detections det_indices = [] det_scores = [] det_objects = [] det_bboxes = [] det_attrs = [] for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = np.array(nms(dets, NMS_THRESH)) dets = dets[keep, :] inds = np.where(dets[:, -1] >= CONF_THRESH)[0] if len(inds) > 0: keep = keep[inds] for k in keep: det_indices.append(k) det_bboxes.append(cls_boxes[k]) det_scores.append(cls_scores[k]) det_objects.append(cls) if attr_scores is not None: attr_inds = np.where(attr_scores[k][1:] >= ATTR_THRESH)[0] det_attrs.append([ATTRS[ix] for ix in attr_inds]) else: det_attrs.append([]) rel_score = rel_score[det_indices].T[det_indices].T rel_argmax = rel_argmax[det_indices].T[det_indices].T for i, (idx, score, obj, bbox, attr) in enumerate( zip(det_indices, det_scores, det_objects, det_bboxes, det_attrs)): ax.add_patch( plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor='red', linewidth=3.5)) box_text = '{:s} {:.3f}'.format(obj, score) if len(attr) > 0: box_text += "(" + ",".join(attr) + ")" ax.text(bbox[0], bbox[1] - 2, box_text, bbox=dict(facecolor='blue', alpha=0.5), fontsize=14, color='white') # Outgoing score = np.max(rel_score[i]) ix = np.argmax(rel_score[i]) subject = det_objects[ix] relation = RELATIONS[rel_argmax[i][ix]] print 'Relation: %.2f %s -> %s -> %s' % (score, obj, relation, subject) # Incoming score = np.max(rel_score.T[i]) ix = np.argmax(rel_score.T[i]) subject = det_objects[ix] relation = RELATIONS[rel_argmax[ix][i]] print 'Relation: %.2f %s -> %s -> %s' % (score, subject, relation, obj) ax.set_title(('detections with ' 'p(object|box) >= {:.1f}').format(CONF_THRESH), fontsize=14) plt.axis('off') plt.tight_layout() plt.draw() plt.savefig('data/demo/' + im_file.split('/')[-1].replace(".jpg", "_demo.jpg"))
def __init__(self, classes, prototxt_file, caffemodel_file, args, class_properties=None): self.classes = classes self.current_scores = [] self.current_boxes = [] self.current_frame = None self.current_frame_timestamp = None self.current_frame_header = None self.frames_detected = 0 self.detection_start = time.time() self.args = args self.CONF_THRESH = args.conf_threshold # print ("THRESH" + str(self.CONF_THRESH)) self.cls_score_factors = {} self.set_cls_score_factors(class_properties) rospy.init_node("frcnn_detector") print("node initialized") cfg.TEST.HAS_RPN = True # Use RPN for proposals prototxt = prototxt_file caffemodel = caffemodel_file if not os.path.isfile(caffemodel): raise IOError( ('{:s} not found.\nDid you run ./download_caffemodels.sh?' ).format(caffemodel)) if not os.path.isfile(prototxt): raise IOError( ("{:s} not found.\nMaybe this model is incompatible with the " "respective network you chose.").format(caffemodel)) if args.cpu_mode: caffe.set_mode_cpu() print("Set caffe to CPU mode") else: caffe.set_mode_gpu() caffe.set_device(args.gpu_id) cfg.GPU_ID = args.gpu_id print("Set caffe to GPU mode, running on GPU {}".format( cfg.GPU_ID)) self.net = caffe.Net(prototxt, caffemodel, caffe.TEST) print '\n\nLoaded network {:s}'.format(caffemodel) # Warmup on a dummy image im = 128 * np.ones((300, 500, 3), dtype=np.uint8) for i in xrange(2): _, _ = im_detect(self.net, im) # Create bounding box publisher self.bb_pub = rospy.Publisher('frcnn/bb', Object_bb_list, queue_size=10) # self.bb_img_pub = rospy.Publisher('frcnn/bb_img', Image, queue_size=1) self.detection_start = time.time() self.sub_frames = rospy.Subscriber("/frcnn_input/image_raw", Image, self.cb_frame_rec, queue_size=10) rospy.spin()
def demo(net, image_name, image_path, CONF_THRESH, NMS_THRESH, boxes_savepath, labels_savepath, images_savepath): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im = cv2.imread(image_path) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class cand = [] for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4:8] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] one = [cls, dets, CONF_THRESH] cand.append(one) rects, cas, rects_out = vis_detections(im, cand) header_rects = ['xmin','ymin','xmax','ymax'] header_cas = ['label','accuracy'] csvfileName_rects = boxes_savepath + '/' + image_name.split('.')[0] + '_boxes.csv' csvfileName_rects2 = boxes_savepath + '/' + image_name.split('.')[0] + '_boxes2.csv' csvfileName_cas = labels_savepath + '/' + image_name.split('.')[0] + '_label.csv' List2CSV(csvfileName_rects, rects_out, header_rects) List2CSV(csvfileName_rects2, rects, header_rects) List2CSV(csvfileName_cas, cas, header_cas) fig_save_name = images_savepath + '/new-' + image_name # path + fig_name of output fig, ax = plt.subplots(figsize=(12,12)) im = im[:, :, (2, 1, 0)] ax.imshow(im, aspect='equal') for ii in range(len(rects)): r = rects[ii] ax.add_patch( plt.Rectangle((r[0], r[1]), r[2], r[3] , fill=False, edgecolor='red', linewidth=3.5)) c = cas[ii] ax.text(r[0], r[1] - 2, '{:s} {:.3f}'.format(c[0], c[1]), bbox=dict(facecolor='blue', alpha=0.5), fontsize=16, color='white') plt.axis('off') plt.tight_layout() plt.draw() plt.savefig(fig_save_name) # save and output the labeled figure plt.close() return scores, boxes, timer
def demo(net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im_file = image_name # Vivek. Change the name of the video # print os.getcwd() cap = cv2.VideoCapture('TireWorks.mp4') count = 1 while(cap.isOpened()): ret, frame = cap.read() if count == 1: (h, w) = frame.shape[:2] zeros = np.zeros((h, w), dtype="uint8") count = 0 #gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) if ret: timer = Timer() timer.tic() scores, boxes = im_detect(net, frame) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] inds = np.where(dets[:, -1] >= CONF_THRESH)[0] for i in inds: bbox = dets[i, :4] cv2.putText(frame, cls, (bbox[0], int(bbox[3] + 25)), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.CV_AA) cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255,255,0), 3) (B, G, R) = cv2.split(frame) R = cv2.merge([zeros, zeros, R]) G = cv2.merge([zeros, G, zeros]) B = cv2.merge([B, zeros, zeros]) output = np.zeros((h, w, 3), dtype="uint8") output = frame cv2.imshow('Deep Learning Demonstration', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break else: break cap.release() #vw.release() #del vw cv2.destroyAllWindows()
def demo(image_name, image_no, image_index, net): colors = [ "blue", "green", "red", "cyan", "magenta", "yellow", "black", "white", "darkblue", "orchid", "springgreen", "lime", "deepskyblue", "mediumvioletred", "maroon", "orangered", "blue", "green", "red", "cyan", "magenta", "yellow", "black", "white", "darkblue", "orchid", "springgreen", "lime", "deepskyblue", "mediumvioletred", "maroon", "orangered", "orangered", "orangered", "yellow", "black", "white", "darkblue", "orchid", "springgreen", "orangered", "blue", "green", "red", "cyan", "magenta", "yellow", "black", "white", "darkblue", "orchid", "springgreen", "lime", "deepskyblue", "mediumvioletred", "maroon", "orangered", "orangered", "orangered", "yellow", "black", "white", "darkblue", "orchid", "springgreen", "orangered", "blue", "green", "red", "cyan", "magenta", "yellow", "black", "white", "darkblue", "orchid", "springgreen", "lime", "deepskyblue", "mediumvioletred", "maroon", "orangered", "orangered", "orangered", "yellow", "black", "white", "darkblue", "orchid", "springgreen", "orangered", "blue", "green", "red", "cyan", "magenta", "yellow", "black", "white", "darkblue", "orchid", "springgreen", "lime", "deepskyblue", "mediumvioletred", "maroon", "orangered", "orangered", "orangered", "yellow", "black", "white", "darkblue", "orchid", "springgreen" ] conf_thresh = 0.4 min_boxes = 36 max_boxes = 36 indexes = [] cfg.TEST.NMS = 0.6 im = cv2.imread( os.path.join( "/media/sadaf/e4da0f25-29be-4c9e-a432-3193ff5f5baf/Code/AWA_data/Animals_with_Attributes2/adv_images", image_name)) cls_append = [] scores, boxes, attr_scores, rel_scores = im_detect(net, im) # Keep the original boxes, don't worry about the regression bbox outputs rois = net.blobs['rois'].data.copy() # unscale back to raw image space blobs, im_scales = _get_blobs(im, None) cls_boxes = rois[:, 1:5] / im_scales[0] print(len(cls_boxes)) cls_prob = net.blobs['cls_prob'].data attr_prob = net.blobs['attr_prob'].data pool5 = net.blobs['pool5_flat'].data # Keep only the best detections max_conf = np.zeros((rois.shape[0])) for cls_ind in range(1, cls_prob.shape[1]): cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = np.array(nms(dets, cfg.TEST.NMS)) max_conf[keep] = np.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = np.where(max_conf >= conf_thresh)[0] if len(keep_boxes) < min_boxes: keep_boxes = np.argsort(max_conf)[::-1][:min_boxes] elif len(keep_boxes) > max_boxes: keep_boxes = np.argsort(max_conf)[::-1][:max_boxes] ############################ att_unique = np.unique(att_names[image_index * scale:(image_index * scale + scale)]) att_unique_adv = np.unique( att_names_adv[image_index * scale:(image_index * scale + scale)]) # cls_unique=np.unique(att_cls[image_index*scale:(image_index*scale+scale)]) # cls_unique_adv=np.unique(att_cls_adv[image_index*scale:(image_index*scale+scale)]) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) sizes = np.shape(im) height = float(sizes[0]) width = float(sizes[1]) fig = plt.figure() fig.set_size_inches(width / height, 1, forward=False) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) plt.imshow(im) boxes = cls_boxes[keep_boxes] #print (boxes) #print (keep_boxes) objects = np.argmax(cls_prob[keep_boxes][:, 1:], axis=1) attr_thresh = 0.1 attr = np.argmax(attr_prob[keep_boxes][:, 1:], axis=1) attr_conf = np.max(attr_prob[keep_boxes][:, 1:], axis=1) count_box = 0 print("image #", image_index) for i in range(len(keep_boxes)): bbox = boxes[i] if bbox[0] == 0: bbox[0] = 1 if bbox[1] == 0: bbox[1] = 1 #cls = classes[objects[i]+1] if attr_conf[i] > attr_thresh: #for k in range (len(att_unique)): # for l in range (len(cls_unique)): #if attributes[attr[i]+1]==att_unique[k]: # if classes[objects[i]+1] == cls_unique[l]: #if attributes[attr[i]+1] not in att_unique_adv: #if classes[objects[i]+1] not in cls_unique_adv: attributes[attr[i] + 1] = attributes[attr[i] + 1].replace( "longleg", "leg") attributes[attr[i] + 1] = attributes[attr[i] + 1].replace( "longneck", "neck") attributes[attr[i] + 1] = attributes[attr[i] + 1].replace( "patches", "patch") attributes[attr[i] + 1] = attributes[attr[i] + 1].replace( "bulbous", "round") attributes[attr[i] + 1] = attributes[attr[i] + 1].replace( "lean", "leaning") attributes[attr[i] + 1] = attributes[attr[i] + 1].replace( "chewteeth", "teeth") attributes[attr[i] + 1] = attributes[attr[i] + 1].replace( "meatteeth", "teeth") attributes[attr[i] + 1] = attributes[attr[i] + 1].replace( "buckteeth", "teeth") attributes[attr[i] + 1] = attributes[attr[i] + 1].replace( "strainteeth", "teeth") attributes[attr[i] + 1] = attributes[attr[i] + 1].replace( "flys", "flying") attributes[attr[i] + 1] = attributes[attr[i] + 1].replace( "swims", "swimming") attributes[attr[i] + 1] = attributes[attr[i] + 1].replace( "tunnels", "tunnel") attributes[attr[i] + 1] = attributes[attr[i] + 1].replace( "walks", "walking") if attributes[attr[i] + 1] in att_unique_adv: # if (attributes[attr[i]+1]=="patch"): cls = attributes[attr[i] + 1] cls = cls.replace("patch", "have patches") # cls = attributes[attr[i]+1] # elif (attributes[attr[i]+1]=="spots"): cls = cls.replace("spots", "have spots") # cls = attributes[attr[i]+1] # elif (attributes[attr[i]+1]=="stripes"): cls = cls.replace("stripes", "have stripes") # cls = attributes[attr[i]+1] # elif (attributes[attr[i]+1]=="furry"): cls = cls.replace("furry", "have fur") # cls = attributes[attr[i]+1] # elif (cls=="hands"): # elif (cls=="hands"): cls = cls.replace("hands", "have hands") # cls = cls # elif (cls=="pads"): cls = cls.replace("pads", "have pads") # cls = cls # elif (cls=="paws"): cls = cls.replace("paws", "have paws") # cls = cls # elif (cls=="leg"): cls = cls.replace("leg", "have leg") # cls = cls # elif (cls=="neck"): cls = cls.replace("neck", "have neck") # cls = cls # elif (cls=="tail"): cls = cls.replace("tail", "have tail") # cls = cls # elif (cls=="teeth"): cls = cls.replace("teeth", "have teeth") # cls = cls # elif (cls=="horns"): cls = cls.replace("horns", "have horns") # cls = cls # elif (cls=="claws"): cls = cls.replace("claws", "have claws") # cls = cls # elif (cls=="tusks"): cls = cls.replace("tusks", "have tusks") # cls = cls # elif (cls=="flying"): cls = cls.replace("flying", "is flying") # cls = cls # elif (cls=="swimming"): cls = cls.replace("swimming", "is swimming") # cls = cls # elif (cls=="tunnel"): cls = cls.replace("tunnel", "in tunnel") # cls = cls # elif (cls=="walking"): cls = cls.replace("walking", "is walking") # cls = cls # elif (cls=="fish"): cls = cls.replace("fish", "eats fish") # cls = cls # elif (cls=="meat"): cls = cls.replace("meat", "eats meat") # cls = cls # elif (cls=="desert"): cls = cls.replace("desert", "lives in desert") # cls = cls # elif (cls=="bush"): cls = cls.replace("bush", "lives in bush") # cls = cls # elif (cls=="plains"): cls = cls.replace("plains", "lives in plains") # cls = cls # elif (cls=="forest"): cls = cls.replace("forest", "lives in forest") # cls = cls # elif (cls=="fields"): cls = cls.replace("fields", "lives in fields") # cls = cls # elif (cls=="mountains"): cls = cls.replace("mountains", "lives in mountains") # cls = cls # elif (cls=="ocean"): cls = cls.replace("ocean", "lives in ocean") # cls = cls # elif (cls=="ground"): cls = cls.replace("ground", "lives in ground") # cls = cls # elif (cls=="water"): cls = cls.replace("water", "lives in water") # cls = cls # elif (cls=="tree"): cls = cls.replace("tree", "lives in tree") # cls = cls # elif (cls=="group"): cls = cls.replace("group", "lives in group") cls = cls.replace("black", "is black") # cls = cls # elif (str(cls)=="white"): cls = cls.replace("white", "is white") # cls = cls # elif (str(cls)=="blue"): cls = cls.replace("blue", "is blue") # cls = cls # elif (str(cls)=="brown"): cls = cls.replace("brown", "is brown") # cls = cls # elif (str(cls)=="gray"): cls = cls.replace("gray", "is gray") # cls = cls # elif (str(cls)=="orange"): cls = cls.replace("orange", "is orange") # cls = cls # elif (str(cls)=="yellow"): cls = cls.replace("yellow", "is yellow") # cls = cls # elif (str(cls)=="green"): cls = cls.replace("green", "is green") # cls = cls # elif (str(cls)=="red"): cls = cls.replace("red", "is red") # cls = cls # elif (cls=="furry"): cls = cls.replace("furry", "is furry") cls = cls.replace("spots", "have spots") # cls = cls # elif (cls=="stripes"): cls = cls.replace("stripes", "have stripes") cls = cls.replace("big", "is big") cls = cls.replace("small", "is small") # else: #cls = attributes[attr[i]+1] + " " + classes[objects[i]+1] # cls = attributes[attr[i]+1] + " " + correct_cls[image_index] #cls = attributes[attr[i]+1] # cls = cls.replace('brown','brown '+wrong_cls[image_no]) # cls = cls.replace('black','black '+wrong_cls[image_no]) # cls = cls.replace('white','white '+wrong_cls[image_no]) # cls = cls.replace('blue','blue '+wrong_cls[image_no]) # cls = cls.replace('gray','gray '+wrong_cls[image_no]) # cls = cls.replace('orange','orange '+wrong_cls[image_no]) # cls = cls.replace('red','red '+wrong_cls[image_no]) # cls = cls.replace('yellow','yellow '+wrong_cls[image_no]) # cls = cls.replace('patch','have patches') # cls = cls.replace('spots','have spots') cls_append.append(cls) count = cls_append.count(cls) if count == 1: count_box = count_box + 1 plt.gca().add_patch( plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor=colors[i], linewidth=0.3, alpha=0.5)) plt.gca().text(bbox[0], bbox[1] - 2, '%s' % (cls), bbox=dict(facecolor='blue', alpha=0, linewidth=0.2), fontsize=2.5, color=colors[i]) plt.savefig( '/media/sadaf/e4da0f25-29be-4c9e-a432-3193ff5f5baf/Code/AWA_data/Animals_with_Attributes2/adv_bb1/Adv_bb{}.jpg' .format(image_no), dpi=1500) #plt.savefig('/media/sadaf/e4da0f25-29be-4c9e-a432-3193ff5f5baf/Code/Pytorch_Code/transfer_learn/pytorch-adversarial_box/plots_AT_NoAT/adv_bb_AT/adv_bb_AT{}_25.jpg'.format(image_no), dpi = 1500) plt.close()
def demo(net, image_name, save_path): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name) im = cv2.imread(im_file) im_h, im_w, im_depth = im.shape[:3] # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.2 NMS_THRESH = 0.1 im = im[:, :, (2, 1, 0)] fig, ax = plt.subplots(figsize=(12, 12)) ax.imshow(im, aspect='equal') data = etree.Element("annotation") data.set('verified', 'no') # 1 folder interface_folder = etree.SubElement(data, 'folder') interface_folder.text = 'testXT' # 2 filename filename_txt = image_name filename = etree.SubElement(data, 'filename') filename.text = filename_txt # 3 path pathNode = etree.SubElement(data, 'path') pathNode.text = save_path + filename_txt + '.jpg' # 4 source source = etree.SubElement(data, 'source') database = etree.SubElement(source, 'database') database.text = 'Unknown' # 5 img size imgsize = etree.SubElement(data, 'size') img_width = etree.SubElement(imgsize, 'width') img_width.text = str(im_w) img_height = etree.SubElement(imgsize, 'height') img_height.text = str(im_h) img_depth = etree.SubElement(imgsize, 'depth') img_depth.text = str(im_depth) # 6 segmented segmented = etree.SubElement(data, 'segmented') segmented.text = '0' for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] vis_detections(data, im_h, im_w, im_depth, image_name.split('/')[-1].split('.')[0], save_path, ax, cls, dets, thresh=CONF_THRESH) output_dir = os.path.join(cfg.ROOT_DIR, 'output_glass', image_name.split('/')[-1] + "_detect_rst.jpg") plt.savefig(output_dir)
if args.cpu_mode: caffe.set_mode_cpu() else: caffe.set_mode_gpu() caffe.set_device(args.gpu_id) cfg.GPU_ID = args.gpu_id NET = caffe.Net(prototxt, caffemodel, caffe.TEST) rospy.loginfo('Loaded network %s', caffemodel) rospy.loginfo('Running detection with these classes: %s', str(classes)) rospy.loginfo('Warmup started') im = 128 * np.ones((300, 500, 3), dtype=np.uint8) timer = Timer() timer.tic() for i in xrange(2): _, _= im_detect(NET, im) timer.toc() rospy.loginfo('Warmup done in %f seconds. Starting node', timer.total_time) rate = rospy.Rate(10) bridge = CvBridge() while not rospy.is_shutdown(): if (RUNNING): rate.sleep() cv_image = bridge.imgmsg_to_cv2(IMAGE) (scores, boxes) = detect(cv_image) detections = generateDetections(scores, boxes, classes, args.treshold) if (pub_single.get_num_connections() > 0): for msg in detections: pub_single.publish(msg)
def detection_to_file(target_path, v_num, file_list, detect, total_frames, current_frames, max_proposal=100, thresh=0): timer = Timer() w = open("{}/{}.txt".format(target_path, v_num), "w") for file_index, file_path in enumerate(file_list): file_name = file_path.split("/")[-1] set_num, v_num, frame_num = file_name[:-4].split("_") frame_num = str(int(frame_num) + 1) im = cv2.imread(file_path) timer = Timer() timer.tic() #print(file_path) #print(im.shape) #_t = {'im_preproc': Timer(), 'im_net' : Timer(), 'im_postproc': Timer(), 'misc' : Timer()} _t = { 'im_preproc': Timer(), 'im_net': Timer(), 'im_postproc': Timer(), 'misc': Timer() } scores, boxes = im_detect(net, im, _t) timer.toc() print('Detection Time:{:.3f}s on {} {}/{} images'.format(timer.average_time,\ file_name ,current_frames+file_index+1 , total_frames)) NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] thresh = 0 inds = np.where(dets[:, -1] > thresh)[0] for i in inds: bbox = dets[i, :4] score = dets[i, -1] * 100 #Fix bug 6 x = bbox[0] y = bbox[1] width = bbox[2] - bbox[0] height = bbox[3] - bbox[1] label = cls socre = bbox[-1] * 100 w.write("{},{},{},{},{},{},{}\n".format( frame_num, label, x, y, width, height, score)) w.close() print("Evalutaion file {} has been writen".format(w.name)) return file_index + 1
def do_detections(self): cfg.TEST.HAS_RPN = True # Use RPN for proposals args = self.parse_args() this_dir = os.path.dirname(__file__) prototxt = os.path.join(this_dir, 'streetlight.prototxt') caffemodel = os.path.join(this_dir, 'streetlight.caffemodel') if not os.path.isfile(caffemodel): raise IOError(('{:s} not found.\nDid you run ./data/script/' 'fetch_faster_rcnn_models.sh?').format(caffemodel)) if args.cpu_mode: caffe.set_mode_cpu() else: caffe.set_mode_gpu() caffe.set_device(args.gpu_id) cfg.GPU_ID = args.gpu_id net = caffe.Net(prototxt, caffemodel, caffe.TEST) print '\n\nLoaded network {:s}'.format(caffemodel) #The color of the rectangle we draw around the face rectangleColor = (0,255,0) #variables holding the current frame number and the current faceid frameCounter = 0 currentFaceID = 0 #Variables holding the correlation trackers and the name per faceid faceTrackers = {} faceNames = {} labelNames = {} #start here #labels = [] #score1 = [] self.ready=True isdimset=False frame=None #img = [None, None, None, None] #index = 0 #detected = 0 counted_id=[] upper_id=[] count=0 #detection loop #while not self.stopEvent.is_set(): try: while not self.stopEvent.is_set(): if np.array_equal(frame,self.get_frame()): continue else : frame = self.get_frame() if not isinstance(frame, np.ndarray): continue if not isdimset: width = int(frame.shape[1] * 75 / 100) height = int(frame.shape[0] * 75 / 100) dim = (width, height) isdimset=True #Resize the image to 320x240 #frame = cv2.resize(frame, (1260, 720)) frame_copy = copy.deepcopy(frame) span = 20 vertical_left = int(frame.shape[1]//2)-span vertical_right = int(frame.shape[1]//2)+span #Result image is the image we will show the user, which is a #combination of the original image from the webcam and the #overlayed rectangle for the largest face #STEPS: # * Update all trackers and remove the ones that are not # relevant anymore # * Every 10 frames: # + Use face detection on the current frame and look # for faces. # + For each found face, check if centerpoint is within # existing tracked box. If so, nothing to do # + If centerpoint is NOT in existing tracked box, then # we add a new tracker with a new face-id #Increase the framecounter frameCounter += 1 if ((frameCounter % 10) == 0) or frameCounter==1: scores, boxes = im_detect(net, frame) CONF_THRESH = 0.2 NMS_THRESH = 0.1 #Update all the trackers and remove the ones for which the update #indicated the quality was not good enough fidsToDelete = [] for fid in faceTrackers.keys(): trackingQuality = faceTrackers[ fid ].update( frame ) #If the tracking quality is good enough, we must delete #this tracker if trackingQuality < 7: fidsToDelete.append( fid ) for fid in fidsToDelete: print("Removing fid " + str(fid) + " from list of trackers") faceTrackers.pop( fid , None ) #Every 10 frames, we will have to determine which faces #are present in the frame if (frameCounter % 10) == 0: for cls_ind, cls in enumerate(self.CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] inds = np.where(dets[:, -1] >= CONF_THRESH)[0] if len(inds) == 0: continue for i in inds: bbox = dets[i, :4] (x, y, w, h) = bbox.astype("int") score = dets[i, -1] #Loop over all faces and check if the area for this #face is the largest so far #We need to convert it to int here because of the #requirement of the dlib tracker. If we omit the cast to #int here, you will get cast errors since the detector #returns numpy.int32 and the tracker requires an int x_bar = int((x + w) / 2.0) y_bar = int((y + h) / 2.0) #Variable holding information which faceid we #matched with matchedFid = None #Now loop over all the trackers and check if the #centerpoint of the face is within the box of a #tracker for fid in faceTrackers.keys(): tracked_position = faceTrackers[fid].get_position() t_x = int(tracked_position.left()) t_y = int(tracked_position.top()) t_w = int(tracked_position.right()) t_h = int(tracked_position.bottom()) t_x_bar = int((t_x + t_w) / 2.0) t_y_bar = int((t_y + t_h) / 2.0) #check if the centerpoint of the face is within the #rectangleof a tracker region. Also, the centerpoint #of the tracker region must be within the region #detected as a face. If both of these conditions hold #we have a match if ( ( t_x <= x_bar <= t_w) and ( t_y <= y_bar <= t_h) and ( x <= t_x_bar <= w ) and ( y <= t_y_bar <= h )): matchedFid = fid #labels.append(cls) #If no matched fid, then we have to create a new tracker if matchedFid is None: print("Creating new tracker " + str(currentFaceID)) #Create and store the tracker tracker = dlib.correlation_tracker() tracker.start_track(frame, dlib.rectangle(x, y, w, h)) faceTrackers[ currentFaceID ] = tracker #Start a new thread that is used to simulate #face recognition. This is not yet implemented in this #version :) t = threading.Thread( target = self.doRecognizePerson , args=(faceNames,currentFaceID,labelNames, cls,score)) t.start() #Increase the currentFaceID counter currentFaceID += 1 #labels.append(cls) #score1.append(score) #Now loop over all the trackers we have and draw the rectangle #around the detected faces. If we 'know' the name for this person #(i.e. the recognition thread is finished), we print the name #of the person, otherwise the message indicating we are detecting #the name of the person for fid in faceTrackers.keys(): tracked_position = faceTrackers[fid].get_position() t_x = int(tracked_position.left()) t_y = int(tracked_position.top()) t_w = int(tracked_position.right()) t_h = int(tracked_position.bottom()) x_id = int((t_x + t_w) / 2.0) y_id = int((t_y + t_h) / 2.0) cv2.rectangle(frame, (t_x, t_y), (t_w, t_h), rectangleColor ,2) if fid in faceNames.keys(): cv2.putText(frame, faceNames[fid], (x_id-5, y_id-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2) cv2.circle(frame, (x_id, y_id), 3, (0, 0, 0), -1) textLabel = '{:s} {:.3f}'.format(labelNames[fid]["label"], labelNames[fid]["score"]) labelonly = str(labelNames[fid]["label"]) textOrg = (t_x, t_y-2) cv2.putText(frame, textLabel, textOrg, cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2) if x_id > vertical_left and x_id < vertical_right: count = count + 1 roi = frame_copy[t_y:t_h, t_x:t_w] #resized_image = cv2.resize(roi, (400, 360)) #cv2.imwrite("/media/ibrahim/Data/faster-rcnn/tools/img/{}_{}.jpg".format(count, labelNames[fid]["label"]),roi) #object_image = frame self.set_passingobject(faceNames[fid], roi, labelonly) #print "object %s inside the box %s times"%(faceNames[fid],str(count)) else: cv2.putText(frame, "Detecting..." , (x_id-5, y_id-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2) cv2.circle(frame, (x_id, y_id), 3, (0, 0, 0), -1) #Since we want to show something larger on the screen than the #original 320x240, we resize the image again # #Note that it would also be possible to keep the large version #of the baseimage and make the result image a copy of this large #base image and use the scaling factor to draw the rectangle #at the right coordinates. cv2.line(frame, ( vertical_left, 0), (vertical_left, frame.shape[0]), (0, 255, 255), 2) cv2.line(frame, ( vertical_right, 0), (vertical_right, frame.shape[0]), (0, 255, 255), 2) image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) self.send_result(image) #To ensure we can also deal with the user pressing Ctrl-C in the console #we have to check for the KeyboardInterrupt exception and break out of #the main loop except KeyboardInterrupt as e: pass
raise IOError(('{:s} not found.\nDid you run ./data/script/' 'fetch_faster_rcnn_models.sh?').format(caffemodel)) caffe.set_mode_cpu() net = caffe.Net(prototxt, caffemodel, caffe.TEST) print '\n\nLoaded network {:s}'.format(caffemodel) print "starting capture" vs = WebcamVideoStream().start() while True: frame = vs.read() # do detection and classification timer = Timer() timer.tic() scores, boxes = im_detect(net, frame) timer.toc() # print stats print('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH)
#for file in sorted(os.listdir(caffemodel_path)): #if file.endswith('.caffemodel'): # caffemodel = os.path.join(caffemodel_path,file) if args.cpu_mode: caffe.set_mode_cpu() else: caffe.set_mode_gpu() caffe.set_device(args.gpu_id) cfg.GPU_ID = args.gpu_id net = caffe.Net(prototxt, caffemodel, caffe.TEST) print '\n\nLoaded network {:s}'.format(caffemodel) # Warmup on a dummy image im = 128 * np.ones((1000, 512, 3), dtype=np.uint8) for i in xrange(2): _, __ = im_detect(net, im) print '\nDetect begin: \n' timer = Timer() timer.tic() for wav_name in sorted(os.listdir(test_folder)): if wav_name.endswith('.wav'): full_wav_name = os.path.join(test_folder, wav_name) x, rate = load_audio(full_wav_name, common_fs) image = stfft(x, nfft, noverlap) IMG = Image.fromarray(image.astype(np.uint8)) IMG.save('temp.jpg') im = cv2.imread('temp.jpg') os.remove('temp.jpg') ''' im=np.zeros((image.shape[0],image.shape[1],3))
def demo(image_name, image_no, image_index, net): conf_thresh = 0.3 min_boxes = 15 max_boxes = 15 indexes = [] cfg.TEST.NMS = 0.6 im = cv2.imread( os.path.join( "/media/sadaf/e4da0f25-29be-4c9e-a432-3193ff5f5baf/Code/AWA_data/Animals_with_Attributes2/clean_images", image_name)) scores, boxes, attr_scores, rel_scores = im_detect(net, im) # Keep the original boxes, don't worry about the regression bbox outputs rois = net.blobs['rois'].data.copy() # unscale back to raw image space blobs, im_scales = _get_blobs(im, None) cls_boxes = rois[:, 1:5] / im_scales[0] print(len(cls_boxes)) cls_prob = net.blobs['cls_prob'].data attr_prob = net.blobs['attr_prob'].data pool5 = net.blobs['pool5_flat'].data # Keep only the best detections max_conf = np.zeros((rois.shape[0])) for cls_ind in range(1, cls_prob.shape[1]): cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = np.array(nms(dets, cfg.TEST.NMS)) max_conf[keep] = np.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = np.where(max_conf >= conf_thresh)[0] if len(keep_boxes) < min_boxes: keep_boxes = np.argsort(max_conf)[::-1][:min_boxes] elif len(keep_boxes) > max_boxes: keep_boxes = np.argsort(max_conf)[::-1][:max_boxes] ############################ att_unique = np.unique(att_names[image_index * scale:(image_index * scale + scale)]) print(att_unique) att_unique_adv = np.unique( att_names_adv[image_index * scale:(image_index * scale + scale)]) cls_unique = np.unique(att_cls[image_index * scale:(image_index * scale + scale)]) print(cls_unique) cls_unique_adv = np.unique( att_cls_adv[image_index * scale:(image_index * scale + scale)]) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) sizes = np.shape(im) height = float(sizes[0]) width = float(sizes[1]) fig = plt.figure() fig.set_size_inches(width / height, 1, forward=False) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) plt.imshow(im) boxes = cls_boxes[keep_boxes] #print (boxes) #print (keep_boxes) objects = np.argmax(cls_prob[keep_boxes][:, 1:], axis=1) attr_thresh = 0.1 attr = np.argmax(attr_prob[keep_boxes][:, 1:], axis=1) attr_conf = np.max(attr_prob[keep_boxes][:, 1:], axis=1) count_box = 0 colors = [ "blue", "green", "red", "cyan", "magenta", "yellow", "black", "white", "darkblue", "orchid", "springgreen", "lime", "deepskyblue", "mediumvioletred", "maroon", "orangered" ] for i in range(len(keep_boxes)): bbox = boxes[i] if bbox[0] == 0: bbox[0] = 1 if bbox[1] == 0: bbox[1] = 1 #cls = classes[objects[i]+1] if attr_conf[i] > attr_thresh: #for k in range (len(att_unique)): # for l in range (len(cls_unique)): #if attributes[attr[i]+1]==att_unique[k]: # if classes[objects[i]+1] == cls_unique[l]: #if attributes[attr[i]+1] not in att_unique_adv: #if classes[objects[i]+1] not in cls_unique_adv: if attributes[attr[i] + 1] in att_unique: #cls = attributes[attr[i]+1] + " " + classes[objects[i]+1] cls = attributes[attr[i] + 1] count_box = count_box + 1 plt.gca().add_patch( plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor=colors[i], linewidth=0.3, alpha=0.5)) plt.gca().text(bbox[0], bbox[1] + 30, '%s' % (cls), bbox=dict(facecolor='blue', alpha=0, linewidth=0.2), fontsize=2, color=colors[i]) # if classes[objects[i]+1] in att_unique: # # cls1 =classes[objects[i]+1] # # plt.gca().add_patch(plt.Rectangle((bbox[0], bbox[1]),bbox[2] - bbox[0],bbox[3] - bbox[1], fill=False,edgecolor='red', linewidth=0.3, alpha=0.5)) # plt.gca().text(bbox[2]-30, bbox[3],'%s' % (cls1),bbox=dict(facecolor='blue', alpha=0,linewidth=0.2),fontsize=1.5, color='red') plt.savefig( '/media/sadaf/e4da0f25-29be-4c9e-a432-3193ff5f5baf/Code/AWA_data/Animals_with_Attributes2/clean_images_1/clean_bb{}.jpg' .format(image_no), dpi=1500) #plt.savefig('/media/sadaf/e4da0f25-29be-4c9e-a432-3193ff5f5baf/Code/Pytorch_Code/transfer_learn/pytorch-adversarial_box/plots_AT_NoAT/adv_bb_AT/adv_bb_AT{}_25.jpg'.format(image_no), dpi = 1500) plt.close()
def run_rcnnFaster(self, images): caffe.set_mode_gpu() caffe.set_device(self.deviceID) image = images[0] cfg.TEST.HAS_RPN = True scores, boxes = im_detect(self.net, image) # THRESHOLDS CONF_THRESH = 0.7 NMS_THRESH = 0.3 classes = list(self.labels_df.iloc[:, 1]) classes.remove('background') objectOutput = [] #image_out = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) for cls_ind, cls in enumerate(classes): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] inds = np.where(dets[:, -1] >= CONF_THRESH)[0] locations = [] for i in inds: bbox = dets[i, :4] score = dets[i, -1] # follow mantii convention ?? w = abs(int(bbox[0]) - int(bbox[2])) h = abs(int(bbox[1]) - int(bbox[3])) location_dict = {'Y': int(bbox[1]), 'X': int(bbox[0]), 'W': w, \ 'H': h, 'Confidence': int(score*100)} locations.append(location_dict) #print "class : ", cls, " roi : ", locations, " confidence: ", score #print "class : ", cls, " roi : ", list(map(int, bbox)), " confidence: ", score #print "image size: ", image.shape #print "x: ", int(bbox[0])," y: ", int(bbox[1]), " w: ", w, " h: ", h #print "xmin: ", int(bbox[0])," ymin: ", int(bbox[1]), " xmax: ", int(bbox[2]), " ymax: ", int(bbox[3]) #cv2.rectangle(image_out, (int(bbox[0]), int(bbox[1])), (int(bbox[0])+w,int(bbox[1])+h), (40,cls_ind*4,cls_ind*10), 3) #cv2.rectangle(image_out, (int(bbox[1]), int(bbox[0])), (int(bbox[1]),int(bbox[0])), (40,cls_ind*4,cls_ind*10), 10) #cv2.imwrite('output/debug.png', image_out) if len(locations) == 0: continue objectOutput.append({ "ObjectID": cls_ind, "Object": cls, "Locs": locations }) return scores, objectOutput
def demo(sess, net, im_file, vis_file, fits_fn, conf_thresh=0.8, eval_class=True, extra_vis_png=False, plot=False): """ Detect object classes in an image using pre-computed object proposals. im_file: The "fused" image file path vis_file: The background image file on which detections are laid. Normallly, this is just the IR image file path fits_fn: The FITS file path eval_class: True - use traditional per class-based evaluation style False - use per RoI-based evaluation """ show_img_size = cfg.TEST.SCALES[0] if (not os.path.exists(im_file)): print('%s cannot be found' % (im_file)) return -1 # Add ground truth values to dictionary im = cv2.imread(im_file) im_file_name = im_file[5:26] add_to_dict(im_file_name) # Detect all object classes and regress object bounds timer = Timer() timer.tic() image_name = osp.basename(im_file) scores, boxes = im_detect(sess, net, im, save_vis_dir=None, img_name=os.path.splitext(image_name)[0]) boxes *= float(show_img_size) / float(im.shape[0]) timer.toc() sys.stdout.write('Done in {:.3f} secs'.format(timer.total_time)) sys.stdout.flush() im = cv2.imread(vis_file) my_dpi = 100 fig = plt.figure() if plot: fig.set_size_inches(show_img_size / my_dpi * 2, show_img_size / my_dpi) fig.suptitle(im_file_name, fontsize=18) ax = fig.add_subplot(1, 2, 1) else: fig.set_size_inches(show_img_size / my_dpi, show_img_size / my_dpi) ax = plt.Axes(fig, [0., 0., 1., 1.]) im = cv2.resize(im, (show_img_size, show_img_size)) im = im[:, :, (2, 1, 0)] ax.axis('off') ax.set_axis_off() fig.add_axes(ax) ax.set_xlim([0, show_img_size]) ax.set_ylim([show_img_size, 0]) ax.imshow(im, aspect='equal') if ((fits_fn is not None) and (not extra_vis_png)): patch_contour = fuse(fits_fn, im, None, sigma_level=4, mask_ir=False, get_path_patch_only=True) ax.add_patch(patch_contour) NMS_THRESH = cfg.TEST.NMS #cfg.TEST.RPN_NMS_THRESH # 0.3 tt_vis = 0 bbox_img = [] bscore_img = [] num_sources = 0 #if (eval_class): for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])) #.astype(np.float32) #get copy of dets for plotting purposes bboxs = np.empty(np.shape(dets)) np.copyto(bboxs, dets) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] if plot: num_sources += plt_detections(im, cls, dets, ax, bboxs, thresh=conf_thresh) fig.subplots_adjust(top=0.85) else: num_sources += vis_detections(im, cls, dets, ax, thresh=conf_thresh) #add_to_csv(scores) #dets = np.hstack((dets, np.ones([dets.shape[0], 1]) * cls_ind)) # if (dets.shape[0] > 0): # bbox_img.append(dets) # bscore_img.append(np.reshape(dets[:, -2], [-1, 1])) # else: # for eoi_ind, eoi in enumerate(boxes): # eoi_scores = scores[eoi_ind, 1:] # skip background # cls_ind = np.argmax(eoi_scores) + 1 # add the background index back # cls_boxes = boxes[eoi_ind, 4 * cls_ind : 4 * (cls_ind + 1)] # cls_scores = scores[eoi_ind, cls_ind] # dets = np.hstack((np.reshape(cls_boxes, [1, -1]), # np.reshape(cls_scores, [-1, 1])))#.astype(np.float32) # dets = np.hstack((dets, np.ones([dets.shape[0], 1]) * cls_ind)) # bbox_img.append(dets) # bscore_img.append(np.reshape(dets[:, -2], [-1, 1])) # # boxes_im = np.vstack(bbox_img) # scores_im = np.vstack(bscore_img) # # #if (not eval_class): # # a numpy float is a C double, so need to use float32 # keep = nms(boxes_im[:, :-1].astype(np.float32), NMS_THRESH) # boxes_im = boxes_im[keep, :] # scores_im = scores_im[keep, :] # # keep_indices = range(boxes_im.shape[0]) #num_sources = vis_detections(im, None, boxes_im[keep_indices, :], ax, thresh=conf_thresh) print(', found %d sources' % num_sources) #If no sources detected plot average position of detection boxes if num_sources == 0: boxes = np.reshape(boxes, (cfg.TEST.RPN_POST_NMS_TOP_N, 7, 4)) bboxs = np.average(boxes, axis=1) #Draw ground truth boxes for i in range(len(ground_truth['act_xmin'])): ax.add_patch( plt.Rectangle( (ground_truth['act_xmin'][i], ground_truth['act_ymin'][i]), ground_truth['act_xmax'][i] - ground_truth['act_xmin'][i], ground_truth['act_ymax'][i] - ground_truth['act_ymin'][i], fill=False, edgecolor='gray', linewidth=2.0, linestyle='--')) #Draw proposed boxes for a_box in range(np.shape(bboxs)[0]): bbox = bboxs[a_box, :4] ax.add_patch( plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor=colors_2[a_box], linewidth=1.0)) ##Plot scores for each box as a grouped histogram if plot: axa = fig.add_subplot(1, 2, 2) barwidth = (1 - 0.1) / cfg.TEST.RPN_POST_NMS_TOP_N axa.set_ylim([0, 1]) r1 = np.arange(len(scores[0, :])) for i in range(0, int(np.shape(scores)[0])): axa.bar(r1, scores[i, :], color=colors_2[i], width=barwidth, label='det_' + str(i + 1)) r1 = [x + barwidth for x in r1] axa.set_xticks([r + 0.45 for r in range(len(scores[0, :]))]) axa.set_xticklabels(CLASSES[:], fontsize=8) #Draw horizontal line at threshold, add legend and title axa.axhline(conf_thresh, linestyle='--', color='black', linewidth=1.0) plt.legend(loc='upper right', fontsize='x-small') axa.set_title('Class Scores for each Detection') # Generate text string for ClaRAN's prediction claran_text = '' for p in range(len(predict['pred_class'])): claran_text += str(predict['pred_class'][p].replace('_', 'C_') + 'P ' + "{:.2f}".format(predict['score'][p])) + " " ax.text(5, 20, '{:s}'.format('ClaRAN: ' + claran_text), bbox=dict(facecolor='None', alpha=0.4, edgecolor='None'), fontsize=8, color='black') # Generate text string for ground truth gt_text = '' for q in ground_truth['act_class']: gt_text += q.replace('_', 'C_') + 'P ' ax.text(5, 40, '{:s}'.format('Ground Truth: ' + gt_text), bbox=dict(facecolor='None', alpha=0.4, edgecolor='None'), fontsize=8, color='black') plt.tight_layout() fig.subplots_adjust(top=0.85) plt.draw() # save results to CSV if ClaRAN has found a single source #if num_sources == 1: #dict_to_csv( dict(ground_truth.items() + predict.items()) ) return 0