def test_net(sess, net, imdb, weights_filename): timer = Timer() timer.tic() np.random.seed(cfg.RNG_SEED) """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) output_dir = get_output_dir(imdb, weights_filename) # timers _t = {'im_detect': Timer(), 'misc': Timer()} # all_boxes = [] all_boxes = [[[] for _ in range(imdb.num_classes)] for _ in range(num_images)] print(all_boxes) for i in range(num_images): print('***********', imdb.image_path_at(i)) img = cv2.imread(imdb.image_path_at(i)) img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) scores, boxes = test_ctpn(sess, net, img) textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) print(('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0])) boxes = check_unreasonable_box(boxes, scale) all_boxes[i][1] += boxes det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) imdb.evaluate_detections(all_boxes, output_dir) timer.toc()
def ctpn(sess, net, image_name, save_path1, save_path2): timer = Timer() timer.tic() #读取图片 img = cv2.imread(image_name) img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) #灰度化处理 #img2 = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY) #img2 = cv2.cvtColor(img2,cv2.COLOR_GRAY2RGB) # base_name = im_name.split('\\')[-1] # cv2.imwrite(os.path.join("data/results2", base_name), img2) scores, boxes = test_ctpn(sess, net, img) #后处理过程,detect包含过滤和合并 textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) draw_boxes2(img, boxes, image_name, save_path2, scale) draw_boxes(img, boxes, image_name, save_path1, scale) #后处理过程,detect2只过滤小文本框 # textdetector = TextDetector() # boxes = textdetector.detect2(boxes, scores[:, np.newaxis], img.shape[:2]) # draw_boxes3(img, boxes,image_name, scale) timer.toc() print(('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]))
def test(): with torch.cuda.device(0): with torch.no_grad(): args = parse_args() if args.config_file is not None: cfg_from_file(args.config_file) #test_model() s = Solver(args) model = s.model _t = Timer() batch_size = 16 timing_array = [] for i in range(1000): _t.tic() batch = torch.FloatTensor(batch_size, 3, cfg.DATASET.IMAGE_SIZE[0], cfg.DATASET.IMAGE_SIZE[1]).cuda(0) model = add_flops_counting_methods(model) model.eval().start_flops_count() out = model(batch) inf_time = _t.toc() timing_array.append(inf_time) print("Inference Time Mean: {:0.6f} Std Dev: {:0.6f}".format(np.mean(timing_array)*1000/batch_size, np.std(timing_array)*1000/batch_size)) #print(model) #print('Output shape: {}'.format(list(out.shape))) print('Flops: {}'.format(flops_to_string(model.compute_average_flops_cost()))) print('Params: ' + get_model_parameters_number(model))
def ctpn(sess, net, image_name, boxlabel): timer = Timer() timer.tic() img = cv2.imread(image_name) img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) scores, boxes = test_ctpn(sess, net, img) textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) img = draw_boxes(img, image_name, boxes, scale, None) boxlabel2 = np.transpose( np.array([ boxlabel[:, 0], boxlabel[:, 1], boxlabel[:, 2], boxlabel[:, 1], boxlabel[:, 0], boxlabel[:, 3], boxlabel[:, 2], boxlabel[:, 3], np.ones(len(boxlabel)) ])) draw_boxes(img, image_name, boxlabel2, 1, (0, 0, 0)) timer.toc() print(('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0])) boxes = boxes / scale return boxes
def ctpn(sess, net, image_name, dst, draw_img=False, show_area=False, area_min=-0.1, area_max=1.1): timer = Timer() timer.tic() img = cv2.imread(image_name) img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) scores, boxes = test_ctpn(sess, net, img) textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) ret = draw_boxes(img, image_name, boxes, scale, dst, draw_img=draw_img, show_area=show_area, area_min=area_min, area_max=area_max) timer.toc() print(('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0])) return ret
def demo(sess, net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im_file = os.path.join(cfg.FLAGS2["data_dir"], 'demo', image_name) im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(sess, net, im) timer.toc() print('Detection took {:.3f}s for {:d} object proposals'.format(timer.total_time, boxes.shape[0])) # Visualize detections for each class CONF_THRESH = 0.1 NMS_THRESH = 0.1 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] vis_detections(im, cls, dets, thresh=CONF_THRESH)
def demo(sess, net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name) im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() # 使用已经训练好的网络模型检测当前图片中所有的物体,得到所有predict boxes scores, boxes = im_detect(sess, net, im) timer.toc() print('Detection took {:.3f}s for {:d} object proposals'.format( timer.total_time, boxes.shape[0])) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): """ 对于每个类,找到对应的predict boxes的概率得分和坐标描述,先进行nms缩减相近的boxes,对于保留的boxes,当概率得分大于CONF_THRESH 阈值时,通过vis_detections函数将box画出来。 """ cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] vis_detections(im, cls, dets, thresh=CONF_THRESH)
def train_model(self, max_iters, snapshot_iters): """ Train the model with max_iters. :return saved model paths """ last_snapshot_iter = -1 timer = Timer() model_paths = [] print "Begin training the model." while self._solver.iter < max_iters: timer.tic() self._solver.step(1) timer.toc() # print the speed if self._solver.iter % 1000 == 0: print 'speed: {:.3f}s / iter.'.format(timer.average_time) # snapshot the weights if self._solver.iter % snapshot_iters == 0: last_snapshot_iter = self._solver.iter model_paths.append(self.snapshot()) if last_snapshot_iter != self._solver.iter: model_paths.append(self.snapshot()) return model_paths
def demo(sess, net, image_name): # 根据路径,使用opencv读取图片数据 im_file = os.path.join(cfg.FLAGS2["data_dir"], 'demo', image_name) im = cv2.imread(im_file) # 进行目标检查 timer = Timer() timer.tic() # 进行预测返回300个box的得分和位置 scores, boxes = im_detect(sess, net, im) timer.toc() print('Detection took {:.3f}s for {:d} object proposals'.format( timer.total_time, boxes.shape[0])) # 每个类最高得分上图的阈值 CONF_THRESH = 0.1 # 每个类NMS阈值 NMS_THRESH = 0.1 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # +1需要跳过背景 # 获取到所有候选框对应这个分类的位置 cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] # 获取到所有候选框对应这个分类的得分 cls_scores = scores[:, cls_ind] # 合并所有的位置和得分,(x1,y1,x2,y2,score) dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) # 通过非极大值抑制保留0.1的候选框以及得分 keep = nms(dets, NMS_THRESH) dets = dets[keep, :] # 上图 vis_detections(im, cls, dets, thresh=CONF_THRESH)
def demo(sess, net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im = cv2.imread(image_name) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(sess, net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class im = im[:, :, (2, 1, 0)] fig, ax = plt.subplots(figsize=(12, 12)) ax.imshow(im, aspect='equal') CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] vis_detections(im, cls, dets, ax, thresh=CONF_THRESH)
def boxdetect(sess, net, im_file, output_path): """Detect object classes in an image using pre-computed object proposals.""" # Load the image im_file = im_file.replace('\\', '/') im = cv2.imread(im_file) image_name = im_file.split(r'/')[-1] # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(sess, net, im) timer.toc() print('Detection took {:.3f}s for {:d} object proposals'.format( timer.total_time, boxes.shape[0])) # Visualize detections for each class CONF_THRESH = 0.1 NMS_THRESH = 0.1 geetcode_bbox = [] for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] bbox = vis_detections(im, cls, dets, image_name, output_path, thresh=CONF_THRESH) geetcode_bbox.append(bbox) return geetcode_bbox
def ctpn(sess, net, image_name): timer = Timer() timer.tic() img = cv2.imread(image_name) height, width = img.shape[:2] img = img[int(2 * height / 3.0):height, :] img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) scores, boxes = test_ctpn(sess, net, img) # for box in boxes: # color = (0, 255, 0) # cv2.line(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[1])), color, 2) # cv2.line(img, (int(box[0]), int(box[1])), (int(box[0]), int(box[3])), color, 2) # cv2.line(img, (int(box[2]), int(box[1])), (int(box[2]), int(box[3])), color, 2) # cv2.line(img, (int(box[0]), int(box[3])), (int(box[2]), int(box[3])), color, 2) # base_name = image_name.split('/')[-1] # cv2.imwrite("data/results/test_"+base_name, img) # draw_boxes(img, image_name, boxes, scale) # print(boxes) # assert 0 textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) draw_boxes(img, image_name, boxes, scale) timer.toc() print(('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]))
def detect(self, image): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image # Detect all object classes and regress object bounds image = image_transform_1_3(image) timer = Timer() timer.tic() scores, boxes = self.im_detect(image) timer.toc() # print('rois--------------', scores) print('Detection took {:.3f}s for ' '{:d} object proposals'.format(timer.total_time, boxes.shape[0])) CONF_THRESH = 0.7 NMS_THRESH = 0.1 for cls_ind, cls in enumerate(self.classes_detect[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] inds = np.where(dets[:, -1] >= CONF_THRESH)[0] dets = dets[inds, :] return dets
def demo(sess, net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image #im_file = os.path.join(cfg.FLAGS2["data_dir"], 'demo', image_name) im_file = os.path.join(path1, image_name) im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(sess, net, im) timer.toc() print('Detection took {:.3f}s for {:d} object proposals'.format( timer.total_time, boxes.shape[0])) # Visualize detections for each class CONF_THRESH = 0.5 NMS_THRESH = 0.1 thresh = CONF_THRESH im = im[:, :, (2, 1, 0)] fig, ax = plt.subplots(figsize=(12, 12)) ax.imshow(im, aspect='equal') for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] #vis_detections(im, cls, dets, thresh=CONF_THRESH) inds = np.where(dets[:, -1] >= thresh)[0] if len(inds) == 0: continue for i in inds: bbox = dets[i, :4] score = dets[i, -1] ax.add_patch( plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor='red', linewidth=3.5)) ax.text(bbox[0], bbox[1] - 2, '{:s} {:.3f}'.format(cls, score), bbox=dict(facecolor='blue', alpha=0.5), fontsize=14, color='white') plt.axis('off') plt.tight_layout() plt.draw() os.chdir(path2) plt.savefig(im_name)
def demo(sess, net, image_name, thresh=0.05): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image image = PIL.Image.open(image_name) im = cv2.imread(image_name) # Detect all object classes and regress object bounds timer = Timer() timer.tic() im_num = os.path.split(image_name)[1].split('.')[0] scores, boxes = im_detect(sess, net, im, save_feature=True, feature_path='./data/conv.npy') timer.toc() print('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class im = im[:, :, (2, 1, 0)] # fig, ax = plt.subplots(figsize=(12, 12)) # ax.imshow(im, aspect='equal') CONF_THRESH = 0.7 NMS_THRESH = 0.3 results = [] name = image_name.split('/')[-1] for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] cls_lables = np.full_like(cls_scores, cls_ind) dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis], cls_lables[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] inds = np.where(dets[:, -2] > thresh)[0] dets = dets[inds] for i in range(dets.shape[0]): name = str(name) category = int(dets[i, -1]) bbox = list(map(float, dets[i, :4])) bbox = [round(b, 2) for b in bbox] score = float(dets[i, -2]) dic = collections.OrderedDict() dic['name'] = str(name) dic['category'] = int(category) dic['bbox'] = bbox dic['score'] = float(score) results.append(dic) im = vis_detections(image, cls, dets, ax=None, thresh=CONF_THRESH) out_path = './data/detection_result' if not os.path.exists(out_path): os.makedirs(out_path) out_path = os.path.join(out_path, os.path.split(image_name)[-1]) image.save(out_path)
def detect(self, image): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image # Detect all object classes and regress object bounds image = image_transform_1_3(image) timer = Timer() timer.tic() scores, boxes = self.im_detect(image) timer.toc() print('kkk', np.argmax(scores, axis=1)) print('lll', scores[np.argmax(scores, axis=1) == 4, 4]) print('Detection took {:.3f}s for ' '{:d} object proposals'.format(timer.total_time, boxes.shape[0])) CONF_THRESH = 0.3 NMS_THRESH = 0.5 dets_list = [] for cls_ind, cls in enumerate(self.classes_detect[1:]): inds = np.where(scores[:, cls_ind] > CONF_THRESH)[0] cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets[inds, :], NMS_THRESH) dets = dets[keep, :] inds = np.where(dets[:, -1] >= CONF_THRESH)[0] cls_ind_list = np.empty((len(inds), 1), np.int32) cls_ind_list.fill(cls_ind) dets = np.hstack((dets[inds, :-1], cls_ind_list)) dets_list.append(dets) dets = np.vstack(dets_list) print('jjj', dets) return dets
def demo(sess, net, image_name, memory_storex, memory_storey, kitti_memory_0323, AN, sess2): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im = cv2.imread(image_name) im = cv2.resize(im, (1242, 375)) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, bbox_pred, _, rois, fc = im_detect(sess, net, im, memory_storex, memory_storey) timer.toc() print('Detection took {:.3f}s for {:d} object proposals'.format( timer.total_time, bbox_pred.shape[0])) # Visualize detections for each class CONF_THRESH = 0.1 NMS_THRESH = 0.1 im_shape = im.shape[:2] box_deltas = bbox_pred pred_boxes = bbox_transform_inv(rois, box_deltas) boxes = clip_boxes(pred_boxes, im_shape) # show.vis_detections(image_name, scores, boxes, dis_pre, fc, NMS_THRESH, CONF_THRESH) show.vis_detections(image_name, scores, boxes, fc, kitti_memory_0323, AN, sess2, NMS_THRESH, CONF_THRESH)
def detect(self, image): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image # Detect all object classes and regress object bounds image = image_transform_1_3(image) timer = Timer() timer.tic() scores, boxes = self.im_detect(image) timer.toc() print('rois--------------', scores) print('Detection took {:.3f}s for ' '{:d} object proposals'.format(timer.total_time, len(boxes))) CONF_THRESH = 0.3 # print(scores) NMS_THRESH = 0.5 dets = [] for i in range(len(boxes)): # print('lll') cls_boxes = boxes[i] cls_scores = scores[i] dets_i_ = np.hstack([cls_boxes[:, 0:4], cls_scores]) keep = nms(dets_i_, NMS_THRESH) dets_i = np.hstack([cls_boxes, cls_scores]) dets_i = dets_i[keep, :] inds = np.where(dets_i[:, -1] >= CONF_THRESH)[0] dets_i = dets_i[inds, :] dets_i = dets_i[:, 0:5] dets.append(dets_i) return dets
def demo_video(sess, net, frame, camera_url): """Detect object classes in an image using pre-computed object proposals.""" im = frame timer = Timer() timer.tic() scores, boxes = im_detect(sess, net, im) timer.toc() # Visualize detections for each class CONF_THRESH = 0.6 # threshold NMS_THRESH = 0.1 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] inds = np.where(dets[:, -1] >= CONF_THRESH)[0] if cls == 'crow' or cls == 'magpie' or cls == 'pigeon' or cls == 'swallow' \ or cls == 'sparrow' and len(inds) != 0: if time.time() - timer_trigger.start_time > residence_time: images = vis_detections_video(im, cls, dets, timer.start_time, timer.total_time, inds, CONF_THRESH) socket_client_target_detection(cls, len(inds), images, time.ctime(), camera_url, True) timer_trigger.tic() # 修改起始时间 else: images = vis_detections_video(im, cls, dets, timer.start_time, timer.total_time, inds, CONF_THRESH) socket_client_target_detection(cls, len(inds), images, time.ctime(), camera_url, False) elif cls == 'airplane' and len(inds) != 0: pass elif cls == 'person' and len(inds) != 0: pass else: pass
def process_frame(self, video_name, im_name, CLASSES, CONF_THRESH): # Output frame path im_path_ = os.path.join(api_config.upload_folder, video_name.split(".")[0], "annotated-frames", os.path.basename(im_name)) im = np.array(Image.open(im_name)) im = im[:, :, ::-1] timer = Timer() timer.tic() scores, boxes = im_detect(self.sess, self.net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) NMS_THRESH = 0.3 im = im[:, :, (2, 1, 0)] fig, ax = plt.subplots(figsize=(12, 12)) ax.imshow(im, aspect='equal') self.annotation = xml_setup(im_name, im.shape) for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] self.draw(im_path_, cls, dets, ax, thresh=CONF_THRESH) xml_write(video_name, os.path.basename(im_name), self.annotation) plt.savefig(im_path_, bbox_inches='tight') plt.close()
def demo(sess, net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im = cv2.imread(image_name) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(sess, net, im) timer.toc() print('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class im = im[:, :, (2, 1, 0)] fig, ax = plt.subplots(figsize=(12, 12)) ax.imshow(im, aspect='equal') CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] vis_detections(im, cls, dets, ax, thresh=CONF_THRESH)
def video_demo(sess, net, image): # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes, _ = im_detect_bbox_kpoints(sess, net, image) # scores, boxes, points = im_detect(sess, net, image) # print("scores:", scores.shape) --> (n, 1) timer.toc() print('Detection took {:.3f}s for {:d} object proposals'.format( timer.total_time, boxes.shape[0])) # Visualize detections for each class CONF_THRESH = 0.6 NMS_THRESH = 0.3 inds = np.where(scores[:, 0] > CONF_THRESH)[0] scores = scores[inds, 0] boxes = boxes[inds, :] # points = points[inds, :] dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) # dets = np.hstack((boxes, scores[:, np.newaxis], points)).astype(np.float32, copy=False) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] return dets
def SignalImage_Test(sess, net,image_path): im=cv2.imread(image_path) timer = Timer() timer.tic() scores, boxes = im_detect(sess, net, im) timer.toc() print('Detection took {:.3f}s for {:d} object proposals'.format(timer.total_time, boxes.shape[0])) # Visualize detections for each class CONF_THRESH = 0.4 NMS_THRESH = 0.35 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] # print('\nboxes:',boxes) # print('\ncls_boxes:',cls_boxes) # print('\n ',boxes.shape) # print(len(cls_boxes),len(boxes))] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] vis_detections(im, cls, dets, thresh=CONF_THRESH)
def demo(sess, net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im = readimage(image_name) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(sess, net, im) timer.toc() # print('rois--------------', scores) print('Detection took {:.3f}s for ' '{:d} object proposals'.format(timer.total_time, boxes.shape[0])) CONF_THRESH = 0.7 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] vis(im, image_name, cls, dets, thresh=CONF_THRESH)
def demo(sess, net, image_name): # 加载目标图片 im_file = os.path.join('test_images', image_name) im = cv2.imread(im_file) # 检测所有对象类并回归对象边界 timer = Timer() timer.tic() scores, boxes = im_detect(sess, net, im) timer.toc() print('Detection took {:.3f}s for {:d} object proposals'.format( timer.total_time, boxes.shape[0])) # 对每个检查的检测进行可视化 CONF_THRESH = 0.1 NMS_THRESH = 0.1 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # 因为跳过了背景background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] vis_dections(im, cls, dets, thresh=CONF_THRESH)
def ctpn(sess, net, image_name): timer = Timer() timer.tic() img = cv2.imread(image_name) img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) scores, boxes = test_ctpn(sess, net, img) new_scores = scores[:, np.newaxis] keep_inds = np.where(new_scores > TextLineCfg.TEXT_PROPOSALS_MIN_SCORE)[0] boxes, new_scores = boxes[keep_inds], new_scores[keep_inds] sorted_indices = np.argsort(new_scores.ravel())[::-1] boxes, new_scores = boxes[sorted_indices], new_scores[sorted_indices] keep_inds = nms(np.hstack((boxes, new_scores)), TextLineCfg.TEXT_PROPOSALS_NMS_THRESH) boxes, new_scores = boxes[keep_inds], new_scores[keep_inds] img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) plt.figure(figsize=(10, 14)) for key, box in enumerate(boxes): img_inside = img.copy() img_inside = cv2.rectangle(img_inside, (box[0], box[1]), (box[2], box[3]), color=(255, 0, 0), thickness=2) plt.imshow(img_inside) plt.title('Scores: {0}'.format(scores[key])) plt.savefig('./data/fig/fig_{0}.jpg'.format(key))
def test_net_on_dataset(args, dataset_name, proposal_file, output_dir, multi_gpu=False, gpu_id=0): """Run inference on a dataset.""" dataset = JsonDataset(dataset_name) test_timer = Timer() test_timer.tic() if multi_gpu: num_images = len(dataset.get_roidb()) all_boxes, all_segms, all_keyps = multi_gpu_test_net_on_dataset( args, dataset_name, proposal_file, num_images, output_dir) else: all_boxes, all_segms, all_keyps = test_net(args, dataset_name, proposal_file, output_dir, gpu_id=gpu_id) test_timer.toc() logger.info('Total inference time: {:.3f}s'.format( test_timer.average_time)) results = task_evaluation.evaluate_all(dataset, all_boxes, all_segms, all_keyps, output_dir) return results
def ctpn(sess, net, image_name): timer = Timer() timer.tic() img = cv2.imread(image_name) img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) #将OPENCV图像转换为PIL图像, pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) #求图片清晰度 imageVar = cv2.Laplacian(img, cv2.CV_64F).var() if imageVar <= 5000: pil_img = ImageEnhance.Sharpness(pil_img).enhance(3.0) #将PIL图像转换为opencv图像 img = cv2.cvtColor(np.asarray(pil_img), cv2.COLOR_RGB2BGR) scores, boxes = test_ctpn(sess, net, img) textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) draw_boxes(img, image_name, boxes, scale) timer.toc() print(('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]))
def demo(net, matlab, image_filepath, classes, method, par1, par2): # Detect all object classes and regress object bounds timer = Timer() timer.tic() # Load pre-computed Selected Search object proposals obj_proposals = ROI_boxes(matlab, image_filepath, method, par1, par2) global OP_num OP_num = len(obj_proposals) if len(obj_proposals)==0: dets = [] timer.toc() return dets, timer.total_time # Load the demo image im = cv2.imread(image_filepath) scores, boxes = im_detect(net, im, obj_proposals) timer.toc() # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls in classes: cls_ind = CLASSES.index(cls) cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] keep = np.where(cls_scores >= CONF_THRESH)[0] cls_boxes = cls_boxes[keep, :] cls_scores = cls_scores[keep] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] return dets, timer.total_time
def test_epoch(self, model, data_loader, detector, output_dir, use_gpu): model.eval() dataset = data_loader.dataset num_images = len(dataset) num_classes = detector.num_classes all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) _t = Timer() for i in iter(range((num_images))): img = dataset.pull_image(i) scale = [img.shape[1], img.shape[0], img.shape[1], img.shape[0]] with torch.no_grad(): images = torch.Tensor( dataset.preproc(img)[0].unsqueeze(0).to(self.device)).to( self.device) _t.tic() # forward out = model(images, phase='eval') # detect detections = detector.forward(out) time = _t.toc() # TODO: make it smart: for j in range(1, num_classes): cls_dets = list() for det in detections[0][j]: if det[0] > 0: d = det.cpu().numpy() score, box = d[0], d[1:] box *= scale box = np.append(box, score) cls_dets.append(box) if len(cls_dets) == 0: cls_dets = empty_array all_boxes[j][i] = np.array(cls_dets) # log per iter log = '{iters:d}/{epoch_size:d} in {time:.3f}s [{prograss}]\r'.format( prograss='#' * int(round(10 * i / num_images)) + '-' * int(round(10 * (1 - i / num_images))), iters=i, epoch_size=num_images, time=time) sys.stdout.write(log) sys.stdout.flush() # write result to pkl with open(os.path.join(output_dir, 'detections.pkl'), 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) # currently the COCO dataset do not return the mean ap or ap 0.5:0.95 values print('Evaluating detections') data_loader.dataset.evaluate_detections(all_boxes, output_dir)
def demo(sess, net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im_file = os.path.join( 'G:\DeepLearning\Project\LJProject\Faster-RCNN\Faster-RCNN-TensorFlow-Python3-master-NEU\data\demo', image_name) im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() # 此处的boxes是经过bbox_pre修正过的Bbox的位置坐标,并且对于预测的每一个类别,都有一个预测的Bbox坐标 scores, boxes = im_detect(sess, net, im) timer.toc() print('Detection took {:.3f}s for {:d} object proposals'.format( timer.total_time, boxes.shape[0])) # Visualize detections for each class CONF_THRESH = 0.1 NMS_THRESH = 0.1 #对每个类别进行一次画图 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) #利用非极大值抑制,从300个proposal中剔除掉与更大得分的proposal的IOU大于0.1的proposal keep = nms(dets, NMS_THRESH) dets = dets[keep, :] vis_detections(im, cls, dets, thresh=CONF_THRESH)
def time_analyse(matlab, cmd, image_filepath, par1, par2): timer = Timer() timer.tic() obj_proposals = ROI_boxes(matlab, image_filepath, cmd, par1, par2) timer.toc() time = timer.total_time box_numer = len(obj_proposals) return time, box_numer, obj_proposals
def ctpn(sess, net, image_name): timer = Timer() timer.tic() img = cv2.imread(image_name) img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) scores, boxes = test_ctpn(sess, net, img) textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) draw_boxes(img, image_name, boxes, scale) timer.toc() print(('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]))
def demo(net, matlab, image_filepath, classes, args): """Detect object classes in an image using pre-computed object proposals.""" timer = Timer() timer.tic() # Load pre-computed Selected Search object proposals obj_proposals = ROI_boxes(matlab, image_filepath, args.OP_method) if len(obj_proposals)==0: return # Load the demo image im = cv2.imread(image_filepath) # Detect all object classes and regress object bounds scores, boxes = im_detect(net, im, obj_proposals) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls in classes: cls_ind = CLASSES.index(cls) cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] keep = np.where(cls_scores >= CONF_THRESH)[0] cls_boxes = cls_boxes[keep, :] cls_scores = cls_scores[keep] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] if (len(dets) == 0): global count count += 1 print('{} No Ear detected').format(count) # print 'All {} detections with p({} | box) >= {:.1f}'.format(cls, cls, # CONF_THRESH) if args.video_mode: visualise(im, cls, dets, thresh=CONF_THRESH) elif args.image_path is not None: vis_detections(im, cls, dets, thresh=CONF_THRESH)
def demo(net, image_name, classes): """Detect object classes in an image using pre-computed object proposals.""" # Load pre-computed Selected Search object proposals box_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo', image_name + '_boxes.mat') obj_proposals = sio.loadmat(box_file)['boxes'] # Load the demo image im_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo', image_name + '.jpg') im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im, obj_proposals) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls in classes: cls_ind = CLASSES.index(cls) cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] keep = np.where(cls_scores >= CONF_THRESH)[0] cls_boxes = cls_boxes[keep, :] cls_scores = cls_scores[keep] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] print 'All {} detections with p({} | box) >= {:.1f}'.format(cls, cls, CONF_THRESH) vis_detections(im, cls, dets, thresh=CONF_THRESH)
def train_model(self, epochs): #1. construct the computation graph self.net.init_modules() #save net structure to data folder net_f = open(os.path.join(self.output_dir, 'nn.txt'), 'w') net_f.write(str(self.net)) net_f.close() #find previous snapshot lsf, nfiles, sfiles = self.find_previous() #2. restore weights if lsf == 0: lr, last_iter, stepsizes, self.np_paths, self.ss_paths = self.initialize() else: lr, last_iter, stepsizes, self.np_paths, self.ss_paths = self.restore(str(sfiles[-1]), str(nfiles[-1])) #3. fix weights and eval mode self.fix_eval_parts() # construct optimizer self.construct_optimizer(lr) if len(stepsizes) != 0: next_stepsize = stepsizes.pop(0) else: next_stepsize = -1 train_timer = Timer() current_snapshot_epoch = int(last_iter / len(self.dataloader_train)) for epoch in range(current_snapshot_epoch, epochs): print("start epoch {}".format(epoch)) with output(initial_len=9, interval=0) as content: for iter, blobs in enumerate(tqdm(self.dataloader_train)): last_iter += 1 # adjust learning rate if last_iter == next_stepsize: lr *= cfg.GAMMA self.scale_lr(self.optimizer, lr) if len(stepsizes) != 0: next_stepsize = stepsizes.pop(0) batch_size = blobs['data'].shape[0] if len(blobs['gt_box']) < batch_size: #invalid sample continue train_timer.tic() # IMAGE PART if cfg.USE_IMAGES: grid_shape = blobs['data'].shape[-3:] projection_helper = ProjectionHelper(cfg.INTRINSIC, cfg.PROJ_DEPTH_MIN, cfg.PROJ_DEPTH_MAX, cfg.DEPTH_SHAPE, grid_shape, cfg.VOXEL_SIZE) proj_mapping = [[projection_helper.compute_projection(d.cuda(), c.cuda(), t.cuda()) for d, c, t in zip(blobs['nearest_images']['depths'][i], blobs['nearest_images']['poses'][i], blobs['nearest_images']['world2grid'][i])] for i in range(batch_size)] jump_flag = False for i in range(batch_size): if None in proj_mapping[i]: #invalid sample jump_flag = True break if jump_flag: continue blobs['proj_ind_3d'] = [] blobs['proj_ind_2d'] = [] for i in range(batch_size): proj_mapping0, proj_mapping1 = zip(*proj_mapping[i]) blobs['proj_ind_3d'].append(torch.stack(proj_mapping0)) blobs['proj_ind_2d'].append(torch.stack(proj_mapping1)) self.net.forward(blobs) self.optimizer.zero_grad() self.net._losses["total_loss"].backward() self.optimizer.step() train_timer.toc() # Display training information if iter % (cfg.DISPLAY) == 0: self.log_print(epoch*len(self.dataloader_train)+iter, lr, content, train_timer.average_time()) self.net.delete_intermediate_states() # validate if satisfying the time criterion if train_timer.total_time() / 3600 >= cfg.VAL_TIME: print('------------------------VALIDATION------------------------------') self.validation(last_iter, 'val') print('------------------------TRAINVAL--------------------------------') self.validation(last_iter, 'trainval') # snapshot if cfg.VAL_TIME > 0.0: ss_path, np_path = self.snapshot(last_iter) self.np_paths.append(np_path) self.ss_paths.append(ss_path) #remove old snapshots if too many if len(self.np_paths) > cfg.SNAPSHOT_KEPT and cfg.SNAPSHOT_KEPT: self.remove_snapshot() train_timer.clean_total_time()
def test(net, data_loader, data_logger): ##################################### # Preparation ##################################### os.makedirs(cfg.TEST_SAVE_DIR, exist_ok=True) mAP_CLASSIFICATION = Evaluate_metric(cfg.NUM_CLASSES, ignore_class=[0], overlap_threshold=cfg.MAP_THRESH) mAP_MASK = Evaluate_metric(cfg.NUM_CLASSES, ignore_class=[0], overlap_threshold=cfg.MAP_THRESH) #################################### # Accumulate data #################################### pred_all = {} gt_all = {} timer = Timer() timer.tic() print('starting test on whole scan....') for iter, blobs in enumerate(tqdm(data_loader)): try: gt_box = blobs['gt_box'][0].numpy()[:, 0:6] gt_class = blobs['gt_box'][0][:, 6].numpy() except: continue # color proj killing_inds = None if cfg.USE_IMAGES: grid_shape = blobs['data'].shape[-3:] projection_helper = ProjectionHelper(cfg.INTRINSIC, cfg.PROJ_DEPTH_MIN, cfg.PROJ_DEPTH_MAX, cfg.DEPTH_SHAPE, grid_shape, cfg.VOXEL_SIZE) if grid_shape[0]*grid_shape[1]*grid_shape[2] > cfg.MAX_VOLUME or blobs['nearest_images']['depths'][0].shape[0] > cfg.MAX_IMAGE: proj_mapping = [projection_helper.compute_projection(d, c, t) for d, c, t in zip(blobs['nearest_images']['depths'][0], blobs['nearest_images']['poses'][0], blobs['nearest_images']['world2grid'][0])] else: proj_mapping = [projection_helper.compute_projection(d.cuda(), c.cuda(), t.cuda()) for d, c, t in zip(blobs['nearest_images']['depths'][0], blobs['nearest_images']['poses'][0], blobs['nearest_images']['world2grid'][0])] killing_inds = [] real_proj_mapping = [] if None in proj_mapping: #invalid sample for killing_ind, killing_item in enumerate(proj_mapping): if killing_item == None: killing_inds.append(killing_ind) else: real_proj_mapping.append(killing_item) print('{}: (invalid sample: no valid projection)'.format(blobs['id'])) else: real_proj_mapping = proj_mapping blobs['proj_ind_3d'] = [] blobs['proj_ind_2d'] = [] proj_mapping0, proj_mapping1 = zip(*real_proj_mapping) blobs['proj_ind_3d'].append(torch.stack(proj_mapping0)) blobs['proj_ind_2d'].append(torch.stack(proj_mapping1)) net.forward(blobs, 'TEST', killing_inds) # test with detection pipeline pred_class = net._predictions['cls_pred'].data.cpu().numpy() rois = net._predictions['rois'][0].cpu() box_reg_pre = net._predictions["bbox_pred"].data.cpu().numpy() box_reg = np.zeros((box_reg_pre.shape[0], 6)) pred_conf_pre = net._predictions['cls_prob'].data.cpu().numpy() pred_conf = np.zeros((pred_conf_pre.shape[0])) for pred_ind in range(pred_class.shape[0]): box_reg[pred_ind, :] = box_reg_pre[pred_ind, pred_class[pred_ind]*6:(pred_class[pred_ind]+1)*6] pred_conf[pred_ind] = pred_conf_pre[pred_ind, pred_class[pred_ind]] pred_box = bbox_transform_inv(rois, torch.from_numpy(box_reg).float()) pred_box = clip_boxes(pred_box, net._scene_info[:3]).numpy() os.makedirs('{}/{}'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), exist_ok=True) np.save('{}/{}/pred_class'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), pred_class) np.save('{}/{}/pred_conf'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), pred_conf) np.save('{}/{}/pred_box'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), pred_box) np.save('{}/{}/scene'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), np.where(blobs['data'][0,0].numpy() <= 1, 1, 0)) np.save('{}/{}/gt_class'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), gt_class) np.save('{}/{}/gt_box'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), gt_box) # pickup sort_index = [] for conf_index in range(pred_conf.shape[0]): if pred_conf[conf_index] > cfg.CLASS_THRESH: sort_index.append(True) else: sort_index.append(False) # eliminate bad box for idx, box in enumerate(pred_box): if round(box[0]) >= round(box[3]) or round(box[1]) >= round(box[4]) or round(box[2]) >= round(box[5]): sort_index[idx] = False mAP_CLASSIFICATION.evaluate( pred_box[sort_index], pred_class[sort_index], pred_conf[sort_index], gt_box, gt_class) if cfg.USE_MASK: gt_mask = blobs['gt_mask'][0] # pickup sort_index = [] for conf_index in range(pred_conf.shape[0]): if pred_conf[conf_index] > cfg.CLASS_THRESH: sort_index.append(True) else: sort_index.append(False) # eliminate bad box for idx, box in enumerate(pred_box): if round(box[0]) >= round(box[3]) or round(box[1]) >= round(box[4]) or round(box[2]) >= round(box[5]): sort_index[idx] = False # test with mask pipeline net.mask_backbone.eval() net.mask_backbone.cuda() mask_pred_batch = [] for net_i in range(1): mask_pred = [] for pred_box_ind, pred_box_item in enumerate(pred_box): if sort_index[pred_box_ind]: mask_pred.append(net.mask_backbone(Variable(blobs['data'].cuda())[net_i:net_i+1, :, int(round(pred_box_item[0])):int(round(pred_box_item[3])), int(round(pred_box_item[1])):int(round(pred_box_item[4])), int(round(pred_box_item[2])):int(round(pred_box_item[5])) ], [] if cfg.USE_IMAGES else None)) mask_pred_batch.append(mask_pred) net._predictions['mask_pred'] = mask_pred_batch # save test result pred_mask = [] mask_ind = 0 for ind, cls in enumerate(pred_class): if sort_index[ind]: mask = net._predictions['mask_pred'][0][mask_ind][0][cls].data.cpu().numpy() mask = np.where(mask >=cfg.MASK_THRESH, 1, 0).astype(np.float32) pred_mask.append(mask) mask_ind += 1 pickle.dump(pred_mask, open('{}/{}/pred_mask'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), 'wb')) pickle.dump(sort_index, open('{}/{}/pred_mask_index'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), 'wb')) pickle.dump(gt_mask, open('{}/{}/gt_mask'.format(cfg.TEST_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), 'wb')) mAP_MASK.evaluate_mask( pred_box[sort_index], pred_class[sort_index], pred_conf[sort_index], pred_mask, gt_box, gt_class, gt_mask, net._scene_info) timer.toc() print('It took {:.3f}s for test on whole scenes'.format(timer.total_time())) ################################### # Summary ################################### if cfg.USE_CLASS: mAP_CLASSIFICATION.finalize() print('mAP of CLASSIFICATION: {}'.format(mAP_CLASSIFICATION.mAP())) for class_ind in range(cfg.NUM_CLASSES): if class_ind not in mAP_CLASSIFICATION.ignore_class: print('class {}: {}'.format(class_ind, mAP_CLASSIFICATION.AP(class_ind))) if cfg.USE_MASK: mAP_MASK.finalize() print('mAP of mask: {}'.format(mAP_MASK.mAP())) for class_ind in range(cfg.NUM_CLASSES): if class_ind not in mAP_MASK.ignore_class: print('class {}: {}'.format(class_ind, mAP_MASK.AP(class_ind)))
def validation(self, index, mode): ##################################### # Preparation ##################################### #------------------------------- # metric #------------------------------- mAP_RPN = Evaluate_metric(1, overlap_threshold=cfg.MAP_THRESH) mAP_CLASSIFICATION = Evaluate_metric(cfg.NUM_CLASSES, ignore_class=[0], overlap_threshold=cfg.MAP_THRESH) mAP_MASK = Evaluate_metric(cfg.NUM_CLASSES, ignore_class=[0], overlap_threshold=cfg.MAP_THRESH) if mode == 'val': data_loader = self.dataloader_val data_logger = self.logger_val elif mode == 'trainval': data_loader = self.dataloader_trainval data_logger = self.logger_trainval #################################### # Accumulate data #################################### timer = Timer() timer.tic() print('starting validation....') for iter, blobs in enumerate(tqdm(data_loader)): # if no box: skip if len(blobs['gt_box']) == 0: continue if cfg.USE_IMAGES: grid_shape = blobs['data'].shape[-3:] projection_helper = ProjectionHelper(cfg.INTRINSIC, cfg.PROJ_DEPTH_MIN, cfg.PROJ_DEPTH_MAX, cfg.DEPTH_SHAPE, grid_shape, cfg.VOXEL_SIZE) proj_mapping = [projection_helper.compute_projection(d.cuda(), c.cuda(), t.cuda()) for d, c, t in zip(blobs['nearest_images']['depths'][0], blobs['nearest_images']['poses'][0], blobs['nearest_images']['world2grid'][0])] if None in proj_mapping: #invalid sample continue blobs['proj_ind_3d'] = [] blobs['proj_ind_2d'] = [] proj_mapping0, proj_mapping1 = zip(*proj_mapping) blobs['proj_ind_3d'].append(torch.stack(proj_mapping0)) blobs['proj_ind_2d'].append(torch.stack(proj_mapping1)) self.net.forward(blobs, 'TEST', []) #-------------------------------------- # RPN: loss, metric #-------------------------------------- if cfg.USE_RPN: # (n, 6) gt_box = blobs['gt_box'][0].numpy()[:, 0:6] gt_box_label = np.zeros(gt_box.shape[0]) try: pred_box_num = (self.net._predictions['roi_scores'][0][:, 0] > cfg.ROI_THRESH).nonzero().size(0) pred_box = self.net._predictions['rois'][0].cpu().numpy()[:pred_box_num] pred_box_label = np.zeros(pred_box_num) pred_box_score = self.net._predictions['roi_scores'][0].cpu().numpy()[:pred_box_num, 0] except: pred_box = self.net._predictions['rois'][0].cpu().numpy()[:1] pred_box_label = np.zeros(1) pred_box_score = self.net._predictions['roi_scores'][0].cpu().numpy()[:1, 0] #evaluation metric mAP_RPN.evaluate(pred_box, pred_box_label, pred_box_score, gt_box, gt_box_label) #-------------------------------------- # Classification: loss, metric #-------------------------------------- if cfg.USE_CLASS: # groundtruth gt_box = blobs['gt_box'][0].numpy()[:, 0:6] gt_class = blobs['gt_box'][0][:, 6].numpy() # predictions pred_class = self.net._predictions['cls_pred'].data.cpu().numpy() # only predictions['rois'] is list and is Tensor / others are no list and Variable rois = self.net._predictions['rois'][0].cpu() box_reg_pre = self.net._predictions["bbox_pred"].data.cpu().numpy() box_reg = np.zeros((box_reg_pre.shape[0], 6)) pred_conf_pre = self.net._predictions['cls_prob'].data.cpu().numpy() pred_conf = np.zeros((pred_conf_pre.shape[0])) for pred_ind in range(pred_class.shape[0]): box_reg[pred_ind, :] = box_reg_pre[pred_ind, pred_class[pred_ind]*6:(pred_class[pred_ind]+1)*6] pred_conf[pred_ind] = pred_conf_pre[pred_ind, pred_class[pred_ind]] pred_box = bbox_transform_inv(rois, torch.from_numpy(box_reg).float()) pred_box = clip_boxes(pred_box, self.net._scene_info[:3]).numpy() # pickup sort_index = [] for conf_index in range(pred_conf.shape[0]): if pred_conf[conf_index] > cfg.CLASS_THRESH: sort_index.append(True) else: sort_index.append(False) # eliminate bad box for idx, box in enumerate(pred_box): if round(box[0]) >= round(box[3]) or round(box[1]) >= round(box[4]) or round(box[2]) >= round(box[5]): sort_index[idx] = False if len(pred_box[sort_index]) == 0: print('no pred box') if iter < cfg.VAL_NUM: os.makedirs('{}/{}'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), exist_ok=True) np.save('{}/{}/pred_class'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), pred_class) np.save('{}/{}/pred_conf'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), pred_conf) np.save('{}/{}/pred_box'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), pred_box) np.save('{}/{}/scene'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), np.where(blobs['data'][0,0].numpy() <= 1, 1, 0)) np.save('{}/{}/gt_class'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), gt_class) np.save('{}/{}/gt_box'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), gt_box) mAP_CLASSIFICATION.evaluate( pred_box[sort_index], pred_class[sort_index], pred_conf[sort_index], gt_box, gt_class) #-------------------------------------- # MASK: loss, metric #-------------------------------------- if cfg.USE_MASK: # gt data gt_box = blobs['gt_box'][0].numpy()[:, 0:6] gt_class = blobs['gt_box'][0][:, 6].numpy() gt_mask = blobs['gt_mask'][0] pred_class = self.net._predictions['cls_pred'].data.cpu().numpy() pred_conf = np.zeros((pred_class.shape[0])) for pred_ind in range(pred_class.shape[0]): pred_conf[pred_ind] = self.net._predictions['cls_prob'].data.cpu().numpy()[pred_ind, pred_class.data[pred_ind]] # pickup sort_index = pred_conf > cfg.CLASS_THRESH # eliminate bad box for idx, box in enumerate(pred_box): if round(box[0]) >= round(box[3]) or round(box[1]) >= round(box[4]) or round(box[2]) >= round(box[5]): sort_index[idx] = False pred_mask = [] mask_ind = 0 for ind, cls in enumerate(pred_class): if sort_index[ind]: mask = self.net._predictions['mask_pred'][0][mask_ind][0][cls].data.cpu().numpy() mask = np.where(mask >=cfg.MASK_THRESH, 1, 0).astype(np.float32) pred_mask.append(mask) mask_ind += 1 if iter < cfg.VAL_NUM: pickle.dump(pred_mask, open('{}/{}/pred_mask'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), 'wb')) pickle.dump(sort_index, open('{}/{}/pred_mask_index'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), 'wb')) pickle.dump(gt_mask, open('{}/{}/gt_mask'.format(cfg.VAL_SAVE_DIR, blobs['id'][0].split('/')[-1][:12]), 'wb')) mAP_MASK.evaluate_mask( pred_box[sort_index], pred_class[sort_index], pred_conf[sort_index], pred_mask, gt_box, gt_class, gt_mask, self.net._scene_info) self.net.delete_intermediate_states() timer.toc() print('It took {:.3f}s for Validation on chunks'.format(timer.total_time())) ################################### # Summary ################################### if cfg.USE_RPN: mAP_RPN.finalize() print('AP of RPN: {}'.format(mAP_RPN.mAP())) data_logger.scalar_summary('AP_ROI', mAP_RPN.mAP(), index) if cfg.USE_CLASS: mAP_CLASSIFICATION.finalize() print('mAP of CLASSIFICATION: {}'.format(mAP_CLASSIFICATION.mAP())) for class_ind in range(cfg.NUM_CLASSES): if class_ind not in mAP_CLASSIFICATION.ignore_class: print('class {}: {}'.format(class_ind, mAP_CLASSIFICATION.AP(class_ind))) data_logger.scalar_summary('mAP_CLASSIFICATION', mAP_CLASSIFICATION.mAP(), index) if cfg.USE_MASK: mAP_MASK.finalize() print('mAP of mask: {}'.format(mAP_MASK.mAP())) for class_ind in range(cfg.NUM_CLASSES): if class_ind not in mAP_MASK.ignore_class: print('class {}: {}'.format(class_ind, mAP_MASK.AP(class_ind))) data_logger.scalar_summary('mAP_MASK', mAP_MASK.mAP(), index)
def train_model(self, sess, max_iters, restore=False): """Network training loop.""" data_layer = get_data_layer(self.roidb, self.imdb.num_classes) total_loss,model_loss, rpn_cross_entropy, rpn_loss_box=self.net.build_loss(ohem=cfg.TRAIN.OHEM) # scalar summary tf.summary.scalar('rpn_reg_loss', rpn_loss_box) tf.summary.scalar('rpn_cls_loss', rpn_cross_entropy) tf.summary.scalar('model_loss', model_loss) tf.summary.scalar('total_loss',total_loss) summary_op = tf.summary.merge_all() log_image, log_image_data, log_image_name =\ self.build_image_summary() # optimizer lr = tf.Variable(cfg.TRAIN.LEARNING_RATE, trainable=False) if cfg.TRAIN.SOLVER == 'Adam': opt = tf.train.AdamOptimizer(cfg.TRAIN.LEARNING_RATE) elif cfg.TRAIN.SOLVER == 'RMS': opt = tf.train.RMSPropOptimizer(cfg.TRAIN.LEARNING_RATE) else: # lr = tf.Variable(0.0, trainable=False) momentum = cfg.TRAIN.MOMENTUM opt = tf.train.MomentumOptimizer(lr, momentum) global_step = tf.Variable(0, trainable=False) with_clip = True if with_clip: tvars = tf.trainable_variables() grads, norm = tf.clip_by_global_norm(tf.gradients(total_loss, tvars), 10.0) train_op = opt.apply_gradients(list(zip(grads, tvars)), global_step=global_step) else: train_op = opt.minimize(total_loss, global_step=global_step) # intialize variables sess.run(tf.global_variables_initializer()) restore_iter = 0 # load vgg16 if self.pretrained_model is not None and not restore: try: print(('Loading pretrained model ' 'weights from {:s}').format(self.pretrained_model)) self.net.load(self.pretrained_model, sess, True) except: raise Exception('Check your pretrained model {:s}'.format(self.pretrained_model)) # resuming a trainer if restore: try: ckpt = tf.train.get_checkpoint_state(self.output_dir) print('Restoring from {}...'.format(ckpt.model_checkpoint_path), end=' ') self.saver.restore(sess, ckpt.model_checkpoint_path) stem = os.path.splitext(os.path.basename(ckpt.model_checkpoint_path))[0] restore_iter = int(stem.split('_')[-1]) sess.run(global_step.assign(restore_iter)) print('done') except: raise 'Check your pretrained {:s}'.format(ckpt.model_checkpoint_path) last_snapshot_iter = -1 timer = Timer() for iter in range(restore_iter, max_iters): timer.tic() # learning rate if iter != 0 and iter % cfg.TRAIN.STEPSIZE == 0: sess.run(tf.assign(lr, lr.eval() * cfg.TRAIN.GAMMA)) print(lr) # get one batch blobs = data_layer.forward() feed_dict={ self.net.data: blobs['data'], self.net.im_info: blobs['im_info'], self.net.keep_prob: 0.5, self.net.gt_boxes: blobs['gt_boxes'], self.net.gt_ishard: blobs['gt_ishard'], self.net.dontcare_areas: blobs['dontcare_areas'] } res_fetches=[] fetch_list = [total_loss,model_loss, rpn_cross_entropy, rpn_loss_box, summary_op, train_op] + res_fetches total_loss_val,model_loss_val, rpn_loss_cls_val, rpn_loss_box_val, \ summary_str, _ = sess.run(fetches=fetch_list, feed_dict=feed_dict) self.writer.add_summary(summary=summary_str, global_step=global_step.eval()) _diff_time = timer.toc(average=False) if (iter) % (cfg.TRAIN.DISPLAY) == 0: print('iter: %d / %d, total loss: %.4f, model loss: %.4f, rpn_loss_cls: %.4f, rpn_loss_box: %.4f, lr: %f'%\ (iter, max_iters, total_loss_val,model_loss_val,rpn_loss_cls_val,rpn_loss_box_val,lr.eval())) print('speed: {:.3f}s / iter'.format(_diff_time)) if (iter+1) % cfg.TRAIN.SNAPSHOT_ITERS == 0: last_snapshot_iter = iter self.snapshot(sess, iter) if last_snapshot_iter != iter: self.snapshot(sess, iter)