def main(): trained_model = cfg.trained_model thresh = 0.5 image_dir = '/home/cory/cedl/vid/videos/vid04' net = Darknet19() net_utils.load_net(trained_model, net) net.eval() net.cuda() print('load model successfully') print(net) image_extensions = ['.jpg', '.JPG', '.png', '.PNG'] image_abs_paths = sorted([ os.path.join(image_dir, name) for name in os.listdir(image_dir) if name[-4:] in image_extensions ]) t_det = Timer() t_total = Timer() for i, image_path in enumerate(image_abs_paths): t_total.tic() image, im_data = preprocess(image_path) im_data = net_utils.np_to_variable(im_data, is_cuda=True, volatile=True).permute(0, 3, 1, 2) t_det.tic() bbox_pred, iou_pred, prob_pred = net.forward(im_data) det_time = t_det.toc() # to numpy bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() # print bbox_pred.shape, iou_pred.shape, prob_pred.shape bboxes, scores, cls_inds = yolo_utils.postprocess( bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh) im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds, cfg) if im2show.shape[0] > 1100: im2show = cv2.resize(im2show, (int( 1000. * float(im2show.shape[1]) / im2show.shape[0]), 1000)) cv2.imshow('test', im2show) total_time = t_total.toc() format_str = 'frame: %d, (detection: %.1f fps, %.1f ms) (total: %.1f fps, %.1f ms)' print(format_str % (i, 1. / det_time, det_time * 1000, 1. / total_time, total_time * 1000)) t_det.clear() t_total.clear() key = cv2.waitKey(1) if key == ord('q'): break
im_data = net_utils.np_to_variable( im_data, is_cuda=True, volatile=True).permute(0, 3, 1, 2) t_det.tic() bbox_pred, iou_pred, prob_pred = net(im_data) det_time = t_det.toc() # to numpy bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() # print bbox_pred.shape, iou_pred.shape, prob_pred.shape bboxes, scores, cls_inds = yolo_utils.postprocess( bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh) im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds, cfg) if im2show.shape[0] > 1100: im2show = cv2.resize(im2show, (int(1000. * float(im2show.shape[1]) / im2show.shape[0]), 1000)) cv2.imshow('test', im2show) total_time = t_total.toc() # wait_time = max(int(60 - total_time * 1000), 1) cv2.waitKey(0) if i % 1 == 0: format_str = 'frame: %d, ' \ '(detection: %.1f Hz, %.1f ms) ' \
def test_net(net, imdb, max_per_image=300, thresh=0.5, vis=False): # ============================================================================= # chang here for ryan # ============================================================================= num_images = imdb.num_images print('num-images',num_images) # num_images = 3 # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] # timers _t = {'im_detect': Timer(), 'misc': Timer()} det_file = os.path.join(output_dir, 'detections.pkl') # ============================================================================= # change size_index = 0 # ============================================================================= # size_index = args.image_size_index size_index = 0 for i in range(num_images): batch = imdb.next_batch(size_index=size_index) # print('next_batch') ori_im = batch['origin_im'][0] im_data = net_utils.np_to_variable(batch['images'], is_cuda=True, volatile=True).permute(0, 3, 1, 2) # print('im_data') _t['im_detect'].tic() bbox_pred, iou_pred, prob_pred = net(im_data) # to numpy bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() bboxes, scores, cls_inds = yolo_utils.postprocess(bbox_pred, iou_pred, prob_pred, ori_im.shape, cfg, thresh, size_index ) detect_time = _t['im_detect'].toc() _t['misc'].tic() for j in range(imdb.num_classes): inds = np.where(cls_inds == j)[0] if len(inds) == 0: all_boxes[j][i] = np.empty([0, 5], dtype=np.float32) continue c_bboxes = bboxes[inds] c_scores = scores[inds] c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) all_boxes[j][i] = c_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in range(imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] nms_time = _t['misc'].toc() if i % 20 == 0: print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s'.format(i + 1, num_images, detect_time, nms_time)) # noqa _t['im_detect'].clear() _t['misc'].clear() if vis: im2show = yolo_utils.draw_detection(ori_im, bboxes, scores, cls_inds, cfg, thr=0.1) if im2show.shape[0] > 1100: im2show = cv2.resize(im2show, (int(1000. * float(im2show.shape[1]) / im2show.shape[0]), 1000)) # noqa cv2.imshow('test', im2show) cv2.waitKey(0) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') imdb.evaluate_detections(all_boxes, output_dir)
imdb = VOCDataset(cfg.imdb_train, cfg.DATA_DIR, cfg.batch_size, yolo_utils.preprocess_train, processes=2, shuffle=True, dst_size=cfg.inp_size) print 'start' for step in range(cfg.max_step): batch = imdb.next_batch() im = batch['images'][0] gt_boxes = batch['gt_boxes'][0] cls_inds = batch['gt_classes'][0] print gt_boxes im2show = yolo_utils.draw_detection(im, gt_boxes, np.ones(len(cls_inds)), cls_inds, cfg) cv2.imshow('train', im2show) cv2.waitKey(1) # print batch['gt_boxes'] # print batch['gt_classes'] # print batch['images'][0].shape # print step # cv2.imshow('test', batch['images'][0]) # # cv2.waitKey(20) imdb.close()
def test_net(net, imdb, max_per_image=300, thresh=0.5, vis=False): num_images = imdb.num_images # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] # timers _t = {'im_detect': Timer(), 'misc': Timer()} det_file = os.path.join(output_dir, 'detections.pkl') size_index = args.image_size_index #helper: 0:320, 1:352, 2:384, 3:416, 4:448, 5:480, 6:512, 7:544, 8:576' #here val_img sometimes is 5123 for i in range(num_images): batch = imdb.next_batch(size_index=size_index) ori_im = batch['origin_im'][0] im_data = net_utils.np_to_variable(batch['images'], is_cuda=True, volatile=True).permute(0, 3, 1, 2) _t['im_detect'].tic() with torch.set_grad_enabled(False): bbox_pred, iou_pred, prob_pred = net(im_data) ''' bbox->(batch,h*w,prior 4) iou ->(batch,h*w,prior,1) prob_pred-->(batch,h*w,prior,20) ''' # to numpy bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() ''' 这里后处理的是: return bbox_pred, scores, cls_inds ''' bboxes, scores, cls_inds = yolo_utils.postprocess( bbox_pred, iou_pred, prob_pred, ori_im.shape, cfg, thresh, size_index) detect_time = _t['im_detect'].toc() _t['misc'].tic() ''' 以下的操作是 对我们预测的值进行处理,这里需要注意的是,对于 这些问题,我们在最后头保留它的概率 并对最后的概率获取 ''' for j in range(imdb.num_classes): inds = np.where(cls_inds == j)[0] if len(inds) == 0: all_boxes[j][i] = np.empty([0, 5], dtype=np.float32) continue c_bboxes = bboxes[inds] c_scores = scores[inds] c_dets = np.hstack( (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) all_boxes[j][i] = c_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in range(imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] nms_time = _t['misc'].toc() if i % 20 == 0: print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s'.format( i + 1, num_images, detect_time, nms_time)) # noqa _t['im_detect'].clear() _t['misc'].clear() if vis: im2show = yolo_utils.draw_detection(ori_im, bboxes, scores, cls_inds, cfg, thr=0.1) if im2show.shape[0] > 1100: im2show = cv2.resize(im2show, (int(1000. * float(im2show.shape[1]) / im2show.shape[0]), 1000)) # noqa cv2.imshow('test', im2show) cv2.waitKey(0) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections')
def test_net_img_only(net, img_list, max_per_image=300, thresh=0.5, vis=False): num_images = len(img_list) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(cfg.num_classes)] # timers _t = {'im_detect': Timer(), 'misc': Timer()} det_file = os.path.join(output_dir, 'detections.pkl') size_index = args.image_size_index inp_size = cfg.multi_scale_inp_size if not os.path.exists("result"): os.mkdir("result") dt = dataTransform.dataTransform() for i in range(num_images): img_name = img_list[i] im, _, __, ___, ori_im = test_only_transform(img_name, inp_size, size_index) im = np.reshape(im, newshape=(-1, im.shape[0], im.shape[1], im.shape[2])) im_data = net_utils.np_to_variable(im, is_cuda=True, volatile=True).permute(0, 3, 1, 2) with torch.set_grad_enabled(False): bbox_pred, iou_pred, prob_pred = net(im_data) ''' bbox->(batch,h*w,prior 4) iou ->(batch,h*w,prior,1) prob_pred-->(batch,h*w,prior,20) ''' # to numpy bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() ''' 这里后处理的是: return bbox_pred, scores, cls_inds ''' bboxes, scores, cls_inds = yolo_utils.postprocess( bbox_pred, iou_pred, prob_pred, ori_im.shape, cfg, thresh, size_index) detect_time = _t['im_detect'].toc() _t['misc'].tic() ''' 以下的操作是 对我们预测的值进行处理,这里需要注意的是,对于 这些问题,我们在最后头保留它的概率 并对最后的概率获取 ''' for j in range(imdb.num_classes): inds = np.where(cls_inds == j)[0] if len(inds) == 0: all_boxes[j][i] = np.empty([0, 5], dtype=np.float32) continue c_bboxes = bboxes[inds] c_scores = scores[inds] c_dets = np.hstack( (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) all_boxes[j][i] = c_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in range(imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] #save detect_result to xml dt.writeXml(img_name, "./result", ori_im, cfg.label_names, cls_inds.tolist(), bboxes.tolist()) if vis: im2show = yolo_utils.draw_detection(ori_im, bboxes, scores, cls_inds, cfg, thr=0.5) if im2show.shape[0] > 1100: im2show = cv2.resize(im2show, (int(1000. * float(im2show.shape[1]) / im2show.shape[0]), 1000)) # noqa cv2.imshow('test', im2show) cv2.waitKey(0)
summary_writer.add_scalar('loss_bbox', bbox_loss, step) summary_writer.add_scalar('loss_iou', iou_loss, step) summary_writer.add_scalar('loss_cls', cls_loss, step) summary_writer.add_scalar('learning_rate', lr, step) # plot results bbox_pred = bbox_pred.data[0:1].cpu().numpy() iou_pred = iou_pred.data[0:1].cpu().numpy() prob_pred = prob_pred.data[0:1].cpu().numpy() # image = im_data.cpu().data.numpy()[0] # image = np.rollaxis(image, 0, 3) image = ori_imgs[0] # bboxes, scores, cls_inds = yolo_utils.postprocess(bbox_pred, iou_pred, prob_pred, (image.shape[1], image.shape[0]), cfg, thresh=0.3, size_index=size_index) bboxes, scores, cls_inds = yolo_utils.postprocess(bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh=0.3, size_index=size_index) im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds, cfg) summary_writer.add_image('predict', im2show, step) im2show = yolo_utils.draw_detection(image, np.round(gt_boxes[0]).astype(int), np.array([1] * gt_boxes[0].shape[0]), gt_classes[0], cfg, thr=0.1) summary_writer.add_image('GT', im2show, step) train_loss = 0 bbox_loss, iou_loss, cls_loss = 0., 0., 0. cnt = 0 t.clear() size_index = randint(0, len(cfg.multi_scale_inp_size) - 1) print("image_size {}".format(cfg.multi_scale_inp_size[size_index])) if step > 0 and (step % batch_per_epoch == 0): if epoch in cfg.lr_decay_epochs:
def home(): data = request.body.read() body = json.loads(data) im_path = body['dir_path'] #im_path = 'demo' im_fnames = sorted((fname for fname in os.listdir(im_path)\ if os.path.splitext(fname)[-1] == '.jpg')) im_fnames = (os.path.join(im_path, fname) for fname in im_fnames) min_record_tmp_list = [0] * len(det_class) for i, (image, im_data, fname) in enumerate(pool.imap(preprocess, im_fnames, chunksize=1)): print(fname) t_total.tic() im_data = net_utils.np_to_variable(im_data, is_cuda=True, volatile=True).permute(0, 3, 1, 2) t_det.tic() bbox_pred, iou_pred, prob_pred = net(im_data) det_time = t_det.toc() # to numpy bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() # print bbox_pred.shape, iou_pred.shape, prob_pred.shape bboxes, scores, cls_inds = yolo_utils.postprocess( bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh) im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds, cfg) ## create list that used to write to database path_list = fname.split("/") filename = path_list.pop() time_folder = im_path # wirte im2show to out dir im_out_path = os.path.join(time_folder, "out") check_path_create(im_out_path) cv2.imwrite(os.path.join(im_out_path, filename), im2show) tmp_list = ['0'] * len(det_class) for i in cls_inds: try: tmp_list[det_class.index(cfg.label_names[i])] = '1' min_record_tmp_list[det_class.index(cfg.label_names[i])] += 1 except: pass tmp_list.insert(0, time_folder) tmp_list.insert(0, filename) conn.execute( """insert into images_det (name, time_folder, %s)\ values (%s)""" % (",".join(det_class), ",".join(['?'] * len(tmp_list))), tmp_list) conn.commit() total_time = t_total.toc() if i % 1 == 0: format_str = 'frame: %d, (detection: %.1f Hz, %.1f ms) (total: %.1f Hz, %.1f ms)' print(format_str % (i, 1. / det_time, det_time * 1000, 1. / total_time, total_time * 1000)) t_total.clear() t_det.clear() tmp_list = [im_path] min_record_tmp_list = [str(i) for i in min_record_tmp_list] tmp_list.extend(min_record_tmp_list) conn.execute( """insert into minute_det (time_folder, %s) values (%s)""" % (",".join(det_class), ",".join(['?'] * len(tmp_list))), tmp_list) conn.commit()
def main(): shutil.rmtree('output', ignore_errors=True) shutil.copytree('output_template', 'output') shutil.rmtree('kitti_det_output', ignore_errors=True) os.makedirs('kitti_det_output') trained_model = '/home/cory/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5' thresh = 0.5 use_kitti = True image_dir = '/home/cory/KITTI_Dataset/data_object_image_2/training/image_2' net = Darknet19() net_utils.load_net(trained_model, net) net.eval() net.cuda() print('load model successfully') # print(net) def str_index(filename): if use_kitti: return filename begin_pos = filename.rfind('_') + 1 end_pos = filename.rfind('.') str_v = filename[begin_pos:end_pos] return int(str_v) image_extensions = ['.jpg', '.JPG', '.png', '.PNG'] img_files = open( '/home/cory/yolo2-pytorch/train_data/kitti/kitti_val_images.txt') image_abs_paths = img_files.readlines() image_abs_paths = [f.strip() for f in image_abs_paths] '''image_abs_paths = sorted([os.path.join(image_dir, name) for name in os.listdir(image_dir) if name[-4:] in image_extensions], key=str_index)''' key_frame_path = '' detection_period = 5 use_flow = False kitti_filename = 'yolo_flow_kitti_det.txt' try: os.remove(kitti_filename) except OSError: pass t_det = Timer() t_total = Timer() for i, image_path in enumerate(image_abs_paths): t_total.tic() image, im_data = preprocess(image_path) im_data = net_utils.np_to_variable(im_data, is_cuda=True, volatile=True).permute(0, 3, 1, 2) layer_of_flow = 'conv4' t_det.tic() bbox_pred, iou_pred, prob_pred = net.forward(im_data) det_time = t_det.toc() # to numpy bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() # print bbox_pred.shape, iou_pred.shape, prob_pred.shape bboxes, scores, cls_inds = yolo_utils.postprocess( bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh) det_obj = detection_objects(bboxes, scores, cls_inds) save_as_kitti_format(i, det_obj, kitti_filename, src_label='kitti') vis_enable = False if vis_enable: im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds, cfg) cv2.imshow('detection', im2show) cv2.imwrite('output/detection/{:04d}.jpg'.format(i), im2show) total_time = t_total.toc() format_str = 'frame: %d, (detection: %.1f fps, %.1f ms) (total: %.1f fps, %.1f ms)' print(format_str % (i, 1. / det_time, det_time * 1000, 1. / total_time, total_time * 1000)) t_det.clear() t_total.clear() if vis_enable: key = cv2.waitKey(0) if key == ord('q'): break
def test_net(net, imdb, max_per_image=300, thresh=0.5, vis=False): num_images = imdb.num_images # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] # timers _t = {'im_detect': Timer(), 'misc': Timer()} det_file = os.path.join(output_dir, 'detections.pkl') size_index = args.image_size_index for i in range(num_images): batch = imdb.next_batch(size_index=size_index) ori_im = batch['origin_im'][0] im_data = net_utils.np_to_variable(batch['images'], is_cuda=True, volatile=True).permute(0, 3, 1, 2) _t['im_detect'].tic() bbox_pred, iou_pred, prob_pred = net(im_data) # to numpy bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() bboxes, scores, cls_inds = yolo_utils.postprocess(bbox_pred, iou_pred, prob_pred, ori_im.shape, cfg, thresh, size_index ) detect_time = _t['im_detect'].toc() _t['misc'].tic() for j in range(imdb.num_classes): inds = np.where(cls_inds == j)[0] if len(inds) == 0: all_boxes[j][i] = np.empty([0, 5], dtype=np.float32) continue c_bboxes = bboxes[inds] c_scores = scores[inds] c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) all_boxes[j][i] = c_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in range(imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] nms_time = _t['misc'].toc() if i % 20 == 0: print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s'.format(i + 1, num_images, detect_time, nms_time)) # noqa _t['im_detect'].clear() _t['misc'].clear() if vis: im2show = yolo_utils.draw_detection(ori_im, bboxes, scores, cls_inds, cfg, thr=0.1) if im2show.shape[0] > 1100: im2show = cv2.resize(im2show, (int(1000. * float(im2show.shape[1]) / im2show.shape[0]), 1000)) # noqa cv2.imshow('test', im2show) cv2.waitKey(0) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') imdb.evaluate_detections(all_boxes, output_dir)
def test_net(net, imdb, max_per_image=300, thresh=0.5, vis=False, num_classes=8): num_images = imdb.size # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] dt_loader = DataLoader(kitti, batch_size=1, shuffle=False) # timers _t = {'im_detect': Timer(), 'misc': Timer()} det_file = os.path.join(output_dir, 'detections.pkl') size_index = args.image_size_index i = 0 for im_data, gt_boxes, gt_classes, dontcare, ori_imgs in dt_loader.get_batch(): # ori_im = (np.rollaxis(im_data[0].data.numpy(), 0, 3) * 255).astype(np.uint8) ori_im = ori_imgs[0] _t['im_detect'].tic() bbox_pred, iou_pred, prob_pred = net(im_data.cuda()) # to numpy bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() # bboxes, scores, cls_inds = yolo_utils.postprocess(bbox_pred, iou_pred, prob_pred, (ori_im.shape[1], ori_im.shape[0]), cfg, thresh, size_index) bboxes, scores, cls_inds = yolo_utils.postprocess(bbox_pred, iou_pred, prob_pred, ori_im.shape, cfg, thresh, size_index) detect_time = _t['im_detect'].toc() _t['misc'].tic() for j in range(num_classes): inds = np.where(cls_inds == j)[0] if len(inds) == 0: all_boxes[j][i] = np.empty([0, 5], dtype=np.float32) continue c_bboxes = bboxes[inds] c_scores = scores[inds] c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) all_boxes[j][i] = c_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in range(num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] nms_time = _t['misc'].toc() if i % 20 == 0: print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s'.format(i + 1, num_images, detect_time, nms_time)) # noqa _t['im_detect'].clear() _t['misc'].clear() if vis: im2show = yolo_utils.draw_detection(ori_im, bboxes, scores, cls_inds, cfg, thr=0.1) # im2show = yolo_utils.draw_detection(ori_im, np.round(gt_boxes[0]).astype(int), np.array([1]*gt_boxes[0].shape[0]), gt_classes[0], cfg, thr=0.1) if im2show.shape[0] > 1100: im2show = cv2.resize(im2show, (int(1000. * float(im2show.shape[1]) / im2show.shape[0]), 1000)) # noqa print im2show.shape cv2.imshow('test', im2show) cv2.waitKey(0) i += 1 with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') kitti.evaluate_detections(all_boxes, output_dir)
def main(): shutil.rmtree('output', ignore_errors=True) shutil.copytree('output_template', 'output') # trained_model = cfg.trained_model # trained_model = '/home/cory/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_60.h5' trained_model = '/home/cory/yolo2-pytorch/models/training/voc0712_obj_scale/voc0712_obj_scale_1.h5' # trained_model = '/home/cory/yolo2-pytorch/models/training/kitti_det_new_2/kitti_det_new_2_40.h5' # trained_model = '/home/cory/yolo2-pytorch/models/training/kitti_det_new_2/kitti_det_new_2_10.h5' thresh = 0.5 use_kitti = True image_dir = '/home/cory/KITTI_Dataset/data_tracking_image_2/training/image_02/0013' # car = 1 5 # pedestrian = 13 17 net = Darknet19() net_utils.load_net(trained_model, net) net.eval() net.cuda() print('load model successfully') # print(net) def str_index(filename): if use_kitti: return filename begin_pos = filename.rfind('_') + 1 end_pos = filename.rfind('.') str_v = filename[begin_pos: end_pos] return int(str_v) image_extensions = ['.jpg', '.JPG', '.png', '.PNG'] image_abs_paths = sorted([os.path.join(image_dir, name) for name in os.listdir(image_dir) if name[-4:] in image_extensions], key=str_index) key_frame_path = '' detection_period = 5 use_flow = False kitti_filename = 'yolo_flow_kitti_det.txt' try: os.remove(kitti_filename) except OSError: pass t_det = Timer() t_total = Timer() for i, image_path in enumerate(image_abs_paths): t_total.tic() image, im_data = preprocess(image_path) im_data = net_utils.np_to_variable(im_data, is_cuda=True, volatile=True).permute(0, 3, 1, 2) layer_of_flow = 'conv4' # key frame if i % detection_period == 0 and use_flow: key_frame_path = image_path # conv5 feature map feature = net.get_feature_map(im_data=im_data, layer=layer_of_flow) feature = feature.data.cpu().numpy() feature_map_all = plot_feature_map(feature, resize_ratio=1) # cv2.imshow('feature_map', feature_map_all) cv2.imwrite('output/feature_map/{:04d}.jpg'.format(i), feature_map_all * 255) t_det.tic() if use_flow: t1 = time.time() conv5_shifted_gpu = detect_by_flow(i, feature, image, image_path, key_frame_path) t2 = time.time() print('detect_by_flow', t2 - t1) bbox_pred, iou_pred, prob_pred = net.feed_feature(Variable(conv5_shifted_gpu), layer=layer_of_flow) else: bbox_pred, iou_pred, prob_pred = net.forward(im_data) det_time = t_det.toc() # to numpy bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() # print bbox_pred.shape, iou_pred.shape, prob_pred.shape bboxes, scores, cls_inds = yolo_utils.postprocess(bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh) det_obj = detection_objects(bboxes, scores, cls_inds) save_as_kitti_format(i, det_obj, kitti_filename, src_label='kitti') im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds, cfg) cv2.imshow('detection', im2show) cv2.imwrite('output/detection/{:04d}.jpg'.format(i), im2show) total_time = t_total.toc() format_str = 'frame: %d, (detection: %.1f fps, %.1f ms) (total: %.1f fps, %.1f ms)' print(format_str % ( i, 1. / det_time, det_time * 1000, 1. / total_time, total_time * 1000)) t_det.clear() t_total.clear() key = cv2.waitKey(1) if key == ord('q'): break