def main(): trained_model = cfg.trained_model thresh = 0.5 image_dir = '/home/cory/cedl/vid/videos/vid04' net = Darknet19() net_utils.load_net(trained_model, net) net.eval() net.cuda() print('load model successfully') print(net) image_extensions = ['.jpg', '.JPG', '.png', '.PNG'] image_abs_paths = sorted([ os.path.join(image_dir, name) for name in os.listdir(image_dir) if name[-4:] in image_extensions ]) t_det = Timer() t_total = Timer() for i, image_path in enumerate(image_abs_paths): t_total.tic() image, im_data = preprocess(image_path) im_data = net_utils.np_to_variable(im_data, is_cuda=True, volatile=True).permute(0, 3, 1, 2) t_det.tic() bbox_pred, iou_pred, prob_pred = net.forward(im_data) det_time = t_det.toc() # to numpy bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() # print bbox_pred.shape, iou_pred.shape, prob_pred.shape bboxes, scores, cls_inds = yolo_utils.postprocess( bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh) im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds, cfg) if im2show.shape[0] > 1100: im2show = cv2.resize(im2show, (int( 1000. * float(im2show.shape[1]) / im2show.shape[0]), 1000)) cv2.imshow('test', im2show) total_time = t_total.toc() format_str = 'frame: %d, (detection: %.1f fps, %.1f ms) (total: %.1f fps, %.1f ms)' print(format_str % (i, 1. / det_time, det_time * 1000, 1. / total_time, total_time * 1000)) t_det.clear() t_total.clear() key = cv2.waitKey(1) if key == ord('q'): break
# print bbox_pred.shape, iou_pred.shape, prob_pred.shape bboxes, scores, cls_inds = yolo_utils.postprocess( bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh) im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds, cfg) if im2show.shape[0] > 1100: im2show = cv2.resize(im2show, (int(1000. * float(im2show.shape[1]) / im2show.shape[0]), 1000)) cv2.imshow('test', im2show) total_time = t_total.toc() # wait_time = max(int(60 - total_time * 1000), 1) cv2.waitKey(0) if i % 1 == 0: format_str = 'frame: %d, ' \ '(detection: %.1f Hz, %.1f ms) ' \ '(total: %.1f Hz, %.1f ms)' print((format_str % ( i, 1. / det_time, det_time * 1000, 1. / total_time, total_time * 1000))) t_total.clear() t_det.clear()
bboxes, scores, cls_inds = yolo_utils.postprocess( bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh=0.3, size_index=size_index) im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds, cfg) summary_writer.add_image('predict', im2show, step) train_loss = 0 bbox_loss, iou_loss, cls_loss = 0., 0., 0. cnt = 0 t.clear() size_index = randint(0, len(cfg.multi_scale_inp_size) - 1) print("image_size {}".format(cfg.multi_scale_inp_size[size_index])) if step > 0 and (step % imdb.batch_per_epoch == 0): if imdb.epoch in cfg.lr_decay_epochs: lr *= cfg.lr_decay optimizer = torch.optim.SGD(net.parameters(), lr=lr, momentum=cfg.momentum, weight_decay=cfg.weight_decay) save_name = os.path.join(cfg.train_output_dir, '{}_{}.h5'.format(cfg.exp_name, imdb.epoch)) net_utils.save_net(save_name, net) print(('save model: {}'.format(save_name)))
def run_detection(im_root, result_root, conf_threshold): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_rfcn_dcn' if not args.rfcn_only else 'resnet_v1_101_rfcn' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 81 classes = [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' ] print('detection in {}'.format(im_root)) im_names = sorted(os.listdir(im_root)) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [] for idx, im_name in enumerate(im_names[:2]): im_file = os.path.join(im_root, im_name) im = cv2.imread(im_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param( cur_path + '/../model/' + ('rfcn_dcn_coco' if not args.rfcn_only else 'rfcn_coco'), config.TEST.test_epoch, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) # nms = gpu_nms_wrapper(config.TEST.NMS, 0) # nms = soft_nms_wrapper(config.TEST.NMS, method=2) nms = gpu_soft_nms_wrapper(config.TEST.NMS, method=2, device_id=0) nms_t = Timer() for idx, im_name in enumerate(im_names): im_file = os.path.join(im_root, im_name) im = cv2.imread(im_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) origin_im = im.copy() target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) # input data = [mx.nd.array(im_tensor), mx.nd.array(im_info)] data_batch = mx.io.DataBatch(data=[data], label=[], pad=0, index=idx, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) nms_t.tic() keep = nms(cls_dets) nms_t.toc() cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) print 'testing {} {:.2f}ms'.format(im_name, toc() * 1000) print 'nms: {:.2f}ms'.format(nms_t.total_time * 1000) nms_t.clear() # save results person_dets = dets_nms[0] with open(os.path.join(result_root, '{:04d}.txt'.format(idx)), 'w') as f: f.write('{}\n'.format(len(person_dets))) for det in person_dets: x1, y1, x2, y2, s = det w = x2 - x1 h = y2 - y1 f.write('0 {} {} {} {} {}\n'.format(s, w, h, x1, y1)) # visualize im = origin_im # im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) im = show_boxes_cv2(im, dets_nms, classes, 1) cv2.imshow('det', im) cv2.waitKey(1)
net.loss.backward() optimizer.step() duration = timer.toc() if step % cfg.disp_interval == 0: train_loss /= cnt bbox_loss /= cnt iou_loss /= cnt cls_loss /= cnt progress_in_epoch = (step % imdb.batch_per_epoch) / imdb.batch_per_epoch print('epoch: %d, step: %d (%.2f %%),' 'loss: %.3f, bbox_loss: %.3f, iou_loss: %.3f, cls_loss: %.3f (%.2f s/batch)' % ( imdb.epoch, step, progress_in_epoch * 100, train_loss, bbox_loss, iou_loss, cls_loss, duration)) with open(cfg.log_file, 'a+') as log: log.write('%d, %d, %.3f, %.3f, %.3f, %.3f, %.2f\n' % ( imdb.epoch, step, train_loss, bbox_loss, iou_loss, cls_loss, duration)) if use_tensorboard and step % cfg.log_interval == 0: exp.add_scalar_value('loss_train', train_loss, step=step) exp.add_scalar_value('loss_bbox', bbox_loss, step=step) exp.add_scalar_value('loss_iou', iou_loss, step=step) exp.add_scalar_value('loss_cls', cls_loss, step=step) exp.add_scalar_value('learning_rate', get_optimizer_lr(optimizer), step=step) train_loss = 0 bbox_loss, iou_loss, cls_loss = 0., 0., 0. cnt = 0 timer.clear() imdb.close()
accuracy_epoch, twod_dists, threed_dists, test_loss_epoch = test_net( net2, imdb_cullnettest, dataloader_cullnettest, args, test_output_dir, args.image_size_index, args.batch_size, objpoints3D, corners_3d, vertices, thresh=args.thresh, vis=args.vis) _t.clear() if args.save_results_bool: resultsdist_dir = cfg.TEST_DIR + '/' + cfg.args.datadirname + '_topk' + str( args.topk) + '_nearby' + str(args.nearby_test) + '_cullnetyolov3' if not os.path.exists(resultsdist_dir): os.makedirs(resultsdist_dir) save_resultsdist(twod_dists, threed_dists, resultsdist_dir, args.class_name) save_results(cfg.args.datadirname + '_topk' + str(args.topk) + '_nearby' + str(args.nearby_test), args.class_name, accuracy_epoch[0], accuracy_epoch[1],
def main(): output_dir = '../output' output_template_dir = '../output_template' kitti_output_dir = '../kitti_det_output' input_file_list = '/home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt' # input_file_list = '/home/cory/project/yolo2-pytorch/flow/w01_imgs.txt' vis_enable = False thresh = 0.5 trained_model = '/home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_ft_flownet2_joint/' \ 'kitti_new_2_flow_center_ft_flownet2_joint_30.h5' shutil.rmtree(output_dir, ignore_errors=True) shutil.rmtree(kitti_output_dir, ignore_errors=True) shutil.copytree(output_template_dir, output_dir) os.makedirs(kitti_output_dir) net = Darknet19(cfg) net_utils.load_net(trained_model, net) net.eval() net.cuda() print(trained_model) print('load model successfully') img_files = open(input_file_list) image_abs_paths = img_files.readlines() image_abs_paths = [f.strip() for f in image_abs_paths] t_det = Timer() t_total = Timer() for i, image_path in enumerate(image_abs_paths): t_total.tic() image, im_data = preprocess(image_path) im_data = net_utils.np_to_variable(im_data, is_cuda=True, volatile=True).permute(0, 3, 1, 2) t_det.tic() bbox_pred, iou_pred, prob_pred = net.forward(im_data) det_time = t_det.toc() bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() bboxes, scores, cls_inds = yolo_utils.postprocess( bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh) det_obj = detection_objects(bboxes, scores, cls_inds) save_as_kitti_format(i, det_obj, kitti_output_dir, src_label='kitti') total_time = t_total.toc() format_str = 'frame: %d, (detection: %.1f fps, %.1f ms) (total: %.1f fps, %.1f ms) %s' print(format_str % (i, 1. / det_time, det_time * 1000, 1. / total_time, total_time * 1000, image_path)) t_det.clear() t_total.clear() if vis_enable: im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds, cfg) cv2.imshow('detection', im2show) cv2.imwrite(output_dir + '/detection/{:04d}.jpg'.format(i), im2show) key = cv2.waitKey(0) if key == ord('q'): break
def train_main(): choice = 1 if choice == 0: dataset_yaml = '/home/cory/project/yolo2-pytorch/cfgs/config_voc.yaml' exp_yaml = '/home/cory/project/yolo2-pytorch/cfgs/exps/voc0712/voc0712_baseline_v3_rand.yaml' gpu_id = 0 else: dataset_yaml = '/home/cory/project/yolo2-pytorch/cfgs/config_kitti.yaml' exp_yaml = '/home/cory/project/yolo2-pytorch/cfgs/exps/kitti/kitti_new_2_flow_center_ft_flownet2_joint.yaml' gpu_id = 1 cfg = load_cfg_yamls([dataset_yaml, exp_yaml]) # runtime setting os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_id) os.makedirs(cfg['train_output_dir'], exist_ok=True) # data loader batch_size = cfg['train_batch_size'] dataset = DetectionDataset(cfg) print('load dataset succeeded') net = Darknet19(cfg) start_epoch, use_model = read_ckp(cfg) net_utils.load_net(use_model, net) net.cuda() net.train() print('load net succeeded') # show training parameters print('-------------------------------') print('pid', os.getpid()) print('gpu_id', os.environ.get('CUDA_VISIBLE_DEVICES')) print('use_model', use_model) print('exp_name', cfg['exp_name']) print('dataset', cfg['dataset_name']) print('optimizer', cfg['optimizer']) print('opt_param', cfg['opt_param']) print('train_batch_size', cfg['train_batch_size']) print('start_epoch', start_epoch) print('lr', lookup_lr(cfg, start_epoch)) print('inp_size', cfg['inp_size']) print('inp_size_candidates', cfg['inp_size_candidates']) print('-------------------------------') timer = Timer() try: for epoch in range(start_epoch, cfg['max_epoch']): time_epoch_begin = time.time() optimizer = get_optimizer(cfg, net, epoch) dataloader = DataLoaderX(dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=False) for step, data in enumerate(dataloader): timer.tic() barrier = Barrier() images, labels = data # debug_and_vis(data) im_data = Variable(images.cuda()) barrier.add(1) bbox_pred, iou_pred, class_pred = net.forward(im_data) barrier.add(2) # build training target network_h = int(im_data.data.size()[2]) network_w = int(im_data.data.size()[3]) network_size_wh = np.array([network_w, network_h]) # (w, h) net_bbox_loss, net_iou_loss, net_class_loss = training_target( cfg, bbox_pred, class_pred, labels, network_size_wh, iou_pred) barrier.add(3) # backward optimizer.zero_grad() net_loss = net_bbox_loss + net_iou_loss + net_class_loss net_loss.backward() optimizer.step() barrier.add(4) duration = timer.toc() if step % cfg['disp_interval'] == 0: # loss for this step bbox_loss = net_bbox_loss.data.cpu().numpy()[0] iou_loss = net_iou_loss.data.cpu().numpy()[0] cls_loss = net_class_loss.data.cpu().numpy()[0] train_loss = net_loss.data.cpu().numpy()[0] barrier.add(5) progress_in_epoch = (step + 1) * batch_size / len(dataset) print( 'epoch %d, step %d (%.2f %%) ' 'loss: %.3f, bbox_loss: %.3f, iou_loss: %.3f, cls_loss: %.3f (%.2f s/batch)' % (epoch, step, progress_in_epoch * 100, train_loss, bbox_loss, iou_loss, cls_loss, duration)) with open(cfg['train_output_dir'] + '/train.log', 'a+') as log: log.write('%d, %d, %.3f, %.3f, %.3f, %.3f, %.2f\n' % (epoch, step, train_loss, bbox_loss, iou_loss, cls_loss, duration)) timer.clear() barrier.add(6) # barrier.print() # epoch_done time_epoch_end = time.time() print('{:.2f} seconds for this epoch'.format(time_epoch_end - time_epoch_begin)) # save trained weights ckp_epoch = epoch + 1 save_name = os.path.join( cfg['train_output_dir'], '{}_{}.h5'.format(cfg['exp_name'], ckp_epoch)) net_utils.save_net(save_name, net) print('save model: {}'.format(save_name)) # update check_point file ckp = open( os.path.join(cfg['train_output_dir'], 'check_point.txt'), 'w') ckp.write(str(ckp_epoch)) ckp.close() except KeyboardInterrupt: exit(1)
def main(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_rfcn_dcn' if not args.rfcn_only else 'resnet_v1_101_rfcn' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 81 classes = [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' ] # load demo data image_names = [ 'lindau_000024_000019_leftImg8bit.png', 'COCO_test2015_000000000891.jpg', 'COCO_test2015_000000001669.jpg' ] data = [] for im_name in image_names: assert os.path.exists(cur_path + '/../demo/' + im_name), ( '%s does not exist'.format('../demo/' + im_name)) im = cv2.imread(cur_path + '/../demo/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param( cur_path + '/../model/' + ('rfcn_dcn_coco' if not args.rfcn_only else 'rfcn_coco'), config.TEST.test_epoch, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) # nms = gpu_nms_wrapper(config.TEST.NMS, 0) # nms = soft_nms_wrapper(config.TEST.NMS, method=2) nms = gpu_soft_nms_wrapper(config.TEST.NMS, method=2, device_id=0) # warm up for j in xrange(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[0]) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) # test nms_t = Timer() for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) nms_t.tic() keep = nms(cls_dets) nms_t.toc() cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) print 'testing {} {:.2f}ms'.format(im_name, toc() * 1000) print 'nms: {:.2f}ms'.format(nms_t.average_time * 1000) nms_t.clear() # visualize im = cv2.imread(cur_path + '/../demo/' + im_name) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) show_boxes(im, dets_nms, classes, 1) print 'done'
def main(): shutil.rmtree('output', ignore_errors=True) shutil.copytree('output_template', 'output') shutil.rmtree('kitti_det_output', ignore_errors=True) os.makedirs('kitti_det_output') trained_model = '/home/cory/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5' thresh = 0.5 use_kitti = True image_dir = '/home/cory/KITTI_Dataset/data_object_image_2/training/image_2' net = Darknet19() net_utils.load_net(trained_model, net) net.eval() net.cuda() print('load model successfully') # print(net) def str_index(filename): if use_kitti: return filename begin_pos = filename.rfind('_') + 1 end_pos = filename.rfind('.') str_v = filename[begin_pos:end_pos] return int(str_v) image_extensions = ['.jpg', '.JPG', '.png', '.PNG'] img_files = open( '/home/cory/yolo2-pytorch/train_data/kitti/kitti_val_images.txt') image_abs_paths = img_files.readlines() image_abs_paths = [f.strip() for f in image_abs_paths] '''image_abs_paths = sorted([os.path.join(image_dir, name) for name in os.listdir(image_dir) if name[-4:] in image_extensions], key=str_index)''' key_frame_path = '' detection_period = 5 use_flow = False kitti_filename = 'yolo_flow_kitti_det.txt' try: os.remove(kitti_filename) except OSError: pass t_det = Timer() t_total = Timer() for i, image_path in enumerate(image_abs_paths): t_total.tic() image, im_data = preprocess(image_path) im_data = net_utils.np_to_variable(im_data, is_cuda=True, volatile=True).permute(0, 3, 1, 2) layer_of_flow = 'conv4' t_det.tic() bbox_pred, iou_pred, prob_pred = net.forward(im_data) det_time = t_det.toc() # to numpy bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() # print bbox_pred.shape, iou_pred.shape, prob_pred.shape bboxes, scores, cls_inds = yolo_utils.postprocess( bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh) det_obj = detection_objects(bboxes, scores, cls_inds) save_as_kitti_format(i, det_obj, kitti_filename, src_label='kitti') vis_enable = False if vis_enable: im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds, cfg) cv2.imshow('detection', im2show) cv2.imwrite('output/detection/{:04d}.jpg'.format(i), im2show) total_time = t_total.toc() format_str = 'frame: %d, (detection: %.1f fps, %.1f ms) (total: %.1f fps, %.1f ms)' print(format_str % (i, 1. / det_time, det_time * 1000, 1. / total_time, total_time * 1000)) t_det.clear() t_total.clear() if vis_enable: key = cv2.waitKey(0) if key == ord('q'): break
def main(): root_dir = '/home/cory/project/yolo2-pytorch' output_dir = root_dir + '/output' output_template_dir = root_dir + '/output_template' kitti_filename = root_dir + '/yolo_flow_kitti_det.txt' shutil.rmtree(output_dir, ignore_errors=True) shutil.copytree(output_template_dir, output_dir) # trained_model = cfg.trained_model # trained_model = '/home/cory/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5' # trained_model = '/home/cory/project/yolo2-pytorch/models/training/kitti_baseline_v3/kitti_baseline_v3_80.h5' trained_model = '/home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_ft/kitti_new_2_flow_center_ft_50.h5' # trained_model = '/home/cory/yolo2-pytorch/models/training/kitti_new_2_flow_ft/kitti_new_2_flow_ft_2.h5' # trained_model = '/home/cory/yolo2-pytorch/models/training/voc0712_obj_scale/voc0712_obj_scale_1.h5' # trained_model = '/home/cory/yolo2-pytorch/models/training/kitti_det_new_2/kitti_det_new_2_40.h5' # trained_model = '/home/cory/yolo2-pytorch/models/training/kitti_det_new_2/kitti_det_new_2_10.h5' thresh = 0.5 # car = 1 5 # pedestrian = 13 17 net = Darknet19() net_utils.load_net(trained_model, net) net.eval() net.cuda() print('load model successfully') # print(net) img_files = open( '/home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt' ) # img_files = open('/home/cory/project/yolo2-pytorch/train_data/kitti/0001_images.txt') # img_files = open('/home/cory/yolo2-pytorch/train_data/ImageNetVID_test.txt') # img_files = open('/home/cory/yolo2-pytorch/train_data/vid04_images.txt') image_abs_paths = img_files.readlines() image_abs_paths = [f.strip() for f in image_abs_paths] image_abs_paths = image_abs_paths[500:] key_frame_path = '' detection_period = 1 use_flow = False layer_of_flow = 'conv4' try: os.remove(kitti_filename) except OSError: pass t_det = Timer() t_total = Timer() for i, image_path in enumerate(image_abs_paths): t_total.tic() t0 = time.time() image, im_data = preprocess(image_path) im_data = net_utils.np_to_variable(im_data, is_cuda=True, volatile=True).permute(0, 3, 1, 2) t1 = time.time() print('t1', t1 - t0) # key frame if use_flow and i % detection_period == 0: key_frame_path = image_path # conv5 feature map feature = net.get_feature_map(im_data=im_data, layer=layer_of_flow) feature = feature.data.cpu().numpy() feature_map_all = plot_feature_map(feature, resize_ratio=1) # cv2.imshow('feature_map', feature_map_all) cv2.imwrite(output_dir + '/feature_map/{:04d}.jpg'.format(i), feature_map_all * 255) t_det.tic() if use_flow: conv5_shifted_gpu = detect_by_flow(i, feature, image, image_path, key_frame_path, output_dir) bbox_pred, iou_pred, prob_pred = net.feed_feature( Variable(conv5_shifted_gpu), layer=layer_of_flow) else: bbox_pred, iou_pred, prob_pred = net.forward(im_data) det_time = t_det.toc() t2 = time.time() print('t2', t2 - t1) # to numpy bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() t3 = time.time() print('t3', t3 - t2) # print bbox_pred.shape, iou_pred.shape, prob_pred.shape bboxes, scores, cls_inds = yolo_utils.postprocess( bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh) t4 = time.time() print('t4', t4 - t3) det_obj = detection_objects(bboxes, scores, cls_inds) save_as_kitti_format(i, det_obj, kitti_filename, src_label='kitti') im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds, cfg) cv2.imshow('detection', im2show) cv2.imwrite(output_dir + '/detection/{:04d}.jpg'.format(i), im2show) total_time = t_total.toc() format_str = 'frame: %d, (detection: %.1f fps, %.1f ms) (total: %.1f fps, %.1f ms)' print(format_str % (i, 1. / det_time, det_time * 1000, 1. / total_time, total_time * 1000)) t5 = time.time() print('t5', t5 - t4) t_det.clear() t_total.clear() key = cv2.waitKey(1) if key == ord('q'): break
def test_ontime( self, path_in='demo', path_out='./interface/tmp/wx_d', dec_flag=0, cv_im=[], da_flag=1, retrival_params=['自动', None, None, '全选', '全选', '全选', '全选', '全选', '全选']): #Read Infomation #print "Test" t_det = Timer() t_total = Timer() # if dec_flag==1: # shutil.rmtree("./result/cut/") # os.mkdir("./result/cut/") # im_fnames = ['person.jpg'] im_fnames = sorted( (fname for fname in os.listdir(path_in) if os.path.splitext(fname)[-1] == '.jpg')) # shuffle data im_fname = os.path.join(path_in, im_fnames[0]) if cv_im == []: (image, im_data, im_name) = preprocess(im_fname) exit() else: #print "OOOOOOOOOOOOOOOOOOOOOOJBK" image = cv_im im_name = im_fname im_data = get_ImData(cv_im) t_total.tic() im_data = net_utils.np_to_variable(im_data, is_cuda=True, volatile=True).permute(0, 3, 1, 2) im_name = os.path.split(im_name)[-1] # print im_name t_det.tic() if dec_flag: self.people_sex = None self.people_age = 20 #if retrival_params[0] == '自动' or retrival_params[-1] == '自动': if '自动' in retrival_params[0] or '自动' in retrival_params[-1]: try: #self.img_uploader.run(im_fname) self.result = self.face.run(im_fname) #print self.result if len(self.result) == 1: if self.result[0][u'faceAttributes'][ u'gender'] == u'female': self.people_sex = '2' else: self.people_sex = '1' #print "people_sex:" #print self.people_sex except: print "can't contact face identify" pass #print 'sex:' #print self.people_sex #print "ooooooooooooooooooooooooooooooooooooooooooo" #print "oooooooooooooooo1" #print "oook" bbox_pred, iou_pred, prob_pred = self.net(im_data) det_time = t_det.toc() det_time = t_det.toc() # to numpy bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() # print bbox_pred.shape, iou_pred.shape, prob_pred.shape #print "oooooooooooooooo2" self.bboxes, self.scores, self.cls_inds = yolo_utils.postprocess( bbox_pred, iou_pred, prob_pred, image.shape, cfg, self.thresh) #print image.shape #print "bboxes" #print bboxes #print "HAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" cls_res = [] cls_score = [] cls_res_top5 = [] cls_score_top5 = [] flag = 0 for i, bbox in enumerate(self.bboxes): #print "bbox",bbox cut_image = image[bbox[1]:bbox[3], bbox[0]:bbox[2]] res_num, res_score = cloth_num(cut_image, self.model_res) cls_res.append(self.num_classes[res_num[0]]) cls_re = [] for n in range(10): cls_re.append(self.num_classes[res_num[n]]) #print "++++++++++++++++++++++++++++++"+str(n) #print Cnum[0][n],res_num[0] #print self.num_classes[Cnum[0][n]] cls_res_top5.append(cls_re) cls_score_top5.append(res_score) #print "-------------" #print res_score #If the adjacent two types of differentiation is too small,it is considered that the category that the category can not be determined if res_score[ 0] < 12 and res_score[1] * 1.0 / res_score[0] > 0.9: #print res_score[1]*1.0/res_score[0] cls_score.append(0) else: #If two overlapping frames of the same category exceed 0.4,only the box with the largest weight is saved for j, obox in enumerate(self.bboxes[:i]): # (bbox[0], bbox[1]), (bbox[2], bbox[3]) if obox[2] > bbox[0] and obox[3] > bbox[1] and obox[ 0] < bbox[2] and obox[1] < bbox[3]: print "------------------------xiangjiao------------------------------" box_inter = (max(obox[0], bbox[0]), max(obox[1], bbox[1]), min(obox[2], bbox[2]), min(obox[3], bbox[3])) area1 = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) area2 = (obox[2] - obox[0]) * (obox[3] - obox[1]) area_min = min((area1, area2)) area_inter = (box_inter[2] - box_inter[0]) * ( box_inter[3] - box_inter[1]) #print area_min,area_inter if area_inter * 1.0 / area_min >= 0.5: print "====================YEEEEEES++++++++++++++++" #if cls_res[i]==cls_res[j]: if res_score[0] > cls_score[j]: if cls_score[j] != 0: cls_score[j] = 1 else: res_score[0] = 1 break cls_score.append(res_score[0]) #print res_num #cls_score.append(res_score) #print "resnet" #print cls_res,cls_score #cut_for_intel = cv2.resize(cut_image, (800, 500)) #cut_for_intel = cv2.resize(cut_image,(int(1000. * float(cut_image.shape[1]) / cut_image.shape[0]), 1000)) #cv2.imwrite("./result/cut/%d_%s"%(i,im_name),cut_for_intel) self.cls_res = cls_res self.cls_score = cls_score self.cls_res_top5 = cls_res_top5 self.cls_score_top5 = cls_score_top5 #print "bbboxes" #print self.bboxes # im2show = yolo_utils.draw_detection_rec(image, self.bboxes, self.scores, self.cls_res,self.cls_score,f_info,f_pic,self.cls_res_top5,self.cls_score_top5,f_pic_like,f_pic_rec,f_pic_rec_like,self.cls_inds) else: # 用于显示 #print "bbboxes" #print self.bboxes #im2show = yolo_utils.draw_detection_rec(image, self.bboxes, self.scores, self.cls_res,self.cls_score,f_info,f_pic,self.cls_res_top5,self.cls_score_top5,f_pic_like,f_pic_rec,f_pic_rec_like,self.cls_inds) f_info = open(InfoDir, 'w') f_pic = open(InfoPicPath, 'w') f_pic_like = open(InfoPicLikePath, 'w') f_pic_rec = open(InfoPicPathRec, 'w') f_pic_rec_like = open(InfoPicLikePathRec, 'w') im2show = self.draw_detection_rec(image, self.bboxes, self.cls_inds, self.scores, self.cls_res_top5, self.cls_score_top5, f_info, f_pic, f_pic_like, f_pic_rec, f_pic_rec_like, flag=1, sex_dec=self.people_sex, age_dec=self.people_age, retrival_params=retrival_params) if im2show.shape[0] > 1100: im2show = cv2.resize(im2show, (int( 1000. * float(im2show.shape[1]) / im2show.shape[0]), 1000)) cv2.imwrite(os.path.join(path_out, im_name), im2show) f_info.close() f_pic.close() f_pic_like.close() f_pic_rec.close() f_pic_rec_like.close() t_total.clear() t_det.clear()
def test( self, path_in='demo', path_out='./interface/tmp/wx_d', cut_path='./result/cut/', retrival_params=['自动', None, None, '全选', '全选', '全选', '全选', '全选', '全选']): #Read Infomation #params---->sex,prince1,prince2,dress,top,bottom,version,style,age_value #print "Test" t_det = Timer() t_total = Timer() # im_fnames = ['person.jpg'] im_fnames = sorted( (fname for fname in os.listdir(path_in) if os.path.splitext(fname)[-1] == '.jpg')) # shuffle data # shutil.rmtree(cut_path) # os.mkdir(cut_path) for i in im_fnames: im_fname = os.path.join(path_in, i) #im_fnames = (os.path.join(path_in, fname) for fname in im_fnames) #print im_fname self.people_sex = None self.people_age = 20 if '自动' in retrival_params[0] or '自动' in retrival_params[-1]: try: #self.img_uploader.run(im_fname) self.result = self.face.run(im_fname) #print self.result if len(self.result) == 1: if self.result[0][u'faceAttributes'][ u'gender'] == u'female': self.people_sex = '2' else: self.people_sex = '1' self.people_age = self.result[0][u'faceAttributes'][u'age'] #print "people_sex:" #print self.people_sex except: print "can't found face identify" pass print 'sex:' print self.people_sex #pool = Pool(processes=1) #print "HAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" #for i, (image, im_data, im_name) in enumerate(pool.imap(preprocess, im_fnames, chunksize=1)): (image, im_data, im_name) = preprocess(im_fname) t_total.tic() im_data = net_utils.np_to_variable(im_data, is_cuda=True, volatile=True).permute( 0, 3, 1, 2) im_name = os.path.split(im_name)[-1] # print im_name t_det.tic() #print "oooooooooooooooo1" bbox_pred, iou_pred, prob_pred = self.net(im_data) det_time = t_det.toc() # to numpy bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() # print bbox_pred.shape, iou_pred.shape, prob_pred.shape #print "oooooooooooooooo2" self.bboxes, self.scores, self.cls_inds = yolo_utils.postprocess( bbox_pred, iou_pred, prob_pred, image.shape, cfg, self.thresh) #print image.shape #print "bboxes" #print bboxes #print "HAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" cls_res = [] cls_score = [] cls_res_top5 = [] cls_score_top5 = [] flag = 0 for i, bbox in enumerate(self.bboxes): #print "bbox",bbox print "start===================================================%d" % i cut_image = image[bbox[1]:bbox[3], bbox[0]:bbox[2]] res_num, res_score = cloth_num(cut_image, self.model_res) cls_res.append(self.num_classes[res_num[0]]) cls_re = [] for n in range(10): cls_re.append(self.num_classes[res_num[n]]) #print "++++++++++++++++++++++++++++++"+str(n) #print Cnum[0][n],res_num[0] #print self.num_classes[Cnum[0][n]] #print "-------------" #print res_score #If the adjacent two types of differentiation is too small,it is considered that the category that the category can not be determined if res_score[ 0] < 12 and res_score[1] * 1.0 / res_score[0] > 0.9: #print res_score[1]*1.0/res_score[0] cls_score.append(0) else: #If two overlapping frames of the same category exceed 0.4,only the box with the largest weight is saved for j, obox in enumerate(self.bboxes[:i]): # (bbox[0], bbox[1]), (bbox[2], bbox[3]) if obox[2] > bbox[0] and obox[3] > bbox[1] and obox[ 0] < bbox[2] and obox[1] < bbox[3]: print "------------------------xiangjiao------------------------------" box_inter = (max(obox[0], bbox[0]), max(obox[1], bbox[1]), min(obox[2], bbox[2]), min(obox[3], bbox[3])) area1 = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) area2 = (obox[2] - obox[0]) * (obox[3] - obox[1]) area_min = min((area1, area2)) area_inter = (box_inter[2] - box_inter[0]) * ( box_inter[3] - box_inter[1]) #print area_min,area_inter if area_inter * 1.0 / area_min >= 0.5: print "====================YEEEEEES++++++++++++++++" #if cls_res[i]==cls_res[j]: if res_score[0] > cls_score[j]: if cls_score[j] != 0: cls_score[j] = 1 cls_score_top5[j][0] = 1 else: res_score[0] = 1 break cls_score.append(res_score[0]) cls_res_top5.append(cls_re) #print cls_score cls_score_top5.append(res_score) #print res_num #cls_score.append(res_score) #print "resnet" #print cls_res,cls_score #cut_for_intel = cv2.resize(cut_image, (800, 500)) #print "===================================================%d"%i #cut_for_intel = cv2.resize(cut_image, (int(1000. * float(cut_image.shape[1]) / cut_image.shape[0]), 1000)) #cv2.imwrite(os.path.join(cut_path,"%d_%s"%(i,im_name)),cut_for_intel) self.cls_res = cls_res self.cls_score = cls_score self.cls_res_top5 = cls_res_top5 self.cls_score_top5 = cls_score_top5 #im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds, cfg) #print "cls_inds:" #print type(self.cls_inds),self.cls_inds # print "cls_res:" # # cls_res=np.array(cls_res) #print type(cls_res),cls_res # image 原始图像 bboxes 识别框坐标 scores识别置信度 cls_res 第一个分类类别 cls_score 第一个分类置信度 f_info 识别文本信息 f_pic识别的类别信息 cls_res_top5前十个分类类别 # cls_score_top5前十个分类置信度,f_pic_like识别的相似类别信息,f_pic_rec根据类别推荐的信息,f_pic_rec_like根据相似类别推荐的信息,cls_inds识别的大类别 f_info = open(InfoDir, 'w') f_pic = open(InfoPicPath, 'w') f_pic_like = open(InfoPicLikePath, 'w') f_pic_rec = open(InfoPicPathRec, 'w') f_pic_rec_like = open(InfoPicLikePathRec, 'w') im2show = self.draw_detection_rec(image, self.bboxes, self.cls_inds, self.scores, self.cls_res_top5, self.cls_score_top5, f_info, f_pic, f_pic_like, f_pic_rec, f_pic_rec_like, sex_dec=self.people_sex, age_dec=self.people_age, retrival_params=retrival_params) f_info.close() f_pic.close() f_pic_like.close() f_pic_rec.close() f_pic_rec_like.close() #for i,j in enumerate(cls_res): # info = "识别概率:%0.2f\t准确度:%0.2f\t类别:%s\n" % ((i[0][1]), i[0][0], score_class) if im2show.shape[0] > 1100: im2show = cv2.resize(im2show, (int( 1000. * float(im2show.shape[1]) / im2show.shape[0]), 1000)) # cv2.imshow('test', im2show) #cv2.imwrite("./result/test/{}".format(im_name), im2show) cv2.imwrite(os.path.join(path_out, im_name), im2show) total_time = t_total.toc() # wait_time = max(int(60 - total_time * 1000), 1) # cv2.waitKey(0) #format_str = 'frame: %d, (detection: %.1f Hz, %.1f ms) (total: %.1f Hz, %.1f ms)' #print(format_str % (i, 1. / det_time, det_time * 1000, 1. / total_time, total_time * 1000)) t_total.clear() t_det.clear()
def main(): shutil.rmtree('output', ignore_errors=True) shutil.copytree('output_template', 'output') # trained_model = cfg.trained_model # trained_model = '/home/cory/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_60.h5' trained_model = '/home/cory/yolo2-pytorch/models/training/voc0712_obj_scale/voc0712_obj_scale_1.h5' # trained_model = '/home/cory/yolo2-pytorch/models/training/kitti_det_new_2/kitti_det_new_2_40.h5' # trained_model = '/home/cory/yolo2-pytorch/models/training/kitti_det_new_2/kitti_det_new_2_10.h5' thresh = 0.5 use_kitti = True image_dir = '/home/cory/KITTI_Dataset/data_tracking_image_2/training/image_02/0013' # car = 1 5 # pedestrian = 13 17 net = Darknet19() net_utils.load_net(trained_model, net) net.eval() net.cuda() print('load model successfully') # print(net) def str_index(filename): if use_kitti: return filename begin_pos = filename.rfind('_') + 1 end_pos = filename.rfind('.') str_v = filename[begin_pos: end_pos] return int(str_v) image_extensions = ['.jpg', '.JPG', '.png', '.PNG'] image_abs_paths = sorted([os.path.join(image_dir, name) for name in os.listdir(image_dir) if name[-4:] in image_extensions], key=str_index) key_frame_path = '' detection_period = 5 use_flow = False kitti_filename = 'yolo_flow_kitti_det.txt' try: os.remove(kitti_filename) except OSError: pass t_det = Timer() t_total = Timer() for i, image_path in enumerate(image_abs_paths): t_total.tic() image, im_data = preprocess(image_path) im_data = net_utils.np_to_variable(im_data, is_cuda=True, volatile=True).permute(0, 3, 1, 2) layer_of_flow = 'conv4' # key frame if i % detection_period == 0 and use_flow: key_frame_path = image_path # conv5 feature map feature = net.get_feature_map(im_data=im_data, layer=layer_of_flow) feature = feature.data.cpu().numpy() feature_map_all = plot_feature_map(feature, resize_ratio=1) # cv2.imshow('feature_map', feature_map_all) cv2.imwrite('output/feature_map/{:04d}.jpg'.format(i), feature_map_all * 255) t_det.tic() if use_flow: t1 = time.time() conv5_shifted_gpu = detect_by_flow(i, feature, image, image_path, key_frame_path) t2 = time.time() print('detect_by_flow', t2 - t1) bbox_pred, iou_pred, prob_pred = net.feed_feature(Variable(conv5_shifted_gpu), layer=layer_of_flow) else: bbox_pred, iou_pred, prob_pred = net.forward(im_data) det_time = t_det.toc() # to numpy bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() # print bbox_pred.shape, iou_pred.shape, prob_pred.shape bboxes, scores, cls_inds = yolo_utils.postprocess(bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh) det_obj = detection_objects(bboxes, scores, cls_inds) save_as_kitti_format(i, det_obj, kitti_filename, src_label='kitti') im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds, cfg) cv2.imshow('detection', im2show) cv2.imwrite('output/detection/{:04d}.jpg'.format(i), im2show) total_time = t_total.toc() format_str = 'frame: %d, (detection: %.1f fps, %.1f ms) (total: %.1f fps, %.1f ms)' print(format_str % ( i, 1. / det_time, det_time * 1000, 1. / total_time, total_time * 1000)) t_det.clear() t_total.clear() key = cv2.waitKey(1) if key == ord('q'): break