def main(input_path, output_path): # Create model # net = SSD("onnx_export") # net.load_state_dict(torch.load(input_path)) # net.eval() # Initialize detection model cfg = widerface_640 thresh = cfg['conf_thresh'] net = build_ssd('test', cfg['min_dim'], cfg['num_classes']) # initialize SSD net.load_state_dict(torch.load(input_path)) net.eval() print('Finished loading detection model!') # Generate a torch.jit.ScriptModule via tracing print('=> tracing module...') input = torch.rand(1, 3, 720, 1280) traced_script_module = torch.jit.trace(net, input) # Serialize script module # output_path = os.path.join(exp_dir, 'unet_face_segmentation_256.pt') print("=> saving script module to '{}'".format(output_path)) traced_script_module.save(output_path)
def test_oneimage(args): # load net cfg = widerface_640 num_classes = len(WIDERFace_CLASSES) + 1 # +1 background net = build_ssd('test', cfg['min_dim'], num_classes) # initialize SSD net.load_state_dict(torch.load(args.trained_model)) if torch.cuda.device_count() > 1: net = nn.DataParallel(net) #enabling data parallelism net.cuda() net.eval() print('Finished loading model!') # evaluation cuda = args.cuda transform = TestBaseTransform((104, 117, 123)) thresh=cfg['conf_thresh'] #save_path = args.save_folder #num_images = len(testset) # load data path = args.img_root img_id = 'face' img = cv2.imread(path, cv2.IMREAD_COLOR) max_im_shrink = ( (2000.0*2000.0) / (img.shape[0] * img.shape[1])) ** 0.5 shrink = max_im_shrink if max_im_shrink < 1 else 1 det0 = infer(net , img , transform , thresh , cuda , shrink) det1 = infer_flip(net , img , transform , thresh , cuda , shrink) # shrink detecting and shrink only detect big face st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink det_s = infer(net , img , transform , thresh , cuda , st) index = np.where(np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0] det_s = det_s[index, :] # enlarge one times factor = 2 bt = min(factor, max_im_shrink) if max_im_shrink > 1 else (st + max_im_shrink) / 2 det_b = infer(net , img , transform , thresh , cuda , bt) # enlarge small iamge x times for small face if max_im_shrink > factor: bt *= factor while bt < max_im_shrink: det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , bt))) bt *= factor det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , max_im_shrink) )) # enlarge only detect small face if bt > 1: index = np.where(np.minimum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0] det_b = det_b[index, :] else: index = np.where(np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0] det_b = det_b[index, :] det = np.row_stack((det0, det1, det_s, det_b)) det = bbox_vote(det) vis_detections(img , det , img_id, args.visual_threshold, args.save_folder)
def test_oneimage(): # load net cfg = widerface_640 num_classes = len(WIDERFace_CLASSES) + 1 # +1 background net = build_ssd('test', cfg['min_dim'], num_classes) # initialize SSD net.load_state_dict(torch.load(args.trained_model)) net.cuda() net.eval() print('Finished loading model!') # evaluation cuda = args.cuda transform = TestBaseTransform((104, 117, 123)) thresh=cfg['conf_thresh'] #save_path = args.save_folder #num_images = len(testset) # load data path = args.img_root img_id = 'face' img = cv2.imread(path, cv2.IMREAD_COLOR) max_im_shrink = ( (2000.0*2000.0) / (img.shape[0] * img.shape[1])) ** 0.5 shrink = max_im_shrink if max_im_shrink < 1 else 1 det0 = infer(net , img , transform , thresh , cuda , shrink) det1 = infer_flip(net , img , transform , thresh , cuda , shrink) # shrink detecting and shrink only detect big face st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink det_s = infer(net , img , transform , thresh , cuda , st) index = np.where(np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0] det_s = det_s[index, :] # enlarge one times factor = 2 bt = min(factor, max_im_shrink) if max_im_shrink > 1 else (st + max_im_shrink) / 2 det_b = infer(net , img , transform , thresh , cuda , bt) # enlarge small iamge x times for small face if max_im_shrink > factor: bt *= factor while bt < max_im_shrink: det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , bt))) bt *= factor det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , max_im_shrink) )) # enlarge only detect small face if bt > 1: index = np.where(np.minimum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0] det_b = det_b[index, :] else: index = np.where(np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0] det_b = det_b[index, :] det = np.row_stack((det0, det1, det_s, det_b)) det = bbox_vote(det) vis_detections(img , det , img_id, args.visual_threshold)
cap.get(cv2.CAP_PROP_FRAME_WIDTH) // args.reduce_scale), int( cap.get(cv2.CAP_PROP_FRAME_HEIGHT) // args.reduce_scale)) out = cv2.VideoWriter(args.output, fourcc, cap.get(cv2.CAP_PROP_FPS), out_size) total_frame = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) f_count = 0 e_time = None s_time = None cfg = widerface_640 # load net num_classes = len(WIDERFace_CLASSES) + 1 # +1 background net = build_ssd('test', cfg['min_dim'], num_classes) # initialize SSD net.load_state_dict(torch.load(args.trained_model)) net.cuda() net.eval() print('Finished loading model!') shrink = 1 while cap.isOpened(): if args.verbose > 0 and e_time is not None: ittime = (e_time - s_time) * (total_frame - f_count) hour = int(ittime / 60.0 / 60.0) minute = int((ittime / 60.0) - (hour * 60)) second = int(ittime % 60.0) print("Progress %d/%d(%.2f%%), Estimated time : %02d:%02d:%02d" %
xmin = det[i][0] ymin = det[i][1] xmax = det[i][2] ymax = det[i][3] score = det[i][4] #f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'. # format(xmin, ymin, (xmax - xmin + 1), (ymax - ymin + 1), score)) f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'. format(np.floor(xmin), np.floor(ymin), np.ceil(xmax - xmin + 1), np.ceil(ymax - ymin + 1), score)) # load net cfg = widerface_640 num_classes = len(WIDERFace_CLASSES) + 1 # +1 background net = build_ssd('test', cfg['min_dim'], num_classes) # initialize SSD net.load_state_dict(torch.load(args.trained_model)) net.cuda() net.eval() print('Finished loading model!') # load data testset = WIDERFaceDetection(args.widerface_root, 'val' , None, WIDERFaceAnnotationTransform()) #testset = WIDERFaceDetection(args.widerface_root, 'test' , None, WIDERFaceAnnotationTransform()) def vis_detections(imgid, im, dets, thresh=0.5): """Draw detected bounding boxes.""" class_name = 'face' inds = np.where(dets[:, -1] >= thresh)[0]
def main(input_path, output_path, detection_model_path): cuda = True torch.set_grad_enabled(False) if cuda and torch.cuda.is_available(): torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') # Initialize detection model cfg = widerface_640 thresh = cfg['conf_thresh'] net = build_ssd('test', cfg['min_dim'], cfg['num_classes']) # initialize SSD net.load_state_dict(torch.load(detection_model_path)) net = net.cuda() net.eval() print('Finished loading detection model!') transform = TestBaseTransform((104, 117, 123)) # Open target video file cap = cv2.VideoCapture(input_path) if not cap.isOpened(): raise RuntimeError('Failed to read video: ' + input_path) total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) fps = cap.get(cv2.CAP_PROP_FPS) target_vid_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) target_vid_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # Initialize output video file if output_path is not None: if os.path.isdir(output_path): output_filename = os.path.splitext(os.path.basename(input_path))[0] + '.mp4' output_path = os.path.join(output_path, output_filename) fourcc = cv2.VideoWriter_fourcc(*'x264') out_vid = cv2.VideoWriter(output_path, fourcc, fps, (target_vid_width, target_vid_height)) else: out_vid = None # max_im_shrink = ((2000.0 * 2000.0) / (target_vid_height * target_vid_width)) ** 0.5 shrink = max_im_shrink if max_im_shrink < 1 else 1 # For each frame in the video for i in tqdm(range(total_frames)): ret, frame = cap.read() if frame is None: continue # Process det0 = infer(net, frame, transform, thresh, True, shrink) det1 = infer_flip(net, frame, transform, thresh, True, shrink) # shrink detecting and shrink only detect big face st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink det_s = infer(net, frame, transform, thresh, True, st) index = np.where(np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0] det_s = det_s[index, :] det = np.row_stack((det0, det1, det_s)) det = bbox_vote(det.astype(float)) det = np.round(det[det[:, 4] > 0.5, :4]).astype(int) # Render render_img = frame for rect in det: # cv2.rectangle(render_img, tuple(rect[:2]), tuple(rect[:2] + rect[2:]), (0, 0, 255), 1) cv2.rectangle(render_img, tuple(rect[:2]), tuple(rect[2:]), (0, 0, 255), 1) if out_vid is not None: out_vid.write(render_img) cv2.imshow('render_img', render_img) if cv2.waitKey(1) & 0xFF == ord('q'): break