def __init__(self, adaptive=False, isFast=True): if isFast: # DIS method self.__tracking_inst = cv2.DISOpticalFlow_create( cv2.DISOPTICAL_FLOW_PRESET_MEDIUM) # self.__tracking_inst.setFinestScale(1) # 2 as default # self.__tracking_inst.setPatchStride(4) # 4 as default # self.__tracking_inst.setGradientDescentIterations(20) # 12 as default # self.__tracking_inst.setVariationalRefinementIterations(4) # 0 as default # self.__tracking_inst.setVariationalRefinementAlpha(0.0) # self.__tracking_inst.setPatchSize(15) # 8 as default self.__coarse_tracking_inst = cv2.DISOpticalFlow_create( cv2.DISOpticalFlow_PRESET_ULTRAFAST) else: # #deep flow method self.__tracking_inst = cv2.optflow.createOptFlow_DeepFlow() self.__coarse_tracking_inst = self.__tracking_inst self.__fine_base_frame_queue = [] self.__coarse_base_frame_queue = [] self.__fine_flow_queue = [] self.__coarse_flow_queue = [] self.__prev_fine_flow = None self.__prev_coarse_flow = None self.__adaptive = adaptive
def optflow_handle(cur_gray, scoremap, is_init): """光流优化 Args: cur_gray : 当前帧灰度图 scoremap : 当前帧分割结果 is_init : 是否第一帧 Returns: dst : 光流追踪图和预测结果融合图, 类型为 np.float32 """ width, height = scoremap.shape[0], scoremap.shape[1] disflow = cv2.DISOpticalFlow_create(cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) prev_gray = np.zeros((height, width), np.uint8) prev_cfd = np.zeros((height, width), np.float32) cur_cfd = scoremap.copy() if is_init: is_init = False if height <= 64 or width <= 64: disflow.setFinestScale(1) elif height <= 160 or width <= 160: disflow.setFinestScale(2) else: disflow.setFinestScale(3) fusion_cfd = cur_cfd else: weights = np.ones((width, height), np.float32) * 0.3 track_cfd, is_track, weights = humanseg_tracking( prev_gray, cur_gray, prev_cfd, weights, disflow) fusion_cfd = humanseg_track_fuse(track_cfd, cur_cfd, weights, is_track) fusion_cfd = cv2.GaussianBlur(fusion_cfd, (3, 3), 0) return fusion_cfd
def __init__(self, lamda, dt): of = OpticalFlowClient(dt=dt) of.lamda = lamda self.of = of dis = cv2.DISOpticalFlow_create(cv2.DISOpticalFlow_PRESET_MEDIUM) self.dis = dis self.asserting = True
def calc_flow(previous, current, current_flow=None, minthresh=0): dis = cv2.DISOpticalFlow_create(cv2.DISOpticalFlow_PRESET_MEDIUM) flow = dis.calc(previous,current, current_flow) mag, ang = cv.cartToPolar(flow[...,0], flow[...,1]) #remove small flows under threshold flow[...,0][mag<minthresh] = 0 flow[...,1][mag<minthresh] = 0 return flow
def get_optFlow(self, input): input = input.detach().cpu().numpy() opt = np.zeros((input.shape[0], 2*self.config['IN_LEN']-2, self.h, self.w), dtype=np.float32) for b in range(input.shape[0]): for i in range(input.shape[1] - 1): delta = cv2.DISOpticalFlow_create(cv2.DISOPTICAL_FLOW_PRESET_MEDIUM) \ .calc((input[b, i]*255).astype(np.uint8), (input[b, i+1]*255).astype(np.uint8), None) opt[b, i*2] = delta[...,0] opt[b, i*2+1] = delta[...,1] return torch.autograd.Variable(data=torch.from_numpy(opt).float(), requires_grad=False).cuda()#.to(self.config['DEVICE'])
def video_stream_segment(self, frame_org, frame_id, prev_gray, prev_cfd, use_gpu=False): """ API for human video segmentation. Args: frame_org (numpy.ndarray): frame data, shape of each is [H, W, C], the color space is BGR. frame_id (int): index of the frame to be decoded. prev_gray (numpy.ndarray): gray scale image of last frame, shape of each is [H, W] prev_cfd (numpy.ndarray): fusion image from optical flow image and segment result, shape of each is [H, W] use_gpu (bool): Whether to use gpu. Returns: img_matting (numpy.ndarray): data of segmentation mask. cur_gray (numpy.ndarray): gray scale image of current frame, shape of each is [H, W] optflow_map (numpy.ndarray): optical flow image of current frame, shape of each is [H, W] """ resize_h = 192 resize_w = 192 is_init = True width = int(frame_org.shape[0]) height = int(frame_org.shape[1]) disflow = cv2.DISOpticalFlow_create( cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) frame = preprocess_v(frame_org, resize_w, resize_h) image = PaddleTensor(np.array([frame.copy()])) output = self.gpu_predictor.run( [image]) if use_gpu else self.cpu_predictor.run([image]) score_map = output[1].as_ndarray() frame = np.transpose(frame, axes=[1, 2, 0]) score_map = np.transpose(np.squeeze(score_map, 0), axes=[1, 2, 0]) cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) score_map = 255 * score_map[:, :, 1] if frame_id == 1: prev_gray = np.zeros((resize_h, resize_w), np.uint8) prev_cfd = np.zeros((resize_h, resize_w), np.float32) optflow_map = postprocess_v(cur_gray, score_map, prev_gray, prev_cfd, disflow, is_init) else: optflow_map = postprocess_v(cur_gray, score_map, prev_gray, prev_cfd, disflow, is_init) optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) optflow_map = threshold_mask(optflow_map, thresh_bg=0.2, thresh_fg=0.8) img_matting = cv2.resize(optflow_map, (height, width), cv2.INTER_LINEAR) return [img_matting, cur_gray, optflow_map]
def get_optFlow(self, prev_input, next_input): prev_input = prev_input.detach().cpu().numpy() next_input = next_input.detach().cpu().numpy() opt = np.zeros((prev_input.shape[0], 1, self.h, self.w), dtype=np.float32) for b in range(input.shape[0]): delta = cv2.DISOpticalFlow_create(cv2.DISOPTICAL_FLOW_PRESET_MEDIUM) \ .calc((input[b, i]*255).astype(np.uint8), (input[b, i+1]*255).astype(np.uint8), None) opt[b, 0] = delta[..., 0] opt[b, 0] = delta[..., 1] return torch.autograd.Variable(data=torch.from_numpy(opt).float(), requires_grad=False)
def dual_optFlow(im0, im1): u_gb, v_gb = gb(im0, im1) delta = cv2.DISOpticalFlow_create(cv2.DISOPTICAL_FLOW_PRESET_MEDIUM).calc( im0, im1, None) u_lc = delta[..., 0] v_lc = delta[..., 1] glob_speedup = 1 loc_speedup = 1 u = (u_lc * loc_speedup) + (u_gb * glob_speedup) v = (v_lc * loc_speedup) + (v_gb * glob_speedup) return u, v
def cal_2frames_optical_flow(ind1, ind2): frame1_name = str(ind1) + '.png' frame1_path = os.path.join(frames_path, frame1_name) frame2_name = str(ind2) + '.png' frame2_path = os.path.join(frames_path, frame2_name) frame1 = cv2.imread(frame1_path) frame2 = cv2.imread(frame2_path) dis = cv2.DISOpticalFlow_create() optical_flow_image = optical_flow(dis, frame1, frame2) optical_flow_name = str(ind1) + '_' + str(ind2) + '.png' cv2.imwrite(optical_flow_name, optical_flow_image)
def calc_sharp_optical_flow(img,img_before, position, dt, frm = -1, area_channel = None, img_before_before = None, of=None): '''calculate radial inward/outward flow. Example Usage: flow_radial = calc_sharp_optical_flow(img,img_before, position, dt, frm = -1, area_channel = None, img_before_before = None, of=None): ''' if of is None: of = OpticalFlowClient(dt=dt) #NB: requires edges and df as arguments #TODO: make sure that flow_list is appended to only here in the module # start = time.time() # get radial coordinates #TODO: fix get_r_hat_mat so there's no arbitrary max radius of support :( rhat,rmat = of.get_r_hat_mat(position) # if img_before_before is given, compute the flow from the next previous frame if img_before_before is not None: dis = cv2.DISOpticalFlow_create(cv2.DISOpticalFlow_PRESET_MEDIUM) # the most precise builtin setting as of now... current_flow = dis.calc(img_before_before,img_before, flow=None) else: current_flow = None # get flow field in radial coordinates flow = of.calc_flow(img,img_before, current_flow=current_flow) flow_out, flow_in = mydot(flow, rhat) # #blurred binary cell area mask # fltr = img/2+img_before/2 # fltr = gaussian(fltr, sigma=sigma) # fltr[fltr< thresh] = 0 # fltr[fltr>=thresh] = 1 # area_channel = fltr#pims.frame.Frame(fltr.astype('uint16')) # if area_channel is None: # area_channel = img/np.max(img) #filter off-cell flow and return output_texture = np.stack([(area_channel*flow_in).astype('float32'), (area_channel*flow_out).astype('float32'), rmat.astype('uint32'), area_channel.astype('uint16')], axis=2) #include original frame_no metadata flow_radial = pims.frame.Frame(output_texture, frame_no = frm) return flow_radial
def compute_sharp_optical_flow(current, previous, previous_previous, dt, of=None, **kwargs): '''Example Usage flow = compute_sharp_optical_flow(current, previous, previous_previous, dt, of=None, **kwargs) ''' # if of is None: # of = OpticalFlowClient(dt=dt) # kwargs['of'] = of dis = cv2.DISOpticalFlow_create(cv2.DISOpticalFlow_PRESET_MEDIUM) # if img_before_before is given, compute the flow from the next previous frame if previous_previous is not None: current_flow = dis.calc(previous_previous,previous, flow=None) else: current_flow = None # flow=None is not entirely unreasonable, though unstable... flow = dis.calc(previous, current, flow=None) flow = dis.calc(previous, current, flow=current_flow) #I tried the average of current and previous flow. no clear improvement. # flow = flow/2 + current_flow/2 #no apparrent difference. see if compute_sharp_optical_flow is in use return flow
def __init__(self, args): self.cfg = DeployConfig(args.cfg) self.args = args self.compose = T.Compose(self.cfg.transforms) resize_h, resize_w = args.input_shape self.disflow = cv2.DISOpticalFlow_create( cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) self.prev_gray = np.zeros((resize_h, resize_w), np.uint8) self.prev_cfd = np.zeros((resize_h, resize_w), np.float32) self.is_init = True pred_cfg = PredictConfig(self.cfg.model, self.cfg.params) pred_cfg.disable_glog_info() if self.args.use_gpu: pred_cfg.enable_use_gpu(100, 0) self.predictor = create_predictor(pred_cfg) if self.args.test_speed: self.cost_averager = TimeAverager()
def dense_optical_flow_loss(gen_images, gt_images, img_channel): optical = cv2.DISOpticalFlow_create(cv2.DISOPTICAL_FLOW_PRESET_FAST) gen_images_cpu = gpu_to_cpu(gen_images) gt_images_cpu = gpu_to_cpu(gt_images) if img_channel == 3: gen_images_cpu = to_grayscale(gen_images_cpu) gt_images_cpu = to_grayscale(gt_images_cpu) gen_diff = [] gt_diff = [] for gen_seq, gt_seq in zip(gen_images_cpu, gt_images_cpu): # for i in range(len(gen_seq) - 1): # numpy 가 아니라 tensor라서 안 됨 에러도 안 남 # gen_img1, gen_img2 = gen_images[i].clone().detach().numpy(), gen_images[i + 1].clone().detach().numpy() # gt_img1, gt_img2 = gt_images[i].clone().detach().numpy(), gt_images[i + 1].clone().detach().numpy() # tmp1 = float_to_cv8u(gen_seq[i]) # tmp2 = float_to_cv8u(gen_seq[i + 1]) gen_flow = optical.calc(float_to_cv8u(gen_seq), float_to_cv8u(gen_seq), None) gt_flow = optical.calc(float_to_cv8u(gt_seq), float_to_cv8u(gt_seq), None) # 채널이 두개 나오는데 광도 및 방향으로 나옴, 그 다음으로 이미지 사이즈 나옴 gen_flow = np.transpose(gen_flow, (2, 0, 1)) gt_flow = np.transpose(gt_flow, (2, 0, 1)) gen_flow = normalize(gen_flow) gt_flow = normalize(gt_flow) # gen_flow = optical.calc(gen_img1, gen_img2, None) # gt_flow = optical.calc(gt_img1, gt_img2, None) gen_diff.append(gen_flow.copy()) gt_diff.append(gt_flow.copy()) return gen_diff, gt_diff
def postprocess(cur_gray, scoremap, prev_gray, pre_cfd, disflow, is_init): """光流优化 Args: cur_gray : 当前帧灰度图 pre_gray : 前一帧灰度图 pre_cfd :前一帧融合结果 scoremap : 当前帧分割结果 difflow : 光流 is_init : 是否第一帧 Returns: fusion_cfd : 光流追踪图和预测结果融合图 """ height, width = scoremap.shape[0], scoremap.shape[1] disflow = cv2.DISOpticalFlow_create(cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) h, w = scoremap.shape cur_cfd = scoremap.copy() if is_init: is_init = False if h <= 64 or w <= 64: disflow.setFinestScale(1) elif h <= 160 or w <= 160: disflow.setFinestScale(2) else: disflow.setFinestScale(3) fusion_cfd = cur_cfd else: weights = np.ones((w, h), np.float32) * 0.3 track_cfd, is_track, weights = human_seg_tracking( prev_gray, cur_gray, pre_cfd, weights, disflow) fusion_cfd = human_seg_track_fuse(track_cfd, cur_cfd, weights, is_track) fusion_cfd = cv2.GaussianBlur(fusion_cfd, (3, 3), 0) return fusion_cfd
import numpy as np import os if __name__=='__main__': img_dir = '/Users/atom/data/FlowFaceSegmentation/data/sample' img_fn_list = os.listdir(img_dir) print(img_fn_list) img_1 = cv2.imread(os.path.join(img_dir, '0a5a3db4535c8b7c21610437b1ca37d7_04.jpg')) img_2 = cv2.imread(os.path.join(img_dir, '0a5a3db4535c8b7c21610437b1ca37d7_05.jpg')) img_1 = cv2.resize(img_1, dsize=(0, 0), fx=0.3, fy=0.3) img_2 = cv2.resize(img_2, dsize=(0, 0), fx=0.3, fy=0.3) prvs = cv2.cvtColor(img_1, cv2.COLOR_BGR2GRAY) hsv = np.zeros_like(img_1) dis = cv2.DISOpticalFlow_create(2) next = cv2.cvtColor(img_2, cv2.COLOR_BGR2GRAY) flow = dis.calc(prvs, next, None, ) # flow = cv.calcOpticalFlowFarneback(prvs,next, None, 0.5, 3, 15, 3, 5, 1.2, 0) mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1]) hsv[..., 0] = ang * 180 / np.pi / 2 hsv[..., 1] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX) hsv[..., 2] = 255 bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR) result = cv2.hconcat([img_1, img_2, bgr]) cv2.imshow('result', result) cv2.waitKey(0)
def video_segment(self, video_path=None, use_gpu=False, save_dir='humanseg_server_video'): resize_h = 512 resize_w = 512 if not video_path: cap_video = cv2.VideoCapture(0) else: cap_video = cv2.VideoCapture(video_path) if not cap_video.isOpened(): raise IOError("Error opening video stream or file, " "--video_path whether existing: {}" " or camera whether working".format(video_path)) width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) disflow = cv2.DISOpticalFlow_create( cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) prev_gray = np.zeros((resize_h, resize_w), np.uint8) prev_cfd = np.zeros((resize_h, resize_w), np.float32) is_init = True fps = cap_video.get(cv2.CAP_PROP_FPS) if video_path is not None: print('Please wait. It is computing......') if not osp.exists(save_dir): os.makedirs(save_dir) save_path = osp.join(save_dir, 'result' + '.avi') cap_out = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height)) while cap_video.isOpened(): ret, frame_org = cap_video.read() if ret: frame = preprocess_v(frame_org, resize_w, resize_h) image = PaddleTensor(np.array([frame.copy()])) output = self.gpu_predictor.run([ image ]) if use_gpu else self.cpu_predictor.run([image]) score_map = output[1].as_ndarray() frame = np.transpose(frame, axes=[1, 2, 0]) score_map = np.transpose(np.squeeze(score_map, 0), axes=[1, 2, 0]) cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) score_map = 255 * score_map[:, :, 1] optflow_map = postprocess_v(cur_gray, score_map, prev_gray, prev_cfd, disflow, is_init) prev_gray = cur_gray.copy() prev_cfd = optflow_map.copy() optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) optflow_map = threshold_mask(optflow_map, thresh_bg=0.2, thresh_fg=0.8) img_matting = cv2.resize(optflow_map, (width, height), cv2.INTER_LINEAR) img_matting = np.repeat(img_matting[:, :, np.newaxis], 3, axis=2) bg_im = np.ones_like(img_matting) * 255 comb = (img_matting * frame_org + (1 - img_matting) * bg_im).astype(np.uint8) cap_out.write(comb) else: break cap_video.release() cap_out.release() else: while cap_video.isOpened(): ret, frame_org = cap_video.read() if ret: frame = preprocess_v(frame_org, resize_w, resize_h) image = PaddleTensor(np.array([frame.copy()])) output = self.gpu_predictor.run([ image ]) if use_gpu else self.cpu_predictor.run([image]) score_map = output[1].as_ndarray() frame = np.transpose(frame, axes=[1, 2, 0]) score_map = np.transpose(np.squeeze(score_map, 0), axes=[1, 2, 0]) cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) score_map = 255 * score_map[:, :, 1] optflow_map = postprocess_v(cur_gray, score_map, prev_gray, prev_cfd, disflow, is_init) prev_gray = cur_gray.copy() prev_cfd = optflow_map.copy() optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) optflow_map = threshold_mask(optflow_map, thresh_bg=0.2, thresh_fg=0.8) img_matting = cv2.resize(optflow_map, (width, height), cv2.INTER_LINEAR) img_matting = np.repeat(img_matting[:, :, np.newaxis], 3, axis=2) bg_im = np.ones_like(img_matting) * 255 comb = (img_matting * frame_org + (1 - img_matting) * bg_im).astype(np.uint8) cv2.imshow('HumanSegmentation', comb) if cv2.waitKey(1) & 0xFF == ord('q'): break else: break cap_video.release()
def video_infer(args): resize_h = args.image_shape[1] resize_w = args.image_shape[0] test_transforms = transforms.Compose( [transforms.Resize((resize_w, resize_h)), transforms.Normalize()]) model = models.load_model(args.model_dir) if not args.video_path: cap = cv2.VideoCapture(0) else: cap = cv2.VideoCapture(args.video_path) if not cap.isOpened(): raise IOError("Error opening video stream or file, " "--video_path whether existing: {}" " or camera whether working".format(args.video_path)) return width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) disflow = cv2.DISOpticalFlow_create(cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) prev_gray = np.zeros((resize_h, resize_w), np.uint8) prev_cfd = np.zeros((resize_h, resize_w), np.float32) is_init = True fps = cap.get(cv2.CAP_PROP_FPS) if args.video_path: # 用于保存预测结果视频 if not osp.exists(args.save_dir): os.makedirs(args.save_dir) out = cv2.VideoWriter(osp.join(args.save_dir, 'result.avi'), cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height)) # 开始获取视频帧 while cap.isOpened(): ret, frame = cap.read() if ret: score_map, im_info = predict(frame, model, test_transforms) cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) scoremap = 255 * score_map[:, :, 1] optflow_map = postprocess(cur_gray, scoremap, prev_gray, prev_cfd, \ disflow, is_init) prev_gray = cur_gray.copy() prev_cfd = optflow_map.copy() is_init = False optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) optflow_map = threshold_mask(optflow_map, thresh_bg=0.2, thresh_fg=0.8) img_mat = np.repeat(optflow_map[:, :, np.newaxis], 3, axis=2) img_mat = recover(img_mat, im_info) bg_im = np.ones_like(img_mat) * 255 comb = (img_mat * frame + (1 - img_mat) * bg_im).astype( np.uint8) out.write(comb) else: break cap.release() out.release() else: while cap.isOpened(): ret, frame = cap.read() if ret: score_map, im_info = predict(frame, model, test_transforms) cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) scoremap = 255 * score_map[:, :, 1] optflow_map = postprocess(cur_gray, scoremap, prev_gray, prev_cfd, \ disflow, is_init) prev_gray = cur_gray.copy() prev_cfd = optflow_map.copy() is_init = False # optflow_map = optflow_map/255.0 optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) optflow_map = threshold_mask(optflow_map, thresh_bg=0.2, thresh_fg=0.8) img_mat = np.repeat(optflow_map[:, :, np.newaxis], 3, axis=2) img_mat = recover(img_mat, im_info) bg_im = np.ones_like(img_mat) * 255 comb = (img_mat * frame + (1 - img_mat) * bg_im).astype( np.uint8) cv2.imshow('HumanSegmentation', comb) if cv2.waitKey(1) & 0xFF == ord('q'): break else: break cap.release()
#!/usr/bin/env python3 # -*- coding: utf-8 -*- ##dis光流法 import cv2 as cv import numpy as np cap = cv.VideoCapture(r'D:\data\Reid\cam_dong_420.mp4') # https://www.bzarg.com/p/how-a-kalman-filter-works-in-pictures/ ret, frame1 = cap.read() prvs = cv.cvtColor(frame1,cv.COLOR_BGR2GRAY) hsv = np.zeros_like(frame1) hsv[...,1] = 255 dis = cv.DISOpticalFlow_create() while(1): ret, frame2 = cap.read() next = cv.cvtColor(frame2,cv.COLOR_BGR2GRAY) flow = dis.calc(prvs,next, None,) # flow = cv.calcOpticalFlowFarneback(prvs,next, None, 0.5, 3, 15, 3, 5, 1.2, 0) mag, ang = cv.cartToPolar(flow[...,0], flow[...,1]) hsv[...,0] = ang*180/np.pi/2 #angle弧度转角度 hsv[...,2] = cv.normalize(mag,None,0,255,cv.NORM_MINMAX) #也可以用通道1来表示 bgr = cv.cvtColor(hsv,cv.COLOR_HSV2BGR) cv.imshow('result',bgr) cv.imshow('input', frame2) k = cv.waitKey(30) & 0xff if k == 27: break elif k == ord('s'): cv.imwrite('opticalfb.png',frame2) cv.imwrite('opticalhsv.png',bgr) prvs = next
# calibration_folder_name = "/home/yipai/image_data/translation_calibration/time1/*.jpg" print("calibration folder: " + calibration_folder_name) calibration_file_paths = sorted(glob.glob(calibration_folder_name)) calibration_file_paths = calibration_file_paths[1:] calibration_step = 0.05 # test_folder_name = "/home/yipai/image_data/translation_calibration/time1/*.jpg" test_folder_name = "/home/yipai/data_11_13/dis_1_time/*.jpg" # test_folder_name = "/home/yipai/data_11_27/3x3/times/*.jpg" print("test folder: " + test_folder_name) test_file_paths = sorted(glob.glob(test_folder_name)) test_file_paths = test_file_paths[1:] test_step = 0.1 # DIS method dis_inst = cv2.DISOpticalFlow_create(cv2.DISOPTICAL_FLOW_PRESET_MEDIUM) # dis_inst.setFinestScale(2) # 2 as default # dis_inst.setPatchStride(3) # 4 as default dis_inst.setGradientDescentIterations(25) # 12 as default dis_inst.setVariationalRefinementIterations(10) # 0 as default # dis_inst.setVariationalRefinementAlpha(0.0) # dis_inst.setPatchSize(15) # 8 as default # # decomp object # dx = float(1.0) # dy = float(1.0) # grid = (360, 480) # # grid = (480, 640) # decomposer = nHHD(grid=grid, spacings=(dy, dx)) flow = None
def video_infer(args): cap = cv2.VideoCapture(args.video) _, frame = cap.read() H, W = frame.shape[:2] fps = cap.get(cv2.CAP_PROP_FPS) out = cv2.VideoWriter(args.output, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (W, H)) # Background if args.bg is not None: BACKGROUND = cv2.imread(args.bg)[..., ::-1] BACKGROUND = cv2.resize(BACKGROUND, (W, H), interpolation=cv2.INTER_LINEAR) KERNEL_SZ = 25 SIGMA = 0 # Alpha transperency else: COLOR1 = [255, 0, 0] COLOR2 = [0, 0, 255] if args.model == 'unet': model = UNet(backbone=args.net, num_classes=2, pretrained_backbone=None) elif args.model == 'deeplabv3_plus': model = DeepLabV3Plus(backbone=args.net, num_classes=2, pretrained_backbone=None) if args.use_cuda: model = model.cuda() trained_dict = torch.load(args.checkpoint, map_location="cpu")['state_dict'] model.load_state_dict(trained_dict, strict=False) model.eval() if W > H: w_new = int(args.input_sz) h_new = int(H * w_new / W) else: h_new = int(args.input_sz) w_new = int(W * h_new / H) disflow = cv2.DISOpticalFlow_create(cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) prev_gray = np.zeros((h_new, w_new), np.uint8) prev_cfd = np.zeros((h_new, w_new), np.float32) is_init = True while (cap.isOpened()): start_time = time() ret, frame = cap.read() if ret: image = frame[..., ::-1] h, w = image.shape[:2] read_cam_time = time() # Predict mask X, pad_up, pad_left, h_new, w_new = utils.preprocessing( image, expected_size=args.input_sz, pad_value=0) preproc_time = time() with torch.no_grad(): if args.use_cuda: mask = model(X.cuda()) mask = mask[..., pad_up:pad_up + h_new, pad_left:pad_left + w_new] #mask = F.interpolate(mask, size=(h,w), mode='bilinear', align_corners=True) mask = F.softmax(mask, dim=1) mask = mask[0, 1, ...].cpu().numpy() #(213, 320) else: mask = model(X) mask = mask[..., pad_up:pad_up + h_new, pad_left:pad_left + w_new] #mask = F.interpolate(mask, size=(h,w), mode='bilinear', align_corners=True) mask = F.softmax(mask, dim=1) mask = mask[0, 1, ...].numpy() predict_time = time() # optical tracking cur_gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) cur_gray = cv2.resize(cur_gray, (w_new, h_new)) scoremap = 255 * mask optflow_map = postprocess(cur_gray, scoremap, prev_gray, prev_cfd, disflow, is_init) optical_flow_track_time = time() prev_gray = cur_gray.copy() prev_cfd = optflow_map.copy() is_init = False optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) optflow_map = threshold_mask(optflow_map, thresh_bg=0.2, thresh_fg=0.8) img_matting = np.repeat(optflow_map[:, :, np.newaxis], 3, axis=2) bg_im = np.ones_like(img_matting) * 255 re_image = cv2.resize(image, (w_new, h_new)) comb = (img_matting * re_image + (1 - img_matting) * bg_im).astype( np.uint8) comb = cv2.resize(comb, (W, H)) comb = comb[..., ::-1] # Print runtime read = read_cam_time - start_time preproc = preproc_time - read_cam_time pred = predict_time - preproc_time optical = optical_flow_track_time - predict_time total = read + preproc + pred + optical print( "read: %.3f [s]; preproc: %.3f [s]; pred: %.3f [s]; optical: %.3f [s]; total: %.3f [s]; fps: %.2f [Hz]" % (read, preproc, pred, optical, total, 1 / pred)) out.write(comb) if args.watch: cv2.imshow('webcam', comb[..., ::-1]) if cv2.waitKey(1) & 0xFF == ord('q'): break else: break cap.release() out.release()
def infer(args): resize_h = args.image_shape[1] resize_w = args.image_shape[0] test_transforms = transforms.Compose( [transforms.Resize((resize_w, resize_h)), transforms.Normalize()]) model = models.load_model(args.model_dir) if not osp.exists(args.save_dir): os.makedirs(args.save_dir) # 图像背景替换 if args.image_path is not None: if not osp.exists(args.image_path): raise ('The --image_path is not existed: {}'.format( args.image_path)) if args.background_image_path is None: raise ('The --background_image_path is not set. Please set it') else: if not osp.exists(args.background_image_path): raise ('The --background_image_path is not existed: {}'.format( args.background_image_path)) img = cv2.imread(args.image_path) score_map, im_info = predict(img, model, test_transforms) score_map = score_map[:, :, 1] score_map = recover(score_map, im_info) bg = cv2.imread(args.background_image_path) save_name = osp.basename(args.image_path) save_path = osp.join(args.save_dir, save_name) result = bg_replace(score_map, img, bg) cv2.imwrite(save_path, result) # 视频背景替换,如果提供背景视频则以背景视频作为背景,否则采用提供的背景图片 else: is_video_bg = False if args.background_video_path is not None: if not osp.exists(args.background_video_path): raise ('The --background_video_path is not existed: {}'.format( args.background_video_path)) is_video_bg = True elif args.background_image_path is not None: if not osp.exists(args.background_image_path): raise ('The --background_image_path is not existed: {}'.format( args.background_image_path)) else: raise ( 'Please offer backgound image or video. You should set --backbground_iamge_paht or --background_video_path' ) disflow = cv2.DISOpticalFlow_create( cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) prev_gray = np.zeros((resize_h, resize_w), np.uint8) prev_cfd = np.zeros((resize_h, resize_w), np.float32) is_init = True if args.video_path is not None: print('Please waite. It is computing......') if not osp.exists(args.video_path): raise ('The --video_path is not existed: {}'.format( args.video_path)) cap_video = cv2.VideoCapture(args.video_path) fps = cap_video.get(cv2.CAP_PROP_FPS) width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) save_name = osp.basename(args.video_path) save_name = save_name.split('.')[0] save_path = osp.join(args.save_dir, save_name + '.avi') cap_out = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height)) if is_video_bg: cap_bg = cv2.VideoCapture(args.background_video_path) frames_bg = cap_bg.get(cv2.CAP_PROP_FRAME_COUNT) current_frame_bg = 1 else: img_bg = cv2.imread(args.background_image_path) while cap_video.isOpened(): ret, frame = cap_video.read() if ret: score_map, im_info = predict(frame, model, test_transforms) cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) score_map = 255 * score_map[:, :, 1] optflow_map = postprocess(cur_gray, score_map, prev_gray, prev_cfd, \ disflow, is_init) prev_gray = cur_gray.copy() prev_cfd = optflow_map.copy() is_init = False optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) optflow_map = threshold_mask(optflow_map, thresh_bg=0.2, thresh_fg=0.8) score_map = recover(optflow_map, im_info) #循环读取背景帧 if is_video_bg: ret_bg, frame_bg = cap_bg.read() if ret_bg: if current_frame_bg == frames_bg: current_frame_bg = 1 cap_bg.set(cv2.CAP_PROP_POS_FRAMES, 0) else: break current_frame_bg += 1 comb = bg_replace(score_map, frame, frame_bg) else: comb = bg_replace(score_map, frame, img_bg) cap_out.write(comb) else: break if is_video_bg: cap_bg.release() cap_video.release() cap_out.release() # 当没有输入预测图像和视频的时候,则打开摄像头 else: cap_video = cv2.VideoCapture(0) if not cap_video.isOpened(): raise IOError("Error opening video stream or file, " "--video_path whether existing: {}" " or camera whether working".format( args.video_path)) return if is_video_bg: cap_bg = cv2.VideoCapture(args.background_video_path) frames_bg = cap_bg.get(cv2.CAP_PROP_FRAME_COUNT) current_frame_bg = 1 else: img_bg = cv2.imread(args.background_image_path) while cap_video.isOpened(): ret, frame = cap_video.read() if ret: score_map, im_info = predict(frame, model, test_transforms) cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) score_map = 255 * score_map[:, :, 1] optflow_map = postprocess(cur_gray, score_map, prev_gray, prev_cfd, \ disflow, is_init) prev_gray = cur_gray.copy() prev_cfd = optflow_map.copy() is_init = False optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) optflow_map = threshold_mask(optflow_map, thresh_bg=0.2, thresh_fg=0.8) score_map = recover(optflow_map, im_info) #循环读取背景帧 if is_video_bg: ret_bg, frame_bg = cap_bg.read() if ret_bg: if current_frame_bg == frames_bg: current_frame_bg = 1 cap_bg.set(cv2.CAP_PROP_POS_FRAMES, 0) else: break current_frame_bg += 1 comb = bg_replace(score_map, frame, frame_bg) else: comb = bg_replace(score_map, frame, img_bg) cv2.imshow('HumanSegmentation', comb) if cv2.waitKey(1) & 0xFF == ord('q'): break else: break if is_video_bg: cap_bg.release() cap_video.release()
# if pose: # loss = np.sqrt(np.mean(w * np.sum(diff * diff, axis=1, keepdims=True))) # # else: # loss = np.sqrt(np.mean(diff * diff)) return loss_0, loss_1, motion if __name__ == '__main__': import os, glob root_path = '/Users/momo/Desktop/test_frames/test_video_frames' pic_names = sorted(glob.glob(os.path.join(root_path, '*.jpeg'))) inst = cv2.DISOpticalFlow_create( cv2.DISOPTICAL_FLOW_PRESET_FAST) # online version: should be fast. inst.setUseSpatialPropagation(True) prev = cv2.imread(pic_names[0]) cur = cv2.imread(pic_names[1]) prev_gray = cv2.cvtColor(prev, cv2.COLOR_BGR2GRAY) cur_gray = cv2.cvtColor(cur, cv2.COLOR_BGR2GRAY) flow = inst.calc(prev_gray, cur_gray, None) height, width = prev_gray.shape test_x = np.random.randn(300) * height test_y = np.random.randn(300) * width x = np.arange(height) y = np.arange(width)
def warp_images(img1, img2, savedir: str = None, look_at_angle: float = 0): # pad image on which flow will be calculated padding = 180 img1 = slice_eqimage(img1, look_at_angle, padding=padding) img2 = slice_eqimage(img2, look_at_angle, padding=padding) output_im1 = resize(img1, 50) output_im2 = resize(img2, 50) resized1 = resize(img1, 25) resized2 = resize(img2, 25) # cast image to greyscale prvs = cv2.cvtColor(resized1, cv2.COLOR_BGR2GRAY) next = cv2.cvtColor(resized2, cv2.COLOR_BGR2GRAY) dis = cv2.DISOpticalFlow_create() # calculate forward flow flow_forward = dis.calc(prvs, next, None) flow_forward = cv2.resize(flow_forward, (output_im1.shape[1], output_im1.shape[0]), interpolation=cv2.INTER_LINEAR) * 2 # calculate backward flow flow_backward = dis.calc(next, prvs, None) flow_backward = cv2.resize(flow_backward, (output_im1.shape[1], output_im1.shape[0]), interpolation=cv2.INTER_LINEAR) * 2 next_img = warp_flow(output_im1, flow_forward) prvs_img = warp_flow(output_im2, flow_backward) # unpad the images unpadded_next = next_img[:, padding // 2:next_img.shape[1] - padding // 2] unpadded_prvs = prvs_img[:, padding // 2:prvs_img.shape[1] - padding // 2] if savedir is not None: plt.imsave( os.path.join(savedir, "forward_flow.jpg"), computeColor.computeImg(flow_forward)[:, padding:next_img.shape[1] - (padding)]) plt.imsave( os.path.join(savedir, "backward_flow.jpg"), computeColor.computeImg(flow_backward)[:, padding:next_img.shape[1] - (padding)]) plt.imsave(os.path.join(savedir, "2_output2.jpg"), np.flip(unpadded_prvs, axis=2)) plt.imsave(os.path.join(savedir, "4_output1.jpg"), np.flip(unpadded_next, axis=2)) plt.imsave( os.path.join(savedir, "1_input_img1.jpg"), np.flip(output_im1[:, padding // 2:next_img.shape[1] - padding // 2], axis=2)) plt.imsave( os.path.join(savedir, "3_input_img2.jpg"), np.flip(output_im2[:, padding // 2:prvs_img.shape[1] - padding // 2], axis=2)) return unpadded_next, unpadded_prvs
# Draw all the nonzero values nz = np.nonzero(norm) for i in range(len(nz[0])): y, x = nz[0][i], nz[1][i] cv2.arrowedLine(image, pt1=tuple(flow_start[y, x]), pt2=tuple(flow_end[y, x]), color=(63, 208, 244), thickness=2, tipLength=.2) return image ## global variables dis_inst = cv2.DISOpticalFlow_create(cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) dis_inst.setFinestScale(3) # 2 as default dis_inst.setGradientDescentIterations(12) # 12 as default dis_inst.setVariationalRefinementIterations(5) # 0 as default # dis_inst.setPatchSize(12) # 8 as default base_frame = None prev_time = time.time() flow = None velocity = None prev_flow = None frame_counter = 0 # decomp object dx = float(1.0) dy = float(1.0) grid = (480, 640)
def main0(): pthpredict = segmentationPredict() resize_h = 720 resize_w = 1280 # cap = cv2.VideoCapture(0) # # cap = cv2.VideoCapture(args.video_path) # if not cap.isOpened(): # raise IOError("Error opening video stream or file, " # "--video_path whether existing: {}" # " or camera whether working".format(args.video_path)) # return # width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) # height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) #width = 1280 #int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) #height = 720 #int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) disflow = cv2.DISOpticalFlow_create(cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) prev_gray = np.zeros((resize_h, resize_w), np.uint8) prev_cfd = np.zeros((resize_h, resize_w), np.float32) is_init = True #fps = cap.get(cv2.CAP_PROP_FPS) if 0: print('Please waite. It is computing......') # 用于保存预测结果视频 if not osp.exists(args.save_dir): os.makedirs(args.save_dir) out = cv2.VideoWriter(osp.join(args.save_dir, 'result.avi'), cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height)) # 开始获取视频帧 while cap.isOpened(): ret, frame = cap.read() if ret: score_map, im_info = predict(frame, model, test_transforms) cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) score_map = 255 * score_map[:, :, 1] optflow_map = smp.utils.postprocess.postprocess(cur_gray, score_map, prev_gray, prev_cfd, \ disflow, is_init) prev_gray = cur_gray.copy() prev_cfd = optflow_map.copy() is_init = False optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) optflow_map = smp.utils.postprocess.threshold_mask( optflow_map, thresh_bg=0.2, thresh_fg=0.8) img_matting = np.repeat(optflow_map[:, :, np.newaxis], 3, axis=2) img_matting = recover(img_matting, im_info) bg_im = np.ones_like(img_matting) * 255 comb = (img_matting * frame + (1 - img_matting) * bg_im).astype(np.uint8) out.write(comb) else: break cap.release() out.release() else: index = 0 file_path = "D:/pengt/data/webvideo/zhoujielu/joinerpic" img_folds_list = os.listdir(file_path) for sub0 in img_folds_list: img_path = os.path.join(file_path, sub0) frame = cv2.imread(img_path) ret = True # while cap.isOpened(): # ret, frame = cap.read() index += 1 if ret: ### img_ori = frame alphargb, pred, img_new_next = pthpredict.run(img_ori, 0, index=index) score_map = alphargb # score_map, im_info = predict(frame, model, test_transforms) cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) score_map = 255 * score_map optflow_map = smp.utils.postprocess.postprocess(cur_gray, score_map, prev_gray, prev_cfd, \ disflow, is_init) prev_gray = cur_gray.copy() prev_cfd = optflow_map.copy() is_init = False optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) optflow_map = smp.utils.postprocess.threshold_mask( optflow_map, thresh_bg=0.2, thresh_fg=0.8) img_matting = np.repeat(optflow_map[:, :, np.newaxis], 3, axis=2) # img_matting = recover(img_matting, im_info) bg_im = np.ones_like(img_matting) * 255 comb = (img_matting * frame + (1 - img_matting) * bg_im).astype(np.uint8) cv2.imshow('HumanSegmentation', comb) if cv2.waitKey(1) & 0xFF == ord('q'): break else: break cap.release()