def show(self) -> None: frames = [] capture = cv2.VideoCapture(self.filename) capture.set(cv2.CAP_PROP_POS_FRAMES, self.start_frame) for i in range(self.end_frame - self.start_frame): status, frame = capture.read() frames.append(frame) combined = self.combine_frames(frames) if combined is None: print("Empty frame") return # frame = combined for point in self.event.positions: frame = cv2.circle(frame, point.center(), 1, (0, 255, 0), 1) frame = resize_frame(frame) left_top, right_bottom = self.bounding_box plt.imshow(frame) plt.show()
def track_motion(video_src, init_flag=False, remove_bg=p.remove_bg, reinitialize_roi=p.reinitialize_roi, reinitialize_hsv=p.reinitialize_hsv, reinitialize_bg=p.reinitialize_bg): """Track object in a video Implements background subtraction with GrabCut and MOG2 algorithms. Tracking is based on either CamShift or meanShift algorithms. Parameters --------------- video_src : str relative path to video to be processed (in suitable format, e.g. .avi or .mkv, depends on PC and OS the algorith runs on) reinitialize_roi : bool [True for init, else False] reinitialize_hsv : bool [True for init, else False] reinitialize_bg : bool [True] init_flag : bool is this an initializing run? [True for init, else False] remove_bg : bool [default = True] Additional Parameters (see available cmd line arguments and files params.py and params_init.py) ------------- frame_range : list range of rames to process, read from csv file double_subtract_bg : bool use also MOG2? [default = True] params : str which parameter set to use height_resize : int height of image to plot, will be resized if different from current plot_mask : bool Flag indicating if binary fg mask should be plotted as well [default = True] annotate_mask : bool Add additional text information to plot_mask image? [default = True] show_frames : bool Plot frames from video and visualize tracking [default = True] save_frames : bool Flag indicating whether to save frames shown in `Tracking` window (cf. show_frames), see utils.define_video_output Returns ----------- None References ---------------- ..[1] https://docs.opencv.org/3.0-beta/doc/py_tutorials/py_video/py_bg_subtraction/py_bg_subtraction.html See also ----------- process.py, params.py, params_init.py, utils.define_video_output """ double_substract_bg = p.double_substract_bg save_init = any([reinitialize_roi, reinitialize_hsv, reinitialize_bg]) params = dict(kSize_gauss=p.kSize_gauss, sigmaX=p.sigmaX, kSize_canny=p.kSize_canny, padding=p.padding) if double_substract_bg: fgbg = cv2.createBackgroundSubtractorMOG2(history=200, varThreshold=12, detectShadows=False) fgbg.setComplexityReductionThreshold(0.05 * 4) fgbg.setBackgroundRatio(1) (pnames, pnames_init) = utils.get_parameter_names(remove_bg, reinitialize_hsv, reinitialize_roi, reinitialize_bg) fnames = utils.get_in_out_names(video_src, init_flag, save_init) try: if pnames: p_vars_curr = utils.load_tracking_params(fnames[1], p.ext, pnames) else: p_vars_curr = {} if pnames_init and not init_flag: p_vars_init = utils.load_tracking_params(fnames[3], p.ext, pnames_init) else: p_vars_init = {} p_vars = {**p_vars_curr, **p_vars_init} except: print("Some parameters couldn't be loaded") if reinitialize_roi and not frame_range: pts, _, vid, frame_pos, frame = getcoords.select_roi_video(video_src) pts = utils.swap_coords_2d(pts) elif reinitialize_roi and frame_range: pts, _, vid, frame_pos, frame = getcoords.select_roi_video( video_src, frame_pos=frame_range[0]) pts = utils.swap_coords_2d(pts) elif not reinitialize_roi and frame_range: frame_pos = frame_range[0] (vid, frame) = getcoords.go_to_frame([], frame_pos, video_src, return_frame=True) pts = p_vars["pts"] elif not frame_range: # pts, roi, vid, frame_pos, frame = from_preset(video_src) pts = p_vars["pts"] frame_pos = p_vars["frame_pos"] (vid, frame) = getcoords.go_to_frame([], frame_pos, video_src, return_frame=True) bbox = cv2.boundingRect(pts) (c, r, w, h) = bbox bbox_min, bbox = update_bbox_location(frame, bbox, **params) (c, r, w, h) = bbox if remove_bg and reinitialize_bg: background, mask_fgd = segment_background(frame, bbox_min, **params) elif remove_bg and not reinitialize_bg: background = p_vars["background"] if remove_bg: frame_bg_removed = subtract_background(frame, background) roi = frame_bg_removed[r:r + h, c:c + w, :] else: background = np.empty(0) roi = frame[r:r + h, c:c + w, :] if reinitialize_hsv: roi_hist, h_ranges, chs = get_roi_hist(roi, vid, background, frame_pos, reinitialize_hsv) else: roi_hist = p_vars["roi_hist"] h_ranges = p_vars["h_ranges"] chs = p_vars["chs"] if save_init: names, names_init = utils.get_parameter_names(remove_bg, not reinitialize_hsv, not reinitialize_roi, not reinitialize_bg) local_variables = locals() if names: save_dict = dict((n, local_variables[n]) for n in names) _ = utils.save_tracking_params(fnames[0], save_dict, p.ext) if names_init and init_flag: save_dict_init = dict((n, local_variables[n]) for n in names_init) _ = utils.save_tracking_params(fnames[2], save_dict_init, p.ext) # Setup the termination criteria, either 10 iteration or move by atleast 1 pt term_crit = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, p.its, p.eps) params["term_crit"] = term_crit fps = vid.get(cv2.CAP_PROP_FPS) frame_count = 0 # Lists to temporalily store output data times = [] centroids = [] num_nonzeros = [] stop_frame = frame_range[1] - frame_range[0] if p.save_frames: vid_out = utils.define_video_output(video_src, vid, fps, p.step, p.height_resize) while vid.isOpened() and frame_count <= stop_frame: frame_avg = np.zeros_like(frame, dtype=np.float32) frame_avg, frame_count, frame = average_frames(vid, frame_avg, p.step, p.alpha, frame_count) # Break out of this loop if emptied stack if frame is None: print("End of stream") break if remove_bg: frame_avg = subtract_background(frame_avg, background) if double_substract_bg: frame_avg, frame_binary, cnts = subtract_stationary_background( fgbg, frame_avg, frame_count, **params) else: cnts = [] frame_binary = np.empty(0) # cv2.normalize( src = frame_avg, dst = frame_avg, # alpha = 0, beta = 255, norm_type = cv2.NORM_MINMAX) prob_mask = prob_mask_hsv(frame_avg, roi_hist, h_ranges, chs, **params) res = tracker(prob_mask, bbox, "meanShift", **params) if (res is None) or prob_mask is None: print("No components detected, end of tracking") vid.release() break bbox = res[0] pts = res[1] if len(res) > 2: cent = res[2] else: cent = utils.rectangle_to_centroid(pts) centroids.append(cent) time = float(frame_count) / fps times.append(time) #Save time if frame_binary.size: num_nonzero = np.count_nonzero(frame_binary) num_nonzeros.append(num_nonzero) if frame_count < 10: # requires pause for rendering, may be machine dependent -.- cv2.waitKey(np.int(1 / fps * p.step * 1000 * 20)) else: # requires pause for rendering, may be machine dependent -.- cv2.waitKey(np.int(1 / fps * p.step * 1000 * 0.1)) # Visualize if p.show_frames: # frame_vis = prob_mask.copy() # frame_avg.copy() frame_vis = cv2.cvtColor(frame_avg, cv2.COLOR_BGR2GRAY).astype(np.uint8) (r, b, w) = ((0, 0, 255), (0, 0, 0), (255, 255, 255)) # Put timestamp on the average image time_str = "{:.2f}".format(time) dimy, dimx = frame_vis.shape[:2] time_loc = (int(dimx - 250), dimy) cv2.putText(frame_vis, time_str, time_loc, cv2.FONT_HERSHEY_PLAIN, 5, w) prob_str = "Max prob: {:.2f}%".format(np.max(prob_mask) / 2.55) prob_loc = (50, 50) cv2.putText(frame_vis, prob_str, prob_loc, cv2.FONT_HERSHEY_PLAIN, 3, w) cv2.polylines(frame_vis, pts=[pts], isClosed=True, color=w, thickness=2) # Draw location of center of mass on the average image cv2.circle(frame_vis, tuple(cent)[:2], radius=4, color=b, thickness=4) # Image should be uint8 to be drawable in 0, 255 scale # https://stackoverflow.com/questions/9588719/opencv-double-mat-shows-up-as-all-white (frame_vis, _) = utils.resize_frame(frame_vis, height=p.height_resize) cv2.imshow("Tracking", frame_vis) if frame_count > 0 and p.save_frames: new_shape = frame_vis.shape + (1, ) frame_vis = np.reshape(frame_vis, new_shape) frame_vis = np.repeat(frame_vis, 3, axis=2) vid_out.write(frame_vis) if p.annotate_mask and p.plot_mask and (cnts): w = (255, 255, 255) cnt_metric = cv2.arcLength(cnts[0], True) ann_str = "Max perim: {:.2f}, #Contours: {}, #Nonzero: {}"\ .format(cnt_metric, len(cnts), num_nonzero) ann_loc = (50, 50) cv2.putText(frame_binary, ann_str, ann_loc, cv2.FONT_HERSHEY_PLAIN, 3, w) if p.plot_mask and frame_binary.size: (frame_binary, _) = utils.resize_frame(frame_binary, height=p.height_resize) cv2.imshow("Mask", frame_binary) # Interrupt on ESC ch = 0xFF & cv2.waitKey(1) if ch == 27: break elif ch == ord('d'): import pdb pdb.set_trace() # end while True: output_data(centroids, times, num_nonzeros, video_src) if p.save_frames: vid_out.release() cv2.destroyAllWindows()
def select_hsv_range(vid, video_source, background=np.empty(0), frame_pos=[], reinitialize=False): """Select object hsv range Interactively selects HSV ranges that threshold tracked object against background Parameters ---------- vid : cv2.VideoCapture object Already opened video object. If empty, video from video_source is read. video_source : str path to video file frame_pos : float number of frame corresponding to current position in video [default = []] reinitialize : bool whether to look for new HSV limits or take preset [default = False] Returns ---------- hsv_lowerb : array_like vector of lower HSV limits [hlow, slow, vlow] hsv_upperb : array_like vector of upper HSV limits [hhigh, shigh, vhigh] References ------------ ..[1] https://botforge.wordpress.com/2016/07/02/basic-color-tracker-using-opencv-python/ ..[2] http://docs.opencv.org/3.0-beta/doc/py_tutorials/py_gui/py_trackbar/py_trackbar.html """ if reinitialize: (_, frame) = getcoords.go_to_frame(vid, frame_pos, video_source, return_frame=True) frame_avg = np.zeros_like(frame, dtype=np.float32) frame, _, _ = average_frames(vid, frame_avg) if background.size: #if exists frame = subtract_background(frame, background) frame, _ = utils.resize_frame(frame, height=p.height_resize) hh = 'Hue High' hl = 'Hue Low' sh = 'Saturation High' sl = 'Saturation Low' vh = 'Value High' vl = 'Value Low' cv2.namedWindow("Select HSV Range") cv2.resizeWindow('Select HSV Range', frame.shape[1], frame.shape[0]) print("Change ranges on sliders and press ENTER to update") def nothing(x): pass cv2.createTrackbar(hl, 'Select HSV Range', 0, 179, nothing) # ~180 deg cv2.createTrackbar(hh, 'Select HSV Range', 0, 179, nothing) cv2.createTrackbar(sl, 'Select HSV Range', 0, 255, nothing) cv2.createTrackbar(sh, 'Select HSV Range', 0, 255, nothing) cv2.createTrackbar(vl, 'Select HSV Range', 0, 255, nothing) cv2.createTrackbar(vh, 'Select HSV Range', 0, 255, nothing) frame_hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV) try: while (1): #read trackbar positions for all HL = cv2.getTrackbarPos(hl, 'Select HSV Range') HH = cv2.getTrackbarPos(hh, 'Select HSV Range') SL = cv2.getTrackbarPos(sl, 'Select HSV Range') SH = cv2.getTrackbarPos(sh, 'Select HSV Range') VL = cv2.getTrackbarPos(vl, 'Select HSV Range') VH = cv2.getTrackbarPos(vh, 'Select HSV Range') #make array for final values hsv_lowerb = np.array([HL, SL, VL]) hsv_upperb = np.array([HH, SH, VH]) #apply the range on a mask mask = cv2.inRange(frame_hsv, hsv_lowerb, hsv_upperb) res = cv2.bitwise_and(frame_hsv, frame_hsv, mask=mask) cv2.imshow('Select HSV Range', res) k = cv2.waitKey(0) & 0xFF if k == 27: break finally: cv2.destroyAllWindows() else: # apply preset if background.size: #if exists hsv_lowerb = np.array([0, 0, 100]) hsv_upperb = np.array([179, 200, 255]) else: hsv_lowerb = np.array([0, 0, 150]) # with bg substraction hsv_upperb = np.array([179, 50, 255]) # with bg substraction return (hsv_lowerb, hsv_upperb)
def test_resize_frames(self, frame): nrows, ncols = frame.shape[:2] frame_out1, ratio1 = utils.resize_frame(frame) frame_out2, ratio2 = utils.resize_frame(frame_out1, nrows) assert np.round(ratio1 * ratio2, 3) == 1.0 assert frame_out2.shape == frame.shape
def capture_action(pred_path, cb, debug=False, log=False): """Facial detection and real time processing credit goes to: https://www.pyimagesearch.com/2017/04/17/real-time-facial-landmark-detection-opencv-python-dlib/ """ action_handler = ActionHandler() detector = dlib.get_frontal_face_detector() predictor = dlib.shape_predictor(pred_path) camera = cv2.VideoCapture(0) while True: _, frame = camera.read() if frame is None: continue _, w_original, _ = frame.shape frame = resize_frame(frame) h, w, _ = frame.shape display_bounds(frame) gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) rects = detector(gray, 0) for rect in rects: shape = predictor(gray, rect) shape = face_utils.shape_to_np(shape) cur_head = Head(shape, w) cur_eyes = Eyes(shape, frame) eye_action = detect_eyes(shape, cur_eyes) head_action = detect_head(shape, cur_head) COUNTER_LOG[eye_action] += 1 COUNTER_LOG[head_action] += 1 perform, action = action_handler.get_next(eye_action, head_action) if log: display_decisions(frame, head_action, eye_action) display_counters(frame, COUNTER_LOG) if perform: COUNTER_LOG[action] += 1 cb(action) if debug: cur_head.debug(frame) cur_eyes.debug(frame) cv2.imshow("Frame", frame) key = cv2.waitKey(1) & 0xFF if key == ord("q"): break cv2.destroyAllWindows()
if count % frame_sample_rate == 0: frame_list.append(frame) frame_count += 1 count += 1 frame_list = np.array(frame_list) if frame_count > num_frames: frame_indices = np.linspace(0, frame_count, num=num_frames, endpoint=False).astype(int) frame_list = frame_list[frame_indices] # 直接截断 frame_list = frame_list[:num_frames] frame_count = num_frames # 把图像做一下处理,然后转换成(batch, channel, height, width)的格式 cropped_frame_list = np.array([resize_frame(x) for x in frame_list]).transpose( (0, 3, 1, 2)) cropped_frame_list = Variable(torch.from_numpy(cropped_frame_list), volatile=True).cuda() # 视频特征的shape是num_frames x 4096 # 如果帧的数量小于num_frames,则剩余的部分用0补足 feats = np.zeros((num_frames, frame_size), dtype='float32') feats[:frame_count, :] = encoder(cropped_frame_list).data.cpu().numpy() videos = Variable(torch.from_numpy(feats)).cuda().unsqueeze(0) # 对视频内容进行解码得到自然语言描述 tokens = decoder.sample(videos).data[0].squeeze() print(decode_tokens(tokens, vocab)) # 播放视频