Example #1
0
    def show(self) -> None:
        frames = []
        capture = cv2.VideoCapture(self.filename)
        capture.set(cv2.CAP_PROP_POS_FRAMES, self.start_frame)
        for i in range(self.end_frame - self.start_frame):
            status, frame = capture.read()
            frames.append(frame)

        combined = self.combine_frames(frames)

        if combined is None:
            print("Empty frame")
            return

        # frame = combined

        for point in self.event.positions:
            frame = cv2.circle(frame, point.center(), 1, (0, 255, 0), 1)

        frame = resize_frame(frame)
        left_top, right_bottom = self.bounding_box

        plt.imshow(frame)
        plt.show()
Example #2
0
def track_motion(video_src,
                 init_flag=False,
                 remove_bg=p.remove_bg,
                 reinitialize_roi=p.reinitialize_roi,
                 reinitialize_hsv=p.reinitialize_hsv,
                 reinitialize_bg=p.reinitialize_bg):
    """Track object in a video
    
    Implements background subtraction with GrabCut and MOG2 algorithms. Tracking is 
    based on either CamShift or meanShift algorithms.
    
    Parameters
    ---------------
    video_src : str
        relative path to video to be processed (in suitable format, e.g. .avi or .mkv,
        depends on PC and OS the algorith runs on)
    reinitialize_roi : bool
        [True for init, else False]
    reinitialize_hsv : bool
        [True for init, else False]
    reinitialize_bg : bool
        [True]
    init_flag : bool
        is this an initializing run? [True for init, else False]
    remove_bg : bool
        [default = True]
          
    Additional Parameters 
    (see available cmd line arguments and files params.py and params_init.py)
    -------------
    frame_range : list
        range of rames to process, read from csv file
    double_subtract_bg : bool
        use also MOG2? [default = True]
    params : str
        which parameter set to use
    height_resize : int
        height of image to plot, will be resized if different from current
    plot_mask : bool
        Flag indicating if binary fg mask should be plotted as well [default = True]
    annotate_mask : bool
        Add additional text information to plot_mask image? [default = True]
    show_frames : bool
        Plot frames from video and visualize tracking [default = True]
    save_frames : bool
        Flag indicating whether to save frames shown in `Tracking` window (cf. 
        show_frames), see utils.define_video_output
        
    Returns
    -----------
    None
    
    References
    ----------------
    ..[1] https://docs.opencv.org/3.0-beta/doc/py_tutorials/py_video/py_bg_subtraction/py_bg_subtraction.html
    
    See also
    -----------
    process.py, params.py, params_init.py, utils.define_video_output
    """

    double_substract_bg = p.double_substract_bg
    save_init = any([reinitialize_roi, reinitialize_hsv, reinitialize_bg])
    params = dict(kSize_gauss=p.kSize_gauss,
                  sigmaX=p.sigmaX,
                  kSize_canny=p.kSize_canny,
                  padding=p.padding)

    if double_substract_bg:
        fgbg = cv2.createBackgroundSubtractorMOG2(history=200,
                                                  varThreshold=12,
                                                  detectShadows=False)
        fgbg.setComplexityReductionThreshold(0.05 * 4)
        fgbg.setBackgroundRatio(1)
    (pnames, pnames_init) = utils.get_parameter_names(remove_bg,
                                                      reinitialize_hsv,
                                                      reinitialize_roi,
                                                      reinitialize_bg)
    fnames = utils.get_in_out_names(video_src, init_flag, save_init)

    try:
        if pnames:
            p_vars_curr = utils.load_tracking_params(fnames[1], p.ext, pnames)
        else:
            p_vars_curr = {}
        if pnames_init and not init_flag:
            p_vars_init = utils.load_tracking_params(fnames[3], p.ext,
                                                     pnames_init)
        else:
            p_vars_init = {}

        p_vars = {**p_vars_curr, **p_vars_init}

    except:
        print("Some parameters couldn't be loaded")

    if reinitialize_roi and not frame_range:
        pts, _, vid, frame_pos, frame = getcoords.select_roi_video(video_src)
        pts = utils.swap_coords_2d(pts)

    elif reinitialize_roi and frame_range:
        pts, _, vid, frame_pos, frame = getcoords.select_roi_video(
            video_src, frame_pos=frame_range[0])
        pts = utils.swap_coords_2d(pts)
    elif not reinitialize_roi and frame_range:
        frame_pos = frame_range[0]
        (vid, frame) = getcoords.go_to_frame([],
                                             frame_pos,
                                             video_src,
                                             return_frame=True)
        pts = p_vars["pts"]
    elif not frame_range:
        # pts, roi, vid, frame_pos, frame = from_preset(video_src)
        pts = p_vars["pts"]
        frame_pos = p_vars["frame_pos"]
        (vid, frame) = getcoords.go_to_frame([],
                                             frame_pos,
                                             video_src,
                                             return_frame=True)

    bbox = cv2.boundingRect(pts)
    (c, r, w, h) = bbox
    bbox_min, bbox = update_bbox_location(frame, bbox, **params)
    (c, r, w, h) = bbox

    if remove_bg and reinitialize_bg:
        background, mask_fgd = segment_background(frame, bbox_min, **params)
    elif remove_bg and not reinitialize_bg:
        background = p_vars["background"]

    if remove_bg:
        frame_bg_removed = subtract_background(frame, background)
        roi = frame_bg_removed[r:r + h, c:c + w, :]
    else:
        background = np.empty(0)
        roi = frame[r:r + h, c:c + w, :]

    if reinitialize_hsv:
        roi_hist, h_ranges, chs = get_roi_hist(roi, vid, background, frame_pos,
                                               reinitialize_hsv)
    else:
        roi_hist = p_vars["roi_hist"]
        h_ranges = p_vars["h_ranges"]
        chs = p_vars["chs"]

    if save_init:
        names, names_init = utils.get_parameter_names(remove_bg,
                                                      not reinitialize_hsv,
                                                      not reinitialize_roi,
                                                      not reinitialize_bg)
        local_variables = locals()
        if names:
            save_dict = dict((n, local_variables[n]) for n in names)
            _ = utils.save_tracking_params(fnames[0], save_dict, p.ext)
        if names_init and init_flag:
            save_dict_init = dict((n, local_variables[n]) for n in names_init)
            _ = utils.save_tracking_params(fnames[2], save_dict_init, p.ext)

    # Setup the termination criteria, either 10 iteration or move by atleast 1 pt
    term_crit = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, p.its, p.eps)
    params["term_crit"] = term_crit

    fps = vid.get(cv2.CAP_PROP_FPS)
    frame_count = 0
    # Lists to temporalily store output data
    times = []
    centroids = []
    num_nonzeros = []
    stop_frame = frame_range[1] - frame_range[0]

    if p.save_frames:
        vid_out = utils.define_video_output(video_src, vid, fps, p.step,
                                            p.height_resize)

    while vid.isOpened() and frame_count <= stop_frame:

        frame_avg = np.zeros_like(frame, dtype=np.float32)
        frame_avg, frame_count, frame = average_frames(vid, frame_avg, p.step,
                                                       p.alpha, frame_count)
        # Break out of this loop if emptied stack
        if frame is None:
            print("End of stream")
            break

        if remove_bg:
            frame_avg = subtract_background(frame_avg, background)

        if double_substract_bg:
            frame_avg, frame_binary, cnts = subtract_stationary_background(
                fgbg, frame_avg, frame_count, **params)
        else:
            cnts = []
            frame_binary = np.empty(0)
            # cv2.normalize(  src = frame_avg, dst = frame_avg,
            # alpha = 0, beta = 255, norm_type = cv2.NORM_MINMAX)

        prob_mask = prob_mask_hsv(frame_avg, roi_hist, h_ranges, chs, **params)

        res = tracker(prob_mask, bbox, "meanShift", **params)

        if (res is None) or prob_mask is None:
            print("No components detected, end of tracking")
            vid.release()
            break

        bbox = res[0]
        pts = res[1]
        if len(res) > 2:
            cent = res[2]
        else:
            cent = utils.rectangle_to_centroid(pts)

        centroids.append(cent)
        time = float(frame_count) / fps
        times.append(time)  #Save time
        if frame_binary.size:
            num_nonzero = np.count_nonzero(frame_binary)
            num_nonzeros.append(num_nonzero)

        if frame_count < 10:  # requires pause for rendering, may be machine dependent -.-
            cv2.waitKey(np.int(1 / fps * p.step * 1000 * 20))
        else:  # requires pause for rendering, may be machine dependent -.-
            cv2.waitKey(np.int(1 / fps * p.step * 1000 * 0.1))
        # Visualize
        if p.show_frames:
            # frame_vis = prob_mask.copy() # frame_avg.copy()
            frame_vis = cv2.cvtColor(frame_avg,
                                     cv2.COLOR_BGR2GRAY).astype(np.uint8)

            (r, b, w) = ((0, 0, 255), (0, 0, 0), (255, 255, 255))
            # Put timestamp on the average image
            time_str = "{:.2f}".format(time)
            dimy, dimx = frame_vis.shape[:2]
            time_loc = (int(dimx - 250), dimy)
            cv2.putText(frame_vis, time_str, time_loc, cv2.FONT_HERSHEY_PLAIN,
                        5, w)
            prob_str = "Max prob: {:.2f}%".format(np.max(prob_mask) / 2.55)
            prob_loc = (50, 50)
            cv2.putText(frame_vis, prob_str, prob_loc, cv2.FONT_HERSHEY_PLAIN,
                        3, w)
            cv2.polylines(frame_vis,
                          pts=[pts],
                          isClosed=True,
                          color=w,
                          thickness=2)
            # Draw location of center of mass on the average image
            cv2.circle(frame_vis,
                       tuple(cent)[:2],
                       radius=4,
                       color=b,
                       thickness=4)

            # Image should be uint8 to be drawable in 0, 255 scale
            # https://stackoverflow.com/questions/9588719/opencv-double-mat-shows-up-as-all-white
            (frame_vis, _) = utils.resize_frame(frame_vis,
                                                height=p.height_resize)

            cv2.imshow("Tracking", frame_vis)

            if frame_count > 0 and p.save_frames:
                new_shape = frame_vis.shape + (1, )
                frame_vis = np.reshape(frame_vis, new_shape)
                frame_vis = np.repeat(frame_vis, 3, axis=2)
                vid_out.write(frame_vis)

        if p.annotate_mask and p.plot_mask and (cnts):

            w = (255, 255, 255)
            cnt_metric = cv2.arcLength(cnts[0], True)
            ann_str = "Max perim: {:.2f}, #Contours: {}, #Nonzero: {}"\
                        .format(cnt_metric, len(cnts), num_nonzero)
            ann_loc = (50, 50)
            cv2.putText(frame_binary, ann_str, ann_loc, cv2.FONT_HERSHEY_PLAIN,
                        3, w)

        if p.plot_mask and frame_binary.size:
            (frame_binary, _) = utils.resize_frame(frame_binary,
                                                   height=p.height_resize)
            cv2.imshow("Mask", frame_binary)

            # Interrupt on ESC
            ch = 0xFF & cv2.waitKey(1)
            if ch == 27: break
            elif ch == ord('d'):
                import pdb
                pdb.set_trace()
    # end while True:
    output_data(centroids, times, num_nonzeros, video_src)
    if p.save_frames:
        vid_out.release()
    cv2.destroyAllWindows()
Example #3
0
def select_hsv_range(vid,
                     video_source,
                     background=np.empty(0),
                     frame_pos=[],
                     reinitialize=False):
    """Select object hsv range
    
    Interactively selects HSV ranges that threshold tracked object against background
    
    Parameters
    ----------
    vid : cv2.VideoCapture object
        Already opened video object. If empty, video from video_source is read. 
    video_source : str
        path to video file
    frame_pos : float
        number of frame corresponding to current position in video [default = []]
    reinitialize : bool
        whether to look for new HSV limits or take preset [default = False]
    
    Returns
    ----------
    hsv_lowerb : array_like
        vector of lower HSV limits [hlow, slow, vlow]
    hsv_upperb : array_like
        vector of upper HSV limits [hhigh, shigh, vhigh]
    
    References
    ------------
    ..[1] https://botforge.wordpress.com/2016/07/02/basic-color-tracker-using-opencv-python/
    ..[2] http://docs.opencv.org/3.0-beta/doc/py_tutorials/py_gui/py_trackbar/py_trackbar.html
    """
    if reinitialize:
        (_, frame) = getcoords.go_to_frame(vid,
                                           frame_pos,
                                           video_source,
                                           return_frame=True)
        frame_avg = np.zeros_like(frame, dtype=np.float32)

        frame, _, _ = average_frames(vid, frame_avg)
        if background.size:  #if exists
            frame = subtract_background(frame, background)

        frame, _ = utils.resize_frame(frame, height=p.height_resize)

        hh = 'Hue High'
        hl = 'Hue Low'
        sh = 'Saturation High'
        sl = 'Saturation Low'
        vh = 'Value High'
        vl = 'Value Low'
        cv2.namedWindow("Select HSV Range")
        cv2.resizeWindow('Select HSV Range', frame.shape[1], frame.shape[0])
        print("Change ranges on sliders and press ENTER to update")

        def nothing(x):
            pass

        cv2.createTrackbar(hl, 'Select HSV Range', 0, 179, nothing)  # ~180 deg
        cv2.createTrackbar(hh, 'Select HSV Range', 0, 179, nothing)
        cv2.createTrackbar(sl, 'Select HSV Range', 0, 255, nothing)
        cv2.createTrackbar(sh, 'Select HSV Range', 0, 255, nothing)
        cv2.createTrackbar(vl, 'Select HSV Range', 0, 255, nothing)
        cv2.createTrackbar(vh, 'Select HSV Range', 0, 255, nothing)

        frame_hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
        try:
            while (1):
                #read trackbar positions for all
                HL = cv2.getTrackbarPos(hl, 'Select HSV Range')
                HH = cv2.getTrackbarPos(hh, 'Select HSV Range')
                SL = cv2.getTrackbarPos(sl, 'Select HSV Range')
                SH = cv2.getTrackbarPos(sh, 'Select HSV Range')
                VL = cv2.getTrackbarPos(vl, 'Select HSV Range')
                VH = cv2.getTrackbarPos(vh, 'Select HSV Range')
                #make array for final values
                hsv_lowerb = np.array([HL, SL, VL])
                hsv_upperb = np.array([HH, SH, VH])

                #apply the range on a mask
                mask = cv2.inRange(frame_hsv, hsv_lowerb, hsv_upperb)
                res = cv2.bitwise_and(frame_hsv, frame_hsv, mask=mask)

                cv2.imshow('Select HSV Range', res)
                k = cv2.waitKey(0) & 0xFF
                if k == 27:
                    break
        finally:
            cv2.destroyAllWindows()
    else:  # apply preset
        if background.size:  #if exists
            hsv_lowerb = np.array([0, 0, 100])
            hsv_upperb = np.array([179, 200, 255])
        else:
            hsv_lowerb = np.array([0, 0, 150])  # with bg substraction
            hsv_upperb = np.array([179, 50, 255])  # with bg substraction

    return (hsv_lowerb, hsv_upperb)
Example #4
0
 def test_resize_frames(self, frame):
     nrows, ncols = frame.shape[:2]
     frame_out1, ratio1 = utils.resize_frame(frame)
     frame_out2, ratio2 = utils.resize_frame(frame_out1, nrows)
     assert np.round(ratio1 * ratio2, 3) == 1.0
     assert frame_out2.shape == frame.shape
Example #5
0
def capture_action(pred_path, cb, debug=False, log=False):
    """Facial detection and real time processing credit goes to:

    https://www.pyimagesearch.com/2017/04/17/real-time-facial-landmark-detection-opencv-python-dlib/
    """
    action_handler = ActionHandler()

    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor(pred_path)

    camera = cv2.VideoCapture(0)

    while True:
        _, frame = camera.read()
        if frame is None:
            continue

        _, w_original, _ = frame.shape
        frame = resize_frame(frame)

        h, w, _ = frame.shape

        display_bounds(frame)

        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        rects = detector(gray, 0)

        for rect in rects:
            shape = predictor(gray, rect)
            shape = face_utils.shape_to_np(shape)

            cur_head = Head(shape, w)
            cur_eyes = Eyes(shape, frame)

            eye_action = detect_eyes(shape, cur_eyes)
            head_action = detect_head(shape, cur_head)

            COUNTER_LOG[eye_action] += 1
            COUNTER_LOG[head_action] += 1

            perform, action = action_handler.get_next(eye_action, head_action)

            if log:
                display_decisions(frame, head_action, eye_action)
                display_counters(frame, COUNTER_LOG)

            if perform:
                COUNTER_LOG[action] += 1
                cb(action)

            if debug:
                cur_head.debug(frame)
                cur_eyes.debug(frame)

                cv2.imshow("Frame", frame)

        key = cv2.waitKey(1) & 0xFF

        if key == ord("q"):
            break

    cv2.destroyAllWindows()
Example #6
0
    if count % frame_sample_rate == 0:
        frame_list.append(frame)
        frame_count += 1
    count += 1

frame_list = np.array(frame_list)
if frame_count > num_frames:
    frame_indices = np.linspace(0, frame_count, num=num_frames,
                                endpoint=False).astype(int)
    frame_list = frame_list[frame_indices]
    # 直接截断
    frame_list = frame_list[:num_frames]
    frame_count = num_frames

# 把图像做一下处理,然后转换成(batch, channel, height, width)的格式
cropped_frame_list = np.array([resize_frame(x) for x in frame_list]).transpose(
    (0, 3, 1, 2))
cropped_frame_list = Variable(torch.from_numpy(cropped_frame_list),
                              volatile=True).cuda()

# 视频特征的shape是num_frames x 4096
# 如果帧的数量小于num_frames,则剩余的部分用0补足
feats = np.zeros((num_frames, frame_size), dtype='float32')
feats[:frame_count, :] = encoder(cropped_frame_list).data.cpu().numpy()
videos = Variable(torch.from_numpy(feats)).cuda().unsqueeze(0)

# 对视频内容进行解码得到自然语言描述
tokens = decoder.sample(videos).data[0].squeeze()
print(decode_tokens(tokens, vocab))

# 播放视频