def dense(self, filename='', pyr_scale=0.5, levels=3, winsize=15, iterations=3, poly_n=5, poly_sigma=1.2, flags=0, skip_empty=False): """ Renders a dense optical flow video of the input video file using `cv2.calcOpticalFlowFarneback()`. For more details about the parameters consult the cv2 documentation. Parameters ---------- - filename : str, optional Path to the input video file. If not specified the video file pointed to by the MgObject is used. - pyr_scale : float, optional Default is 0.5. - levels : int, optional Default is 3. - winsize : int, optional Default is 15. - iterations : int, optional Default is 3. - poly_n : int, optional Default is 5. - poly_sigma : float, optional Default is 1.2. - flags : int, optional Default is 0. - skip_empty : bool, optional Default is `False`. If `True`, repeats previous frame in the output when encounters an empty frame. Outputs ------- - `filename`_flow_dense.avi Returns ------- - MgObject A new MgObject pointing to the output '_flow_dense' video file. """ if filename == '': filename = self.filename of, fex = os.path.splitext(filename) # Convert to avi if the input is not avi - necesarry for cv2 compatibility on all platforms if fex != '.avi': convert_to_avi(of + fex) fex = '.avi' filename = of + fex vidcap = cv2.VideoCapture(filename) ret, frame = vidcap.read() fourcc = cv2.VideoWriter_fourcc(*'MJPG') fps = int(vidcap.get(cv2.CAP_PROP_FPS)) width = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT)) length = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT)) pb = MgProgressbar(total=length, prefix='Rendering dense optical flow video:') out = cv2.VideoWriter(of + '_flow_dense' + fex, fourcc, fps, (width, height)) ret, frame1 = vidcap.read() prev_frame = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY) hsv = np.zeros_like(frame1) hsv[..., 1] = 255 ii = 0 while (vidcap.isOpened()): ret, frame2 = vidcap.read() if ret == True: next_frame = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY) flow = cv2.calcOpticalFlowFarneback(prev_frame, next_frame, None, pyr_scale, levels, winsize, iterations, poly_n, poly_sigma, flags) mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1]) hsv[..., 0] = ang * 180 / np.pi / 2 hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX) rgb = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR) if skip_empty: if np.sum(rgb) > 0: out.write(rgb.astype(np.uint8)) else: if ii == 0: out.write(rgb.astype(np.uint8)) else: out.write(prev_rgb.astype(np.uint8)) else: out.write(rgb.astype(np.uint8)) prev_frame = next_frame if skip_empty: if np.sum(rgb) > 0: prev_rgb = rgb else: prev_rgb = rgb else: pb.progress(length) break pb.progress(ii) ii += 1 out.release() destination_video = of + '_flow_dense' + fex if self.has_audio: source_audio = extract_wav(of + fex) embed_audio_in_video(source_audio, destination_video) os.remove(source_audio) return musicalgestures.MgObject(destination_video, color=self.color, returned_by_process=True)
def history_cv2(self, filename='', history_length=10, weights=1): """ This function creates a video where each frame is the average of the n previous frames, where n is determined by `history_length`. The history frames are summed up and normalized, and added to the current frame to show the history. Based on cv2. Parameters ---------- - filename : str, optional Path to the input video file. If not specified the video file pointed to by the MgObject is used. - history_length : int, optional Default is 10. Number of frames to be saved in the history tail. Outputs ------- - `filename`_history.avi Returns ------- - MgObject A new MgObject pointing to the output '_history' video file. """ if filename == '': filename = self.filename of = os.path.splitext(filename)[0] fex = os.path.splitext(filename)[1] video = cv2.VideoCapture(filename) ret, frame = video.read() fourcc = cv2.VideoWriter_fourcc(*'MJPG') fps = int(video.get(cv2.CAP_PROP_FPS)) width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) length = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) pb = MgProgressbar(total=length, prefix='Rendering history video:') out = cv2.VideoWriter(of + '_history' + fex, fourcc, fps, (width, height)) ii = 0 history = [] weights_map = [1 for weight in range(history_length + 1)] if type(weights) in [int, float]: offset = weights - 1 weights_map[0] = weights elif type(weights) == list: offset = sum([weight - 1 for weight in weights]) for ind, weight in enumerate(weights): if ind > history_length: break weights_map[ind] = weight denominator = history_length + 1 + offset while (video.isOpened()): ret, frame = video.read() if ret == True: if self.color == False: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) frame = (np.array(frame)).astype(np.float64) if len(history) > 0: #history_total = frame/(len(history)+1) history_total = frame * weights_map[0] / denominator # history_total = frame else: history_total = frame for ind, newframe in enumerate(history): #history_total += newframe/(len(history)+1) history_total += newframe * weights_map[ind + 1] / denominator # or however long history you would like if len(history) >= history_length: history.pop(0) # pop first frame history.append(frame) # 0.5 to not overload it poor thing total = history_total.astype(np.uint64) if self.color == False: total = cv2.cvtColor(total.astype(np.uint8), cv2.COLOR_GRAY2BGR) out.write(total) else: out.write(total.astype(np.uint8)) else: pb.progress(length) # mg_progressbar( # length, length, 'Rendering history video:', 'Complete') break pb.progress(ii) ii += 1 # mg_progressbar(ii, length+1, 'Rendering history video:', 'Complete') out.release() destination_video = self.of + '_history' + self.fex if self.has_audio: source_audio = extract_wav(self.of + self.fex) embed_audio_in_video(source_audio, destination_video) os.remove(source_audio) return musicalgestures.MgObject(destination_video, color=self.color, returned_by_process=True)
def sparse(self, filename='', corner_max_corners=100, corner_quality_level=0.3, corner_min_distance=7, corner_block_size=7, of_win_size=(15, 15), of_max_level=2, of_criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03)): """ Renders a sparse optical flow video of the input video file using `cv2.calcOpticalFlowPyrLK()`. `cv2.goodFeaturesToTrack()` is used for the corner estimation. For more details about the parameters consult the cv2 documentation. Parameters ---------- - filename : str, optional Path to the input video file. If not specified the video file pointed to by the MgObject is used. - corner_max_corners : int, optional Default is 100. - corner_quality_level : float, optional Default is 0.3. - corner_min_distance : int, optional Default is 7. - corner_block_size : int, optional Default is 7. - of_win_size : tuple (int, int), optional Default is (15, 15). - of_max_level : int, optional Default is 2. - of_criteria : optional Default is `(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03)`. Outputs ------- - `filename`_flow_sparse.avi Returns ------- - MgObject A new MgObject pointing to the output '_flow_sparse' video file. """ if filename == '': filename = self.filename of, fex = os.path.splitext(filename) # Convert to avi if the input is not avi - necesarry for cv2 compatibility on all platforms if fex != '.avi': convert_to_avi(of + fex) fex = '.avi' filename = of + fex vidcap = cv2.VideoCapture(filename) ret, frame = vidcap.read() fourcc = cv2.VideoWriter_fourcc(*'MJPG') fps = int(vidcap.get(cv2.CAP_PROP_FPS)) width = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT)) length = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT)) pb = MgProgressbar(total=length, prefix='Rendering sparse optical flow video:') out = cv2.VideoWriter(of + '_flow_sparse' + fex, fourcc, fps, (width, height)) # params for ShiTomasi corner detection feature_params = dict(maxCorners=corner_max_corners, qualityLevel=corner_quality_level, minDistance=corner_min_distance, blockSize=corner_block_size) # Parameters for lucas kanade optical flow lk_params = dict(winSize=of_win_size, maxLevel=of_max_level, criteria=of_criteria) # Create some random colors color = np.random.randint(0, 255, (100, 3)) # Take first frame and find corners in it ret, old_frame = vidcap.read() old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY) p0 = cv2.goodFeaturesToTrack(old_gray, mask=None, **feature_params) # Create a mask image for drawing purposes mask = np.zeros_like(old_frame) ii = 0 while (vidcap.isOpened()): ret, frame = vidcap.read() if ret == True: frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # calculate optical flow p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, frame_gray, p0, None, **lk_params) # Select good points good_new = p1[st == 1] good_old = p0[st == 1] # draw the tracks for i, (new, old) in enumerate(zip(good_new, good_old)): a, b = new.ravel() c, d = old.ravel() mask = cv2.line(mask, (a, b), (c, d), color[i].tolist(), 2) if self.color == False: frame = cv2.cvtColor(frame_gray, cv2.COLOR_GRAY2BGR) frame = cv2.circle(frame, (a, b), 5, color[i].tolist(), -1) img = cv2.add(frame, mask) out.write(img.astype(np.uint8)) # Now update the previous frame and previous points old_gray = frame_gray.copy() p0 = good_new.reshape(-1, 1, 2) else: pb.progress(length) break pb.progress(ii) ii += 1 out.release() destination_video = of + '_flow_sparse' + fex if self.has_audio: source_audio = extract_wav(of + fex) embed_audio_in_video(source_audio, destination_video) os.remove(source_audio) return musicalgestures.MgObject(destination_video, color=self.color, returned_by_process=True)
def dense( self, filename='', pyr_scale=0.5, levels=3, winsize=15, iterations=3, poly_n=5, poly_sigma=1.2, flags=0, skip_empty=False): """ Renders a dense optical flow video of the input video file using `cv2.calcOpticalFlowFarneback()`. The description of the matching parameters are taken from the cv2 documentation. Args: filename (str, optional): Path to the input video file. If not specified the video file pointed to by the MgObject is used. Defaults to ''. pyr_scale (float, optional): Specifies the image scale (<1) to build pyramids for each image. `pyr_scale=0.5` means a classical pyramid, where each next layer is twice smaller than the previous one. Defaults to 0.5. levels (int, optional): The number of pyramid layers including the initial image. `levels=1` means that no extra layers are created and only the original images are used. Defaults to 3. winsize (int, optional): The averaging window size. Larger values increase the algorithm robustness to image noise and give more chances for fast motion detection, but yield more blurred motion field. Defaults to 15. iterations (int, optional): The number of iterations the algorithm does at each pyramid level. Defaults to 3. poly_n (int, optional): The size of the pixel neighborhood used to find polynomial expansion in each pixel. Larger values mean that the image will be approximated with smoother surfaces, yielding more robust algorithm and more blurred motion field, typically poly_n =5 or 7. Defaults to 5. poly_sigma (float, optional): The standard deviation of the Gaussian that is used to smooth derivatives used as a basis for the polynomial expansion. For `poly_n=5`, you can set `poly_sigma=1.1`, for `poly_n=7`, a good value would be `poly_sigma=1.5`. Defaults to 1.2. flags (int, optional): Operation flags that can be a combination of the following: - **OPTFLOW_USE_INITIAL_FLOW** uses the input flow as an initial flow approximation. - **OPTFLOW_FARNEBACK_GAUSSIAN** uses the Gaussian \\f$\\texttt{winsize}\\times\\texttt{winsize}\\f$ filter instead of a box filter of the same size for optical flow estimation. Usually, this option gives z more accurate flow than with a box filter, at the cost of lower speed. Normally, `winsize` for a Gaussian window should be set to a larger value to achieve the same level of robustness. Defaults to 0. skip_empty (bool, optional): If True, repeats previous frame in the output when encounters an empty frame. Defaults to False. Outputs: `filename`_flow_dense.avi Returns: MgObject: A new MgObject pointing to the output '_flow_dense' video file. """ if filename == '': filename = self.filename of, fex = os.path.splitext(filename) # Convert to avi if the input is not avi - necesarry for cv2 compatibility on all platforms if fex != '.avi': convert_to_avi(of + fex) fex = '.avi' filename = of + fex vidcap = cv2.VideoCapture(filename) ret, frame = vidcap.read() fourcc = cv2.VideoWriter_fourcc(*'MJPG') fps = int(vidcap.get(cv2.CAP_PROP_FPS)) width = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT)) length = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT)) pb = MgProgressbar( total=length, prefix='Rendering dense optical flow video:') out = cv2.VideoWriter(of + '_flow_dense' + fex, fourcc, fps, (width, height)) ret, frame1 = vidcap.read() prev_frame = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY) prev_rgb = None hsv = np.zeros_like(frame1) hsv[..., 1] = 255 ii = 0 while(vidcap.isOpened()): ret, frame2 = vidcap.read() if ret == True: next_frame = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY) flow = cv2.calcOpticalFlowFarneback( prev_frame, next_frame, None, pyr_scale, levels, winsize, iterations, poly_n, poly_sigma, flags) mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1]) hsv[..., 0] = ang*180/np.pi/2 hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX) rgb = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR) if skip_empty: if np.sum(rgb) > 0: out.write(rgb.astype(np.uint8)) else: if ii == 0: out.write(rgb.astype(np.uint8)) else: out.write(prev_rgb.astype(np.uint8)) else: out.write(rgb.astype(np.uint8)) prev_frame = next_frame if skip_empty: if np.sum(rgb) > 0 or ii == 0: prev_rgb = rgb else: prev_rgb = rgb else: pb.progress(length) break pb.progress(ii) ii += 1 out.release() destination_video = of + '_flow_dense' + fex if self.has_audio: source_audio = extract_wav(of + fex) embed_audio_in_video(source_audio, destination_video) os.remove(source_audio) return musicalgestures.MgObject(destination_video, color=self.color, returned_by_process=True)
def sparse( self, filename='', corner_max_corners=100, corner_quality_level=0.3, corner_min_distance=7, corner_block_size=7, of_win_size=(15, 15), of_max_level=2, of_criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03)): """ Renders a sparse optical flow video of the input video file using `cv2.calcOpticalFlowPyrLK()`. `cv2.goodFeaturesToTrack()` is used for the corner estimation. The description of the matching parameters are taken from the cv2 documentation. Args: filename (str, optional): Path to the input video file. If not specified the video file pointed to by the MgObject is used. Defaults to ''. corner_max_corners (int, optional): Maximum number of corners to return. If there are more corners than are found, the strongest of them is returned. `maxCorners <= 0` implies that no limit on the maximum is set and all detected corners are returned. Defaults to 100. corner_quality_level (float, optional): Parameter characterizing the minimal accepted quality of image corners. The parameter value is multiplied by the best corner quality measure, which is the minimal eigenvalue (see cornerMinEigenVal in cv2 docs) or the Harris function response (see cornerHarris in cv2 docs). The corners with the quality measure less than the product are rejected. For example, if the best corner has the quality measure = 1500, and the qualityLevel=0.01, then all the corners with the quality measure less than 15 are rejected. Defaults to 0.3. corner_min_distance (int, optional): Minimum possible Euclidean distance between the returned corners. Defaults to 7. corner_block_size (int, optional): Size of an average block for computing a derivative covariation matrix over each pixel neighborhood. See cornerEigenValsAndVecs in cv2 docs. Defaults to 7. of_win_size (tuple, optional): Size of the search window at each pyramid level. Defaults to (15, 15). of_max_level (int, optional): 0-based maximal pyramid level number. If set to 0, pyramids are not used (single level), if set to 1, two levels are used, and so on. If pyramids are passed to input then the algorithm will use as many levels as pyramids have but no more than `maxLevel`. Defaults to 2. of_criteria (tuple, optional): Specifies the termination criteria of the iterative search algorithm (after the specified maximum number of iterations criteria.maxCount or when the search window moves by less than criteria.epsilon). Defaults to (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03). Outputs: `filename`_flow_sparse.avi Returns: MgObject: A new MgObject pointing to the output '_flow_sparse' video file. """ if filename == '': filename = self.filename of, fex = os.path.splitext(filename) # Convert to avi if the input is not avi - necesarry for cv2 compatibility on all platforms if fex != '.avi': convert_to_avi(of + fex) fex = '.avi' filename = of + fex vidcap = cv2.VideoCapture(filename) ret, frame = vidcap.read() fourcc = cv2.VideoWriter_fourcc(*'MJPG') fps = int(vidcap.get(cv2.CAP_PROP_FPS)) width = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT)) length = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT)) pb = MgProgressbar( total=length, prefix='Rendering sparse optical flow video:') out = cv2.VideoWriter(of + '_flow_sparse' + fex, fourcc, fps, (width, height)) # params for ShiTomasi corner detection feature_params = dict(maxCorners=corner_max_corners, qualityLevel=corner_quality_level, minDistance=corner_min_distance, blockSize=corner_block_size) # Parameters for lucas kanade optical flow lk_params = dict(winSize=of_win_size, maxLevel=of_max_level, criteria=of_criteria) # Create some random colors color = np.random.randint(0, 255, (100, 3)) # Take first frame and find corners in it ret, old_frame = vidcap.read() old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY) p0 = cv2.goodFeaturesToTrack(old_gray, mask=None, **feature_params) # Create a mask image for drawing purposes mask = np.zeros_like(old_frame) ii = 0 while(vidcap.isOpened()): ret, frame = vidcap.read() if ret == True: frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # calculate optical flow p1, st, err = cv2.calcOpticalFlowPyrLK( old_gray, frame_gray, p0, None, **lk_params) # Select good points good_new = p1[st == 1] good_old = p0[st == 1] # draw the tracks for i, (new, old) in enumerate(zip(good_new, good_old)): a, b = new.ravel() c, d = old.ravel() mask = cv2.line(mask, (a, b), (c, d), color[i].tolist(), 2) if self.color == False: frame = cv2.cvtColor(frame_gray, cv2.COLOR_GRAY2BGR) frame = cv2.circle( frame, (a, b), 5, color[i].tolist(), -1) img = cv2.add(frame, mask) out.write(img.astype(np.uint8)) # Now update the previous frame and previous points old_gray = frame_gray.copy() p0 = good_new.reshape(-1, 1, 2) else: pb.progress(length) break pb.progress(ii) ii += 1 out.release() destination_video = of + '_flow_sparse' + fex if self.has_audio: source_audio = extract_wav(of + fex) embed_audio_in_video(source_audio, destination_video) os.remove(source_audio) return musicalgestures.MgObject(destination_video, color=self.color, returned_by_process=True)
def mg_motion(self, filtertype='Regular', thresh=0.05, blur='None', kernel_size=5, inverted_motionvideo=False, inverted_motiongram=False, unit='seconds', equalize_motiongram=True, save_plot=True, save_data=True, data_format="csv", save_motiongrams=True, save_video=True): """ Finds the difference in pixel value from one frame to the next in an input video, and saves the frames into a new video. Describes the motion in the recording. Parameters ---------- - filtertype : {'Regular', 'Binary', 'Blob'}, optional `Regular` turns all values below `thresh` to 0. `Binary` turns all values below `thresh` to 0, above `thresh` to 1. `Blob` removes individual pixels with erosion method. - thresh : float, optional A number in the range of 0 to 1. Default is 0.05. Eliminates pixel values less than given threshold. - blur : {'None', 'Average'}, optional `Average` to apply a 10px * 10px blurring filter, `None` otherwise. - kernel_size : int, optional Default is 5. Size of structuring element. - inverted_motionvideo : bool, optional Default is `False`. If `True`, inverts colors of the motion video. - inverted_motiongram : bool, optional Default is `False`. If `True`, inverts colors of the motiongrams. - unit : {'seconds', 'samples'}, optional Unit in QoM plot. - equalize_motiongram : bool, optional Default is `True`. If `True`, converts the motiongrams to hsv-color space and flattens the value channel (v). - save_plot : bool, optional Default is `True`. If `True`, outputs motion-plot. - save_data : bool, optional Default is `True`. If `True`, outputs motion-data. - data_format : {'csv', 'tsv', 'txt'}, optional Specifies format of motion-data. - save_motiongrams : bool, optional Default is `True`. If `True`, outputs motiongrams. - save_video : bool, optional Default is `True`. If `True`, outputs the motion video. Outputs ------- - `filename`_motion.avi A video of the absolute difference between consecutive frames in the source video. - `filename`_motion_com_qom.png A plot describing the centroid of motion and the quantity of motion in the source video. - `filename`_mgx.png A horizontal motiongram of the source video. - `filename`_mgy.png A vertical motiongram of the source video. - `filename`_motion.csv A text file containing the quantity of motion and the centroid of motion for each frame in the source video with timecodes in milliseconds. Available formats: csv, tsv, txt. Returns ------- - MgObject A new MgObject pointing to the output '_motion' video file. If `save_video=False`, it returns an MgObject pointing to the input video file. """ if save_plot | save_data | save_motiongrams | save_video: # self.blur = blur # self.thresh = thresh # self.filtertype = filtertype of, fex = self.of, self.fex # Convert to avi if the input is not avi - necesarry for cv2 compatibility on all platforms if fex != '.avi': convert_to_avi(of + fex) fex = '.avi' filename = of + fex vidcap = cv2.VideoCapture(of + fex) ret, frame = vidcap.read() if save_video: fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter(of + '_motion' + fex, fourcc, self.fps, (self.width, self.height)) if save_motiongrams: gramx = np.zeros([1, self.width, 3]) gramy = np.zeros([self.height, 1, 3]) if save_data | save_plot: time = np.array([]) # time in ms qom = np.array([]) # quantity of motion com = np.array([]) # centroid of motion ii = 0 pgbar_text = 'Rendering motion' + ", ".join( np.array(["-video", "-grams", "-plots", "-data"])[np.array( [save_video, save_motiongrams, save_plot, save_data])]) + ":" pb = MgProgressbar(total=self.length, prefix=pgbar_text) if self.color == False: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) if save_motiongrams: gramx = np.zeros([1, self.width]) gramy = np.zeros([self.height, 1]) while (vidcap.isOpened()): if blur.lower() == 'average': prev_frame = cv2.blur(frame, (10, 10)) elif blur.lower() == 'none': prev_frame = frame ret, frame = vidcap.read() if ret == True: if blur.lower() == 'average': # The higher these numbers the more blur you get frame = cv2.blur(frame, (10, 10)) if self.color == False: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) frame = np.array(frame) frame = frame.astype(np.int32) if self.color == True: motion_frame_rgb = np.zeros([self.height, self.width, 3]) for i in range(frame.shape[2]): motion_frame = (np.abs(frame[:, :, i] - prev_frame[:, :, i])).astype( np.uint8) motion_frame = filter_frame(motion_frame, filtertype, thresh, kernel_size) motion_frame_rgb[:, :, i] = motion_frame if save_motiongrams: movement_y = np.mean(motion_frame_rgb, axis=1).reshape( self.height, 1, 3) movement_x = np.mean(motion_frame_rgb, axis=0).reshape(1, self.width, 3) gramy = np.append(gramy, movement_y, axis=1) gramx = np.append(gramx, movement_x, axis=0) else: motion_frame = (np.abs(frame - prev_frame)).astype( np.uint8) motion_frame = filter_frame(motion_frame, filtertype, thresh, kernel_size) if save_motiongrams: movement_y = np.mean(motion_frame, axis=1).reshape(self.height, 1) movement_x = np.mean(motion_frame, axis=0).reshape(1, self.width) gramy = np.append(gramy, movement_y, axis=1) gramx = np.append(gramx, movement_x, axis=0) if self.color == False: motion_frame = cv2.cvtColor(motion_frame, cv2.COLOR_GRAY2BGR) motion_frame_rgb = motion_frame if save_video: if inverted_motionvideo: out.write( cv2.bitwise_not(motion_frame_rgb.astype(np.uint8))) else: out.write(motion_frame_rgb.astype(np.uint8)) if save_plot | save_data: combite, qombite = centroid( motion_frame_rgb.astype(np.uint8), self.width, self.height) if ii == 0: time = frame2ms(ii, self.fps) com = combite.reshape(1, 2) qom = qombite else: time = np.append(time, frame2ms(ii, self.fps)) com = np.append(com, combite.reshape(1, 2), axis=0) qom = np.append(qom, qombite) else: pb.progress(self.length) break pb.progress(ii) ii += 1 if save_motiongrams: if self.color == False: # Normalize before converting to uint8 to keep precision gramx = gramx / gramx.max() * 255 gramy = gramy / gramy.max() * 255 gramx = cv2.cvtColor(gramx.astype(np.uint8), cv2.COLOR_GRAY2BGR) gramy = cv2.cvtColor(gramy.astype(np.uint8), cv2.COLOR_GRAY2BGR) gramx = (gramx - gramx.min()) / (gramx.max() - gramx.min()) * 255.0 gramy = (gramy - gramy.min()) / (gramy.max() - gramy.min()) * 255.0 if equalize_motiongram: gramx = gramx.astype(np.uint8) gramx_hsv = cv2.cvtColor(gramx, cv2.COLOR_BGR2HSV) gramx_hsv[:, :, 2] = cv2.equalizeHist(gramx_hsv[:, :, 2]) gramx = cv2.cvtColor(gramx_hsv, cv2.COLOR_HSV2BGR) gramy = gramy.astype(np.uint8) gramy_hsv = cv2.cvtColor(gramy, cv2.COLOR_BGR2HSV) gramy_hsv[:, :, 2] = cv2.equalizeHist(gramy_hsv[:, :, 2]) gramy = cv2.cvtColor(gramy_hsv, cv2.COLOR_HSV2BGR) if inverted_motiongram: cv2.imwrite(of + '_mgx.png', cv2.bitwise_not(gramx.astype(np.uint8))) cv2.imwrite(of + '_mgy.png', cv2.bitwise_not(gramy.astype(np.uint8))) else: cv2.imwrite(of + '_mgx.png', gramx.astype(np.uint8)) cv2.imwrite(of + '_mgy.png', gramy.astype(np.uint8)) if save_data: save_txt(of, time, com, qom, self.width, self.height, data_format) if save_plot: plot_motion_metrics(of, self.fps, com, qom, self.width, self.height, unit) vidcap.release() if save_video: out.release() destination_video = of + '_motion' + fex if self.has_audio: source_audio = extract_wav(of + fex) embed_audio_in_video(source_audio, destination_video) os.remove(source_audio) return musicalgestures.MgObject(destination_video, color=self.color, returned_by_process=True) else: return musicalgestures.MgObject(of + fex, color=self.color, returned_by_process=True) else: print("Nothing to render. Exiting...") return musicalgestures.MgObject(of + fex, returned_by_process=True)
def mg_motionhistory(self, history_length=10, kernel_size=5, filtertype='Regular', thresh=0.05, blur='None', inverted_motionhistory=False): """ Finds the difference in pixel value from one frame to the next in an input video, and saves the difference frame to a history tail. The history frames are summed up and normalized, and added to the current difference frame to show the history of motion. Parameters ---------- - history_length : int, optional Default is 10. Number of frames to be saved in the history tail. - kernel_size : int, optional Default is 5. Size of structuring element. - filtertype : {'Regular', 'Binary', 'Blob'}, optional `Regular` turns all values below `thresh` to 0. `Binary` turns all values below `thresh` to 0, above `thresh` to 1. `Blob` removes individual pixels with erosion method. - thresh : float, optional A number in the range of 0 to 1. Default is 0.05. Eliminates pixel values less than given threshold. - blur : {'None', 'Average'}, optional `Average` to apply a 10px * 10px blurring filter, `None` otherwise. - inverted_motionhistory : bool, optional Default is `False`. If `True`, inverts colors of the motionhistory video. Outputs ------- - `filename`_motionhistory.avi Returns ------- - MgObject A new MgObject pointing to the output '_motionhistory' video file. """ enhancement = 1 # This can be adjusted to higher number to make motion more visible. Use with caution to not make it overflow. self.filtertype = filtertype self.thresh = thresh self.blur = blur vidcap = cv2.VideoCapture(self.of + self.fex) ret, frame = vidcap.read() #of = os.path.splitext(self.filename)[0] fex = os.path.splitext(self.filename)[1] fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter(self.of + '_motionhistory' + fex, fourcc, self.fps, (self.width, self.height)) ii = 0 history = [] if self.color == False: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) while (vidcap.isOpened()): if self.blur.lower() == 'average': prev_frame = cv2.blur(frame, (10, 10)) elif self.blur.lower() == 'none': prev_frame = frame ret, frame = vidcap.read() if ret == True: if self.blur.lower() == 'average': # The higher these numbers the more blur you get frame = cv2.blur(frame, (10, 10)) if self.color == False: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) frame = (np.array(frame)).astype(np.float64) if self.color == True: motion_frame_rgb = np.zeros([self.height, self.width, 3]) for i in range(frame.shape[2]): motion_frame = (np.abs(frame[:, :, i] - prev_frame[:, :, i])).astype( np.float64) motion_frame = filter_frame(motion_frame, self.filtertype, self.thresh, kernel_size) motion_frame_rgb[:, :, i] = motion_frame if len(history) > 0: motion_history = motion_frame_rgb / (len(history) + 1) else: motion_history = motion_frame_rgb for newframe in history: motion_history += newframe / (len(history) + 1) # or however long history you would like if len(history) > history_length or len( history) == history_length: history.pop(0) # pop first frame history.append(motion_frame_rgb) motion_history = motion_history.astype( np.uint64) # 0.5 to not overload it poor thing else: # self.color = False motion_frame = (np.abs(frame - prev_frame)).astype(np.float64) motion_frame = filter_frame(motion_frame, self.filtertype, self.thresh, kernel_size) if len(history) > 0: motion_history = motion_frame / (len(history) + 1) else: motion_history = motion_frame for newframe in history: motion_history += newframe / (len(history) + 1) # or however long history you would like if len(history) > history_length or len( history) == history_length: history.pop(0) # pop first frame history.append(motion_frame) motion_history = motion_history.astype(np.uint64) if self.color == False: motion_history_rgb = cv2.cvtColor( motion_history.astype(np.uint8), cv2.COLOR_GRAY2BGR) else: motion_history_rgb = motion_history if inverted_motionhistory: out.write( cv2.bitwise_not(enhancement * motion_history_rgb.astype(np.uint8))) else: out.write(enhancement * motion_history_rgb.astype(np.uint8)) else: mg_progressbar(self.length, self.length, 'Rendering motion history video:', 'Complete') break ii += 1 mg_progressbar(ii, self.length, 'Rendering motion history video:', 'Complete') out.release() source_audio = extract_wav(self.of + self.fex) destination_video = self.of + '_motionhistory' + self.fex embed_audio_in_video(source_audio, destination_video) os.remove(source_audio) return musicalgestures.MgObject(destination_video, color=self.color, returned_by_process=True)
def pose(self, model='mpi', device='cpu', threshold=0.1, downsampling_factor=4, save_data=True, data_format='csv', save_video=True): """ Renders a video with the pose estimation (aka. "keypoint detection" or "skeleton tracking") overlaid on it. Outputs the predictions in a text file (default format is csv). Uses models from the [openpose](https://github.com/CMU-Perceptual-Computing-Lab/openpose) project. Args: model (str, optional): 'mpi' loads the model trained on the Multi-Person Dataset (MPII), 'coco' loads one trained on the COCO dataset. The MPII model outputs 15 points, while the COCO model produces 18 points. Defaults to 'mpi'. device (str, optional): Sets the backend to use for the neural network ('cpu' or 'gpu'). Defaults to 'cpu'. threshold (float, optional): The normalized confidence threshold that decides whether we keep or discard a predicted point. Discarded points get substituted with (0, 0) in the output data. Defaults to 0.1. downsampling_factor (int, optional): Decides how much we downsample the video before we pass it to the neural network. For example `downsampling_factor=4` means that the input to the network is one-fourth the resolution of the source video. Heaviver downsampling reduces rendering time but produces lower quality pose estimation. Defaults to 4. save_data (bool, optional): Whether we save the predicted pose data to a file. Defaults to True. data_format (str or list, optional): Specifies format of pose-data. Accepted values are 'csv', 'tsv' and 'txt'. For multiple output formats, use list, eg. ['csv', 'txt']. Defaults to 'csv'. save_video (bool, optional): Whether we save the video with the estimated pose overlaid on it. Defaults to True. Outputs: `filename`_pose.avi: The source video with pose overlay. `filename`_pose.`data_format`: A text file containing the normalized x and y coordinates of each keypoints (such as head, left shoulder, right shoulder, etc) for each frame in the source video with timecodes in milliseconds. Available formats: csv, tsv, txt. Returns: MgObject: An MgObject pointing to the output '_pose' video. """ module_path = os.path.abspath(os.path.dirname(musicalgestures.__file__)) if model.lower() == 'mpi': protoFile = module_path + '/pose/mpi/pose_deploy_linevec_faster_4_stages.prototxt' weightsFile = module_path + '/pose/mpi/pose_iter_160000.caffemodel' model = 'mpi' nPoints = 15 POSE_PAIRS = [[0, 1], [1, 2], [2, 3], [3, 4], [1, 5], [5, 6], [6, 7], [1, 14], [14, 8], [8, 9], [9, 10], [14, 11], [11, 12], [12, 13]] elif model.lower() == 'coco': protoFile = module_path + '/pose/coco/pose_deploy_linevec.prototxt' weightsFile = module_path + '/pose/coco/pose_iter_440000.caffemodel' model = 'coco' nPoints = 18 POSE_PAIRS = [[1, 0], [1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], [9, 10], [1, 11], [11, 12], [12, 13], [0, 14], [0, 15], [14, 16], [15, 17]] else: print(f'Unrecognized model "{model}", switching to default (mpi).') protoFile = module_path + '/pose/mpi/pose_deploy_linevec_faster_4_stages.prototxt' weightsFile = module_path + '/pose/mpi/pose_iter_160000.caffemodel' model = 'mpi' # Check if .caffemodel file exists, download if necessary if not os.path.exists(weightsFile): print( 'Could not find weights file. Do you want to download it (~200MB)? (y/n)' ) answer = input() if answer.lower() == 'n': print('Ok. Exiting...') return musicalgestures.MgObject(self.filename, color=self.color, returned_by_process=True) elif answer.lower() == 'y': download_model(model) else: print(f'Unrecognized answer "{answer}". Exiting...') return musicalgestures.MgObject(self.filename, color=self.color, returned_by_process=True) # Read the network into Memory net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile) if device == "cpu": net.setPreferableBackend(cv2.dnn.DNN_TARGET_CPU) elif device == "gpu": net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA) net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA) else: print(f'Unrecognized device "{device}", switching to default (cpu).') net.setPreferableBackend(cv2.dnn.DNN_TARGET_CPU) of, fex = os.path.splitext(self.filename) if fex != '.avi': convert_to_avi(of + fex) fex = '.avi' filename = of + fex else: filename = self.filename vidcap = cv2.VideoCapture(filename) ret, frame = vidcap.read() fps = int(vidcap.get(cv2.CAP_PROP_FPS)) width = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT)) length = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT)) inWidth = int(roundup(width / downsampling_factor, 2)) inHeight = int(roundup(height / downsampling_factor, 2)) pb = MgProgressbar(total=length, prefix='Rendering pose estimation video:') if save_video: fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter(of + '_pose' + fex, fourcc, fps, (width, height)) ii = 0 data = [] while (vidcap.isOpened()): ret, frame = vidcap.read() if ret: inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight), (0, 0, 0), swapRB=False, crop=False) net.setInput(inpBlob) output = net.forward() H = output.shape[2] W = output.shape[3] points = [] for i in range(nPoints): # confidence map of corresponding body's part. probMap = output[0, i, :, :] # Find global maxima of the probMap. minVal, prob, minLoc, point = cv2.minMaxLoc(probMap) # Scale the point to fit on the original image x = (width * point[0]) / W y = (height * point[1]) / H if prob > threshold: points.append((int(x), int(y))) else: points.append(None) if save_data: time = frame2ms(ii, fps) points_list = [[ list(point)[0] / width, list(point)[1] / height, ] if point != None else [0, 0] for point in points] points_list_flat = itertools.chain.from_iterable(points_list) datapoint = [time] datapoint += points_list_flat data.append(datapoint) for pair in POSE_PAIRS: partA = pair[0] partB = pair[1] if points[partA] and points[partB]: cv2.line(frame, points[partA], points[partB], (0, 255, 255), 2, lineType=cv2.LINE_AA) cv2.circle(frame, points[partA], 4, (0, 0, 255), thickness=-1, lineType=cv2.FILLED) cv2.circle(frame, points[partB], 4, (0, 0, 255), thickness=-1, lineType=cv2.FILLED) if save_video: out.write(frame.astype(np.uint8)) else: pb.progress(length) break pb.progress(ii) ii += 1 if save_video: out.release() destination_video = of + '_pose' + fex if self.has_audio: source_audio = extract_wav(of + fex) embed_audio_in_video(source_audio, destination_video) os.remove(source_audio) def save_txt(of, width, height, model, data, data_format): """ Helper function to export pose estimation data as textfile(s). """ def save_single_file(of, width, height, model, data, data_format): """ Helper function to export pose estimation data as a textfile using pandas. """ coco_table = [ 'Nose', 'Neck', 'Right Shoulder', 'Right Elbow', 'Right Wrist', 'Left Shoulder', 'Left Elbow', 'Left Wrist', 'Right Hip', 'Right Knee', 'Right Ankle', 'Left Hip', 'Left Knee', 'Left Ankle', 'Right Eye', 'Left Eye', 'Right Ear', 'Left Ear' ] mpi_table = [ 'Head', 'Neck', 'Right Shoulder', 'Right Elbow', 'Right Wrist', 'Left Shoulder', 'Left Elbow', 'Left Wrist', 'Right Hip', 'Right Knee', 'Right Ankle', 'Left Hip', 'Left Knee', 'Left Ankle', 'Chest' ] headers = ['Time'] table_to_use = [] if model.lower() == 'mpi': table_to_use = mpi_table else: table_to_use = coco_table for i in range(len(table_to_use)): header_x = table_to_use[i] + ' X' header_y = table_to_use[i] + ' Y' headers.append(header_x) headers.append(header_y) data_format = data_format.lower() df = pd.DataFrame(data=data, columns=headers) if data_format == "tsv": with open(of + '_pose.tsv', 'wb') as f: head_str = '' for head in headers: head_str += head + '\t' head_str += '\n' f.write(head_str.encode()) fmt_list = ['%d'] fmt_list += [ '%.15f' for item in range(len(table_to_use) * 2) ] np.savetxt(f, df.values, delimiter='\t', fmt=fmt_list) elif data_format == "csv": df.to_csv(of + '_pose.csv', index=None) elif data_format == "txt": with open(of + '_pose.txt', 'wb') as f: head_str = '' for head in headers: head_str += head + ' ' head_str += '\n' f.write(head_str.encode()) fmt_list = ['%d'] fmt_list += [ '%.15f' for item in range(len(table_to_use) * 2) ] np.savetxt(f, df.values, delimiter=' ', fmt=fmt_list) elif data_format not in ["tsv", "csv", "txt"]: print( f"Invalid data format: '{data_format}'.\nFalling back to '.csv'." ) if type(data_format) == str: save_single_file(of, width, height, model, data, data_format) elif type(data_format) == list: if all([ item.lower() in ["csv", "tsv", "txt"] for item in data_format ]): data_format = list(set(data_format)) [ save_single_file(of, width, height, model, data, item) for item in data_format ] else: print( f"Unsupported formats in {data_format}.\nFalling back to '.csv'." ) save_single_file(of, width, height, model, data, "csv") save_txt(of, width, height, model, data, data_format) return musicalgestures.MgObject(destination_video, color=self.color, returned_by_process=True)