def speakerTracker(self): # Clean up cv2.destroyAllWindows() if self.inputPath is not None: # If a path to a file was given, assume it is a single video file if os.path.isfile(self.inputPath): cap = cv2.VideoCapture(self.inputPath) clip = VideoFileClip(self.inputPath,audio=False) fps = cap.get(cv2.cv.CV_CAP_PROP_FPS) self.numFrames = cap.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT) print "[speakerTrackering] Number of frames" , self.numFrames pathBase = os.path.basename(self.inputPath) pathDirectory = os.path.dirname(self.inputPath) baseName = pathDirectory + '/' + os.path.splitext(pathBase)[0] + '_' + 'speakerCoordinates.txt' #Skip first frames if required if self.skip is not None: cap.set(cv2.cv.CV_CAP_PROP_POS_FRAMES, self.skip) # Otherwise assume it is a format string for reading images else: cap = cmtutil.FileVideoCapture(self.inputPath) #Skip first frames if required if self.skip is not None: cap.frame = 1 + self.skip else: # If no input path was specified, open camera device sys.exit("[speakerTrackering] Error: no input path was specified") # Read first frame status, im0 = cap.read() imGray0 = cv2.cvtColor(im0, cv2.COLOR_BGR2GRAY) imDraw = np.copy(im0) (tl, br) = cmtutil.get_rect(imDraw) print '[speakerTrackering] Using', tl, br, 'as initial bounding box for the speaker' # Get the points to crop the video with 4:3 aspect ratio # If the selected points are near to the border of the image, It will automaticaly croped till borders. x1 = np.floor(tl[0]) y1 = np.floor(tl[1]) x2 = np.floor(br[0]) y2 = np.floor(tl[1] + np.abs((br[0] - tl[0])*(3.0/4.0))) print x1, x2, y1, y2 croppedClip = moviepycrop(clip, x1, y1, x2, y2) return croppedClip
def speakerTracker(self): # Clean up cv2.destroyAllWindows() if self.inputPath is not None: # If a path to a file was given, assume it is a single video file if os.path.isfile(self.inputPath): cap = cv2.VideoCapture(self.inputPath) clip = VideoFileClip(self.inputPath,audio=False) fps = cap.get(cv2.cv.CV_CAP_PROP_FPS) self.numFrames = cap.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT) print "[speakerTracker] Number of frames" , self.numFrames pathBase = os.path.basename(self.inputPath) pathDirectory = os.path.dirname(self.inputPath) baseName = pathDirectory + '/' + os.path.splitext(pathBase)[0] + '_' #Skip first frames if required if self.skip is not None: cap.set(cv2.cv.CV_CAP_PROP_POS_FRAMES, self.skip) # Otherwise assume it is a format string for reading images else: cap = cmtutil.FileVideoCapture(self.inputPath) #Skip first frames if required if self.skip is not None: cap.frame = 1 + self.skip else: # If no input path was specified, open camera device sys.exit("[speakerTracker] Error: no input path was specified") # The number of pixels from the center of object till the border of cropped image marginPixels = 300 # Read first frame status, im0 = cap.read() imGray0 = cv2.cvtColor(im0, cv2.COLOR_BGR2GRAY) imDraw = np.copy(im0) (tl0, br0) = cmtutil.get_rect(imDraw) print '[speakerTracker] Using', tl0, br0, 'as initial bounding box for the speaker' # First initialization to get the center self.CMT.initialise(imGray0, tl0, br0) # The first x and y coordinates of the object of interest self.inity = tl0[1] - self.CMT.center_to_tl[1] self.initx = tl0[0] - self.CMT.center_to_tl[0] # Crop the first frame imGray0_initial = imGray0[self.inity - marginPixels : self.inity + marginPixels, self.initx - marginPixels : self.initx + marginPixels] # Calculate the translation vector from main image to the cropped frame self.originFromMainImageY = self.inity - marginPixels self.originFromMainImageX = self.initx - marginPixels # Calculate the position of the selected rectangle in the cropped frame tl = (tl0[0] - self.originFromMainImageX , tl0[1] - self.originFromMainImageY) br = (br0[0] - self.originFromMainImageX , br0[1] - self.originFromMainImageY) #print '[speakerTracker] Using', tl, br, 'as initial bounding box for the speaker' # initialization and keypoint calculation self.CMT.initialise(imGray0_initial, tl, br) # Center of object in cropped frame self.currentY = tl[1] - self.CMT.center_to_tl[1] self.currentX = tl[0] - self.CMT.center_to_tl[0] # Center of object in main frame self.currentYMainImage = self.currentY + self.originFromMainImageY self.currentXMainImage = self.currentX + self.originFromMainImageX measuredTrack=np.zeros((self.numFrames+10,2))-1 count =0 # loop to read all frames, # crop them with the center of last frame, # calculate keypoints and center of the object for frame in clip.iter_frames(): # Read the frame and convert it to gray scale im_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # Corner correction (Height) if (self.currentYMainImage + marginPixels >= im_gray.shape[0]): self.currentYMainImage = im_gray.shape[0] - marginPixels -1 else: self.currentYMainImage = self.currentYMainImage if (self.currentXMainImage + marginPixels >= im_gray.shape[1]): self.currentXMainImage = im_gray.shape[1] - marginPixels -1 else: self.currentXMainImage = self.currentXMainImage if (self.currentYMainImage - marginPixels <= 0): self.currentYMainImage = 0 + marginPixels +1 else: self.currentYMainImage = self.currentYMainImage if (self.currentXMainImage - marginPixels <= 0): self.currentXMainImage = 0 + marginPixels +1 else: self.currentXMainImage = self.currentXMainImage # Crop it by previous coordinates im_gray_crop = im_gray[self.currentYMainImage - marginPixels : self.currentYMainImage + marginPixels, self.currentXMainImage - marginPixels : self.currentXMainImage + marginPixels] #plt.imshow(im_gray_crop, cmap = cm.Greys_r) #plt.show() #print "self.currentYMainImage:", self.currentYMainImage #print "self.currentXMainImage:", self.currentXMainImage #print im_gray_crop.shape # Compute all keypoints in the cropped frame self.CMT.process_frame(im_gray_crop) #print 'frame: {2:4d}, Center: {0:.2f},{1:.2f}'.format(self.CMT.center[0], self.CMT.center[1] , count) if not (math.isnan(self.CMT.center[0]) or math.isnan(self.CMT.center[1]) or (self.CMT.center[0] <= 0) or (self.CMT.center[1] <= 0)): # Compute the center of the object with respect to the main image self.diffY = self.CMT.center[0] - self.currentY self.diffX = self.CMT.center[1] - self.currentX self.currentYMainImage = self.diffY + self.currentYMainImage self.currentXMainImage = self.diffX + self.currentXMainImage self.currentY = self.CMT.center[0] self.currentX = self.CMT.center[1] # Save the center of frames in an array for further process measuredTrack[count,0] = self.currentYMainImage measuredTrack[count,1] = self.currentXMainImage else: self.currentYMainImage = self.currentYMainImage self.currentXMainImage = self.currentXMainImage print 'frame: {2:4d}, Center: {0:.2f},{1:.2f}'.format(self.currentYMainImage, self.currentXMainImage , count) count += 1 numMeas=measuredTrack.shape[0] markedMeasure=np.ma.masked_less(measuredTrack,0) # Kalman Filter Parameters deltaT = 1.0/clip.fps transitionMatrix=[[1,0,deltaT,0],[0,1,0,deltaT],[0,0,1,0],[0,0,0,1]] #A observationMatrix=[[1,0,0,0],[0,1,0,0]] #C xinit = markedMeasure[0,0] yinit = markedMeasure[0,1] vxinit = markedMeasure[1,0]-markedMeasure[0,0] vyinit = markedMeasure[1,1]-markedMeasure[0,1] initState = [xinit,yinit,vxinit,vyinit] #mu0 initCovariance = 1.0e-3*np.eye(4) #sigma0 transistionCov = 1.0e-4*np.eye(4) #Q observationCov = 1.0e-1*np.eye(2) #R # Kalman Filter bias kf = KalmanFilter(transition_matrices = transitionMatrix, observation_matrices = observationMatrix, initial_state_mean = initState, initial_state_covariance = initCovariance, transition_covariance = transistionCov, observation_covariance = observationCov) self.measuredTrack = measuredTrack # Kalman Filter (self.filteredStateMeans, self.filteredStateCovariances) = kf.filter(markedMeasure) # Kalman Smoother (self.filterStateMeanSmooth, self.filterStateCovariancesSmooth) = kf.smooth(markedMeasure) newClip = clip.fl_image( self.crop ) return newClip
def speakerTracker(self): # Clean up cv2.destroyAllWindows() if self.inputPath is not None: # If a path to a file was given, assume it is a single video file if os.path.isfile(self.inputPath): cap = cv2.VideoCapture(self.inputPath) clip = VideoFileClip(self.inputPath,audio=False) fps = cap.get(cv2.cv.CV_CAP_PROP_FPS) self.numFrames = cap.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT) print "[speakerTrackering] Number of frames" , self.numFrames pathBase = os.path.basename(self.inputPath) pathDirectory = os.path.dirname(self.inputPath) baseName = pathDirectory + '/' + os.path.splitext(pathBase)[0] + '_' + 'speakerCoordinates.txt' #Skip first frames if required if self.skip is not None: cap.set(cv2.cv.CV_CAP_PROP_POS_FRAMES, self.skip) # Otherwise assume it is a format string for reading images else: cap = cmtutil.FileVideoCapture(self.inputPath) #Skip first frames if required if self.skip is not None: cap.frame = 1 + self.skip else: # If no input path was specified, open camera device sys.exit("[speakerTrackering] Error: no input path was specified") # Read first frame status, im0 = cap.read() imGray0 = cv2.cvtColor(im0, cv2.COLOR_BGR2GRAY) imDraw = np.copy(im0) if self.bBox is not None: # Try to disassemble user specified bounding box values = self.bBox.split(',') try: values = [int(v) for v in values] except: raise Exception('[speakerTrackering] Unable to parse bounding box') if len(values) != 4: raise Exception('[speakerTrackering] Bounding box must have exactly 4 elements') bbox = np.array(values) # Convert to point representation, adding singleton dimension bbox = cmtutil.bb2pts(bbox[None, :]) # Squeeze bbox = bbox[0, :] tl = bbox[:2] br = bbox[2:4] else: # Get rectangle input from user (tl, br) = cmtutil.get_rect(imDraw) print '[speakerTrackering] Using', tl, br, 'as initial bounding box for the speaker' self.CMT.initialise(imGray0, tl, br) newClip = clip.fl_image( self.crop ) return newClip
def speakerTracker(self): # Clean up cv2.destroyAllWindows() if self.inputPath is not None: # If a path to a file was given, assume it is a single video file if os.path.isfile(self.inputPath): cap = cv2.VideoCapture(self.inputPath) clip = VideoFileClip(self.inputPath,audio=False) fps = cap.get(cv2.cv.CV_CAP_PROP_FPS) self.numFrames = cap.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT) print "[speakerTracker] Number of frames" , self.numFrames pathBase = os.path.basename(self.inputPath) pathDirectory = os.path.dirname(self.inputPath) baseName = pathDirectory + '/' + os.path.splitext(pathBase)[0] + '_' # Otherwise assume it is a format string for reading images else: cap = cmtutil.FileVideoCapture(self.inputPath) else: # If no input path was specified, open camera device sys.exit("[speakerTracker] Error: no input path was specified") # Read first frame status, im0 = cap.read() imGray0 = cv2.cvtColor(im0, cv2.COLOR_BGR2GRAY) imDraw = np.copy(im0) (tl, br) = cmtutil.get_rect(imDraw) print '[speakerTrackering] Using', tl, br, 'as initial bounding box for the speaker' self.CMT.initialise(imGray0, tl, br) #self.inity = tl[1] - self.CMT.center_to_tl[1] measuredTrack=np.zeros((self.numFrames+10,2))-1 count =0 for frame in clip.iter_frames(): im_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) self.CMT.process_frame(im_gray) print 'frame: {2:4d}, Center: {0:.2f},{1:.2f}'.format(self.CMT.center[0], self.CMT.center[1] , count) #print self.inity if not (math.isnan(self.CMT.center[0]) or (self.CMT.center[0] <= 0)): measuredTrack[count,0] = self.CMT.center[0] measuredTrack[count,1] = self.CMT.center[1] count += 1 numMeas=measuredTrack.shape[0] markedMeasure=np.ma.masked_less(measuredTrack,0) # Kalman Filter Parameters deltaT = 1.0/clip.fps transitionMatrix=[[1,0,deltaT,0],[0,1,0,deltaT],[0,0,1,0],[0,0,0,1]] #A observationMatrix=[[1,0,0,0],[0,1,0,0]] #C xinit = markedMeasure[0,0] yinit = markedMeasure[0,1] vxinit = markedMeasure[1,0]-markedMeasure[0,0] vyinit = markedMeasure[1,1]-markedMeasure[0,1] initState = [xinit,yinit,vxinit,vyinit] #mu0 initCovariance = 1.0e-3*np.eye(4) #sigma0 transistionCov = 1.0e-4*np.eye(4) #Q observationCov = 1.0e-1*np.eye(2) #R kf = KalmanFilter(transition_matrices = transitionMatrix, observation_matrices = observationMatrix, initial_state_mean = initState, initial_state_covariance = initCovariance, transition_covariance = transistionCov, observation_covariance = observationCov) self.measuredTrack = measuredTrack (self.filteredStateMeans, self.filteredStateCovariances) = kf.filter(markedMeasure) (self.filterStateMeanSmooth, self.filterStateCovariancesSmooth) = kf.smooth(markedMeasure) self.inity = np.mean(self.filterStateMeanSmooth[:][1], axis=0) newClip = clip.fl_image( self.crop ) return newClip