def speakerTracker(self):


        # Clean up
        cv2.destroyAllWindows()

        if self.inputPath is not None:

            # If a path to a file was given, assume it is a single video file
            if os.path.isfile(self.inputPath):
                cap = cv2.VideoCapture(self.inputPath)
                clip  = VideoFileClip(self.inputPath,audio=False)
                fps = cap.get(cv2.cv.CV_CAP_PROP_FPS)
                self.numFrames = cap.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT)
                print "[speakerTrackering] Number of frames" , self.numFrames
                pathBase = os.path.basename(self.inputPath)
                pathDirectory = os.path.dirname(self.inputPath)
                baseName = pathDirectory + '/' + os.path.splitext(pathBase)[0] + '_' + 'speakerCoordinates.txt'


                #Skip first frames if required
                if self.skip is not None:
                    cap.set(cv2.cv.CV_CAP_PROP_POS_FRAMES, self.skip)

            # Otherwise assume it is a format string for reading images
            else:
                cap = cmtutil.FileVideoCapture(self.inputPath)

                #Skip first frames if required
                if self.skip is not None:
                    cap.frame = 1 + self.skip

        else:
            # If no input path was specified, open camera device
            sys.exit("[speakerTrackering] Error: no input path was specified")

        # Read first frame
        status, im0 = cap.read()
        imGray0 = cv2.cvtColor(im0, cv2.COLOR_BGR2GRAY)
        imDraw = np.copy(im0)

        (tl, br) = cmtutil.get_rect(imDraw)

        print '[speakerTrackering] Using', tl, br, 'as initial bounding box for the speaker'
        
        # Get the points to crop the video with 4:3 aspect ratio
        # If the selected points are near to the border of the image, It will automaticaly croped till borders.
        x1 = np.floor(tl[0])
        y1 = np.floor(tl[1])
        x2 = np.floor(br[0])
        y2 = np.floor(tl[1] + np.abs((br[0] - tl[0])*(3.0/4.0))) 

        print x1, x2, y1, y2
        croppedClip = moviepycrop(clip, x1, y1, x2, y2)

        return croppedClip
    def speakerTracker(self):
        

        # Clean up
        cv2.destroyAllWindows()

        if self.inputPath is not None:

            # If a path to a file was given, assume it is a single video file
            if os.path.isfile(self.inputPath):
                cap = cv2.VideoCapture(self.inputPath)
                clip  = VideoFileClip(self.inputPath,audio=False)
                fps = cap.get(cv2.cv.CV_CAP_PROP_FPS)
                self.numFrames = cap.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT)
                print "[speakerTracker] Number of frames" , self.numFrames
                pathBase = os.path.basename(self.inputPath)
                pathDirectory = os.path.dirname(self.inputPath)
                baseName = pathDirectory + '/' + os.path.splitext(pathBase)[0] + '_' 
                
                
                
                #Skip first frames if required
                if self.skip is not None:
                    cap.set(cv2.cv.CV_CAP_PROP_POS_FRAMES, self.skip)

            # Otherwise assume it is a format string for reading images
            else:
                cap = cmtutil.FileVideoCapture(self.inputPath)

                #Skip first frames if required
                if self.skip is not None:
                    cap.frame = 1 + self.skip

        else:
            # If no input path was specified, open camera device
            sys.exit("[speakerTracker] Error: no input path was specified")
        
        # The number of pixels from the center of object till the border of cropped image
        marginPixels = 300
        
        # Read first frame
        status, im0 = cap.read()
        imGray0 = cv2.cvtColor(im0, cv2.COLOR_BGR2GRAY)
        imDraw = np.copy(im0)

        (tl0, br0) = cmtutil.get_rect(imDraw)

        print '[speakerTracker] Using', tl0, br0, 'as initial bounding box for the speaker'
          
        # First initialization to get the center
        self.CMT.initialise(imGray0, tl0, br0)
        
        # The first x and y coordinates of the object of interest
        self.inity = tl0[1] - self.CMT.center_to_tl[1]
        self.initx = tl0[0] - self.CMT.center_to_tl[0]
        
        # Crop the first frame
        imGray0_initial = imGray0[self.inity - marginPixels : self.inity + marginPixels,
                                  self.initx - marginPixels : self.initx + marginPixels]
        
        
        # Calculate the translation vector from main image to the cropped frame
        self.originFromMainImageY = self.inity - marginPixels
        self.originFromMainImageX = self.initx - marginPixels
        
        # Calculate the position of the selected rectangle in the cropped frame        
        tl =  (tl0[0] - self.originFromMainImageX , tl0[1] - self.originFromMainImageY)
        br =  (br0[0] - self.originFromMainImageX , br0[1] - self.originFromMainImageY)
        #print '[speakerTracker] Using', tl, br, 'as initial bounding box for the speaker'
        
        # initialization and keypoint calculation
        self.CMT.initialise(imGray0_initial, tl, br)
        
        # Center of object in cropped frame
        self.currentY = tl[1] - self.CMT.center_to_tl[1]
        self.currentX = tl[0] - self.CMT.center_to_tl[0]
        
        # Center of object in main frame
        self.currentYMainImage = self.currentY + self.originFromMainImageY
        self.currentXMainImage = self.currentX + self.originFromMainImageX
        

        measuredTrack=np.zeros((self.numFrames+10,2))-1    
        count =0
        
        
        # loop to read all frames, 
        # crop them with the center of last frame, 
        # calculate keypoints and center of the object
        
        
        for frame in clip.iter_frames():
            
            # Read the frame and convert it to gray scale
            im_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            
            # Corner correction (Height)
            if (self.currentYMainImage + marginPixels >= im_gray.shape[0]):                 
                self.currentYMainImage = im_gray.shape[0] - marginPixels -1
            else:
                self.currentYMainImage = self.currentYMainImage
                                    
            if (self.currentXMainImage + marginPixels >= im_gray.shape[1]):                 
                self.currentXMainImage = im_gray.shape[1] - marginPixels -1   
            else:
                self.currentXMainImage = self.currentXMainImage
                
            if (self.currentYMainImage - marginPixels <= 0):                 
                self.currentYMainImage = 0 + marginPixels +1
            else:
                self.currentYMainImage = self.currentYMainImage
                
            if (self.currentXMainImage - marginPixels <= 0):                 
                self.currentXMainImage = 0 + marginPixels +1   
            else:
                self.currentXMainImage = self.currentXMainImage
            
            
            # Crop it by previous coordinates      
            im_gray_crop = im_gray[self.currentYMainImage - marginPixels : self.currentYMainImage + marginPixels,
                                   self.currentXMainImage - marginPixels : self.currentXMainImage + marginPixels]
            
            #plt.imshow(im_gray_crop, cmap = cm.Greys_r)
            #plt.show() 
            
            #print "self.currentYMainImage:", self.currentYMainImage
            #print "self.currentXMainImage:", self.currentXMainImage
            #print im_gray_crop.shape
            
            # Compute all keypoints in the cropped frame
            self.CMT.process_frame(im_gray_crop) 
            #print 'frame: {2:4d}, Center: {0:.2f},{1:.2f}'.format(self.CMT.center[0], self.CMT.center[1] , count)
            
            
            if not (math.isnan(self.CMT.center[0]) or math.isnan(self.CMT.center[1])
                or (self.CMT.center[0] <= 0) or (self.CMT.center[1] <= 0)):
                
                # Compute the center of the object with respect to the main image
                self.diffY = self.CMT.center[0] - self.currentY
                self.diffX = self.CMT.center[1] - self.currentX

                self.currentYMainImage = self.diffY + self.currentYMainImage
                self.currentXMainImage = self.diffX + self.currentXMainImage
                
                self.currentY = self.CMT.center[0]
                self.currentX = self.CMT.center[1]
                # Save the center of frames in an array for further process
                measuredTrack[count,0] = self.currentYMainImage
                measuredTrack[count,1] = self.currentXMainImage
                
            else:
                self.currentYMainImage = self.currentYMainImage
                self.currentXMainImage = self.currentXMainImage
            

            
            print 'frame: {2:4d}, Center: {0:.2f},{1:.2f}'.format(self.currentYMainImage, self.currentXMainImage , count)
            count += 1 
 
        numMeas=measuredTrack.shape[0]
        markedMeasure=np.ma.masked_less(measuredTrack,0)
        
        # Kalman Filter Parameters
        deltaT = 1.0/clip.fps
        transitionMatrix=[[1,0,deltaT,0],[0,1,0,deltaT],[0,0,1,0],[0,0,0,1]]   #A
        observationMatrix=[[1,0,0,0],[0,1,0,0]]   #C

        xinit = markedMeasure[0,0]
        yinit = markedMeasure[0,1]
        vxinit = markedMeasure[1,0]-markedMeasure[0,0]
        vyinit = markedMeasure[1,1]-markedMeasure[0,1]
        initState = [xinit,yinit,vxinit,vyinit]    #mu0
        initCovariance = 1.0e-3*np.eye(4)          #sigma0
        transistionCov = 1.0e-4*np.eye(4)          #Q
        observationCov = 1.0e-1*np.eye(2)          #R
        
        # Kalman Filter bias
        kf = KalmanFilter(transition_matrices = transitionMatrix,
            observation_matrices = observationMatrix,
            initial_state_mean = initState,
            initial_state_covariance = initCovariance,
            transition_covariance = transistionCov,
            observation_covariance = observationCov)
        
        self.measuredTrack = measuredTrack
        # Kalman Filter
        (self.filteredStateMeans, self.filteredStateCovariances) = kf.filter(markedMeasure)
        # Kalman Smoother
        (self.filterStateMeanSmooth, self.filterStateCovariancesSmooth) = kf.smooth(markedMeasure)
        newClip = clip.fl_image( self.crop ) 
        return newClip 
    def speakerTracker(self):
        

        # Clean up
        cv2.destroyAllWindows()

        if self.inputPath is not None:

            # If a path to a file was given, assume it is a single video file
            if os.path.isfile(self.inputPath):
                cap = cv2.VideoCapture(self.inputPath)
                clip  = VideoFileClip(self.inputPath,audio=False)
                fps = cap.get(cv2.cv.CV_CAP_PROP_FPS)
                self.numFrames = cap.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT)
                print "[speakerTrackering] Number of frames" , self.numFrames
                pathBase = os.path.basename(self.inputPath)
                pathDirectory = os.path.dirname(self.inputPath)
                baseName = pathDirectory + '/' + os.path.splitext(pathBase)[0] + '_' + 'speakerCoordinates.txt'
                

                #Skip first frames if required
                if self.skip is not None:
                    cap.set(cv2.cv.CV_CAP_PROP_POS_FRAMES, self.skip)

            # Otherwise assume it is a format string for reading images
            else:
                cap = cmtutil.FileVideoCapture(self.inputPath)

                #Skip first frames if required
                if self.skip is not None:
                    cap.frame = 1 + self.skip

        else:
            # If no input path was specified, open camera device
            sys.exit("[speakerTrackering] Error: no input path was specified")

        # Read first frame
        status, im0 = cap.read()
        imGray0 = cv2.cvtColor(im0, cv2.COLOR_BGR2GRAY)
        imDraw = np.copy(im0)

        if self.bBox is not None:
            # Try to disassemble user specified bounding box
            values = self.bBox.split(',')
            try:
                values = [int(v) for v in values]
            except:
                raise Exception('[speakerTrackering] Unable to parse bounding box')
            if len(values) != 4:
                raise Exception('[speakerTrackering] Bounding box must have exactly 4 elements')
            bbox = np.array(values)

            # Convert to point representation, adding singleton dimension
            bbox = cmtutil.bb2pts(bbox[None, :])

            # Squeeze
            bbox = bbox[0, :]

            tl = bbox[:2]
            br = bbox[2:4]
        else:
            # Get rectangle input from user
            (tl, br) = cmtutil.get_rect(imDraw)

        print '[speakerTrackering] Using', tl, br, 'as initial bounding box for the speaker'

        self.CMT.initialise(imGray0, tl, br)
        
        newClip = clip.fl_image( self.crop )
                   
        return newClip 
    def speakerTracker(self):
        

        # Clean up
        cv2.destroyAllWindows()

        if self.inputPath is not None:

            # If a path to a file was given, assume it is a single video file
            if os.path.isfile(self.inputPath):
                cap = cv2.VideoCapture(self.inputPath)
                clip  = VideoFileClip(self.inputPath,audio=False)
                fps = cap.get(cv2.cv.CV_CAP_PROP_FPS)
                self.numFrames = cap.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT)
                print "[speakerTracker] Number of frames" , self.numFrames
                pathBase = os.path.basename(self.inputPath)
                pathDirectory = os.path.dirname(self.inputPath)
                baseName = pathDirectory + '/' + os.path.splitext(pathBase)[0] + '_' 

            # Otherwise assume it is a format string for reading images
            else:
                cap = cmtutil.FileVideoCapture(self.inputPath)

        else:
            # If no input path was specified, open camera device
            sys.exit("[speakerTracker] Error: no input path was specified")

        # Read first frame
        status, im0 = cap.read()
        imGray0 = cv2.cvtColor(im0, cv2.COLOR_BGR2GRAY)
        imDraw = np.copy(im0)

        (tl, br) = cmtutil.get_rect(imDraw)

        print '[speakerTrackering] Using', tl, br, 'as initial bounding box for the speaker'

        self.CMT.initialise(imGray0, tl, br)
        #self.inity = tl[1] - self.CMT.center_to_tl[1]
        measuredTrack=np.zeros((self.numFrames+10,2))-1
        
        
        count =0
        for frame in clip.iter_frames():
            im_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            
            self.CMT.process_frame(im_gray)

            print 'frame: {2:4d}, Center: {0:.2f},{1:.2f}'.format(self.CMT.center[0], self.CMT.center[1] , count)
            #print self.inity
            if not (math.isnan(self.CMT.center[0]) or (self.CMT.center[0] <= 0)):
                measuredTrack[count,0] = self.CMT.center[0]
                measuredTrack[count,1] = self.CMT.center[1]               
            count += 1 

    
        numMeas=measuredTrack.shape[0]
        markedMeasure=np.ma.masked_less(measuredTrack,0)
        
        # Kalman Filter Parameters
        deltaT = 1.0/clip.fps
        transitionMatrix=[[1,0,deltaT,0],[0,1,0,deltaT],[0,0,1,0],[0,0,0,1]]   #A
        observationMatrix=[[1,0,0,0],[0,1,0,0]]   #C

        xinit = markedMeasure[0,0]
        yinit = markedMeasure[0,1]
        vxinit = markedMeasure[1,0]-markedMeasure[0,0]
        vyinit = markedMeasure[1,1]-markedMeasure[0,1]
        initState = [xinit,yinit,vxinit,vyinit]    #mu0
        initCovariance = 1.0e-3*np.eye(4)          #sigma0
        transistionCov = 1.0e-4*np.eye(4)          #Q
        observationCov = 1.0e-1*np.eye(2)          #R
        kf = KalmanFilter(transition_matrices = transitionMatrix,
            observation_matrices = observationMatrix,
            initial_state_mean = initState,
            initial_state_covariance = initCovariance,
            transition_covariance = transistionCov,
            observation_covariance = observationCov)
        
        self.measuredTrack = measuredTrack
        (self.filteredStateMeans, self.filteredStateCovariances) = kf.filter(markedMeasure)
        (self.filterStateMeanSmooth, self.filterStateCovariancesSmooth) = kf.smooth(markedMeasure)
        self.inity = np.mean(self.filterStateMeanSmooth[:][1], axis=0)
        newClip = clip.fl_image( self.crop ) 
        return newClip