def __init__(self): self.scriptPath = os.path.dirname(__file__) self.image = None self.maskArray = None self.filename = None self.residualSaliencyFilter = ResidualSaliencyFilter() # Setup the GUI builder = gtk.Builder() builder.add_from_file(self.scriptPath + "/GUI/SegmentationExplorer.glade") self.window = builder.get_object("winMain") dwgImage = builder.get_object("dwgImage") dwgSegmentation = builder.get_object("dwgSegmentation") dwgSaliency = builder.get_object("dwgSaliency") self.adjBrushSize = builder.get_object("adjBrushSize") self.comboBrushType = builder.get_object("comboBrushType") self.comboPixelClass = builder.get_object("comboPixelClass") self.dwgImageDisplay = Display(dwgImage) self.dwgSegmentationDisplay = Display(dwgSegmentation) self.dwgSaliencyDisplay = Display(dwgSaliency) # Set default values self.adjBrushSize.set_value(1) self.makeBrush() builder.connect_signals(self) updateLoop = self.update() gobject.idle_add(updateLoop.next) self.window.show()
def processBag( self, bag ): FLIP_IMAGE = bool( self.options.frameFlip == "True" ) USING_OPTICAL_FLOW_FOR_MOTION = False print "frameFlip = ", FLIP_IMAGE bagFrameIdx = 0 frameIdx = 0 impactFrameIdx = None # Setup filters opticalFlowFilter = OpticalFlowFilter( self.OPTICAL_FLOW_BLOCK_WIDTH, self.OPTICAL_FLOW_BLOCK_HEIGHT, self.OPTICAL_FLOW_RANGE_WIDTH, self.OPTICAL_FLOW_RANGE_HEIGHT ) motionDetectionFilter = MotionDetectionFilter() imageFlowFilter = ImageFlowFilter() residualSaliencyFilter = ResidualSaliencyFilter() # Process bag file for topic, msg, t in bag.read_messages(): if self.workCancelled: # We've been given the signal to quit break if msg._type == "sensor_msgs/Image": bagFrameIdx += 1 if (bagFrameIdx-1)%self.PROCESSED_FRAME_DIFF != 0: continue print "Processing image", frameIdx # Get input image image = cv.CreateMatHeader( msg.height, msg.width, cv.CV_8UC3 ) cv.SetData( image, msg.data, msg.step ) if FLIP_IMAGE: cv.Flip( image, None, 1 ) # Convert to grayscale grayImage = cv.CreateMat( msg.height, msg.width, cv.CV_8UC1 ) cv.CvtColor( image, grayImage, cv.CV_BGR2GRAY ) grayImageNumpPy = np.array( grayImage ) # Calculate optical flow opticalFlowArrayX, opticalFlowArrayY = \ opticalFlowFilter.calcOpticalFlow( grayImage ) # Detect motion if USING_OPTICAL_FLOW_FOR_MOTION: if frameIdx == 0: motionImage = PyVarFlowLib.createMotionMask( grayImageNumpPy, grayImageNumpPy ) else: motionImage = PyVarFlowLib.createMotionMask( np.array( self.grayScaleImageList[ frameIdx - 1 ] ), grayImageNumpPy ) else: motionImage = motionDetectionFilter.calcMotion( grayImage ) # Work out the left most point in the image where motion appears motionTest = np.copy( motionImage ) cv.Erode( motionTest, motionTest ) if frameIdx == 0: leftMostMotion = motionImage.shape[ 1 ] else: leftMostMotion = self.leftMostMotionList[ frameIdx - 1 ] leftMostMotionDiff = 0 for i in range( leftMostMotion ): if motionTest[ :, i ].max() > 0: leftMostMotionDiff = abs( leftMostMotion - i ) leftMostMotion = i break segmentationMask = np.zeros( ( msg.height, msg.width ), dtype=np.uint8 ) FRAMES_BACK = 3 if impactFrameIdx == None: if leftMostMotionDiff > 18 and leftMostMotion < 0.75*msg.width: # Found impact frame impactFrameIdx = frameIdx else: PROCESS_IMPACT = False if PROCESS_IMPACT and frameIdx - impactFrameIdx == FRAMES_BACK: # Should now have enough info to segment object impactMotionImage = self.motionImageList[ impactFrameIdx ] print "Aligning" postImpactRealFarFlow = imageFlowFilter.calcImageFlow( impactMotionImage, motionImage ) print "Aligning" postImpactFarFlow = imageFlowFilter.calcImageFlow( impactMotionImage, self.motionImageList[ impactFrameIdx + 2 ] ) print "Aligning" postImpactNearFlow = imageFlowFilter.calcImageFlow( impactMotionImage, self.motionImageList[ impactFrameIdx + 1 ] ) segmentationMask = np.maximum( np.maximum( np.maximum( impactMotionImage, postImpactNearFlow[ 3 ] ), postImpactFarFlow[ 3 ] ), postImpactRealFarFlow[ 3 ] ) cv.Dilate( segmentationMask, segmentationMask ) print "Aligning" preImpactRealFarFlow = imageFlowFilter.calcImageFlow( impactMotionImage, self.motionImageList[ impactFrameIdx - 8 ] ) print "Aligning" preImpactFarFlow = imageFlowFilter.calcImageFlow( impactMotionImage, self.motionImageList[ impactFrameIdx - 6 ] ) print "Aligning" preImpactNearFlow = imageFlowFilter.calcImageFlow( impactMotionImage, self.motionImageList[ impactFrameIdx - 4 ] ) subMask = np.maximum( np.maximum( preImpactRealFarFlow[ 3 ], preImpactFarFlow[ 3 ] ), preImpactNearFlow[ 3 ] ) cv.Erode( subMask, subMask ) cv.Dilate( subMask, subMask ) cv.Dilate( subMask, subMask ) cv.Dilate( subMask, subMask ) subMask[ subMask > 0 ] = 255 diffImage = segmentationMask.astype( np.int32 ) - subMask.astype( np.int32 ) diffImage[ diffImage < 0 ] = 0 diffImage = diffImage.astype( np.uint8 ) cv.Erode( diffImage, diffImage ) #diffImage[ diffImage > 0 ] = 255 #segmentationMask = subMask segmentationMask = diffImage #segmentationMask = np.where( diffImage > 128, 255, 0 ).astype( np.uint8 ) # Calculate image flow #imageFlow = imageFlowFilter.calcImageFlow( motionImage ) ## Calculate saliency map #saliencyMap, largeSaliencyMap = residualSaliencyFilter.calcSaliencyMap( grayImageNumpPy ) #blobMap = np.where( largeSaliencyMap > 128, 255, 0 ).astype( np.uint8 ) #blobMap, numBlobs = PyBlobLib.labelBlobs( blobMap ) #print "found", numBlobs, "blobs" #largeSaliencyMap = np.where( largeSaliencyMap > 128, 255, 0 ).astype( np.uint8 ) # Threshold the saliency map #largeSaliencyMap = (largeSaliencyMap > 128).astype(np.uint8) * 255 #cv.AdaptiveThreshold( largeSaliencyMap, largeSaliencyMap, 255 ) # Detect clusters within the saliency map #NUM_CLUSTERS = 5 #numSamples = np.sum( saliencyMap ) #sampleList = np.ndarray( ( numSamples, 2 ), dtype=np.float32 ) #sampleListIdx = 0 #for y in range( saliencyMap.shape[ 0 ] ): #for x in range( saliencyMap.shape[ 1 ] ): #numNewSamples = saliencyMap[ y, x ] #if numNewSamples > 0: #sampleList[ sampleListIdx:sampleListIdx+numNewSamples, 0 ] = x #sampleList[ sampleListIdx:sampleListIdx+numNewSamples, 1 ] = y #sampleListIdx += numNewSamples #sampleList[ 0:numSamples/2 ] = ( 20, 20 ) #sampleList[ numSamples/2: ] = ( 200, 200 ) #labelList = np.ndarray( ( numSamples, 1 ), dtype=np.int32 ) #cv.KMeans2( sampleList, NUM_CLUSTERS, labelList, #(cv.CV_TERMCRIT_ITER | cv.CV_TERMCRIT_EPS, 10, 0.01) ) #saliencyScaleX = float( largeSaliencyMap.shape[ 1 ] ) / saliencyMap.shape[ 1 ] #saliencyScaleY = float( largeSaliencyMap.shape[ 0 ] ) / saliencyMap.shape[ 0 ] clusterList = [] #for clusterIdx in range( NUM_CLUSTERS ): #clusterSamples = sampleList[ #np.where( labelList == clusterIdx )[ 0 ], : ] #if clusterSamples.size <= 0: #mean = ( 0.0, 0.0 ) #stdDev = 0.0 #else: #mean = clusterSamples.mean( axis=0 ) #mean = ( mean[ 0 ]*saliencyScaleX, mean[ 1 ]*saliencyScaleY ) #stdDev = clusterSamples.std()*saliencyScaleX #clusterList.append( ( mean, stdDev ) ) # Work out the maximum amount of motion we've seen in a single frame so far #motionCount = motionImage[ motionImage > 0 ].size #if frameIdx == 0: #lastMotionCount = 0 #else: #lastMotionCount = self.maxMotionCounts[ frameIdx - 1 ] #if motionCount < lastMotionCount: #motionCount = lastMotionCount ## Work out diffImage #diffImage = np.array( motionImage, dtype=np.int32 ) \ #- np.array( imageFlow[ 3 ], dtype=np.int32 ) #diffImage = np.array( np.maximum( diffImage, 0 ), dtype=np.uint8 ) # Segment the image #workingMask = np.copy( motionImage ) #workingMask = np.copy( diffImage ) workingMask = np.copy( segmentationMask ) kernel = cv.CreateStructuringElementEx( cols=3, rows=3, anchorX=1, anchorY=1, shape=cv.CV_SHAPE_CROSS ) cv.Erode( workingMask, workingMask, kernel ) cv.Dilate( workingMask, workingMask ) extraExtraMask = np.copy( workingMask ) cv.Dilate( extraExtraMask, extraExtraMask ) cv.Dilate( extraExtraMask, extraExtraMask ) cv.Dilate( extraExtraMask, extraExtraMask ) cv.Dilate( extraExtraMask, extraExtraMask ) cv.Dilate( extraExtraMask, extraExtraMask ) cv.Dilate( extraExtraMask, extraExtraMask ) allMask = np.copy( extraExtraMask ) cv.Dilate( allMask, allMask ) cv.Dilate( allMask, allMask ) cv.Dilate( allMask, allMask ) cv.Dilate( allMask, allMask ) cv.Dilate( allMask, allMask ) cv.Dilate( allMask, allMask ) possibleForeground = workingMask > 0 if workingMask[ possibleForeground ].size >= 100 \ and frameIdx >= 16: print "Msk size", workingMask[ possibleForeground ].size print workingMask[ 0, 0:10 ] fgModel = cv.CreateMat( 1, 5*13, cv.CV_64FC1 ) bgModel = cv.CreateMat( 1, 5*13, cv.CV_64FC1 ) #workingMask[ possibleForeground ] = self.GC_FGD #workingMask[ possibleForeground == False ] = self.GC_PR_BGD #workingMask[ : ] = self.GC_PR_BGD #workingMask[ possibleForeground ] = self.GC_FGD workingMask[ : ] = self.GC_BGD workingMask[ allMask > 0 ] = self.GC_PR_BGD workingMask[ extraExtraMask > 0 ] = self.GC_PR_FGD workingMask[ possibleForeground ] = self.GC_FGD if frameIdx == 16: # Save mask maskCopy = np.copy( workingMask ) maskCopy[ maskCopy == self.GC_BGD ] = 0 maskCopy[ maskCopy == self.GC_PR_BGD ] = 64 maskCopy[ maskCopy == self.GC_PR_FGD ] = 128 maskCopy[ maskCopy == self.GC_FGD ] = 255 print "Unused pixels", \ maskCopy[ (maskCopy != 255) & (maskCopy != 0) ].size outputImage = cv.CreateMat( msg.height, msg.width, cv.CV_8UC3 ) cv.CvtColor( maskCopy, outputImage, cv.CV_GRAY2BGR ) cv.SaveImage( "output.png", image ); cv.SaveImage( "outputMask.png", outputImage ); print "Saved images" #return #print "Set Msk size", workingMask[ workingMask == self.GC_PR_FGD ].size imageToSegment = image #self.inputImageList[ frameIdx - FRAMES_BACK ] imageCopy = np.copy( imageToSegment ) cv.CvtColor( imageCopy, imageCopy, cv.CV_BGR2RGB ) print "Start seg" cv.GrabCut( imageCopy, workingMask, (0,0,0,0), fgModel, bgModel, 12, self.GC_INIT_WITH_MASK ) print "Finish seg" segmentation = np.copy( imageToSegment ) segmentation[ (workingMask != self.GC_PR_FGD) & (workingMask != self.GC_FGD) ] = 0 black = (workingMask != self.GC_PR_FGD) & (workingMask != self.GC_FGD) #motionImage = np.where( black, 0, 255 ).astype( np.uint8 ) # Refine the segmentation REFINE_SEG = False if REFINE_SEG: motionImageCopy = np.copy( motionImage ) cv.Erode( motionImageCopy, motionImageCopy ) #cv.Erode( motionImageCopy, motionImageCopy ) #cv.Erode( motionImageCopy, motionImageCopy ) workingMask[ motionImageCopy > 0 ] = self.GC_PR_FGD workingMask[ motionImageCopy == 0 ] = self.GC_PR_BGD cv.Dilate( motionImageCopy, motionImageCopy ) cv.Dilate( motionImageCopy, motionImageCopy ) cv.Dilate( motionImageCopy, motionImageCopy ) cv.Dilate( motionImageCopy, motionImageCopy ) workingMask[ motionImageCopy == 0 ] = self.GC_BGD print "Other seg" cv.GrabCut( imageCopy, workingMask, (0,0,0,0), fgModel, bgModel, 12, self.GC_INIT_WITH_MASK ) print "Other seg done" segmentation = np.copy( imageToSegment ) segmentation[ (workingMask != self.GC_PR_FGD) & (workingMask != self.GC_FGD) ] = 0 black = (workingMask != self.GC_PR_FGD) & (workingMask != self.GC_FGD) motionImage = np.where( black, 0, 255 ).astype( np.uint8 ) else: segmentation = np.zeros( ( image.height, image.width ), dtype=np.uint8 ) # Save output data self.inputImageList[ frameIdx ] = image self.grayScaleImageList[ frameIdx ] = grayImage self.opticalFlowListX[ frameIdx ] = opticalFlowArrayX self.opticalFlowListY[ frameIdx ] = opticalFlowArrayY self.motionImageList[ frameIdx ] = motionImage self.segmentationList[ frameIdx ] = segmentation self.segmentationMaskList[ frameIdx ] = segmentationMask #self.maxMotionCounts[ frameIdx ] = motionCount #self.imageFlowList[ frameIdx ] = imageFlow #self.saliencyMapList[ frameIdx ] = largeSaliencyMap #self.saliencyClusterList[ frameIdx ] = clusterList self.leftMostMotionList[ frameIdx ] = leftMostMotion frameIdx += 1 self.numFramesProcessed += 1 if not self.workCancelled: SAVE_MOTION_IMAGES = True BASE_MOTION_IMAGE_NAME = self.scriptPath + "/../../test_data/motion_images/motion_{0:03}.png" if SAVE_MOTION_IMAGES and len( self.motionImageList ) > 0: width = self.motionImageList[ 0 ].shape[ 1 ] height = self.motionImageList[ 0 ].shape[ 0 ] colourImage = np.zeros( ( height, width, 3 ), dtype=np.uint8 ) for frameIdx, motionImage in enumerate( self.motionImageList ): colourImage[ :, :, 0 ] = motionImage colourImage[ :, :, 1 ] = motionImage colourImage[ :, :, 2 ] = motionImage outputName = BASE_MOTION_IMAGE_NAME.format( frameIdx + 1 ) cv.SaveImage( outputName, colourImage ) # Recalculate impactFrameIdx width = self.motionImageList[ 0 ].shape[ 1 ] totalMotionDiff = 0 maxMotionDiff = 0 impactFrameIdx = None for motionIdx in range( 1, len( self.leftMostMotionList ) ): motionDiff = abs( self.leftMostMotionList[ motionIdx ] \ - self.leftMostMotionList[ motionIdx - 1 ] ) totalMotionDiff += motionDiff if motionDiff > maxMotionDiff and totalMotionDiff > 0.5*width: maxMotionDiff = motionDiff impactFrameIdx = motionIdx if maxMotionDiff <= 18: impactFrameIdx = None if impactFrameIdx != None: preMotionImages = [] postMotionImages = [] impactMotionImage = None NUM_FRAMES_BEFORE = 3 prefix = self.options.outputPrefix if prefix != "": prefix += "_" BASE_MOTION_IMAGE_NAME = self.scriptPath + "/../../test_data/impact_images/" + prefix + "motion_{0:03}.png" START_MOTION_IMAGE_NAME = self.scriptPath + "/../../test_data/impact_images/" + prefix + "start_motion.png" START_IMAGE_NAME = self.scriptPath + "/../../test_data/impact_images/" + prefix + "start.png" IMPACT_IMAGE_NAME = self.scriptPath + "/../../test_data/impact_images/" + prefix + "impact.png" SEGMENTATION_IMAGE_NAME = self.scriptPath + "/../../test_data/impact_images/" + prefix + "segmentation.png" NUM_FRAMES_AFTER = 3 width = self.motionImageList[ 0 ].shape[ 1 ] height = self.motionImageList[ 0 ].shape[ 0 ] colourImage = np.zeros( ( height, width, 3 ), dtype=np.uint8 ) for frameIdx in range( impactFrameIdx - NUM_FRAMES_BEFORE, impactFrameIdx + NUM_FRAMES_AFTER + 1 ): motionImage = self.motionImageList[ frameIdx ] if frameIdx < impactFrameIdx: preMotionImages.append( motionImage ) elif frameIdx == impactFrameIdx: impactMotionImage = motionImage else: # frameIdx > impactFrameIdx postMotionImages.append( motionImage ) colourImage[ :, :, 0 ] = motionImage colourImage[ :, :, 1 ] = motionImage colourImage[ :, :, 2 ] = motionImage outputName = BASE_MOTION_IMAGE_NAME.format( frameIdx - impactFrameIdx ) cv.SaveImage( outputName, colourImage ) motionDetectionFilter.calcMotion( self.grayScaleImageList[ 0 ] ) startMotionImage = motionDetectionFilter.calcMotion( self.grayScaleImageList[ impactFrameIdx ] ) colourImage[ :, :, 0 ] = startMotionImage colourImage[ :, :, 1 ] = startMotionImage colourImage[ :, :, 2 ] = startMotionImage cv.SaveImage( START_MOTION_IMAGE_NAME, colourImage ) cv.CvtColor( self.inputImageList[ 0 ], colourImage, cv.CV_RGB2BGR ) cv.SaveImage( START_IMAGE_NAME, colourImage ) cv.CvtColor( self.inputImageList[ impactFrameIdx ], colourImage, cv.CV_RGB2BGR ) cv.SaveImage( IMPACT_IMAGE_NAME, colourImage ) print "Segmenting..." segmentation = self.produceSegmentation( self.inputImageList[ 0 ], impactMotionImage, preMotionImages, postMotionImages ) cv.CvtColor( segmentation, colourImage, cv.CV_RGB2BGR ) cv.SaveImage( SEGMENTATION_IMAGE_NAME, colourImage ) self.refreshGraphDisplay() print "Finished processing bag file" if bool( self.options.quitAfterFirstSegmentation == "True" ): print "Trying to quit" self.onWinMainDestroy( None ) else: print "Not trying to quit so neeah"
class MainWindow: FOREGROUND_BRUSH_COLOUR = np.array([255, 255, 0], dtype=np.uint8) PROBABLY_FOREGROUND_BRUSH_COLOUR = np.array([0, 255, 0], dtype=np.uint8) BACKGROUND_BRUSH_COLOUR = np.array([0, 0, 255], dtype=np.uint8) # Classes of pixel in GrabCut algorithm GC_BGD = 0 # background GC_FGD = 1 # foreground GC_PR_BGD = 2 # most probably background GC_PR_FGD = 3 # most probably foreground # GrabCut algorithm flags GC_INIT_WITH_RECT = 0 GC_INIT_WITH_MASK = 1 GC_EVAL = 2 # --------------------------------------------------------------------------- def __init__(self): self.scriptPath = os.path.dirname(__file__) self.image = None self.maskArray = None self.filename = None self.residualSaliencyFilter = ResidualSaliencyFilter() # Setup the GUI builder = gtk.Builder() builder.add_from_file(self.scriptPath + "/GUI/SegmentationExplorer.glade") self.window = builder.get_object("winMain") dwgImage = builder.get_object("dwgImage") dwgSegmentation = builder.get_object("dwgSegmentation") dwgSaliency = builder.get_object("dwgSaliency") self.adjBrushSize = builder.get_object("adjBrushSize") self.comboBrushType = builder.get_object("comboBrushType") self.comboPixelClass = builder.get_object("comboPixelClass") self.dwgImageDisplay = Display(dwgImage) self.dwgSegmentationDisplay = Display(dwgSegmentation) self.dwgSaliencyDisplay = Display(dwgSaliency) # Set default values self.adjBrushSize.set_value(1) self.makeBrush() builder.connect_signals(self) updateLoop = self.update() gobject.idle_add(updateLoop.next) self.window.show() # --------------------------------------------------------------------------- def onWinMainDestroy(self, widget, data=None): gtk.main_quit() # --------------------------------------------------------------------------- def main(self): # All PyGTK applications must have a gtk.main(). Control ends here # and waits for an event to occur (like a key press or mouse event). gtk.main() # --------------------------------------------------------------------------- def makeBrush(self): brushSize = self.adjBrushSize.get_value() brushShape = (brushSize, brushSize) self.brush = np.zeros(brushSize, dtype=np.uint8) self.brush.fill(self.GC_FGD) brushRadius = int(brushSize / 2) self.brushIndices = np.indices(brushShape) - brushRadius brushType = self.comboBrushType.get_active_text() if brushType == "Circle": i = self.brushIndices circleIndices = np.where(i[0] * i[0] + i[1] * i[1] <= brushRadius * brushRadius) self.brushIndices = self.brushIndices[:, circleIndices[0], circleIndices[1]] # --------------------------------------------------------------------------- def alterMask(self, x, y, pixelClass): if self.maskArray != None: indices = np.copy(self.brushIndices) indices[0] += y indices[1] += x validIndices = indices[ :, np.where( (indices[0] >= 0) & (indices[1] >= 0) & (indices[0] < self.maskArray.shape[0]) & (indices[1] < self.maskArray.shape[1]) ), ] self.maskArray[validIndices[0], validIndices[1]] = pixelClass self.redrawImageWithMask() # --------------------------------------------------------------------------- def redrawImageWithMask(self): self.composedImage = np.copy(self.image) self.composedImage[self.maskArray == self.GC_FGD] = self.FOREGROUND_BRUSH_COLOUR self.composedImage[self.maskArray == self.GC_PR_FGD] = self.PROBABLY_FOREGROUND_BRUSH_COLOUR self.composedImage[self.maskArray == self.GC_BGD] = self.BACKGROUND_BRUSH_COLOUR self.dwgImageDisplay.setImageFromNumpyArray(self.composedImage) # --------------------------------------------------------------------------- def getCurPixelClass(self): pixelClassText = self.comboPixelClass.get_active_text() if pixelClassText == "Background": return self.GC_BGD elif pixelClassText == "Probably Foreground": return self.GC_PR_FGD else: return self.GC_FGD # --------------------------------------------------------------------------- def chooseImageFile(self): result = None dialog = gtk.FileChooserDialog( title="Choose Image File", action=gtk.FILE_CHOOSER_ACTION_SAVE, buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_REJECT, gtk.STOCK_OK, gtk.RESPONSE_ACCEPT), ) dialog.set_current_folder(self.scriptPath + "/../../test_data/saliency") filter = gtk.FileFilter() filter.add_pattern("*.png") filter.add_pattern("*.bmp") filter.add_pattern("*.jpg") filter.set_name("Image Files") dialog.add_filter(filter) dialog.set_filter(filter) result = dialog.run() if result == gtk.RESPONSE_ACCEPT: result = dialog.get_filename() dialog.destroy() return result # --------------------------------------------------------------------------- def onMenuItemOpenImageActivate(self, widget): filename = self.chooseImageFile() if filename != None: self.image = cv.LoadImageM(filename) cv.CvtColor(self.image, self.image, cv.CV_BGR2RGB) # Create a mask and blank segmentation that matches the size of the image self.maskArray = np.ones((self.image.height, self.image.width), np.uint8) * self.GC_PR_BGD self.segmentation = np.zeros((self.image.height, self.image.width, 3), np.uint8) self.dwgImageDisplay.setImageFromOpenCVMatrix(self.image) self.dwgSegmentationDisplay.setImageFromNumpyArray(self.segmentation) # --------------------------------------------------------------------------- def onMenuItemOpenMaskActivate(self, widget): # Map for translating mask values PIXEL_MAP = {0: self.GC_BGD, 64: self.GC_PR_BGD, 128: self.GC_PR_FGD, 255: self.GC_FGD} if self.image == None: return # No image to apply mask to yet filename = self.chooseImageFile() if filename != None: maskImage = cv.LoadImageM(filename) if maskImage.width != self.image.width or maskImage.height != self.image.height: print "Error: Mask doesn't match the size of the current image" return # maskImageGray = cv.CreateMat( self.image.height, self.image.width, cv.CV_8UC1 ) self.maskArray = np.ndarray((self.image.height, self.image.width), np.uint8) cv.CvtColor(maskImage, self.maskArray, cv.CV_BGR2GRAY) # Convert the pixel values over to the correct values for the pixel type translationArray = [ (self.maskArray == sourcePixelValue, PIXEL_MAP[sourcePixelValue]) for sourcePixelValue in PIXEL_MAP ] for translation in translationArray: self.maskArray[translation[0]] = translation[1] # Redraw the main image to show the mask self.redrawImageWithMask() # --------------------------------------------------------------------------- def onMenuItemQuitActivate(self, widget): self.onWinMainDestroy(widget) # --------------------------------------------------------------------------- def onBtnClearMaskClicked(self, widget): if self.image != None: self.maskArray = np.ones((self.image.height, self.image.width), np.uint8) * self.GC_PR_BGD self.dwgImageDisplay.setImageFromOpenCVMatrix(self.image) # --------------------------------------------------------------------------- def onBtnSegmentClicked(self, widget): if self.maskArray != None: workingMask = np.copy(self.maskArray) fgModel = cv.CreateMat(1, 5 * 13, cv.CV_64FC1) cv.Set(fgModel, 0) bgModel = cv.CreateMat(1, 5 * 13, cv.CV_64FC1) cv.Set(bgModel, 0) workingImage = np.copy(self.image) cv.GrabCut(workingImage, workingMask, (0, 0, 0, 0), fgModel, bgModel, 12, self.GC_INIT_WITH_MASK) self.segmentation = np.copy(self.image) self.segmentation[(workingMask != self.GC_PR_FGD) & (workingMask != self.GC_FGD)] = 0 self.dwgSegmentationDisplay.setImageFromNumpyArray(self.segmentation) # --------------------------------------------------------------------------- def onBtnCreateSaliencyMaskClicked(self, widget): ROI_X = 0 ROI_Y = 76 ROI_WIDTH = 230 ROI_HEIGHT = 100 # ROI_WIDTH = 10 if self.image != None: # Create a saliency map from the current image grayImage = np.ndarray((self.image.height, self.image.width), dtype=np.uint8) cv.CvtColor(self.image, grayImage, cv.CV_RGB2GRAY) saliencyMap, largeSaliencyMap = self.residualSaliencyFilter.calcSaliencyMap(grayImage) self.dwgSaliencyDisplay.setImageFromNumpyArray(largeSaliencyMap) # Threshold to get blobs blobPixels = largeSaliencyMap >= 160 largeSaliencyMap[blobPixels] = 255 largeSaliencyMap[blobPixels == False] = 0 # Label blobs largeSaliencyMap, numBlobs = PyBlobLib.labelBlobs(largeSaliencyMap) # Find blobs in the region of interest testMap = np.copy(largeSaliencyMap) testMap[:ROI_Y, :] = 0 # Mask out area above the ROI testMap[:, :ROI_X] = 0 # Mask out area to the left of the ROI testMap[ROI_Y + ROI_HEIGHT :] = 0 # Mask out area below the ROI testMap[:, ROI_X + ROI_WIDTH :] = 0 # Mask out area to the right of the ROI biggestBlobIdx = None biggestBlobSize = 0 for blobIdx in range(1, numBlobs + 1): if testMap[testMap == blobIdx].size > 0: blobSize = largeSaliencyMap[largeSaliencyMap == blobIdx].size if blobSize > biggestBlobSize: biggestBlobSize = blobSize biggestBlobIdx = blobIdx # Isolate the largest blob if biggestBlobIdx != None: biggestBlobPixels = largeSaliencyMap == biggestBlobIdx print biggestBlobPixels.size largeSaliencyMap[biggestBlobPixels] = self.GC_PR_FGD largeSaliencyMap[biggestBlobPixels == False] = self.GC_PR_BGD # Use the largest blob as the segmentation mask self.maskArray = largeSaliencyMap self.redrawImageWithMask() # --------------------------------------------------------------------------- def onComboBrushTypeChanged(self, widget): self.makeBrush() # --------------------------------------------------------------------------- def onAdjBrushSizeValueChanged(self, widget): self.makeBrush() # --------------------------------------------------------------------------- def onDwgImageButtonPressEvent(self, widget, data): if data.button == 1: self.onImageMaskChanged(widget, data, self.getCurPixelClass()) else: self.onImageMaskChanged(widget, data, self.GC_PR_BGD) # --------------------------------------------------------------------------- def onDwgImageMotionNotifyEvent(self, widget, data): self.onImageMaskChanged(widget, data, self.getCurPixelClass()) # --------------------------------------------------------------------------- def onImageMaskChanged(self, widget, data, pixelClass): imgRect = self.dwgImageDisplay.getImageRectangleInWidget(widget) if imgRect != None: x = data.x - imgRect.x y = data.y - imgRect.y self.alterMask(x, y, pixelClass) # --------------------------------------------------------------------------- def onDwgImageExposeEvent(self, widget, data): imgRect = self.dwgImageDisplay.drawPixBufToDrawingArea(data.area) if imgRect != None: imgRect = imgRect.intersect(data.area) # Add in the mask # if self.maskArray != None: # self.maskArrayRGB.fill( 0 ) # self.maskArrayRGB[ self.maskArray == self.GC_FGD ] = self.FOREGROUND_BRUSH_COLOUR # self.drawingArea.window.draw_pixbuf( # self.drawingArea.get_style().fg_gc[ gtk.STATE_NORMAL ], # self.pixBuf, srcX, srcY, # redrawRect.x, redrawRect.y, redrawRect.width, redrawRect.height ) # Draw an overlay to show the selected segmentation # if self.markerBuffer != None: # graphicsContext = widget.window.new_gc() # graphicsContext.set_rgb_fg_color( gtk.gdk.Color( 65535, 65535, 0 ) ) # blockY = imgRect.y # for y in range( self.markerBuffer.shape[ 0 ] ): # blockX = imgRect.x # for x in range( self.markerBuffer.shape[ 1 ] ): # if self.markerBuffer[ y, x ]: # points = [ (blockX+int((i*2)%self.opticalFlowBlockWidth), blockY+2*int((i*2)/self.opticalFlowBlockWidth)) \ # for i in range( int(self.opticalFlowBlockWidth*self.opticalFlowBlockHeight/4) ) ] # widget.window.draw_points( graphicsContext, points ) # blockX += self.opticalFlowBlockWidth # blockY += self.opticalFlowBlockHeight # --------------------------------------------------------------------------- def onDwgSegmentationExposeEvent(self, widget, data=None): self.dwgSegmentationDisplay.drawPixBufToDrawingArea(data.area) # --------------------------------------------------------------------------- def onDwgSaliencyExposeEvent(self, widget, data=None): self.dwgSaliencyDisplay.drawPixBufToDrawingArea(data.area) # --------------------------------------------------------------------------- def update(self): lastTime = time.clock() while 1: curTime = time.clock() yield True yield False
class MainWindow: FOREGROUND_BRUSH_COLOUR = np.array([255, 255, 0], dtype=np.uint8) PROBABLY_FOREGROUND_BRUSH_COLOUR = np.array([0, 255, 0], dtype=np.uint8) BACKGROUND_BRUSH_COLOUR = np.array([0, 0, 255], dtype=np.uint8) # Classes of pixel in GrabCut algorithm GC_BGD = 0 # background GC_FGD = 1 # foreground GC_PR_BGD = 2 # most probably background GC_PR_FGD = 3 # most probably foreground # GrabCut algorithm flags GC_INIT_WITH_RECT = 0 GC_INIT_WITH_MASK = 1 GC_EVAL = 2 #--------------------------------------------------------------------------- def __init__(self): self.scriptPath = os.path.dirname(__file__) self.image = None self.maskArray = None self.filename = None self.residualSaliencyFilter = ResidualSaliencyFilter() # Setup the GUI builder = gtk.Builder() builder.add_from_file(self.scriptPath + "/GUI/SegmentationExplorer.glade") self.window = builder.get_object("winMain") dwgImage = builder.get_object("dwgImage") dwgSegmentation = builder.get_object("dwgSegmentation") dwgSaliency = builder.get_object("dwgSaliency") self.adjBrushSize = builder.get_object("adjBrushSize") self.comboBrushType = builder.get_object("comboBrushType") self.comboPixelClass = builder.get_object("comboPixelClass") self.dwgImageDisplay = Display(dwgImage) self.dwgSegmentationDisplay = Display(dwgSegmentation) self.dwgSaliencyDisplay = Display(dwgSaliency) # Set default values self.adjBrushSize.set_value(1) self.makeBrush() builder.connect_signals(self) updateLoop = self.update() gobject.idle_add(updateLoop.next) self.window.show() #--------------------------------------------------------------------------- def onWinMainDestroy(self, widget, data=None): gtk.main_quit() #--------------------------------------------------------------------------- def main(self): # All PyGTK applications must have a gtk.main(). Control ends here # and waits for an event to occur (like a key press or mouse event). gtk.main() #--------------------------------------------------------------------------- def makeBrush(self): brushSize = self.adjBrushSize.get_value() brushShape = (brushSize, brushSize) self.brush = np.zeros(brushSize, dtype=np.uint8) self.brush.fill(self.GC_FGD) brushRadius = int(brushSize / 2) self.brushIndices = np.indices(brushShape) - brushRadius brushType = self.comboBrushType.get_active_text() if brushType == "Circle": i = self.brushIndices circleIndices = np.where( i[0] * i[0] + i[1] * i[1] <= brushRadius * brushRadius) self.brushIndices = self.brushIndices[:, circleIndices[0], circleIndices[1]] #--------------------------------------------------------------------------- def alterMask(self, x, y, pixelClass): if self.maskArray != None: indices = np.copy(self.brushIndices) indices[0] += y indices[1] += x validIndices = indices[ :, np.where( (indices[ 0 ]>=0) & (indices[ 1 ]>=0) \ & (indices[ 0 ] < self.maskArray.shape[ 0 ]) \ & (indices[ 1 ] < self.maskArray.shape[ 1 ]) ) ] self.maskArray[validIndices[0], validIndices[1]] = pixelClass self.redrawImageWithMask() #--------------------------------------------------------------------------- def redrawImageWithMask(self): self.composedImage = np.copy(self.image) self.composedImage[self.maskArray == self.GC_FGD] = self.FOREGROUND_BRUSH_COLOUR self.composedImage[self.maskArray == self. GC_PR_FGD] = self.PROBABLY_FOREGROUND_BRUSH_COLOUR self.composedImage[self.maskArray == self.GC_BGD] = self.BACKGROUND_BRUSH_COLOUR self.dwgImageDisplay.setImageFromNumpyArray(self.composedImage) #--------------------------------------------------------------------------- def getCurPixelClass(self): pixelClassText = self.comboPixelClass.get_active_text() if pixelClassText == "Background": return self.GC_BGD elif pixelClassText == "Probably Foreground": return self.GC_PR_FGD else: return self.GC_FGD #--------------------------------------------------------------------------- def chooseImageFile(self): result = None dialog = gtk.FileChooserDialog( title="Choose Image File", action=gtk.FILE_CHOOSER_ACTION_SAVE, buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_REJECT, gtk.STOCK_OK, gtk.RESPONSE_ACCEPT)) dialog.set_current_folder(self.scriptPath + "/../../test_data/saliency") filter = gtk.FileFilter() filter.add_pattern("*.png") filter.add_pattern("*.bmp") filter.add_pattern("*.jpg") filter.set_name("Image Files") dialog.add_filter(filter) dialog.set_filter(filter) result = dialog.run() if result == gtk.RESPONSE_ACCEPT: result = dialog.get_filename() dialog.destroy() return result #--------------------------------------------------------------------------- def onMenuItemOpenImageActivate(self, widget): filename = self.chooseImageFile() if filename != None: self.image = cv.LoadImageM(filename) cv.CvtColor(self.image, self.image, cv.CV_BGR2RGB) # Create a mask and blank segmentation that matches the size of the image self.maskArray = np.ones((self.image.height, self.image.width), np.uint8) * self.GC_PR_BGD self.segmentation = np.zeros( (self.image.height, self.image.width, 3), np.uint8) self.dwgImageDisplay.setImageFromOpenCVMatrix(self.image) self.dwgSegmentationDisplay.setImageFromNumpyArray( self.segmentation) #--------------------------------------------------------------------------- def onMenuItemOpenMaskActivate(self, widget): # Map for translating mask values PIXEL_MAP = { 0: self.GC_BGD, 64: self.GC_PR_BGD, 128: self.GC_PR_FGD, 255: self.GC_FGD } if self.image == None: return # No image to apply mask to yet filename = self.chooseImageFile() if filename != None: maskImage = cv.LoadImageM(filename) if maskImage.width != self.image.width \ or maskImage.height != self.image.height: print "Error: Mask doesn't match the size of the current image" return #maskImageGray = cv.CreateMat( self.image.height, self.image.width, cv.CV_8UC1 ) self.maskArray = np.ndarray((self.image.height, self.image.width), np.uint8) cv.CvtColor(maskImage, self.maskArray, cv.CV_BGR2GRAY) # Convert the pixel values over to the correct values for the pixel type translationArray = \ [ ( self.maskArray == sourcePixelValue, PIXEL_MAP[ sourcePixelValue ] ) \ for sourcePixelValue in PIXEL_MAP ] for translation in translationArray: self.maskArray[translation[0]] = translation[1] # Redraw the main image to show the mask self.redrawImageWithMask() #--------------------------------------------------------------------------- def onMenuItemQuitActivate(self, widget): self.onWinMainDestroy(widget) #--------------------------------------------------------------------------- def onBtnClearMaskClicked(self, widget): if self.image != None: self.maskArray = np.ones((self.image.height, self.image.width), np.uint8) * self.GC_PR_BGD self.dwgImageDisplay.setImageFromOpenCVMatrix(self.image) #--------------------------------------------------------------------------- def onBtnSegmentClicked(self, widget): if self.maskArray != None: workingMask = np.copy(self.maskArray) fgModel = cv.CreateMat(1, 5 * 13, cv.CV_64FC1) cv.Set(fgModel, 0) bgModel = cv.CreateMat(1, 5 * 13, cv.CV_64FC1) cv.Set(bgModel, 0) workingImage = np.copy(self.image) cv.GrabCut(workingImage, workingMask, (0, 0, 0, 0), fgModel, bgModel, 12, self.GC_INIT_WITH_MASK) self.segmentation = np.copy(self.image) self.segmentation[(workingMask != self.GC_PR_FGD) & (workingMask != self.GC_FGD)] = 0 self.dwgSegmentationDisplay.setImageFromNumpyArray( self.segmentation) #--------------------------------------------------------------------------- def onBtnCreateSaliencyMaskClicked(self, widget): ROI_X = 0 ROI_Y = 76 ROI_WIDTH = 230 ROI_HEIGHT = 100 #ROI_WIDTH = 10 if self.image != None: # Create a saliency map from the current image grayImage = np.ndarray((self.image.height, self.image.width), dtype=np.uint8) cv.CvtColor(self.image, grayImage, cv.CV_RGB2GRAY) saliencyMap, largeSaliencyMap = self.residualSaliencyFilter.calcSaliencyMap( grayImage) self.dwgSaliencyDisplay.setImageFromNumpyArray(largeSaliencyMap) # Threshold to get blobs blobPixels = largeSaliencyMap >= 160 largeSaliencyMap[blobPixels] = 255 largeSaliencyMap[blobPixels == False] = 0 # Label blobs largeSaliencyMap, numBlobs = PyBlobLib.labelBlobs(largeSaliencyMap) # Find blobs in the region of interest testMap = np.copy(largeSaliencyMap) testMap[:ROI_Y, :] = 0 # Mask out area above the ROI testMap[:, :ROI_X] = 0 # Mask out area to the left of the ROI testMap[ROI_Y + ROI_HEIGHT:] = 0 # Mask out area below the ROI testMap[:, ROI_X + ROI_WIDTH:] = 0 # Mask out area to the right of the ROI biggestBlobIdx = None biggestBlobSize = 0 for blobIdx in range(1, numBlobs + 1): if testMap[testMap == blobIdx].size > 0: blobSize = largeSaliencyMap[largeSaliencyMap == blobIdx].size if blobSize > biggestBlobSize: biggestBlobSize = blobSize biggestBlobIdx = blobIdx # Isolate the largest blob if biggestBlobIdx != None: biggestBlobPixels = (largeSaliencyMap == biggestBlobIdx) print biggestBlobPixels.size largeSaliencyMap[biggestBlobPixels] = self.GC_PR_FGD largeSaliencyMap[biggestBlobPixels == False] = self.GC_PR_BGD # Use the largest blob as the segmentation mask self.maskArray = largeSaliencyMap self.redrawImageWithMask() #--------------------------------------------------------------------------- def onComboBrushTypeChanged(self, widget): self.makeBrush() #--------------------------------------------------------------------------- def onAdjBrushSizeValueChanged(self, widget): self.makeBrush() #--------------------------------------------------------------------------- def onDwgImageButtonPressEvent(self, widget, data): if data.button == 1: self.onImageMaskChanged(widget, data, self.getCurPixelClass()) else: self.onImageMaskChanged(widget, data, self.GC_PR_BGD) #--------------------------------------------------------------------------- def onDwgImageMotionNotifyEvent(self, widget, data): self.onImageMaskChanged(widget, data, self.getCurPixelClass()) #--------------------------------------------------------------------------- def onImageMaskChanged(self, widget, data, pixelClass): imgRect = self.dwgImageDisplay.getImageRectangleInWidget(widget) if imgRect != None: x = data.x - imgRect.x y = data.y - imgRect.y self.alterMask(x, y, pixelClass) #--------------------------------------------------------------------------- def onDwgImageExposeEvent(self, widget, data): imgRect = self.dwgImageDisplay.drawPixBufToDrawingArea(data.area) if imgRect != None: imgRect = imgRect.intersect(data.area) # Add in the mask #if self.maskArray != None: #self.maskArrayRGB.fill( 0 ) #self.maskArrayRGB[ self.maskArray == self.GC_FGD ] = self.FOREGROUND_BRUSH_COLOUR #self.drawingArea.window.draw_pixbuf( #self.drawingArea.get_style().fg_gc[ gtk.STATE_NORMAL ], #self.pixBuf, srcX, srcY, #redrawRect.x, redrawRect.y, redrawRect.width, redrawRect.height ) # Draw an overlay to show the selected segmentation #if self.markerBuffer != None: #graphicsContext = widget.window.new_gc() #graphicsContext.set_rgb_fg_color( gtk.gdk.Color( 65535, 65535, 0 ) ) #blockY = imgRect.y #for y in range( self.markerBuffer.shape[ 0 ] ): #blockX = imgRect.x #for x in range( self.markerBuffer.shape[ 1 ] ): #if self.markerBuffer[ y, x ]: #points = [ (blockX+int((i*2)%self.opticalFlowBlockWidth), blockY+2*int((i*2)/self.opticalFlowBlockWidth)) \ #for i in range( int(self.opticalFlowBlockWidth*self.opticalFlowBlockHeight/4) ) ] #widget.window.draw_points( graphicsContext, points ) #blockX += self.opticalFlowBlockWidth #blockY += self.opticalFlowBlockHeight #--------------------------------------------------------------------------- def onDwgSegmentationExposeEvent(self, widget, data=None): self.dwgSegmentationDisplay.drawPixBufToDrawingArea(data.area) #--------------------------------------------------------------------------- def onDwgSaliencyExposeEvent(self, widget, data=None): self.dwgSaliencyDisplay.drawPixBufToDrawingArea(data.area) #--------------------------------------------------------------------------- def update(self): lastTime = time.clock() while 1: curTime = time.clock() yield True yield False