def detect_and_draw(img, cascade): # allocate temporary images gray = cv.CreateImage((img.width,img.height), 8, 1) small_img = cv.CreateImage((cv.Round(img.width / image_scale), cv.Round (img.height / image_scale)), 8, 1) # convert color input image to grayscale cv.CvtColor(img, gray, cv.CV_BGR2GRAY) # scale input image for faster processing cv.Resize(gray, small_img, cv.CV_INTER_LINEAR) cv.EqualizeHist(small_img, small_img) ####### 若解析度有改變,下面劃線座標亦隨之改變############## ''' cv.Line(img, (210,0),(210,480), (0,255,255),1) cv.Line(img, (420,0),(420,480), (0,255,255),1) cv.Line(img, (0,160),(640,160), (0,255,255),1) cv.Line(img, (0,320),(640,320), (0,255,255),1) ''' cv.Line(img, (width/2,0),(width/2,height), (0,10,255),3) cv.Line(img, ((width/2-20),(height/2-10)),((width/2-20),(height/2+10)), (0,10,255),2) cv.Line(img, ((width/2+20),(height/2-10)),((width/2+20),(height/2+10)), (0,10,255),2) cv.Line(img, (0,height/2),(width,height/2), (0,10,255),3) cv.Line(img, ((width/2-10),(height/2-20)),((width/2+10),(height/2-20)), (0,10,255),2) cv.Line(img, ((width/2-10),(height/2+20)),((width/2+10),(height/2+20)), (0,10,255),2) if(cascade): t = cv.GetTickCount() faces = cv.HaarDetectObjects(small_img, cascade, cv.CreateMemStorage(0), haar_scale, min_neighbors, haar_flags, min_size) t = cv.GetTickCount() - t print "detection time = %gms" % (t/(cv.GetTickFrequency()*1000.)) if faces: for ((x, y, w, h), n) in faces: # the input to cv.HaarDetectObjects was resized, so scale the # bounding box of each face and convert it to two CvPoints pt1 = (int(x * image_scale), int(y * image_scale)) pt2 = (int((x + w) * image_scale), int((y + h) * image_scale)) cv.Rectangle(img, pt1, pt2, cv.RGB(255, 0, 0), 3, 8, 0) ##################################################################################################3333 cx = (int(x * image_scale) + int((x + w) * image_scale)) / 2 cy = (int(y * image_scale) + int((y + h) * image_scale)) / 2 print cx, cy #################################################### if cx < img.width*3/ 7 : arduino.write('4') print '4' if cx < img.width*2/ 7 : arduino.write('44') print '4' if cx < img.width/ 7 : arduino.write('4444') print '44' if cx > img.width*4 / 7 : arduino.write('6') print '6' if cx > img.width*5/ 7 : arduino.write('66') print '6' if cx > img.width*6/ 7 : arduino.write('6666') print '66' if cy < img.height*3/ 7: arduino.write('2') print '2' if cy < img.height*2/ 7: arduino.write('22') print '2' if cy < img.height/ 7: arduino.write('2222') print '222' if cy > img.height*4 / 7: arduino.write('8') print '8' if cy > img.height*5 / 7: arduino.write('88') print '8' if cy > img.height*6 / 7: arduino.write('8888') print '888' break ###################################################### cv.ShowImage("result", img)
def detect_and_draw(img, cascade): # allocate temporary images gray = cv.CreateImage((img.width, img.height), 8, 1) small_img = cv.CreateImage((cv.Round( img.width / image_scale), cv.Round(img.height / image_scale)), 8, 1) # convert color input image to grayscale cv.CvtColor(img, gray, cv.CV_BGR2GRAY) # scale input image for faster processing cv.Resize(gray, small_img, cv.CV_INTER_LINEAR) cv.EqualizeHist(small_img, small_img) if (cascade): t = cv.GetTickCount() faces = cv.HaarDetectObjects(small_img, cascade, cv.CreateMemStorage(0), haar_scale, min_neighbors, haar_flags, min_size) t = cv.GetTickCount() - t print "time taken for detection = %gms" % ( t / (cv.GetTickFrequency() * 1000.)) if faces: for ((x, y, w, h), n) in faces: # the input to cv.HaarDetectObjects was resized, so scale the # bounding box of each face and convert it to two CvPoints pt1 = (int(x * image_scale), int(y * image_scale)) pt2 = (int((x + w) * image_scale), int((y + h) * image_scale)) cv.Rectangle(img, pt1, pt2, cv.RGB(255, 0, 0), 3, 8, 0) cv.ShowImage("video", img) if __name__ == '__main__': parser = OptionParser( usage="usage: %prog [options] [filename|camera_index]") parser.add_option( "-c", "-cascade", action="store", dest="cascade", type="str", help="Haar cascade file, default %default", default="../data/haarcascades/haarcascade_frontalface_alt.xml")( options, args) = parser.parse_args() cascade = cv.Load(options.cascade) if len(args) != 1: parser.print_help() sys.exit(1) input_name = args[0] if input_name.isdigit(): capture = cv.CreateCameraCapture(int(input_name)) else: capture = None cv.NamedWindow("video", 1) #size of the video width = 160 height = 120 if width is None: width = int(cv.GetCaptureProperty(capture, cv.CV_CAP_PROP_FRAME_WIDTH)) else: cv.SetCaptureProperty(capture, cv.CV_CAP_PROP_FRAME_WIDTH, width) if height is None: height = int( cv.GetCaptureProperty(capture, cv.CV_CAP_PROP_FRAME_HEIGHT)) else: cv.SetCaptureProperty(capture, cv.CV_CAP_PROP_FRAME_HEIGHT, height) if capture: frame_copy = None while True: frame = cv.QueryFrame(capture) if not frame: cv.WaitKey(0) break if not frame_copy: frame_copy = cv.CreateImage((frame.width, frame.height), cv.IPL_DEPTH_8U, frame.nChannels) if frame.origin == cv.IPL_ORIGIN_TL: cv.Copy(frame, frame_copy) else: cv.Flip(frame, frame_copy, 0) detect_and_draw(frame_copy, cascade) if cv.WaitKey(10) >= 0: break else: image = cv.LoadImage(input_name, 1) detect_and_draw(image, cascade) cv.WaitKey(0) cv.DestroyWindow("video")
def runtracking(): global rgb_image, hsv_image, hsvmouse, pausecam, hsvgreen, hsvyellow, hsvblue, hsvred, homographycomputed global hsvyellowtab, hsvrange global homography, pose_flag global hsvyellowmin, hsvyellowmax, hsvgreenmin, hsvgreenmax, hsvbluemin, hsvbluemax, hsvredmin, hsvredmax global cycloppoint, righteyepoint, lefteyepoint global capture, pausecam, size_image global yellowmask_image, greenmask_image, redmask_image, bluemask_image global nb_pts, modelepoints, blob_centers global rx, ry, rz global background size_thumb = [size_image[0] / 2, size_image[1] / 2] thumbgreen = cv.CreateImage(size_thumb, cv.IPL_DEPTH_8U, 1) thumbred = cv.CreateImage(size_thumb, cv.IPL_DEPTH_8U, 1) thumbblue = cv.CreateImage(size_thumb, cv.IPL_DEPTH_8U, 1) thumbyellow = cv.CreateImage(size_thumb, cv.IPL_DEPTH_8U, 1) cv.NamedWindow("GreenBlobDetection", cv.CV_WINDOW_AUTOSIZE) cv.ShowImage("GreenBlobDetection", thumbgreen) cv.NamedWindow("YellowBlobDetection", cv.CV_WINDOW_AUTOSIZE) cv.ShowImage("YellowBlobDetection", thumbyellow) cv.NamedWindow("BlueBlobDetection", cv.CV_WINDOW_AUTOSIZE) cv.ShowImage("BlueBlobDetection", thumbblue) cv.NamedWindow("RedBlobDetection", cv.CV_WINDOW_AUTOSIZE) cv.ShowImage("RedBlobDetection", thumbred) rgb_image = cv.QueryFrame(capture) cv.NamedWindow("Source", cv.CV_WINDOW_AUTOSIZE) cv.ShowImage("Source", rgb_image) cv.SetMouseCallback("Source", getObjectHSV) print "Hit ESC key to quit..." #infinite loop for processing while True: time.sleep(0.02) blobcentergreen = findBlob(rgb_image, hsv_image, greenmask_image, greenblob_image, hsvrange, hsvgreenmin, hsvgreenmax) blobcenteryellow = findBlob(rgb_image, hsv_image, yellowmask_image, yellowblob_image, hsvrange, hsvyellowmin, hsvyellowmax) blobcenterblue = findBlob(rgb_image, hsv_image, bluemask_image, blueblob_image, hsvrange, hsvbluemin, hsvbluemax) blobcenterred = findBlob(rgb_image, hsv_image, redmask_image, redblob_image, hsvrange, hsvredmin, hsvredmax) if not pausecam: if (blobcentergreen != None): cv.Resize(greenblob_image, thumbgreen) #cv.ShowImage("GreenBlobDetection", greenblob_image) cv.ShowImage("GreenBlobDetection", thumbgreen) #print "green center: %d %d %d" %blobcentergreen if (blobcenteryellow != None): cv.Resize(yellowblob_image, thumbyellow) cv.ShowImage("YellowBlobDetection", thumbyellow) #print "yellow center: %d %d %d" %blobcenteryellow if (blobcenterblue != None): cv.Resize(blueblob_image, thumbblue) cv.ShowImage("BlueBlobDetection", thumbblue) #print "blue center: %d %d %d" %blobcenterblue if (blobcenterred != None): cv.Resize(redblob_image, thumbred) cv.ShowImage("RedBlobDetection", thumbred) #print "red center: %d %d %d" %blobcenterred cv.ShowImage("Source", rgb_image) c = cv.WaitKey(7) % 0x100 if c == 27: break if c == ord('p') or c == ord('P'): pausecam = not pausecam if c == ord('y'): hsvyellowtab.append(hsvmouse) hsvyellowmin = mintab(hsvyellowtab) hsvyellowmax = maxtab(hsvyellowtab) print "minyellow" print hsvyellowmin print "maxyellow" print hsvyellowmax if c == ord('Y'): if (len(hsvyellowtab) > 0): hsvyellowtab.pop(len(hsvyellowtab) - 1) if (len(hsvyellowtab) != 0): hsvyellowmin = mintab(hsvyellowtab) hsvyellowmax = maxtab(hsvyellowtab) else: hsvyellowmin = [255, 255, 255] hsvyellowmax = [0, 0, 0] if c == ord('g'): hsvgreentab.append(hsvmouse) hsvgreenmin = mintab(hsvgreentab) hsvgreenmax = maxtab(hsvgreentab) print "mingreen" print hsvgreenmin print "maxgreen" print hsvgreenmax if c == ord('G'): if (len(hsvgreentab) > 0): hsvgreentab.pop(len(hsvgreentab) - 1) if (len(hsvgreentab) != 0): hsvgreenmin = mintab(hsvgreentab) hsvgreenmax = maxtab(hsvgreentab) else: hsvgreenmin = [255, 255, 255] hsvgreenmax = [0, 0, 0] if c == ord('r'): hsvredtab.append(hsvmouse) hsvredmin = mintab(hsvredtab) hsvredmax = maxtab(hsvredtab) print "minred" print hsvredmin print "maxred" print hsvredmax if c == ord('R'): if (len(hsvredtab) > 0): hsvredtab.pop(len(hsvredtab) - 1) if (len(hsvredtab) != 0): hsvredmin = mintab(hsvredtab) hsvredmax = maxtab(hsvredtab) else: hsvredmin = [255, 255, 255] hsvredmax = [0, 0, 0] if c == ord('b'): hsvbluetab.append(hsvmouse) hsvbluemin = mintab(hsvbluetab) hsvbluemax = maxtab(hsvbluetab) print "minblue" print hsvbluemin print "maxblue" print hsvbluemax if c == ord('B'): if (len(hsvbluetab) > 0): hsvbluetab.pop(len(hsvbluetab) - 1) if (len(hsvbluetab) != 0): hsvbluemin = mintab(hsvbluetab) hsvbluemax = maxtab(hsvbluetab) else: hsvbluemin = [255, 255, 255] hsvbluemax = [0, 0, 0] #if c == ord('R') : # step=0 if not pausecam: rgb_image = cv.QueryFrame(capture) #after blob center detection we need to launch pose estimation if ((blobcentergreen != None) and (blobcenteryellow != None) and (blobcenterblue != None) and (blobcenterred != None)): #order is Yellow,blue,red, green pose_flag = 1 blob_centers = [] blob_centers.append((blobcenteryellow[0] - size_image[0] / 2, blobcenteryellow[1] - size_image[1] / 2)) blob_centers.append((blobcenterblue[0] - size_image[0] / 2, blobcenterblue[1] - size_image[1] / 2)) blob_centers.append((blobcenterred[0] - size_image[0] / 2, blobcenterred[1] - size_image[1] / 2)) blob_centers.append((blobcentergreen[0] - size_image[0] / 2, blobcentergreen[1] - size_image[1] / 2)) #get the tracking matrix (orientation and position) result with POSIT method in the tracker (camera) referential matrix = find_pose(nb_pts, blob_centers, modelepoints) #We want to get the tracking result in the world referencial, i.e. with at 60 cm of the midle of the screen, with Y up, and Z behind you. #The tracker referential in the camera referential, with the X axis pointing to the #left, the Y axis pointing down, and the Z axis pointing behind you, and with the camera as origin. #We thus pre multiply to have the traking results in the world referential, and not in the tracker (camera) referential. (pre-product) pre_tranform_matrix = WordToTrackerTransform(matrix) #We do not want to track the center of the body referential (the right up point of the glasses), but the midlle of the two eyes in monoscopic (cyclops eye), #or left and right eyes in stereoscopic. #We thus post multiply the world traking results in the world referential, using the referential of the eye in the body referential (glasses) pre_tranform_matrix_post_cylcope_eye = BodyToCyclopsEyeTransform( pre_tranform_matrix) poscyclope = [ pre_tranform_matrix_post_cylcope_eye[3][0], pre_tranform_matrix_post_cylcope_eye[3][1], pre_tranform_matrix_post_cylcope_eye[3][2] ] print "poscylope", poscyclope pre_tranform_matrix_post_left_eye = BodyToLeftEyeTransform( pre_tranform_matrix) posleft = [ pre_tranform_matrix_post_left_eye[3][0], pre_tranform_matrix_post_left_eye[3][1], pre_tranform_matrix_post_left_eye[3][2] ] #print "posleft",posleft pre_tranform_matrix_post_right_eye = BodyToRightEyeTransform( pre_tranform_matrix) posright = [ pre_tranform_matrix_post_right_eye[3][0], pre_tranform_matrix_post_right_eye[3][1], pre_tranform_matrix_post_right_eye[3][2] ] #print "posright",posright sendPosition("/tracker/head/pos_xyz/cyclope_eye", poscyclope) sendPosition("/tracker/head/pos_xyz/left_eye", posleft) sendPosition("/tracker/head/pos_xyz/right_eye", posright) else: print "Traking failed"
def imgResizer1(crop_img_string, width, height): image1 = cv.LoadImage(crop_img_string, cv.CV_LOAD_IMAGE_GRAYSCALE) dst1 = cv.CreateImage((width, height), 8, 1) cv.Resize(image1, dst1, interpolation=cv.CV_INTER_LINEAR) cv.SaveImage('Z_Resized_image.png', dst1) return dst1
def DetectEyes(image, faceCascade, eyeCascade): min_size = (20, 20) image_scale = 2 haar_scale = 1.2 min_neighbors = 3 haar_flags = cv.CV_HAAR_DO_CANNY_PRUNING # Allocate the temporary images gray = cv.CreateImage((image.width, image.height), 8, 1) smallImage = cv.CreateImage((cv.Round( image.width / image_scale), cv.Round(image.height / image_scale)), 8, 1) # Convert color input image to grayscale cv.CvtColor(image, gray, cv.CV_BGR2GRAY) # Scale input image for faster processing cv.Resize(gray, smallImage, cv.CV_INTER_LINEAR) # Equalize the histogram cv.EqualizeHist(smallImage, smallImage) # Detect the faces faces = cv.HaarDetectObjects(smallImage, faceCascade, cv.CreateMemStorage(0), haar_scale, min_neighbors, haar_flags, min_size) # If faces are found if faces: for ((x, y, w, h), n) in faces: # the input to cv.HaarDetectObjects was resized, so scale the # bounding box of each face and convert it to two CvPoints pt1 = (int(x * image_scale), int(y * image_scale)) pt2 = (int((x + w) * image_scale), int((y + h) * image_scale)) cv.Rectangle(image, pt1, pt2, cv.RGB(255, 0, 0), 3, 8, 0) face_region = cv.GetSubRect(image, (x, int(y + (h / 4)), w, int(h / 2))) cv.SetImageROI( image, (pt1[0], pt1[1], pt2[0] - pt1[0], int((pt2[1] - pt1[1]) * 0.7))) # If there are no faces found there's no reason to continue else: sys.exit("No faces were found") # NOTE: This returns the eye regions we're interested in eyes = cv.HaarDetectObjects(image, eyeCascade, cv.CreateMemStorage(0), 1.3, min_neighbors, haar_flags, (15, 15)) ## Draw rectangles around the eyes found ## if eyes: # For each eye found for eye in eyes: # Draw a rectangle around the eye cv.Rectangle(image, (eye[0][0], eye[0][1]), (eye[0][0] + eye[0][2], eye[0][1] + eye[0][3]), cv.RGB(255, 0, 0), 1, 8, 0) # The following is commented out because as we're debugging we don't # need to see the original image, just the regions of interest we have. #cv.ResetImageROI(image) return image
# equalize cv.EqualizeHist(grayscale, grayscale) # detections features = cv.GoodFeaturesToTrack(grayscale, temp_eigen, temp_image, 10, 0.04, 1.0, useHarris=True) if features: for (x, y) in features: cv.Rectangle(image, (x, y), (x + 4, y + 4), cv.RGB(0, 255, 0), 3, 8, 0) while True: frame = get_video() # scale down the 640x480 kinect to half size for quicker processing shrunk = cv.CreateMat(SCALE_Y, SCALE_X, cv.CV_8UC3) cv.Resize(frame, shrunk) # cv.Flip(frame, None, 1) features(shrunk) cv.ShowImage("Bob", shrunk) k = cv.WaitKey(10)
break if not frame_copy: frame_copy = cv.CreateImage((frame.width, frame.height), cv.IPL_DEPTH_8U, frame.nChannels) # Reescalamos la imagen tanto en factores de alto como ancho a la hora de pasarla a grises y hacer el escalado posterior. gray = cv.CreateImage((frame.width, frame.height), 8, 1) small_img = cv.CreateImage((cv.Round( frame.width / image_scale), cv.Round(frame.height / image_scale)), 8, 1) # Convertimos el color a escala de grises con la función BGR2GRAY para facilitar la detección ya que el color no nos es útil. cv.CvtColor(frame, gray, cv.CV_BGR2GRAY) # Reescalamos la imagen para aumentar el rendimiento. cv.Resize(gray, small_img, cv.CV_INTER_LINEAR) # Ecualizamos el histograma de los colores de la imagen en escala de grises para trabajar con valores de 0 a 255. cv.EqualizeHist(small_img, small_img) midFace = None if (cascade): t = cv.GetTickCount() # Utilizamos la función HaarDetectObjects con los factores que hemos definido, es decir, la cara de frente, la tolerancia, la imagen # pequeña reescalada en escala de grises, el tamaño de la ventana creciente y el mínimo de rectángulos adyacentes. faces = cv.HaarDetectObjects(small_img, cascade, cv.CreateMemStorage(0), factor_haar, minAdyacentes, haar_flag, tamVentana) t = cv.GetTickCount() - t if faces:
def __init__(self): rospy.init_node('avi2ros', anonymous=True) self.input = rospy.get_param("~input", "") self.output = rospy.get_param("~output", "video_output") self.fps = rospy.get_param("~fps", 25) self.loop = rospy.get_param("~loop", False) self.width = rospy.get_param("~width", "") self.height = rospy.get_param("~height", "") self.start_paused = rospy.get_param("~start_paused", False) self.show_viz = not rospy.get_param("~headless", False) self.show_text = True image_pub = rospy.Publisher(self.output, Image, queue_size=10) rospy.on_shutdown(self.cleanup) video = cv.CaptureFromFile(self.input) fps = int(cv.GetCaptureProperty(video, cv.CV_CAP_PROP_FPS)) """ Bring the fps up to the specified rate """ try: fps = int(fps * self.fps / fps) except: fps = self.fps if self.show_viz: cv.NamedWindow("AVI Video", True) # autosize the display cv.MoveWindow("AVI Video", 650, 100) bridge = CvBridge() self.paused = self.start_paused self.keystroke = None self.restart = False # Get the first frame to display if we are starting in the paused state. frame = cv.QueryFrame(video) image_size = cv.GetSize(frame) if self.width and self.height and (self.width != image_size[0] or self.height != image_size[1]): rospy.loginfo("Resizing! " + str(self.width) + " x " + str(self.height)) resized_frame = cv.CreateImage((self.width, self.height), frame.depth, frame.channels) cv.Resize(frame, resized_frame) frame = cv.CloneImage(resized_frame) text_frame = cv.CloneImage(frame) cv.Zero(text_frame) while not rospy.is_shutdown(): """ Handle keyboard events """ self.keystroke = cv.WaitKey(1000 / fps) """ Process any keyboard commands """ if 32 <= self.keystroke and self.keystroke < 128: cc = chr(self.keystroke).lower() if cc == 'q': """ user has press the q key, so exit """ rospy.signal_shutdown("User hit q key to quit.") elif cc == ' ': """ Pause or continue the video """ self.paused = not self.paused elif cc == 'r': """ Restart the video from the beginning """ self.restart = True elif cc == 't': """ Toggle display of text help message """ self.show_text = not self.show_text if self.restart: #video = cv.CaptureFromFile(self.input) print "restarting video from beginning" cv.SetCaptureProperty(video, cv.CV_CAP_PROP_POS_AVI_RATIO, 0) self.restart = None if not self.paused: frame = cv.QueryFrame(video) if frame and self.width and self.height: if self.width != image_size[0] or self.height != image_size[1]: cv.Resize(frame, resized_frame) frame = cv.CloneImage(resized_frame) if frame == None: if self.loop: self.restart = True else: if self.show_text: frame_size = cv.GetSize(frame) text_font = cv.InitFont(cv.CV_FONT_HERSHEY_SIMPLEX, 0.2, 1, 0, 1, 8) cv.PutText(text_frame, "Keyboard commands:", (20, int(frame_size[1] * 0.6)), text_font, cv.RGB(255, 255, 0)) cv.PutText(text_frame, " ", (20, int(frame_size[1] * 0.65)), text_font, cv.RGB(255, 255, 0)) cv.PutText(text_frame, "space - toggle pause/play", (20, int(frame_size[1] * 0.72)), text_font, cv.RGB(255, 255, 0)) cv.PutText(text_frame, " r - restart video from beginning", (20, int(frame_size[1] * 0.79)), text_font, cv.RGB(255, 255, 0)) cv.PutText(text_frame, " t - hide/show this text", (20, int(frame_size[1] * 0.86)), text_font, cv.RGB(255, 255, 0)) cv.PutText(text_frame, " q - quit the program", (20, int(frame_size[1] * 0.93)), text_font, cv.RGB(255, 255, 0)) cv.Add(frame, text_frame, text_frame) if self.show_viz: cv.ShowImage("AVI Video", text_frame) cv.Zero(text_frame) try: test = np.asarray(frame[:,:]) publishing_image = bridge.cv2_to_imgmsg(test, "bgr8") image_pub.publish(publishing_image) except CvBridgeError, e: print e
def dilateImage(im, nbiter=0): for i in range(nbiter): cv.Dilate(im, im) def erodeImage(im, nbiter=0): for i in range(nbiter): cv.Erode(im, im) def thresholdImage(im, value, filter=cv.CV_THRESH_BINARY_INV): cv.Threshold(im, im, value, 255, filter) def resizeImage(im, (width, height)): #It appears to me that resize an image can be significant for the ocr engine to detect characters res = cv.CreateImage((width,height), im.depth, im.channels) cv.Resize(im, res) return res def getContours(im, approx_value=1): #Return contours approximated storage = cv.CreateMemStorage(0) contours = cv.FindContours(cv.CloneImage(im), storage, cv.CV_RETR_CCOMP, cv.CV_CHAIN_APPROX_SIMPLE) contourLow=cv.ApproxPoly(contours, storage, cv.CV_POLY_APPROX_DP,approx_value,approx_value) return contourLow def getIndividualContoursRectangles(contours): #Return the bounding rect for every contours contourscopy = contours rectangleList = [] while contourscopy: x,y,w,h = cv.BoundingRect(contourscopy) rectangleList.append((x,y,w,h)) contourscopy = contourscopy.h_next()
#coding=utf-8 import cv2.cv as cv image = cv.LoadImage('meinv.jpg', cv.CV_LOAD_IMAGE_COLOR) font = cv.InitFont(cv.CV_FONT_HERSHEY_SIMPLEX, 1, 1, 0, 3, 8) y = image.height / 4 x = image.width / 2 cv.PutText(image, "Hello Meinv!", (x, y), font, cv.RGB(0, 0, 0)) thumb = cv.CreateImage((image.width / 2, image.height / 2), cv.CV_8UC2, 3) cv.Resize(image, thumb) #cvt = cv.CreateImage(cv.GetSize(thumb), cv.CV_8UC2, 3) #cv.CvtColor(thumb, cvt, cv.CV_RGB2BGR) #cv.NamedWindow('Image', cv.CV_WINDOW_AUTOSIZE) b = cv.CreateImage(cv.GetSize(thumb), thumb.depth, 1) g = cv.CloneImage(b) r = cv.CloneImage(b) cv.Split(thumb, b, g, r, None) merged = cv.CreateImage(cv.GetSize(thumb), 8, 3) cv.Merge(g, b, r, None, merged) cv.ShowImage('Image', thumb) cv.ShowImage('Blue', b) cv.ShowImage('Green', g) cv.ShowImage('Red', r)
def detectFaces(): global frame_copy, min_size, image_scale, haar_scale, min_neighbors, haar_flags, cap, cam_pan, cam_tilt t0 = cv.GetTickCount() frame = cv.QueryFrame(cap) if not frame: cv.WaitKey(0) return False if not frame_copy: frame_copy = cv.CreateImage((frame.width,frame.height), cv.IPL_DEPTH_8U, frame.nChannels) if frame.origin == cv.IPL_ORIGIN_TL: cv.Flip(frame, frame, -1) # Our operations on the frame come here gray = cv.CreateImage((frame.width,frame.height), 8, 1) small_img = cv.CreateImage((cv.Round(frame.width / image_scale), cv.Round (frame.height / image_scale)), 8, 1) small_img2 = cv.CreateImage((cv.Round(frame.width / image_scale), cv.Round (frame.height / image_scale)), 8, 1) # convert color input image to grayscale cv.CvtColor(frame, gray, cv.CV_BGR2GRAY) # scale input image for faster processing cv.Resize(gray, small_img, cv.CV_INTER_LINEAR) cv.EqualizeHist(small_img, small_img) #flip the image for more convenient camera mounting cv.Flip(small_img,small_img2,-1) midFace = None t1 = cv.GetTickCount() if(cascade): t = cv.GetTickCount() # HaarDetectObjects takes 0.02s faces = cv.HaarDetectObjects(small_img2, cascade, cv.CreateMemStorage(0), haar_scale, min_neighbors, haar_flags, min_size) t = cv.GetTickCount() - t if faces: #lights(50 if len(faces) == 0 else 0, 50 if len(faces) > 0 else 0,0,50) for ((x, y, w, h), n) in faces: # the input to cv.HaarDetectObjects was resized, so scale the # bounding box of each face and convert it to two CvPoints pt1 = (int(x * image_scale), int(y * image_scale)) pt2 = (int((x + w) * image_scale), int((y + h) * image_scale)) # cv.Rectangle(frame, pt1, pt2, cv.RGB(100, 220, 255), 1, 8, 0) # get the xy corner co-ords, calc the midFace location x1 = pt1[0] x2 = pt2[0] y1 = pt1[1] y2 = pt2[1] midFaceX = x1+((x2-x1)/2) midFaceY = y1+((y2-y1)/2) midFace = (midFaceX, midFaceY) offsetX = midFaceX / float(frame.width/2) offsetY = midFaceY / float(frame.height/2) offsetX -= 1 offsetY -= 1 cam_pan -= (offsetX * 5) cam_tilt += (offsetY * 5) cam_pan = max(0,min(180,cam_pan)) cam_tilt = max(0,min(180,cam_tilt)) print(offsetX, offsetY, midFace, cam_pan, cam_tilt, frame.width, frame.height) sys.stdout.flush() # pan(int(cam_pan-90)) # tilt(int(cam_tilt-90)) #break # print "e"+str((t1-t0)/1000000)+"-"+str( (cv.GetTickCount()-t1)/1000000) # cv.ShowImage('Tracker',frame) if cv.WaitKey(1) & 0xFF == ord('q'): return False return True
def convertToPngs(movieName, frameOutName, wdir='', \ startFrame=0, endFrame=499, maxDim = 128): """ Converts a saved movie into a collection of png frames movieName: name of movie file frameOutName: prefix of each frame to be written out should not have image type at the end wdir: working directory (i.e. where the movie is and where the frames will be written). In general this should be its own directory for each movie, since there are many frames in a given movie. startFrame: first frame # to be written out endFrame: last frame # to be written out maxDim: the maximum number of elements in any one dimension of the output image. This should be an integer, but if maxDim = False, then it will save the frames in their original size. """ # change to working directory os.chdir(wdir) # strip frame prefix of unnecessary suffixes frameOutName = frameOutName.replace(".png", '') frameOutName = frameOutName.replace(".jpeg", '') # initiate movie stream capture = cv.CaptureFromFile("C:\Users\Tom\Desktop\doom.mp4") # extract frame size nCols = int(cv.GetCaptureProperty(capture, cv.CV_CAP_PROP_FRAME_WIDTH)) nRows = int(cv.GetCaptureProperty(capture, cv.CV_CAP_PROP_FRAME_HEIGHT)) size = (nRows, nCols) maxFrameDim = max(size) # compute rescaling required based upon input #scale =float(maxFrameDim)/float(maxDim) newSize = size #(int(floor(size[0]/scale + .5)), int(floor(size[1]/scale + .5)) ) # extract number of frames in video NframesTot = int(cv.GetCaptureProperty(capture, cv.CV_CAP_PROP_FRAME_COUNT)) k = 0 # loop over frames, writing out those in desired range. for k in xrange(NframesTot): # i assume that there is no way to start at a particular frame # and that we have to loop over all of them sequentially frame = cv.QueryFrame(capture) if k >= startFrame: # TODO: we could put this in a try, except condition, # but I'm happy to just let it fail naturally if there is a problem # since it is writing out the frames as it progresses, we won't # lose anything. if maxDim: smallFrame = cv.CreateImage(newSize, frame.depth, frame.nChannels) cv.Resize(frame, smallFrame) frame = smallFrame cv.SaveImage(frameOutName + "{0:04d}.png".format(k), frame) if k >= endFrame: break k += 1 print '\n\nConverted {0} frames'.format(k) return 0
def detect_and_draw(img, cascade): # allocate temporary images gray = cv.CreateImage((img.width,img.height), 8, 1) #create a image with smaller size small_img = cv.CreateImage((cv.Round(img.width / image_scale), cv.Round (img.height / image_scale)), 8, 1) # convert color input image to grayscale cv.CvtColor(img, gray, cv.CV_BGR2GRAY) # scale input image for faster processing cv.Resize(gray, small_img, cv.CV_INTER_LINEAR) #if algorithm is present if(cascade): #to get the current time t = cv.GetTickCount() #create memory for calculation(createMemStorage) faces = cv.HaarDetectObjects(small_img, cascade, cv.CreateMemStorage(0), haar_scale, min_neighbors, haar_flags, min_size) #previous time minus current time t = cv.GetTickCount() - t print "detection time = %gms" % (t/(cv.GetTickFrequency()*1000.)) i=0 #if more then one faces detected if faces: #getting all the coordinates of face for ((x, y, w, h), n) in faces: i=1; # the input to cv.HaarDetectObjects was resized, so scale the # bounding box of each face and convert it to two CvPoints pt1 = (int(x * image_scale), int(y * image_scale)) pt2 = (int((x + w) * image_scale), int((y + h) * image_scale)) #draw rectangle (imagename,topleft,bottomright,color,size) cv.Rectangle(img,pt1,pt2,(0,230,0),1) #crop the image var1 = img[y: y + h, x: x + w] cv.SaveImage("face/database/image.png",var1) name="face/database/image.png" img=Image.open(name).convert('LA') img.save(name) break; cv.DestroyAllWindows() if i == 1: os.system("python resize.py") if i == 0: os.remove("face/database/image.png")
def OnPaint(self, evt): if not self.timer.IsRunning(): dc = wx.BufferedDC(wx.ClientDC(self), wx.NullBitmap, wx.BUFFER_VIRTUAL_AREA) dc.SetBackground(wx.Brush(wx.Colour(0, 0, 0))) return # Capture de l'image frame = cv.QueryFrame(CAMERA) cv.CvtColor(frame, frame, cv.CV_BGR2RGB) Img = wx.EmptyImage(frame.width, frame.height) Img.SetData(frame.tostring()) self.bmp = wx.BitmapFromImage(Img) width, height = frame.width, frame.height # Détection des visages min_size = (20, 20) image_scale = 2 haar_scale = 1.2 min_neighbors = 2 haar_flags = 0 gray = cv.CreateImage((frame.width, frame.height), 8, 1) small_img = cv.CreateImage((cv.Round( frame.width / image_scale), cv.Round(frame.height / image_scale)), 8, 1) cv.CvtColor(frame, gray, cv.CV_BGR2GRAY) cv.Resize(gray, small_img, cv.CV_INTER_LINEAR) cv.EqualizeHist(small_img, small_img) listeVisages = cv.HaarDetectObjects(small_img, CASCADE, cv.CreateMemStorage(0), haar_scale, min_neighbors, haar_flags, min_size) # Affichage de l'image x, y = (0, 0) try: dc = wx.BufferedDC(wx.ClientDC(self), wx.NullBitmap, wx.BUFFER_VIRTUAL_AREA) try: dc.SetBackground(wx.Brush(wx.Colour(0, 0, 0))) except: pass dc.Clear() dc.DrawBitmap(self.bmp, x, y) # Dessin des rectangles des visages if listeVisages: for ((x, y, w, h), n) in listeVisages: dc.SetBrush(wx.TRANSPARENT_BRUSH) dc.SetPen(wx.Pen(wx.Colour(255, 0, 0), 2)) dc.DrawRectangle(x * image_scale, y * image_scale, w * image_scale, h * image_scale) self.listeVisages = listeVisages del dc del Img except TypeError: pass except wx.PyDeadObjectError: pass
def detect_and_draw(img, cascade, detected): global pan # allocate temporary images gray = cv.CreateImage((img.width, img.height), 8, 1) image_scale = img.width / smallwidth small_img = cv.CreateImage((cv.Round( img.width / image_scale), cv.Round(img.height / image_scale)), 8, 1) # gray = cv.CreateImage((img.width,img.height), 8, 1) image_scale = img.width / smallwidth # small_img = cv.CreateImage((cv.Round(img.width / image_scale), cv.Round (img.height / image_scale)), 8, 1) # convert color input image to grayscale cv.CvtColor(img, gray, cv.CV_BGR2GRAY) # scale input image for faster processing cv.Resize(gray, small_img, cv.CV_INTER_LINEAR) cv.EqualizeHist(small_img, small_img) if (cascade): faces = cv.HaarDetectObjects(small_img, cascade, cv.CreateMemStorage(0), haar_scale, min_neighbors, haar_flags, min_size) if faces: if detected == 0: # os.system('festival --tts hi &') detected = 1 for ((x, y, w, h), n) in faces: # the input to cv.HaarDetectObjects was resized, so scale the # bounding box of each face and convert it to two CvPoints pt1 = (int(x * image_scale), int(y * image_scale)) pt2 = (int((x + w) * image_scale), int((y + h) * image_scale)) cv.Rectangle(img, pt1, pt2, cv.RGB(255, 0, 0), 3, 8, 0) print "Face at: ", pt1[0], ",", pt2[0], "\t", pt1[1], ",", pt2[ 1] # find amount needed to pan/tilt span = pt1[0] mid = smallwidth / 2 if span < mid: print "left", mid - span pandir = -1 else: print "right", span - mid pandir = 1 pan = pan + pandir if pan > 180: pan = 180 if pan < 0: pan = 0 os.system('echo "0="' + str(pan) + ' > /dev/servoblaster') else: if detected == 1: #print "Last seen at: ", pt1[0], ",", pt2[0], "\t", pt1[1], ",", pt2[1] #os.system('festival --tts bye &') status = "just disappeared" detected = 0 # fps = int ( (t/(cv.GetTickFrequency()) / 1000)) # font = cv.InitFont(cv.CV_FONT_HERSHEY_SIMPLEX,1,1,0,3,8) # cv.PutText(img,"% fps" % fps,(img.width/2,img.height-10), font,255) cv.ShowImage("result", img) return detected
def run(self): # Initialize #log_file_name = "tracker_output.log" #log_file = file( log_file_name, 'a' ) frame = cv.QueryFrame(self.capture) frame_size = cv.GetSize(frame) # Capture the first frame from webcam for image properties display_image = cv.QueryFrame(self.capture) # Greyscale image, thresholded to create the motion mask: grey_image = cv.CreateImage(cv.GetSize(frame), cv.IPL_DEPTH_8U, 1) # The RunningAvg() function requires a 32-bit or 64-bit image... running_average_image = cv.CreateImage(cv.GetSize(frame), cv.IPL_DEPTH_32F, 3) # ...but the AbsDiff() function requires matching image depths: running_average_in_display_color_depth = cv.CloneImage(display_image) # RAM used by FindContours(): mem_storage = cv.CreateMemStorage(0) # The difference between the running average and the current frame: difference = cv.CloneImage(display_image) target_count = 1 last_target_count = 1 last_target_change_t = 0.0 k_or_guess = 1 codebook = [] frame_count = 0 last_frame_entity_list = [] t0 = time.time() # For toggling display: image_list = ["camera", "difference", "threshold", "display", "faces"] image_index = 0 # Index into image_list # Prep for text drawing: text_font = cv.InitFont(cv.CV_FONT_HERSHEY_COMPLEX, .5, .5, 0.0, 1, cv.CV_AA) text_coord = (5, 15) text_color = cv.CV_RGB(255, 255, 255) ############################### ### Face detection stuff #haar_cascade = cv.Load( 'haarcascades/haarcascade_frontalface_default.xml' ) #haar_cascade = cv.Load( 'haarcascades/haarcascade_frontalface_alt.xml' ) #haar_cascade = cv.Load( 'haarcascades/haarcascade_frontalface_alt2.xml' ) #haar_cascade = cv.Load( 'haarcascades/haarcascade_mcs_mouth.xml' ) haar_cascade = cv.Load('haarcascades/haarcascade_eye.xml') #haar_cascade = cv.Load( 'haarcascades/haarcascade_frontalface_alt_tree.xml' ) #haar_cascade = cv.Load( 'haarcascades/haarcascade_upperbody.xml' ) #haar_cascade = cv.Load( 'haarcascades/haarcascade_profileface.xml' ) # Set this to the max number of targets to look for (passed to k-means): max_targets = 3 while True: # Capture frame from webcam camera_image = cv.QueryFrame(self.capture) frame_count += 1 frame_t0 = time.time() # Create an image with interactive feedback: display_image = cv.CloneImage(camera_image) # Create a working "color image" to modify / blur color_image = cv.CloneImage(display_image) # Smooth to get rid of false positives cv.Smooth(color_image, color_image, cv.CV_GAUSSIAN, 19, 0) # Use the Running Average as the static background # a = 0.020 leaves artifacts lingering way too long. # a = 0.320 works well at 320x240, 15fps. (1/a is roughly num frames.) cv.RunningAvg(color_image, running_average_image, 0.320, None) # Convert the scale of the moving average. cv.ConvertScale(running_average_image, running_average_in_display_color_depth, 1.0, 0.0) # Subtract the current frame from the moving average. cv.AbsDiff(color_image, running_average_in_display_color_depth, difference) # Convert the image to greyscale. cv.CvtColor(difference, grey_image, cv.CV_RGB2GRAY) # Threshold the image to a black and white motion mask: cv.Threshold(grey_image, grey_image, 2, 255, cv.CV_THRESH_BINARY) # Smooth and threshold again to eliminate "sparkles" cv.Smooth(grey_image, grey_image, cv.CV_GAUSSIAN, 19, 0) cv.Threshold(grey_image, grey_image, 240, 255, cv.CV_THRESH_BINARY) grey_image_as_array = np.asarray(cv.GetMat(grey_image)) non_black_coords_array = np.where(grey_image_as_array > 3) # Convert from np.where()'s two separate lists to one list of (x, y) tuples: non_black_coords_array = zip(non_black_coords_array[1], non_black_coords_array[0]) points = [ ] # Was using this to hold either pixel coords or polygon coords. bounding_box_list = [] # Now calculate movements using the white pixels as "motion" data contour = cv.FindContours(grey_image, mem_storage, cv.CV_RETR_CCOMP, cv.CV_CHAIN_APPROX_SIMPLE) while contour: bounding_rect = cv.BoundingRect(list(contour)) point1 = (bounding_rect[0], bounding_rect[1]) point2 = (bounding_rect[0] + bounding_rect[2], bounding_rect[1] + bounding_rect[3]) bounding_box_list.append((point1, point2)) polygon_points = cv.ApproxPoly(list(contour), mem_storage, cv.CV_POLY_APPROX_DP) # To track polygon points only (instead of every pixel): #points += list(polygon_points) # Draw the contours: ###cv.DrawContours(color_image, contour, cv.CV_RGB(255,0,0), cv.CV_RGB(0,255,0), levels, 3, 0, (0,0) ) cv.FillPoly(grey_image, [ list(polygon_points), ], cv.CV_RGB(255, 255, 255), 0, 0) cv.PolyLine(display_image, [ polygon_points, ], 0, cv.CV_RGB(255, 255, 255), 1, 0, 0) #cv.Rectangle( display_image, point1, point2, cv.CV_RGB(120,120,120), 1) contour = contour.h_next() # Find the average size of the bbox (targets), then # remove any tiny bboxes (which are prolly just noise). # "Tiny" is defined as any box with 1/10th the area of the average box. # This reduces false positives on tiny "sparkles" noise. box_areas = [] for box in bounding_box_list: box_width = box[right][0] - box[left][0] box_height = box[bottom][0] - box[top][0] box_areas.append(box_width * box_height) #cv.Rectangle( display_image, box[0], box[1], cv.CV_RGB(255,0,0), 1) average_box_area = 0.0 if len(box_areas): average_box_area = float(sum(box_areas)) / len(box_areas) trimmed_box_list = [] for box in bounding_box_list: box_width = box[right][0] - box[left][0] box_height = box[bottom][0] - box[top][0] # Only keep the box if it's not a tiny noise box: if (box_width * box_height) > average_box_area * 0.1: trimmed_box_list.append(box) # Draw the trimmed box list: #for box in trimmed_box_list: # cv.Rectangle( display_image, box[0], box[1], cv.CV_RGB(0,255,0), 2 ) bounding_box_list = merge_collided_bboxes(trimmed_box_list) # Draw the merged box list: for box in bounding_box_list: cv.Rectangle(display_image, box[0], box[1], cv.CV_RGB(0, 255, 0), 1) # Here are our estimate points to track, based on merged & trimmed boxes: estimated_target_count = len(bounding_box_list) # Don't allow target "jumps" from few to many or many to few. # Only change the number of targets up to one target per n seconds. # This fixes the "exploding number of targets" when something stops moving # and the motion erodes to disparate little puddles all over the place. if frame_t0 - last_target_change_t < .350: # 1 change per 0.35 secs estimated_target_count = last_target_count else: if last_target_count - estimated_target_count > 1: estimated_target_count = last_target_count - 1 if estimated_target_count - last_target_count > 1: estimated_target_count = last_target_count + 1 last_target_change_t = frame_t0 # Clip to the user-supplied maximum: estimated_target_count = min(estimated_target_count, max_targets) # The estimated_target_count at this point is the maximum number of targets # we want to look for. If kmeans decides that one of our candidate # bboxes is not actually a target, we remove it from the target list below. # Using the numpy values directly (treating all pixels as points): points = np.array(non_black_coords_array, dtype='f') center_points = [] if len(points): # If we have all the "target_count" targets from last frame, # use the previously known targets (for greater accuracy). k_or_guess = max(estimated_target_count, 1) # Need at least one target to look for. if len(codebook) == estimated_target_count: k_or_guess = codebook #points = vq.whiten(array( points )) # Don't do this! Ruins everything. codebook, distortion = vq.kmeans(array(points), k_or_guess) # Convert to tuples (and draw it to screen) for center_point in codebook: center_point = (int(center_point[0]), int(center_point[1])) center_points.append(center_point) #cv.Circle(display_image, center_point, 10, cv.CV_RGB(255, 0, 0), 2) #cv.Circle(display_image, center_point, 5, cv.CV_RGB(255, 0, 0), 3) # Now we have targets that are NOT computed from bboxes -- just # movement weights (according to kmeans). If any two targets are # within the same "bbox count", average them into a single target. # # (Any kmeans targets not within a bbox are also kept.) trimmed_center_points = [] removed_center_points = [] for box in bounding_box_list: # Find the centers within this box: center_points_in_box = [] for center_point in center_points: if center_point[0] < box[right][0] and center_point[0] > box[left][0] and \ center_point[1] < box[bottom][1] and center_point[1] > box[top][1] : # This point is within the box. center_points_in_box.append(center_point) # Now see if there are more than one. If so, merge them. if len(center_points_in_box) > 1: # Merge them: x_list = y_list = [] for point in center_points_in_box: x_list.append(point[0]) y_list.append(point[1]) average_x = int(float(sum(x_list)) / len(x_list)) average_y = int(float(sum(y_list)) / len(y_list)) trimmed_center_points.append((average_x, average_y)) # Record that they were removed: removed_center_points += center_points_in_box if len(center_points_in_box) == 1: trimmed_center_points.append( center_points_in_box[0]) # Just use it. # If there are any center_points not within a bbox, just use them. # (It's probably a cluster comprised of a bunch of small bboxes.) for center_point in center_points: if (not center_point in trimmed_center_points) and ( not center_point in removed_center_points): trimmed_center_points.append(center_point) # Draw what we found: #for center_point in trimmed_center_points: # center_point = ( int(center_point[0]), int(center_point[1]) ) # cv.Circle(display_image, center_point, 20, cv.CV_RGB(255, 255,255), 1) # cv.Circle(display_image, center_point, 15, cv.CV_RGB(100, 255, 255), 1) # cv.Circle(display_image, center_point, 10, cv.CV_RGB(255, 255, 255), 2) # cv.Circle(display_image, center_point, 5, cv.CV_RGB(100, 255, 255), 3) # Determine if there are any new (or lost) targets: actual_target_count = len(trimmed_center_points) last_target_count = actual_target_count # Now build the list of physical entities (objects) this_frame_entity_list = [] # An entity is list: [ name, color, last_time_seen, last_known_coords ] for target in trimmed_center_points: # Is this a target near a prior entity (same physical entity)? entity_found = False entity_distance_dict = {} for entity in last_frame_entity_list: entity_coords = entity[3] delta_x = entity_coords[0] - target[0] delta_y = entity_coords[1] - target[1] distance = sqrt(pow(delta_x, 2) + pow(delta_y, 2)) entity_distance_dict[distance] = entity # Did we find any non-claimed entities (nearest to furthest): distance_list = entity_distance_dict.keys() distance_list.sort() for distance in distance_list: # Yes; see if we can claim the nearest one: nearest_possible_entity = entity_distance_dict[distance] # Don't consider entities that are already claimed: if nearest_possible_entity in this_frame_entity_list: #print "Target %s: Skipping the one iwth distance: %d at %s, C:%s" % (target, distance, nearest_possible_entity[3], nearest_possible_entity[1] ) continue #print "Target %s: USING the one iwth distance: %d at %s, C:%s" % (target, distance, nearest_possible_entity[3] , nearest_possible_entity[1]) # Found the nearest entity to claim: entity_found = True nearest_possible_entity[ 2] = frame_t0 # Update last_time_seen nearest_possible_entity[ 3] = target # Update the new location this_frame_entity_list.append(nearest_possible_entity) #log_file.write( "%.3f MOVED %s %d %d\n" % ( frame_t0, nearest_possible_entity[0], nearest_possible_entity[3][0], nearest_possible_entity[3][1] ) ) break if entity_found == False: # It's a new entity. color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) name = hashlib.md5(str(frame_t0) + str(color)).hexdigest()[:6] last_time_seen = frame_t0 new_entity = [name, color, last_time_seen, target] this_frame_entity_list.append(new_entity) #log_file.write( "%.3f FOUND %s %d %d\n" % ( frame_t0, new_entity[0], new_entity[3][0], new_entity[3][1] ) ) # Now "delete" any not-found entities which have expired: entity_ttl = 1.0 # 1 sec. for entity in last_frame_entity_list: last_time_seen = entity[2] if frame_t0 - last_time_seen > entity_ttl: # It's gone. #log_file.write( "%.3f STOPD %s %d %d\n" % ( frame_t0, entity[0], entity[3][0], entity[3][1] ) ) pass else: # Save it for next time... not expired yet: this_frame_entity_list.append(entity) # For next frame: last_frame_entity_list = this_frame_entity_list # Draw the found entities to screen: for entity in this_frame_entity_list: center_point = entity[3] c = entity[1] # RGB color tuple cv.Circle(display_image, center_point, 20, cv.CV_RGB(c[0], c[1], c[2]), 1) cv.Circle(display_image, center_point, 15, cv.CV_RGB(c[0], c[1], c[2]), 1) cv.Circle(display_image, center_point, 10, cv.CV_RGB(c[0], c[1], c[2]), 2) cv.Circle(display_image, center_point, 5, cv.CV_RGB(c[0], c[1], c[2]), 3) #print "min_size is: " + str(min_size) # Listen for ESC or ENTER key c = cv.WaitKey(7) % 0x100 if c == 27 or c == 10: break # Toggle which image to show if chr(c) == 'd': image_index = (image_index + 1) % len(image_list) image_name = image_list[image_index] # Display frame to user if image_name == "camera": image = camera_image cv.PutText(image, "Camera (Normal)", text_coord, text_font, text_color) elif image_name == "difference": image = difference cv.PutText(image, "Difference Image", text_coord, text_font, text_color) elif image_name == "display": image = display_image cv.PutText(image, "Targets (w/AABBs and contours)", text_coord, text_font, text_color) elif image_name == "threshold": # Convert the image to color. cv.CvtColor(grey_image, display_image, cv.CV_GRAY2RGB) image = display_image # Re-use display image here cv.PutText(image, "Motion Mask", text_coord, text_font, text_color) elif image_name == "faces": # Do face detection detect_faces(camera_image, haar_cascade, mem_storage) image = camera_image # Re-use camera image here cv.PutText(image, "Face Detection", text_coord, text_font, text_color) size = cv.GetSize(image) large = cv.CreateImage( (int(size[0] * display_ratio), int(size[1] * display_ratio)), image.depth, image.nChannels) cv.Resize(image, large, interpolation=cv2.INTER_CUBIC) cv.ShowImage("Target", large) if self.writer: cv.WriteFrame(self.writer, image) #log_file.flush() # If only using a camera, then there is no time.sleep() needed, # because the camera clips us to 15 fps. But if reading from a file, # we need this to keep the time-based target clipping correct: frame_t1 = time.time() # If reading from a file, put in a forced delay: if not self.writer: delta_t = frame_t1 - frame_t0 if delta_t < (1.0 / 15.0): time.sleep((1.0 / 15.0) - delta_t) t1 = time.time() time_delta = t1 - t0 processed_fps = float(frame_count) / time_delta print "Got %d frames. %.1f s. %f fps." % (frame_count, time_delta, processed_fps)
def DetectRedEyes(image, faceCascade, smileCascade): min_size = (20, 20) image_scale = 2 haar_scale = 1.2 min_neighbors = 2 haar_flags = 0 # Allocate the temporary images gray = cv.CreateImage((image.width, image.height), 8, 1) smallImage = cv.CreateImage((cv.Round( image.width / image_scale), cv.Round(image.height / image_scale)), 8, 1) # Convert color input image to grayscale cv.CvtColor(image, gray, cv.CV_BGR2GRAY) # Scale input image for faster processing cv.Resize(gray, smallImage, cv.CV_INTER_LINEAR) # Equalize the histogram cv.EqualizeHist(smallImage, smallImage) # Detect the faces faces = cv.HaarDetectObjects(smallImage, faceCascade, cv.CreateMemStorage(0), haar_scale, min_neighbors, haar_flags, min_size) # If faces are found if faces: #print faces for ((x, y, w, h), n) in faces: # the input to cv.HaarDetectObjects was resized, so scale the # bounding box of each face and convert it to two CvPoints #print "face" pt1 = (int(x * image_scale), int(y * image_scale)) pt2 = (int((x + w) * image_scale), int((y + h) * image_scale)) # print pt1 # print pt2 #cv.Rectangle(image, pt1, pt2, cv.RGB(255, 0, 0), 1, 8, 0) #cv.PutText(image, "face", pt1, font, cv.RGB(255, 0, 0)) face_region = cv.GetSubRect(image, (x, int(y + (h / 4)), w, int(h / 2))) #split face #cv.Rectangle(image, (pt1[0],(pt1[1] + (abs(pt1[1]-pt2[1]) / 2 ))), pt2, cv.RGB(0,255,0), 1, 8, 0) #cv.PutText(image, "lower", (pt1[0],(pt1[1] + (abs(pt1[1]-pt2[1]) / 2 ))), font, cv.RGB(0, 255, 0)) cv.SetImageROI( image, (pt1[0], (pt1[1] + (abs(pt1[1] - pt2[1]) / 2)), pt2[0] - pt1[0], int((pt2[1] - (pt1[1] + (abs(pt1[1] - pt2[1]) / 2)))))) smiles = cv.HaarDetectObjects(image, smileCascade, cv.CreateMemStorage(0), 1.1, 5, 0, (15, 15)) if smiles: #print smiles for smile in smiles: cv.Rectangle( image, (smile[0][0], smile[0][1]), (smile[0][0] + smile[0][2], smile[0][1] + smile[0][3]), cv.RGB(0, 0, 255), 1, 8, 0) cv.PutText(image, "smile", (smile[0][0], smile[0][1]), font, cv.RGB(0, 0, 255)) cv.PutText(image, str(smile[1]), (smile[0][0], smile[0][1] + smile[0][3]), font, cv.RGB(0, 0, 255)) #print ((abs(smile[0][1] - smile[0][2]) / abs(pt1[0] - pt2[0])) * 100) global smileness smileness = smile[1] cv.ResetImageROI(image) #if smile[1] > 90: # mqttc.publish("smiles", "got smile", 1) # time.sleep(5) #eyes = cv.HaarDetectObjects(image, eyeCascade, #cv.CreateMemStorage(0), #haar_scale, min_neighbors, #haar_flags, (15,15)) #if eyes: # For each eye found #print eyes #for eye in eyes: # Draw a rectangle around the eye # cv.Rectangle(image, # (eye[0][0], # eye[0][1]), # (eye[0][0] + eye[0][2], # eye[0][1] + eye[0][3]), # cv.RGB(255, 0, 0), 1, 8, 0) cv.ResetImageROI(image) return image
def OnPaint(self, evt): if not self.timer.IsRunning(): dc = wx.BufferedDC(wx.ClientDC(self), wx.NullBitmap, wx.BUFFER_VIRTUAL_AREA) dc.SetBackground(wx.Brush(wx.Colour(0, 0, 0))) return # Capture de l'image if hasattr(cv, "QueryFrame"): # Ancienne version OpenCV image_scale = 2 min_size = (20, 20) haar_scale = 1.2 min_neighbors = 2 haar_flags = 0 frame = cv.QueryFrame(CAMERA) cv.CvtColor(frame, frame, cv.CV_BGR2RGB) Img = wx.EmptyImage(frame.width, frame.height) Img.SetData(frame.tostring()) self.bmp = wx.BitmapFromImage(Img) del Img largeur, hauteur = frame.width, frame.height gray = cv.CreateImage((largeur, hauteur), 8, 1) small_img = cv.CreateImage((cv.Round( frame.width / image_scale), cv.Round(hauteur / image_scale)), 8, 1) cv.CvtColor(frame, gray, cv.CV_BGR2GRAY) cv.Resize(gray, small_img, cv.CV_INTER_LINEAR) cv.EqualizeHist(small_img, small_img) listeVisages = cv.HaarDetectObjects(small_img, CASCADE, cv.CreateMemStorage(0), haar_scale, min_neighbors, haar_flags, min_size) else: # Nouvelle version OpenCV image_scale = 1 ret, frame = CAMERA.read() hauteur, largeur = frame.shape[:2] frame = cv.resize(frame, (largeur, hauteur), cv.INTER_LINEAR) frame = cv.cvtColor(frame, cv.COLOR_BGR2RGB) self.bmp = wx.Bitmap.FromBuffer(largeur, hauteur, frame) gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY) listeVisages = CASCADE.detectMultiScale(gray, 1.3, 5) # Affichage de l'image x, y = (0, 0) try: dc = wx.BufferedDC(wx.ClientDC(self), wx.NullBitmap, wx.BUFFER_VIRTUAL_AREA) try: dc.SetBackground(wx.Brush(wx.Colour(0, 0, 0))) except: pass dc.Clear() dc.DrawBitmap(self.bmp, x, y) # Dessin des rectangles des visages for (x, y, w, h) in listeVisages: dc.SetBrush(wx.TRANSPARENT_BRUSH) dc.SetPen(wx.Pen(wx.Colour(255, 0, 0), 2)) dc.DrawRectangle(x * image_scale, y * image_scale, w * image_scale, h * image_scale) self.listeVisages = listeVisages del dc except: pass
import cv2.cv as cv orig = cv.LoadImage("../img/road.png") im = cv.CreateMat(orig.height / 5, orig.width / 5, cv.CV_8UC3) cv.Resize(orig, im) #resize the original image src = cv.GetSubRect(im, (10, 10, 30, 30)) minSat = 65 hsv = cv.CreateImage(cv.GetSize(src), 8, 3) cv.CvtColor(src, hsv, cv.CV_BGR2HSV) # Extract the H and S planes h_plane = cv.CreateMat(src.rows, src.cols, cv.CV_8UC1) s_plane = cv.CreateMat(src.rows, src.cols, cv.CV_8UC1) cv.Split(hsv, h_plane, s_plane, None, None) planes = [h_plane, s_plane] #s_plane = cv.Threshold(s_plane, s_plane, minSat, 255, cv.CV_THRESH_BINARY) h_bins = 30 s_bins = 32 hist_size = [h_bins, s_bins] # hue varies from 0 (~0 deg red) to 180 (~360 deg red again */ h_ranges = [0, 180] # saturation varies from 0 (black-gray-white) to # 255 (pure spectrum color) s_ranges = [0, 255] ranges = [h_ranges, s_ranges]
def detect_and_draw(self, originalImage): # allocate temporary images print type(originalImage) grayScaleFullImage = cv.CreateImage((originalImage.width, originalImage.height), 8, 1) smallScaleFullImage = cv.CreateImage((cv.Round(originalImage.width / image_scale), cv.Round (originalImage.height / image_scale)), 8, 1) # convert color input image to grayscale cv.CvtColor(originalImage, grayScaleFullImage, cv.CV_BGR2GRAY) # scale input image for faster processing cv.Resize(grayScaleFullImage, smallScaleFullImage, cv.CV_INTER_LINEAR) cv.EqualizeHist(smallScaleFullImage, smallScaleFullImage) if(self.cascade): t = cv.GetTickCount() # detect faces faces = cv.HaarDetectObjects(smallScaleFullImage, self.cascade, cv.CreateMemStorage(0), haar_scale, min_neighbors, haar_flags, min_size) t = cv.GetTickCount() - t print "detection time = %gms" % (t / (cv.GetTickFrequency() * 1000.)) if faces: print "detected face" for ((x, y, w, h), n) in faces: # the input to cv.HaarDetectObjects was resized, so scale the # bounding box of each face and convert it to two CvPoints pt1 = (int(x * image_scale), int(y * image_scale)) pt11 = (int(x * image_scale) + 10, int(y * image_scale) + 10) pt2 = (int((x + w) * image_scale), int((y + h) * image_scale)) # face cv.Rectangle(originalImage, pt1, pt2, cv.RGB(255, 0, 0), 3, 8, 0) if isOpticalFlow: originalArray2 = cv.CloneImage(originalImage) faceArea = cv.GetSubRect(originalArray2, (pt1[0], pt1[1], pt2[0] - pt1[0], pt2[1] - pt1[1])) faceArea2 = cv.CloneMat(faceArea) cv.ShowImage("face area", faceArea2) self.MotionDetector.iterativeMotionDetector(faceArea2) # get the center of the rectangle centerX = (pt1[0] + pt2[0]) / 2 centerY = (pt1[1] + pt2[1]) / 2 + int(0.1 * w * image_scale) # around nose region cv.Rectangle(originalImage, (centerX, centerY), (centerX + 10, centerY + 10), cv.RGB(255, 0, 255)) # detect left eye # cv.SetZero(sub) 55 self.detectLeftEye(originalImage, self.cascade2, pt1, centerX, centerY) # detect right eye rightEyeArea = cv.GetSubRect(originalImage, (centerX, pt1[1], pt2[0] - centerX , centerY - pt1[1])) # cv.SetZero(rightEyeArea) self.detectRightEye(originalImage, rightEyeArea, centerX, centerY, pt1, self.cascade2) # self.detectNose(originalImage, cascade4, centerX, centerY) # now apply mask for values in range +/- 10% of index_1 # form a map for showing the eyebrows # cloneImageArray = cv.CloneMat(imageArray) # cloneImageArray = np.empty_like (imageArray) # cloneImageArray[:] = imageArray # cv2.imshow("left eye " ,cloneImageArray) # res = cv2.bitwise_and(cloneImageArray,cloneImageArray,mask = backproj) # cv2.imshow("res" ,res) # detect left eyebrow # by doing simple contour detection # print type(leftEyeArea) # gray_im = cv.CreateMat(leftEyeArea.height, leftEyeArea.width, cv.CV_8UC1) # #gray_im = cv.CreateImage((leftEyeArea.rows, leftEyeArea.cols), cv.IPL_DEPTH_8U, 1) # print type(gray_im) # cv.CvtColor(leftEyeArea, gray_im, cv.CV_RGB2GRAY) # imageArray = np.asarray(gray_im, dtype=np.uint8) # #floatMat.convertTo(ucharMat, CV_8UC1); # # # scale values from 0..1 to 0..255 # #floatMat.convertTo(ucharMatScaled, CV_8UC1, 255, 0); # contours0, hier = cv2.findContours( backproj , cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # # # # cv_im = cv.CreateMat(img.width, img.height, cv.CV_8UC3) # cv.SetData(cv_im, img.tostring()) # # #print type(cv_im) # # originalImageArray = np.asarray(cv_im, dtype=np.uint8) # # print " length " + str(len(contours0)) # #print type(contours0) # # lines = None # linesList = list() # for item in contours0: # #print "item " + str(item) # # #print type(item) # for i in range(1, len(item)): # #for j in range(len(item[i][0])): # #print str(item[i][0][0]) + " " + str(item[i][0][1]) # #lines.append([[item[i][0][0], item[i][0][1]]]) # if lines != None: # np.append(lines, item[i][0]) # else: # lines = np.array(item[i][0]) # linesList.append((item[i][0][0] , item[i][0][1])) # #cv2.circle(backproj, ( item[i][0][0] , item[i][0][1]), 10, (255,255,255), 10) # #cv.Circle(img, (pt1[0] + item[i][0][0] ,int(pt1[1] * 1.1)+ item[i][0][1]), 5, (255,0,255)) # # # # # #print type(originalImageArray) # print lines # #cv2.polylines(originalImageArray, lines, True, cv.RGB(255, 255, 0), 10) # print type(linesList) # #cv.PolyLine(cv_im, linesList, False, cv.RGB(255, 255, 0), 10) # #cv2.drawContours(backproj, contours0, , cv.RGB(55, 55, 55)) # canny_output = None # canny_output = cv2.Canny(backproj, 700, 1000, canny_output, 7) # cv2.imshow("canny ", canny_output) # cv.Canny(hsv_image, contours0, 10, 60); # contours, hier = cv2.findContours( canny_output , cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # # cv2.drawContours(originalImageArray,lines,-1,(0,255,0),3) # detect mouth mouthArea = cv.GetSubRect(originalImage, (pt1[0], centerY, pt2[0] - pt1[0], pt2[1] - centerY)) self.detectMouth(originalImage, mouthArea, pt1, centerY, self.cascade3) # start tracking face if not isOpticalFlow: originalArray2 = cv.CloneImage(originalImage) faceArea = cv.GetSubRect(originalArray2, (pt1[0], pt1[1], pt2[0] - pt1[0], pt2[1] - pt1[1])) faceArea2 = cv.CloneMat(faceArea) return (True, faceArea2, originalImage, pt1, pt2) # originalImage2 = cv.CloneImage(originalImage) # camshift = Camshift() # camshift.defineRegionOfInterest(originalImage2, pt1, pt2) # originalArray2 = cv.CloneImage(originalImage) # faceArea = cv.GetSubRect(originalArray2, (pt1[0], pt1[1], pt2[0] - pt1[0], pt2[1] - pt1[1])) # faceArea2 = cv.CloneMat(faceArea) # cv.ShowImage("face area", faceArea2) # faceArray = np.asarray(faceArea2, np.uint8, 3) # faceArray = cv2.cvtColor(faceArray, cv2.COLOR_BGR2GRAY) # self.matcher.defineTargetImage(faceArray) # self.matcher.findInVideoSequence() cv.ShowImage("result", originalImage) return (False, originalImage, None, None, None)
def parse_detail_page(self, page, link): print 3 doc_id = link.split('/')[ -1] #http://openlaw.cn/judgement/ea86414b0cac4075a3b88fcd9f8d4139 d = pq(page) #判断是否出现验证码 try: if u'请输入验证码' in d.text(): print u'等待输入验证码 > ' imgfoler = '/img/' basic = 'img' webimage = imgfoler + utility.get_unique_name() uniquename = basic + webimage self.driver.save_screenshot(uniquename + '_s.png') #截屏 _s.png captcha_image = self.driver.find_element_by_xpath( '//img[@id="kaptcha"]') loc = captcha_image.location loc['x'] = int(loc['x']) loc['y'] = int(loc['y']) image = cv.LoadImage(uniquename + '_s.png', True) out = cv.CreateImage((200, 50), image.depth, 3) cv.SetImageROI(image, (loc['x'], loc['y'], 200, 50)) cv.Resize(image, out) imgname = uniquename + '.jpg' cv.SaveImage(imgname, out) # 使用外部服务解码 result = captchaservice.getCaptcha(imgname) dictresult = json.loads(result) if dictresult.has_key('Error'): resultno = 1 raise Exception('service does not work well !') #endif code = dictresult['Result'] inputkey = self.driver.find_element_by_xpath( '//input[@class="search-field"]') inputkey.clear() inputkey.send_keys(code) time.sleep(2) searchbtn = self.driver.find_element_by_xpath( '//input[@type="submit"]') searchbtn.click() time.sleep(10) except: pass data = {} title = d('h2.entry-title').text() if '404' in d('title').text(): print ' [!] ERROR page, 404 not found, %s' % link return if not title: print ' [!] Empty page, resend %s' % link #如果页面为空,则将链接再发送到队列 self.channel.basic_publish( exchange='', routing_key='doc_queue', body=link, properties=pika.BasicProperties( delivery_mode=2, # 使消息持久化 )) time.sleep(.5) return #print title print 4 #提取结构化信息 侧边栏(sidebar) reason = trim_colon( d('aside#sidebar section').eq(0).find('li').filter( lambda i: u'案由' in pq(this).text()).text()) court = trim_colon( d('aside#sidebar section').eq(0).find('li').filter( lambda i: u'法院' in pq(this).text()).text()) doc_type = trim_colon( d('aside#sidebar section').eq(0).find('li').filter( lambda i: u'类型' in pq(this).text()).text()) status = trim_colon( d('aside#sidebar section').eq(0).find('li').filter( lambda i: u'程序' in pq(this).text()).text()) date = trim_colon(d('li.ht-kb-em-date').text()).strip() #strip() 去前后空格 regx = re.match(r'\d{4}-\d{2}-\d{2}', date) if not regx: date = '1970-01-01' case_id = trim_colon(d('li.ht-kb-em-category').text()) content = d('div#entry-cont').text().strip(u' 允许所有人 查看 该批注 \ 允许所有人 编辑 该批注 取消 保存 Annotate') # 人物侧边栏 persons = d('aside#sidebar section').eq(1) # 原告 accuser = filter_person(persons, [u'原告', u'审请人', u'上诉人', u'再审申请人']) # 被告 accused = filter_person(persons, [u'被告', u'被审请人', u'被上诉人']) # 审判长 chief_judge = filter_person(persons, [u'审判长']) # 律师 lawyers = filter_lawyers(persons) data['title'] = title data['title_search'] = title data['reason'] = reason data['court'] = court data['date'] = date data['doc_type'] = doc_type data['status'] = status data['content'] = content data['case_id'] = case_id data['lawyers'] = lawyers data['accuser'] = accuser data['accused'] = accused data['chief_judge'] = chief_judge data['url'] = link #导入elasticsearch #self.es.index(index=ES_INDEX, doc_type=ES_TYPE, id=doc_id, body=data) #print 'data',data #convertedDict = xmltodict.parse(data); realpath = link extraction = {} extraction['realpath'] = realpath extraction['data'] = data data1 = {} data1['extraction'] = extraction convertedXml = xmltodict.unparse(data1) # print "convertedXml=",convertedXml; try: folder = './result/' filename = folder + data['case_id'] + '.xml' f = open(filename, 'w') f.write(convertedXml) f.close() except: print 'error...'
def detect_and_draw(img, cascade): # allocate temporary images gray = cv.CreateImage((img.width, img.height), 8, 1) small_img = cv.CreateImage((cv.Round( img.width / image_scale), cv.Round(img.height / image_scale)), 8, 1) # convert color input image to grayscale cv.CvtColor(img, gray, cv.CV_BGR2GRAY) # scale input image for faster processing cv.Resize(gray, small_img, cv.CV_INTER_LINEAR) cv.EqualizeHist(small_img, small_img) ###### 若解析度有改變,下面劃線座標亦隨之改變############## cv.Line(img, (width / middle_w, 0), (width / middle_w, height), (0, 10, 255), 3) cv.Line(img, ((width / middle_w - 20), (height / middle_h - 10)), ((width / middle_w - 20), (height / middle_h + 10)), (0, 10, 255), 2) cv.Line(img, ((width / middle_w + 20), (height / middle_h - 10)), ((width / middle_w + 20), (height / middle_h + 10)), (0, 10, 255), 2) cv.Line(img, (0, height / middle_h), (width, height / middle_h), (0, 10, 255), 3) cv.Line(img, ((width / middle_w - 10), (height / middle_h - 20)), ((width / middle_w + 10), (height / middle_h - 20)), (0, 10, 255), 2) cv.Line(img, ((width / middle_w - 10), (height / middle_h + 20)), ((width / middle_w + 10), (height / middle_h + 20)), (0, 10, 255), 2) #cv.ShowImage("camera", img) if (cascade): t = cv.GetTickCount() faces = cv.HaarDetectObjects(small_img, cascade, cv.CreateMemStorage(0), haar_scale, min_neighbors, haar_flags, min_size) t = cv.GetTickCount() - t print "detection time = %gms" % (t / (cv.GetTickFrequency() * 1000.)) if faces: for ((x, y, w, h), n) in faces: # the input to cv.HaarDetectObjects was resized, so scale the # bounding box of each face and convert it to two CvPoints pt1 = (int(x * image_scale), int(y * image_scale)) pt2 = (int((x + w) * image_scale), int((y + h) * image_scale)) cv.Rectangle(img, pt1, pt2, cv.RGB(255, 0, 0), 3, 8, 0) ##################################################################################################3333 cx = (int(x * image_scale) + int((x + w) * image_scale)) / 2 cy = (int(y * image_scale) + int((y + h) * image_scale)) / 2 print cx, cy #将文字框加入到图片中,(5,30)定义了文字框左顶点在窗口中的位置,最后参数定义文字颜色 ''' if cx <= (width*2/3) and cx >= (width*1/3) and cy <= (height*2/3) and cy >= (height*1/3) : TestStr = "Locking" cv.PutText(img, TestStr , (5,30), font, (0,0,255)) else: TestStr = "serching...." cv.PutText(img, TestStr , (160,30), font, (0,255,0)) ''' if cx <= (width * 4 / 7) and cx >= (width * 3 / 7) and cy <= ( height * 4 / 7) and cy >= (height * 3 / 7): TestStr = "Locking" cv.PutText(img, TestStr, (5, 30), font, (0, 0, 255)) else: TestStr = "serching...." cv.PutText(img, TestStr, (160, 30), font, (0, 255, 0)) ################################################################################################################# if cx < img.width * 3 / 7: arduino.write('4') print '4' if cx < img.width * 2 / 7: arduino.write('44') print '4' if cx < img.width / 7: arduino.write('4444') print '44' if cx > img.width * 4 / 7: arduino.write('6') print '6' if cx > img.width * 5 / 7: arduino.write('66') print '6' if cx > img.width * 6 / 7: arduino.write('6666') print '66' if cy < img.height * 3 / 7: arduino.write('2') print '2' if cy < img.height * 2 / 7: arduino.write('22') print '2' if cy < img.height / 7: arduino.write('2222') print '222' if cy > img.height * 4 / 7: arduino.write('8') print '8' if cy > img.height * 5 / 7: arduino.write('88') print '8' if cy > img.height * 6 / 7: arduino.write('8888') print '888' break ###################################################### cv.ShowImage("result", img)
def view_thread(): '''image viewing thread - this runs on the ground station''' from cuav.lib import cuav_mosaic state = mpstate.camera_state bsend = block_xmit.BlockSender(state.settings.gcs_view_port, bandwidth=state.settings.bandwidth) state.bsocket = MavSocket(mpstate.mav_master[0]) state.bsend2 = block_xmit.BlockSender(mss=96, sock=state.bsocket, dest_ip='mavlink', dest_port=0, backlog=5, debug=False) state.bsend2.set_bandwidth(state.settings.bandwidth2) view_window = False image_count = 0 thumb_count = 0 image_total_bytes = 0 jpeg_total_bytes = 0 thumb_total_bytes = 0 region_count = 0 mosaic = None thumbs_received = set() view_dir = os.path.join(state.camera_dir, "view") thumb_dir = os.path.join(state.camera_dir, "thumb") cuav_util.mkdir_p(view_dir) cuav_util.mkdir_p(thumb_dir) img_window = mp_image.MPImage(title='Camera') mpstate.console.set_status('Images', 'Images %u' % image_count, row=6) mpstate.console.set_status('Lost', 'Lost %u' % 0, row=6) mpstate.console.set_status('Regions', 'Regions %u' % region_count, row=6) mpstate.console.set_status('JPGSize', 'JPGSize %.0f' % 0.0, row=6) mpstate.console.set_status('XMITQ', 'XMITQ %.0f' % 0.0, row=6) mpstate.console.set_status('Thumbs', 'Thumbs %u' % thumb_count, row=7) mpstate.console.set_status('ThumbSize', 'ThumbSize %.0f' % 0.0, row=7) mpstate.console.set_status('ImageSize', 'ImageSize %.0f' % 0.0, row=7) ack_time = time.time() while not state.unload.wait(0.02): if state.viewing: tnow = time.time() if tnow - ack_time > 0.1: bsend.tick(packet_count=1000, max_queue=state.settings.maxqueue1) state.bsend2.tick(packet_count=1000, max_queue=state.settings.maxqueue2) if state.bsend_slave is not None: state.bsend_slave.tick(packet_count=1000) ack_time = tnow if not view_window: view_window = True mosaic = cuav_mosaic.Mosaic(slipmap=mpstate.map, C=state.c_params) if state.boundary_polygon is not None: mosaic.set_boundary(state.boundary_polygon) if mpstate.continue_mode: reload_mosaic(mosaic) # check for keyboard events mosaic.check_events() buf = bsend.recv(0) if buf is None: buf = state.bsend2.recv(0) if buf is None: continue try: obj = cPickle.loads(str(buf)) if obj == None: continue except Exception as e: continue if state.settings.gcs_slave is not None: if state.bsend_slave is None: state.bsend_slave = block_xmit.BlockSender( 0, bandwidth=state.settings.bandwidth * 10, debug=False) #print("send bsend_slave") state.bsend_slave.send(buf, dest=(state.settings.gcs_slave, state.settings.gcs_view_port), priority=1) if isinstance(obj, ThumbPacket): # we've received a set of thumbnails from the plane for a positive hit if obj.frame_time in thumbs_received: continue thumbs_received.add(obj.frame_time) thumb_total_bytes += len(buf) # save the thumbnails thumb_filename = '%s/v%s.jpg' % ( thumb_dir, cuav_util.frame_time(obj.frame_time)) chameleon.save_file(thumb_filename, obj.thumb) composite = cv.LoadImage(thumb_filename) thumbs = cuav_mosaic.ExtractThumbs(composite, len(obj.regions)) # log the joe positions filename = '%s/v%s.jpg' % ( view_dir, cuav_util.frame_time(obj.frame_time)) pos = obj.pos log_joe_position(pos, obj.frame_time, obj.regions, filename, thumb_filename) # update the mosaic and map mosaic.set_brightness(state.settings.brightness) mosaic.add_regions(obj.regions, thumbs, filename, pos=pos) # update console display region_count += len(obj.regions) state.frame_loss = obj.frame_loss state.xmit_queue = obj.xmit_queue thumb_count += 1 mpstate.console.set_status('Lost', 'Lost %u' % state.frame_loss) mpstate.console.set_status('Regions', 'Regions %u' % region_count) mpstate.console.set_status('XMITQ', 'XMITQ %.0f' % state.xmit_queue) mpstate.console.set_status('Thumbs', 'Thumbs %u' % thumb_count) mpstate.console.set_status( 'ThumbSize', 'ThumbSize %.0f' % (thumb_total_bytes / thumb_count)) if isinstance(obj, ImagePacket): # we have an image from the plane image_total_bytes += len(buf) state.xmit_queue = obj.xmit_queue mpstate.console.set_status('XMITQ', 'XMITQ %.0f' % state.xmit_queue) # save it to disk filename = '%s/v%s.jpg' % ( view_dir, cuav_util.frame_time(obj.frame_time)) chameleon.save_file(filename, obj.jpeg) img = cv.LoadImage(filename) if img.width == 1280: display_img = cv.CreateImage((640, 480), 8, 3) cv.Resize(img, display_img) else: display_img = img mosaic.add_image(obj.frame_time, filename, obj.pos) cv.ConvertScale(display_img, display_img, scale=state.settings.brightness) img_window.set_image(display_img, bgr=True) # update console image_count += 1 jpeg_total_bytes += len(obj.jpeg) state.jpeg_size = 0.95 * state.jpeg_size + 0.05 * len(obj.jpeg) mpstate.console.set_status('Images', 'Images %u' % image_count) mpstate.console.set_status( 'JPGSize', 'JPG Size %.0f' % (jpeg_total_bytes / image_count)) mpstate.console.set_status( 'ImageSize', 'ImageSize %.0f' % (image_total_bytes / image_count)) if isinstance(obj, CommandResponse): print('REMOTE: %s' % obj.response) else: if view_window: view_window = False
import cv2.cv as cv im = cv.LoadImage("../pic/QQ20171118-0.jpg") # get the image thumb = cv.CreateImage((im.width / 2, im.height / 2), 8, 3) # Create an image that is cv.Resize(im, thumb) # resize the original image into thumb # cv.PyrDown(im, thumb) cv.ShowImage('Hello World', im) # Show the image cv.ShowImage("thumb.png", thumb) # Show the thumb image cv.SaveImage("thumb.png", thumb) # save the thumb image cv.WaitKey(0)
def iterativeMotionDetector(self, display_image): print type(display_image) size = cv.GetSize(display_image) # copy_image = cv.CreateImage(size, cv.IPL_DEPTH_8U, 3) copy_image = cv.CloneMat(cv.GetMat(display_image)) self.frame_count += 1 self.frame_t0 = time.time() print "input type " + str(type(display_image)) print "input type 2" + str(type(self.thumbnail)) cv.Resize(copy_image, self.thumbnail) cv.ShowImage("input image 2" , self.thumbnail) color_image = self.thumbnail # cv.CloneImage(display_image) display_image = color_image # Smooth to get rid of false positives cv.Smooth(color_image, color_image, cv.CV_GAUSSIAN, 19, 0) # Use the Running Average as the static background # a = 0.020 leaves artifacts lingering way too long. # a = 0.320 works well at 320x240, 15fps. (1/a is roughly num frames.) cv.RunningAvg(color_image, self.running_average_image, 0.320, None) # cv.ShowImage("background ", running_average_image) # Convert the scale of the moving average. cv.ConvertScale(self.running_average_image, self.running_average_in_display_color_depth, 1.0, 0.0) cv.CvtColor(color_image, self.grey_original_image, cv.CV_RGB2GRAY) grey_image_array = np.asarray(cv.GetMat(self.grey_original_image), np.uint8, 1) cv.CvtColor(self.running_average_in_display_color_depth, self.grey_average_image, cv.CV_RGB2GRAY) running_image_array = np.asarray(cv.GetMat(self.grey_average_image), np.uint8, 1) # Subtract the current frame from the moving average. cv.AbsDiff(color_image, self.running_average_in_display_color_depth, self.difference) print cv.GetSize(color_image) print cv.GetSize(self.running_average_in_display_color_depth) cv.ShowImage("difference ", self.difference) # Convert the image to greyscale. cv.CvtColor(self.difference, self.grey_image, cv.CV_RGB2GRAY) # Threshold the image to a black and white motion mask: cv.Threshold(self.grey_image, self.grey_image, 2, 255, cv.CV_THRESH_BINARY) # Smooth and threshold again to eliminate "sparkles" cv.Smooth(self.grey_image, self.grey_image, cv.CV_GAUSSIAN, 19, 0) cv.Threshold(self.grey_image, self.grey_image, 240, 255, cv.CV_THRESH_BINARY) cv.ShowImage("binary mask", self.grey_image) self.opticalFlow.makeOpticalFlow(np.asarray(copy_image, np.uint8, 3), self.grey_image) grey_image_as_array = np.asarray(cv.GetMat(self.grey_image)) non_black_coords_array = np.where(grey_image_as_array > 3) # Convert from numpy.where()'s two separate lists to one list of (x, y) tuples: non_black_coords_array = zip(non_black_coords_array[1], non_black_coords_array[0]) points = [] # Was using this to hold either pixel coords or polygon coords. bounding_box_list = [] # Now calculate movements using the white pixels as "motion" data contour = cv.FindContours(self.grey_image, self.mem_storage, cv.CV_RETR_CCOMP, cv.CV_CHAIN_APPROX_SIMPLE) levels = 10 while contour: bounding_rect = cv.BoundingRect(list(contour)) point1 = (bounding_rect[0], bounding_rect[1]) point2 = (bounding_rect[0] + bounding_rect[2], bounding_rect[1] + bounding_rect[3]) bounding_box_list.append((point1, point2)) polygon_points = cv.ApproxPoly(list(contour), self.mem_storage, cv.CV_POLY_APPROX_DP) # To track polygon points only (instead of every pixel): # points += list(polygon_points) # Draw the contours: cv.DrawContours(color_image, contour, cv.CV_RGB(255, 0, 0), cv.CV_RGB(0, 255, 0), levels, 3, 0, (0, 0)) cv.FillPoly(self.grey_image, [ list(polygon_points), ], cv.CV_RGB(255, 255, 255), 0, 0) cv.PolyLine(display_image, [ polygon_points, ], 0, cv.CV_RGB(255, 255, 255), 1, 0, 0) # cv.Rectangle( display_image, point1, point2, cv.CV_RGB(120,120,120), 1) contour = contour.h_next() # Find the average size of the bbox (targets), then # remove any tiny bboxes (which are prolly just noise). # "Tiny" is defined as any box with 1/10th the area of the average box. # This reduces false positives on tiny "sparkles" noise. box_areas = [] for box in bounding_box_list: box_width = box[right][0] - box[left][0] box_height = box[bottom][0] - box[top][0] box_areas.append(box_width * box_height) # cv.Rectangle( display_image, box[0], box[1], cv.CV_RGB(255,0,0), 1) average_box_area = 0.0 if len(box_areas): average_box_area = float(sum(box_areas)) / len(box_areas) trimmed_box_list = [] for box in bounding_box_list: box_width = box[right][0] - box[left][0] box_height = box[bottom][0] - box[top][0] # Only keep the box if it's not a tiny noise box: if (box_width * box_height) > average_box_area * 0.1: trimmed_box_list.append(box) # Draw the trimmed box list: # for box in trimmed_box_list: # cv.Rectangle( display_image, box[0], box[1], cv.CV_RGB(0,255,0), 2 ) bounding_box_list = merge_collided_bboxes(trimmed_box_list) # Draw the merged box list: for box in bounding_box_list: cv.Rectangle(display_image, box[0], box[1], cv.CV_RGB(0, 255, 0), 1) # Here are our estimate points to track, based on merged & trimmed boxes: estimated_target_count = len(bounding_box_list) # Don't allow target "jumps" from few to many or many to few. # Only change the number of targets up to one target per n seconds. # This fixes the "exploding number of targets" when something stops moving # and the motion erodes to disparate little puddles all over the place. if self.frame_t0 - self.last_target_change_t < .350: # 1 change per 0.35 secs estimated_target_count = self.last_target_count else: if self.last_target_count - estimated_target_count > 1: estimated_target_count = self.last_target_count - 1 if estimated_target_count - self.last_target_count > 1: estimated_target_count = self.last_target_count + 1 last_target_change_t = self.frame_t0 # Clip to the user-supplied maximum: estimated_target_count = min(estimated_target_count, self.max_targets) # The estimated_target_count at this point is the maximum number of targets # we want to look for. If kmeans decides that one of our candidate # bboxes is not actually a target, we remove it from the target list below. # Using the numpy values directly (treating all pixels as points): points = non_black_coords_array center_points = [] if len(points): # If we have all the "target_count" targets from last frame, # use the previously known targets (for greater accuracy). k_or_guess = max(estimated_target_count, 1) # Need at least one target to look for. if len(self.codebook) == estimated_target_count: k_or_guess = self.codebook # points = vq.whiten(array( points )) # Don't do this! Ruins everything. self.codebook, distortion = vq.kmeans(array(points), k_or_guess) # Convert to tuples (and draw it to screen) for center_point in self.codebook: center_point = (int(center_point[0]), int(center_point[1])) center_points.append(center_point) # cv.Circle(display_image, center_point, 10, cv.CV_RGB(255, 0, 0), 2) # cv.Circle(display_image, center_point, 5, cv.CV_RGB(255, 0, 0), 3) # Now we have targets that are NOT computed from bboxes -- just # movement weights (according to kmeans). If any two targets are # within the same "bbox count", average them into a single target. # # (Any kmeans targets not within a bbox are also kept.) trimmed_center_points = [] removed_center_points = [] for box in bounding_box_list: # Find the centers within this box: center_points_in_box = [] for center_point in center_points: if center_point[0] < box[right][0] and center_point[0] > box[left][0] and \ center_point[1] < box[bottom][1] and center_point[1] > box[top][1] : # This point is within the box. center_points_in_box.append(center_point) # Now see if there are more than one. If so, merge them. if len(center_points_in_box) > 1: # Merge them: x_list = y_list = [] for point in center_points_in_box: x_list.append(point[0]) y_list.append(point[1]) average_x = int(float(sum(x_list)) / len(x_list)) average_y = int(float(sum(y_list)) / len(y_list)) trimmed_center_points.append((average_x, average_y)) # Record that they were removed: removed_center_points += center_points_in_box if len(center_points_in_box) == 1: trimmed_center_points.append(center_points_in_box[0]) # Just use it. # If there are any center_points not within a bbox, just use them. # (It's probably a cluster comprised of a bunch of small bboxes.) for center_point in center_points: if (not center_point in trimmed_center_points) and (not center_point in removed_center_points): trimmed_center_points.append(center_point) # Draw what we found: # for center_point in trimmed_center_points: # center_point = ( int(center_point[0]), int(center_point[1]) ) # cv.Circle(display_image, center_point, 20, cv.CV_RGB(255, 255,255), 1) # cv.Circle(display_image, center_point, 15, cv.CV_RGB(100, 255, 255), 1) # cv.Circle(display_image, center_point, 10, cv.CV_RGB(255, 255, 255), 2) # cv.Circle(display_image, center_point, 5, cv.CV_RGB(100, 255, 255), 3) # Determine if there are any new (or lost) targets: actual_target_count = len(trimmed_center_points) last_target_count = actual_target_count # Now build the list of physical entities (objects) this_frame_entity_list = [] # An entity is list: [ name, color, last_time_seen, last_known_coords ] for target in trimmed_center_points: # Is this a target near a prior entity (same physical entity)? entity_found = False entity_distance_dict = {} for entity in self.last_frame_entity_list: entity_coords = entity[3] delta_x = entity_coords[0] - target[0] delta_y = entity_coords[1] - target[1] distance = sqrt(pow(delta_x, 2) + pow(delta_y, 2)) entity_distance_dict[ distance ] = entity # Did we find any non-claimed entities (nearest to furthest): distance_list = entity_distance_dict.keys() distance_list.sort() for distance in distance_list: # Yes; see if we can claim the nearest one: nearest_possible_entity = entity_distance_dict[ distance ] # Don't consider entities that are already claimed: if nearest_possible_entity in this_frame_entity_list: # print "Target %s: Skipping the one iwth distance: %d at %s, C:%s" % (target, distance, nearest_possible_entity[3], nearest_possible_entity[1] ) continue # print "Target %s: USING the one iwth distance: %d at %s, C:%s" % (target, distance, nearest_possible_entity[3] , nearest_possible_entity[1]) # Found the nearest entity to claim: entity_found = True nearest_possible_entity[2] = self.frame_t0 # Update last_time_seen nearest_possible_entity[3] = target # Update the new location this_frame_entity_list.append(nearest_possible_entity) # log_file.write( "%.3f MOVED %s %d %d\n" % ( frame_t0, nearest_possible_entity[0], nearest_possible_entity[3][0], nearest_possible_entity[3][1] ) ) break if entity_found == False: # It's a new entity. color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) name = hashlib.md5(str(self.frame_t0) + str(color)).hexdigest()[:6] last_time_seen = self.frame_t0 new_entity = [ name, color, last_time_seen, target ] this_frame_entity_list.append(new_entity) # log_file.write( "%.3f FOUND %s %d %d\n" % ( frame_t0, new_entity[0], new_entity[3][0], new_entity[3][1] ) ) # Now "delete" any not-found entities which have expired: entity_ttl = 1.0 # 1 sec. for entity in self.last_frame_entity_list: last_time_seen = entity[2] if self.frame_t0 - last_time_seen > entity_ttl: # It's gone. # log_file.write( "%.3f STOPD %s %d %d\n" % ( frame_t0, entity[0], entity[3][0], entity[3][1] ) ) pass else: # Save it for next time... not expired yet: this_frame_entity_list.append(entity) # For next frame: last_frame_entity_list = this_frame_entity_list # Draw the found entities to screen: for entity in this_frame_entity_list: center_point = entity[3] c = entity[1] # RGB color tuple cv.Circle(display_image, center_point, 20, cv.CV_RGB(c[0], c[1], c[2]), 1) cv.Circle(display_image, center_point, 15, cv.CV_RGB(c[0], c[1], c[2]), 1) cv.Circle(display_image, center_point, 10, cv.CV_RGB(c[0], c[1], c[2]), 2) cv.Circle(display_image, center_point, 5, cv.CV_RGB(c[0], c[1], c[2]), 3) # Toggle which image to show # if chr(c) == 'd': # image_index = ( image_index + 1 ) % len( image_list ) # # image_name = image_list[ image_index ] # # # Display frame to user # if image_name == "camera": # image = camera_image # cv.PutText( image, "Camera (Normal)", text_coord, text_font, text_color ) # elif image_name == "difference": # image = difference # cv.PutText( image, "Difference Image", text_coord, text_font, text_color ) # elif image_name == "display": # image = display_image # cv.PutText( image, "Targets (w/AABBs and contours)", text_coord, text_font, text_color ) # elif image_name == "threshold": # # Convert the image to color. # cv.CvtColor( grey_image, display_image, cv.CV_GRAY2RGB ) # image = display_image # Re-use display image here # cv.PutText( image, "Motion Mask", text_coord, text_font, text_color ) # elif image_name == "faces": # # Do face detection # detect_faces( camera_image, haar_cascade, mem_storage ) # image = camera_image # Re-use camera image here # cv.PutText( image, "Face Detection", text_coord, text_font, text_color ) # cv.ShowImage( "Target", image ) image1 = display_image cv.ShowImage("Target 1", image1) # if self.writer: # cv.WriteFrame( self.writer, image ); # log_file.flush() # If only using a camera, then there is no time.sleep() needed, # because the camera clips us to 15 fps. But if reading from a file, # we need this to keep the time-based target clipping correct: frame_t1 = time.time() # If reading from a file, put in a forced delay: if not self.writer: delta_t = frame_t1 - self.frame_t0 if delta_t < (1.0 / 15.0): time.sleep((1.0 / 15.0) - delta_t)
def _wipeMix(self, wipeMode, wipeConfig, level, image1, image2, mixMat): if((wipeMode == WipeMode.Push)): wipeDirection = wipeConfig if(wipeDirection < 0.25): wipePosX = int(self._internalResolutionX * level) sourceLeft = self._internalResolutionX-wipePosX sourceTop = 0 sourceWidth = wipePosX sourceHeight = self._internalResolutionY destLeft = 0 destTop = 0 elif(wipeDirection < 0.5): wipePosX = self._internalResolutionX - int(self._internalResolutionX * level) sourceLeft = 0 sourceTop = 0 sourceWidth = self._internalResolutionX-wipePosX sourceHeight = self._internalResolutionY destLeft = self._internalResolutionX-(self._internalResolutionX-wipePosX) destTop = 0 elif(wipeDirection < 0.75): wipePosY = int(self._internalResolutionY * level) sourceLeft = 0 sourceTop = self._internalResolutionY-wipePosY sourceWidth = self._internalResolutionX sourceHeight = wipePosY destLeft = 0 destTop = 0 else: wipePosY = self._internalResolutionY - int(self._internalResolutionY * level) sourceLeft = 0 sourceTop = 0 sourceWidth = self._internalResolutionX sourceHeight = self._internalResolutionY-wipePosY destLeft = 0 destTop = self._internalResolutionY-(self._internalResolutionY-wipePosY) destWidth = sourceWidth destHeight = sourceHeight src_region = cv.GetSubRect(image2, (sourceLeft, sourceTop, sourceWidth, sourceHeight)) if(image1 == None): cv.SetZero(mixMat) dst_region = cv.GetSubRect(mixMat, (destLeft, destTop, destWidth, destHeight)) return mixMat else: dst_region = cv.GetSubRect(mixMat, (destLeft, destTop, destWidth, destHeight)) cv.Copy(src_region, dst_region) if(wipeDirection < 0.25): wipePosX = int(self._internalResolutionX * level) sourceLeft = wipePosX sourceTop = 0 sourceWidth = self._internalResolutionX-wipePosX sourceHeight = self._internalResolutionY destLeft = wipePosX destTop = 0 elif(wipeDirection < 0.5): wipePosX = self._internalResolutionX - int(self._internalResolutionX * level) sourceLeft = 0 sourceTop = 0 sourceWidth = wipePosX sourceHeight = self._internalResolutionY destLeft = 0 destTop = 0 elif(wipeDirection < 0.75): wipePosY = int(self._internalResolutionY * level) sourceLeft = 0 sourceTop = wipePosY sourceWidth = self._internalResolutionX sourceHeight = self._internalResolutionY-wipePosY destLeft = 0 destTop = wipePosY else: wipePosY = self._internalResolutionY - int(self._internalResolutionY * level) sourceLeft = 0 sourceTop = 0 sourceWidth = self._internalResolutionX sourceHeight = wipePosY destLeft = 0 destTop = 0 destWidth = sourceWidth destHeight = sourceHeight src_region = cv.GetSubRect(image1, (sourceLeft, sourceTop, sourceWidth, sourceHeight)) dst_region = cv.GetSubRect(mixMat, (destLeft, destTop, destWidth, destHeight)) cv.Copy(src_region, dst_region) return mixMat if(wipeMode == WipeMode.Noize): scaleArg = wipeConfig noizeMask = getNoizeMask(level, self._internalResolutionX, self._internalResolutionY, 1.0 + (19.0 * scaleArg)) if(image1 == None): cv.SetZero(mixMat) cv.Copy(image2, mixMat, noizeMask) return mixMat cv.Copy(image2, image1, noizeMask) return image1 if(wipeMode == WipeMode.Zoom): xMove, yMove = wipeConfig xSize = int(self._internalResolutionX * level) ySize = int(self._internalResolutionY * level) xPos = int((self._internalResolutionX - xSize) * xMove) yPos = int((self._internalResolutionY - ySize) * (1.0 - yMove)) cv.SetZero(mixMat) dst_region = cv.GetSubRect(mixMat, (xPos, yPos, xSize, ySize)) cv.Resize(image2, dst_region,cv.CV_INTER_CUBIC) if(image1 == None): return mixMat cv.SetZero(self._mixMixMask1) dst_region = cv.GetSubRect(self._mixMixMask1, (xPos, yPos, xSize, ySize)) cv.Set(dst_region, 256) cv.Copy(mixMat, image1, self._mixMixMask1) return image1 if(wipeMode == WipeMode.Flip): flipRotation = wipeConfig rotation = 1.0 - level srcPoints = ((0.0, 0.0),(0.0,self._internalResolutionY),(self._internalResolutionX, 0.0)) destPoint1 = (0.0, 0.0) destPoint2 = (0.0, self._internalResolutionY) destPoint3 = (self._internalResolutionX, 0.0) if(image1 == None): rotation = rotation / 2 if(rotation < 0.5): flipAngle = rotation / 2 else: flipAngle = level / 2 destPoint1 = rotatePoint(flipRotation, destPoint1[0], destPoint1[1], self._halfResolutionX, self._halfResolutionY, flipAngle) destPoint2 = rotatePoint(flipRotation, destPoint2[0], destPoint2[1], self._halfResolutionX, self._halfResolutionY, flipAngle) destPoint3 = rotatePoint(flipRotation, destPoint3[0], destPoint3[1], self._halfResolutionX, self._halfResolutionY, flipAngle) dstPoints = ((destPoint1[0], destPoint1[1]),(destPoint2[0], destPoint2[1]),(destPoint3[0],destPoint3[1])) zoomMatrix = cv.CreateMat(2,3,cv.CV_32F) # print "DEBUG pcn: trasform points source: " + str(srcPoints) + " dest: " + str(dstPoints) cv.GetAffineTransform(srcPoints, dstPoints, zoomMatrix) if(rotation < 0.5): cv.WarpAffine(image2, mixMat, zoomMatrix) else: cv.WarpAffine(image1, mixMat, zoomMatrix) cv.Set(self._mixMixMask2, (255,255,255)) cv.WarpAffine(self._mixMixMask2, self._mixMixMask1, zoomMatrix) return mixMat return image2
def extract_features(filename, is_url=False): '''Extracts features to be used in text image classifier. :param filename: input image :param is_url: is input image a url or a file path on disk :return: tuple of features: (average_slope, median_slope, average_tilt, median_tilt, median_differences, average_differences, nr_straight_lines) Most relevant ones are average_slope, average_differences and nr_straight_lines. ''' if is_url: filedata = urllib2.urlopen(filename).read() imagefiledata = cv.CreateMatHeader(1, len(filedata), cv.CV_8UC1) cv.SetData(imagefiledata, filedata, len(filedata)) src = cv.DecodeImageM(imagefiledata, cv.CV_LOAD_IMAGE_GRAYSCALE) else: src = cv.LoadImage(filename, cv.CV_LOAD_IMAGE_GRAYSCALE) # normalize size normalized_size = 400 # smaller dimension will be 400, longer dimension will be proportional orig_size = cv.GetSize(src) max_dim_idx = max(enumerate(orig_size), key=lambda l: l[1])[0] min_dim_idx = [idx for idx in [0, 1] if idx != max_dim_idx][0] new_size = [0, 0] new_size[min_dim_idx] = normalized_size new_size[max_dim_idx] = int( float(orig_size[max_dim_idx]) / orig_size[min_dim_idx] * normalized_size) dst = cv.CreateImage(new_size, 8, 1) cv.Resize(src, dst) # cv.SaveImage("/tmp/resized.jpg",dst) src = dst dst = cv.CreateImage(cv.GetSize(src), 8, 1) color_dst = cv.CreateImage(cv.GetSize(src), 8, 3) storage = cv.CreateMemStorage(0) cv.Canny(src, dst, 50, 200, 3) cv.CvtColor(dst, color_dst, cv.CV_GRAY2BGR) slopes = [] # difference between xs or ys - variant of slope tilts = [] # x coordinates of horizontal lines horizontals = [] # y coordinates of vertical lines verticals = [] if USE_STANDARD: coords = cv.HoughLines2(dst, storage, cv.CV_HOUGH_STANDARD, 1, pi / 180, 50, 50, 10) lines = [] for coord in coords: (rho, theta) = coord a = cos(theta) b = sin(theta) x0 = a * rho y0 = b * rho pt1 = (cv.Round(x0 + 1000 * (-b)), cv.Round(y0 + 1000 * (a))) pt2 = (cv.Round(x0 - 1000 * (-b)), cv.Round(y0 - 1000 * (a))) lines += [(pt1, pt2)] else: lines = cv.HoughLines2(dst, storage, cv.CV_HOUGH_PROBABILISTIC, 1, pi / 180, 50, 50, 10) # eliminate duplicates - there are many especially with the standard version # first round the coordinates to integers divisible with 5 (to eliminate different but really close ones) # TODO # lines = list(set(map(lambda l: tuple([int(p) - int(p)%5 for p in l]), lines))) nr_straight_lines = 0 for line in lines: (pt1, pt2) = line # compute slope, rotate the line so that the slope is smallest # (slope is either delta x/ delta y or the reverse) # add smoothing term in denominator in case of 0 slope = min( abs(pt1[1] - pt2[1]), (abs(pt1[0] - pt2[0]))) / (max(abs(pt1[1] - pt2[1]), (abs(pt1[0] - pt2[0]))) + 0.01) # if slope < 0.1: # if slope < 5: if slope < 0.05: if abs(pt1[0] - pt2[0]) < abs(pt1[1] - pt2[1]): # means it's a horizontal line horizontals.append(pt1[0]) else: verticals.append(pt1[1]) if slope < 0.05: # if slope < 5: # if slope < 0.1: nr_straight_lines += 1 slopes.append(slope) tilts.append(min(abs(pt1[1] - pt2[1]), (abs(pt1[0] - pt2[0])))) # print slope average_slope = sum(slopes) / float(len(slopes)) median_slope = npmedian(nparray(slopes)) average_tilt = sum(tilts) / float(len(tilts)) median_tilt = npmedian(nparray(tilts)) differences = [] horizontals = sorted(horizontals) verticals = sorted(verticals) print "x_differences:" for (i, x) in enumerate(horizontals): if i > 0: # print abs(horizontals[i] - horizontals[i-1]) differences.append(abs(horizontals[i] - horizontals[i - 1])) print "y_differences:" for (i, y) in enumerate(verticals): if i > 0: # print abs(verticals[i] - verticals[i-1]) differences.append(abs(verticals[i] - verticals[i - 1])) print filename print "average_slope:", average_slope print "median_slope:", median_slope print "average_tilt:", average_tilt print "median_tilt:", median_tilt median_differences = npmedian(nparray(differences)) print "median_differences:", median_differences if not differences: # big random number for average difference average_differences = 50 else: average_differences = sum(differences) / float(len(differences)) print "average_differences:", average_differences print "nr_lines:", nr_straight_lines # print "sorted xs:", sorted(lines) return (average_slope, median_slope, average_tilt, median_tilt, median_differences, average_differences, nr_straight_lines)