def getStandardizedRects(self): ''' @return: the boxes centered on the target center of mass +- n_sigma*std @note: You must call detect() before getStandardizedRects() to see updated results. ''' #create a list of the top-level contours found in the contours (cv.Seq) structure rects = [] if len(self._contours) < 1: return (rects) seq = self._contours while not (seq == None): (x, y, w, h) = cv.BoundingRect(seq) if (cv.ContourArea(seq) > self._minArea): # and self._filter(rect) r = pv.Rect(x, y, w, h) moments = cv.Moments(seq) m_0_0 = cv.GetSpatialMoment(moments, 0, 0) m_0_1 = cv.GetSpatialMoment(moments, 0, 1) m_1_0 = cv.GetSpatialMoment(moments, 1, 0) mu_2_0 = cv.GetCentralMoment(moments, 2, 0) mu_0_2 = cv.GetCentralMoment(moments, 0, 2) cx = m_1_0 / m_0_0 cy = m_0_1 / m_0_0 w = 2.0 * self._rect_sigma * np.sqrt(mu_2_0 / m_0_0) h = 2.0 * self._rect_sigma * np.sqrt(mu_0_2 / m_0_0) r = pv.CenteredRect(cx, cy, w, h) rects.append(r) seq = seq.h_next() if self._filter != None: rects = self._filter(rects) return rects
def extract(self, img, face_records): '''Extract a template that allows the face to be matched.''' # Compute the 512D vector that describes the face in img identified by #shape. im = pv.Image(img[:, :, ::-1]) for face_record in face_records.face_records: rect = pt.rect_proto2pv(face_record.detection.location) x, y, w, h = rect.asTuple() # Extract view rect = pv.Rect() cx, cy = x + 0.5 * w, y + 0.5 * h tmp = 1.5 * max(w, h) cw, ch = tmp, tmp crop = pv.AffineFromRect(pv.CenteredRect(cx, cy, cw, ch), (256, 256)) pvim = pv.Image(img[:, :, ::-1]) # convert rgb to bgr pvim = crop(pvim) view = pt.image_pv2proto(pvim) face_record.view.CopyFrom(view) tile = pvim.resize((224, 224)) tile = tile.resize((112, 112)) face_im = tile.asOpenCV2() face_im = face_im[:, :, ::-1] # Convert BGR to RGB features = self.fr_model.get_embedding(face_im) face_descriptor = pv.meanUnit(features.flatten()) face_record.template.data.CopyFrom( pt.vector_np2proto(face_descriptor))
def detect(self, im, annotate=True): ''' This performs face detection and returns an ordered list of faces sorted by confidence scores. ''' # Compute the raw detections . detections = self.raw_detections(im) # Produce a list of faces. faces = [] for each in detections: rect = each[0] # Assign a qualty score to each detection. score = self.quality.predict(each[2:]) rect.detector = each[1] rect.score = score[0] faces.append(rect) # Add a default if self.default == True: w, h = im.size s = 0.75 * min(w, h) default = pv.CenteredRect(0.5 * w, 0.5 * h, s, s) default.score = 0.0 default.detector = "DEFAULT" faces.append(default) # Order the list by score. faces.sort(lambda x, y: -cmp(x.score, y.score)) return faces
def setSize(self,size): ''' Used to adjust the size of the tracking rect. @param size: (width,height) @type size: (float,float) ''' x = self.rect.center().X() y = self.rect.center().Y() w,h = size self.rect = pv.CenteredRect(x,y,w,h)
def unitRectCallback(population): if random.random() < 0.05: return im = pv.Image(np.zeros((1000,1000),dtype=np.float32)) for each in population: rect = each[1][0].generate() im.annotateRect(1000*rect,color='gray') rect = population[0][1][0].generate() im.annotatePolygon((1000*rect).asPolygon(),color='green',width=3) target_rect = pv.CenteredRect(.33385,.69348,.3482,.55283) im.annotatePolygon((1000*target_rect).asPolygon(),color='white',width=3)
def setCenter(self,point): ''' Used to adjust the center point of the track. @param point: The new center for the track. @type point: pv.Point ''' #self.center = point x = point.X() y = point.Y() w = self.rect.w h = self.rect.h self.rect = pv.CenteredRect(x,y,w,h)
def extract(self,img,face_records): '''Extract a template that allows the face to be matched.''' # Compute the 128D vector that describes the face in img identified by # shape. In general, if two face descriptor vectors have a Euclidean # distance between them less than 0.6 then they are from the same # person, otherwise they are from different people. Here we just print # the vector to the screen. # TODO: Make this an option JITTER_COUNT = 5 for face_record in face_records.face_records: rect = pt.rect_proto2pv(face_record.detection.location) x,y,w,h = rect.asTuple() # Extract view rect = pv.Rect() cx,cy = x+0.5*w,y+0.5*h tmp = 1.5*max(w,h) cw,ch = tmp,tmp crop = pv.AffineFromRect(pv.CenteredRect(cx,cy,cw,ch),(256,256)) #print (x,y,w,h,cx,cy,cw,ch,crop) pvim = pv.Image(img[:,:,::-1]) # convert rgb to bgr pvim = crop(pvim) view = pt.image_pv2proto(pvim) face_record.view.CopyFrom(view) # Extract landmarks l,t,r,b = [int(tmp) for tmp in [x,y,x+w,y+h]] d = dlib.rectangle(l,t,r,b) shape = self.shape_pred(img, d) #print('s',dir(shape)) #print(shape.parts()[0]) for i in range(len(shape.parts())): loc = shape.parts()[i] landmark = face_record.landmarks.add() landmark.landmark_id = "point_%02d"%i landmark.location.x = loc.x landmark.location.y = loc.y #print('fr',face_record) #print('shape:',face_records.face_records[0].landmarks) face_descriptor = self.face_rec.compute_face_descriptor(img, shape, JITTER_COUNT) face_descriptor = np.array(face_descriptor) vec = face_descriptor.flatten() face_record.template.data.CopyFrom(pt.vector_np2proto(vec))
def test_affine_Matrix3D(self): im = pv.Image(pv.BABOON) test_im = pv.Image(im.asMatrix3D()) affine = pv.AffineFromRect(pv.CenteredRect(256,256,128,128),(64,64)) # Transform the images im = affine(im) test_im = affine(test_im) # Correlate the resulting images vec1 = pv.unit(im.asMatrix3D().flatten()) vec2 = pv.unit(test_im.asMatrix3D().flatten()) score = np.dot(vec1,vec2) self.assertGreater(score, 0.998)
def extract(self, img, face_records): '''Extract a template that allows the face to be matched.''' # Compute the 512D vector that describes the face in img identified by #shape. #print(type(img),img.shape) img = img[:, :, :: -1] #convert from rgb to bgr. There is BGRtoRGB conversion in get_embedding for face_record in face_records.face_records: #print(face_record) if face_record.detection.score != -1: landmarks = np.zeros((5, 2), dtype=np.float) for i in range(0, len(face_record.landmarks)): vals = face_record.landmarks[i] landmarks[i, 0] = vals.location.x landmarks[i, 1] = vals.location.y _img = self.preprocess.norm_crop(img, landmark=landmarks) #print(_img.shape) embedding = self.fr_model.get_embedding(_img).flatten() embedding_norm = np.linalg.norm(embedding) normed_embedding = embedding / embedding_norm #print(normed_embedding.shape) # Extract view x, y, w, h = pt.rect_proto2pv( face_record.detection.location).asTuple() cx, cy = x + 0.5 * w, y + 0.5 * h tmp = 1.5 * max(w, h) cw, ch = tmp, tmp crop = pv.AffineFromRect(pv.CenteredRect(cx, cy, cw, ch), (256, 256)) #print (x,y,w,h,cx,cy,cw,ch,crop) pvim = pv.Image(img[:, :, ::-1]) # convert rgb to bgr pvim = crop(pvim) view = pt.image_pv2proto(pvim) face_record.view.CopyFrom(view) else: normed_embedding = np.zeros(512, dtype=float) face_record.template.data.CopyFrom( pt.vector_np2proto(normed_embedding))
def generate(self): '''generate the actual value that will be populated in the arguments''' return pv.CenteredRect(self.cx, self.cy, self.width, self.height)
import pyvision as pv import scipy as sp if __name__ == '__main__': im = pv.Image(sp.zeros((128,128))) pts = [pv.Point(48,55),pv.Point(80,55)] im.annotatePoints(pts) elipse = pv.CenteredRect(64,64,96,96) im.annotateEllipse(elipse) im.annotateLabel(pv.Point(40,36),"MMM") im.annotateLabel(pv.Point(72,36),"MMM") im.annotateLabel(pv.Point(58,64),"db") im.annotatePolygon([pv.Point(48,90), pv.Point(80,90),pv.Point(64,100)]) im.show(delay=0)
def raw_detections(self, im): ''' Run the face detectors with additional quality parameters. ''' W, H = im.size scale = 1.0 / self.prescale im = im.scale(self.prescale) faces = self.fd(im) faces = [[scale * rect, 'FACE'] for rect in faces] heads = self.hd(im) # Approximate face locations from head detections hfaces = [] for each in heads: # Get the center of the head location x, y, w, _ = each.asCenteredTuple() y = y - 0.10 * w w = 0.33 * w hfaces.append([scale * pv.CenteredRect(x, y, w, w), 'HEAD']) # Compute when face and head detections overlap for face in faces: best_overlap = 0.0 for head in hfaces: best_overlap = max(best_overlap, face[0].similarity(head[0])) if best_overlap > 0.7: face.append(1.0) else: face.append(0.0) # Compute when face and head detections overlap for head in hfaces: best_overlap = 0.0 for face in faces: best_overlap = max(best_overlap, head[0].similarity(face[0])) if best_overlap > 0.7: head.append(1.0) else: head.append(0.0) detections = faces + hfaces # Compute some simple statistics for each in detections: tile = pv.AffineFromRect(self.prescale * each[0], (128, 128))(im) #tile.show() # face vs head detection each.append(1.0 * (each[1] == 'FACE')) # size relative to image each.append(np.sqrt(each[0].area()) / np.sqrt(W * H)) each.append(np.sqrt(each[0].area()) / np.sqrt(W * H)**2) # scaled contrast each.append(tile.asMatrix2D().std() / 255.0) each.append((tile.asMatrix2D().std() / 255.0)**2) # scaled brightness each.append(tile.asMatrix2D().mean() / 255.0) each.append((tile.asMatrix2D().mean() / 255.0)**2) # relative rgb intensity rgb = tile.asMatrix3D() t = rgb.mean() + 0.001 # grand mean regularized # rgb relative to grand mean r = -1 + rgb[0, :, :].mean() / t g = -1 + rgb[1, :, :].mean() / t b = -1 + rgb[2, :, :].mean() / t # create a quadradic model with interactions for rgb each += [r, g, b, r * r, r * g, r * b, g * g, g * b, b * b] return detections
def train(self, image_dir, eye_data): ''' This function trains the logistic regression model to score the meta-detections. Images must be oriented so that the face is upright. @param image_dir: A pathname containing images. @param eye_data: a list of tuples (from csv) filename,eye1x,eye1y,eye2x,eye2y ''' print "Training" data_set = [] progress = pv.ProgressBar(maxValue=len(eye_data)) for row in eye_data: filename = row[0] print "Processing", row points = [float(val) for val in row[1:]] eye1 = pv.Point(points[0], points[1]) eye2 = pv.Point(points[2], points[3]) # Compute the truth rectangle from the eye coordinates ave_dist = np.abs(cd.AVE_LEFT_EYE.X() - cd.AVE_RIGHT_EYE.X()) y_height = 0.5 * (cd.AVE_LEFT_EYE.Y() + cd.AVE_RIGHT_EYE.Y()) x_center = 0.5 * (eye1.X() + eye2.X()) x_dist = np.abs(eye1.X() - eye2.X()) width = x_dist / ave_dist y_center = 0.5 * (eye1.Y() + eye2.Y()) + (0.5 - y_height) * width truth = pv.CenteredRect(x_center, y_center, width, width) # Read the image im = pv.Image(os.path.join(image_dir, filename)) # Compute the detections detections = self.raw_detections(im) #print detections # Score the detections # Similarity above 0.7 count as correct and get a value of 1.0 in the logistic regression # Incorrect detections get a value of 0.0 scores = [truth.similarity(each[0]) for each in detections] for i in range(len(scores)): score = scores[i] detection = detections[i] success = 0.0 if score > 0.7: success = 1.0 row = detection[1], success, detection[2:] print row data_set.append(row) # Display the results im = im.scale(self.prescale) colors = {'FACE': 'yellow', 'HEAD': 'blue'} for detection in detections: #print detection rect = self.prescale * detection[0] im.annotateRect(rect, color=colors[detection[1]]) im.annotateRect(self.prescale * truth, color='red') progress.updateAmount() progress.show() print #im.show(delay=1) progress.finish() obs = [each[1] for each in data_set] data = [each[2] for each in data_set] print obs print data self.quality.train(obs, data) return for each in data_set: self.quality[each[0]][1].append(each[1]) self.quality[each[0]][2].append(each[2]) for key, value in self.quality.iteritems(): print "Training:", key obs = value[1] data = value[2] assert len(obs) == len(data) value[0].train(obs, data) print value[0].params print "Done Training"
def fitnessUnitRect(rect, **kwargs): target_rect = pv.CenteredRect(.33385, .69348, .3482, .55283) return -target_rect.overlap(rect)
def update(self,frame): ''' This is the main work function for the tracker. After initialization, this function should be called on each new frame. This function: 1. Extracts the tracking window from the new frame. 2. Applies the filter to locate the new center of the target. 3. Updates the filter (if PSR exceeds the threshold). 4. Updates the internal state of the tracker to reflect the new location and status. @param frame: a new frame from the video. @type frame: pv.Image ''' start = time.time() self.frame += 1 self.psr_cache = None tile,affine = frame.crop(self.rect,size=self.tile_size,return_affine=True) #affine = pv.AffineFromRect(self.rect,self.tile_size) self.prevRect=copy.deepcopy(self.rect) #tile = affine.transformImage(frame) self.input = tile corr = self.filter.correlate(tile) self.corr = corr # Find the peak _,cols = self.corr.shape i = self.corr.argmax() x,y = i/cols, i%cols target = pv.Point(x,y) self.best_estimate = affine.invert(target) if self.subpixel: dx,dy = common.subpixel(self.corr[x-3:x+4,y-3:y+4]) target = pv.Point(x+dx, y+dy) # check status target_rect = pv.CenteredRect(target.X(),target.Y(),self.rect.w,self.rect.h) frame_rect = pv.Rect(0,0,frame.size[0],frame.size[1]) self.in_bounds = frame_rect.containsRect(target_rect) status = self.getStatus() # Status == Good: Update the filter psr = self.psr() if self.frame <= self.init_time or status == STATUS_GOOD and (self.max_psr == None or psr < self.max_psr): self.filter.addTraining(tile,target,rate=self.update_rate) # Recenter the affine target = affine.invertPoint(target) dx = target.X() - self.rect.center().X() dy = target.Y() - self.rect.center().Y() if self.update_location: self.rect.x = self.rect.x + dx self.rect.y = self.rect.y + dy #self.center = target stop = time.time() self.update_time = stop - start
def extract(self, img, face_records): '''Extract a template that allows the face to be matched.''' # Compute the 128D vector that describes the face in img identified by # shape. In general, if two face descriptor vectors have a Euclidean # distance between them less than 0.6 then they are from the same # person, otherwise they are from different people. Here we just print # the vector to the screen. im = pv.Image(img[:, :, ::-1]) for face_record in face_records.face_records: rect = pt.rect_proto2pv(face_record.detection.location) x, y, w, h = rect.asTuple() # Extract view rect = pv.Rect() cx, cy = x + 0.5 * w, y + 0.5 * h tmp = 1.5 * max(w, h) cw, ch = tmp, tmp crop = pv.AffineFromRect(pv.CenteredRect(cx, cy, cw, ch), (256, 256)) pvim = pv.Image(img[:, :, ::-1]) # convert rgb to bgr pvim = crop(pvim) view = pt.image_pv2proto(pvim) face_record.view.CopyFrom(view) # Extract landmarks l, t, r, b = [int(tmp) for tmp in [x, y, x + w, y + h]] d = dlib.rectangle(l, t, r, b) shape = self.shape_pred(img, d) for i in range(len(shape.parts())): loc = shape.parts()[i] landmark = face_record.landmarks.add() landmark.landmark_id = "point_%02d" % i landmark.location.x = loc.x landmark.location.y = loc.y # Get detection rectangle and crop the face #rect = pt.rect_proto2pv(face_record.detection.location).rescale(1.5) #tile = im.crop(rect) tile = pvim.resize((224, 224)) #tile.show(delay=1000) face_im = tile.asOpenCV2() face_im = face_im[:, :, ::-1] # Convert BGR to RGB #mat_ = cv2.cvtColor(mat,cv2.COLOR_RGB2GRAY) #mat = cv2.cvtColor(mat_,cv2.COLOR_GRAY2RGB) #img = image.load_img('../image/ajb.jpg', target_size=(224, 224)) from keras_vggface import utils from keras.preprocessing import image face_im = image.img_to_array(face_im) face_im = np.expand_dims(face_im, axis=0) face_im = utils.preprocess_input(face_im, version=2) # or version=2 # Needed in multithreaded applications with self.graph.as_default(): tmp = self.recognizer.predict(face_im) face_descriptor = pv.meanUnit(tmp.flatten()) #print('shape:',face_records.face_records[0].landmarks) #face_descriptor = self.face_rec.compute_face_descriptor(img, shape, JITTER_COUNT) #face_descriptor = np.array(face_descriptor) #vec = face_descriptor.flatten() face_record.template.data.CopyFrom( pt.vector_np2proto(face_descriptor))