def recog_cap(self, image): image_size = self.args.image_size #img = cv2.resize(images,(image_size,image_size)) #images = facenet_ext.load_data(images_path, False, False, 160) images = facenet_ext.load_data_im(image, False, False, 160) if len(images.shape) == 3: images = np.expand_dims(images, axis=0) feed_dict = { self.phase_train_placeholder: False, self.phase_train_placeholder_expression: False, self.images_placeholder: images, self.keep_probability_placeholder: 1.0 } logits_array = self.sess.run([self.logits], feed_dict=feed_dict) logits0 = logits_array[0] cap_probs = np.exp(logits0) / np.sum(np.exp(logits0)) iscap = np.argmax(logits0) cap_prob = cap_probs[0][iscap] return iscap, cap_prob
def recog_hand(self, imgs): #image_size = self.args.image_size #img = cv2.resize(images,(image_size,image_size)) #images = facenet_ext.load_data(images_path, False, False, 160) images = facenet_ext.load_data_im(imgs, False, False, 160) if len(images.shape) == 3: images = np.expand_dims(images, axis=0) feed_dict = {self.phase_train_placeholder: False, self.phase_train_placeholder_expression: False, self.images_placeholder: images, self.keep_probability_placeholder: 1.0} logits_ = self.sess.run([self.logits], feed_dict=feed_dict) # logits0 = logits_array[0] # hand_probs = np.exp(logits0) / np.sum(np.exp(logits0)) # IDs = np.argmax(logits0) # probs = hand_probs[0][IDs] logits_array = np.array(logits_) logits_array = np.squeeze(logits_array, 0) exp_logit = np.exp(logits_array) imgs_sf_denominator = np.sum(exp_logit, 1) imgs_sf = exp_logit/imgs_sf_denominator IDs = np.argmax(imgs_sf, 1) probs = np.max(imgs_sf, 1) return IDs, probs
def face_embeddings(img_refs_, args, sess, args_model, Expr_dataset): # Load images image_size = args.image_size images = facenet_ext.load_data_im(img_refs_, False, False, image_size) if len(images.shape)==3: images = np.expand_dims(images, axis=0) if Expr_dataset == 'CK+' or 'FER2013': feed_dict = {args_model.phase_train_placeholder: False, args_model.images_placeholder: images, args_model.keep_probability_placeholder: 1.0} t2 = time.time() emb_array = sess.run([args_model.embeddings], feed_dict=feed_dict) t3 = time.time() print('Embedding calculation FPS:%d' % (int(1 / (t3 - t2)))) return emb_array
def face_expression_multiref_forward(face_img_, emb_ref, args, sess, args_model, Expr_dataset): nrof_imgs = 1 imgs = np.zeros((nrof_imgs, args.image_size, args.image_size, 3)) imgs[0, :, :, :]=face_img_ # Load images image_size = args.image_size images = facenet_ext.load_data_im(imgs, False, False, image_size) if len(images.shape) == 3: images = np.expand_dims(images,axis=0) if Expr_dataset == 'CK+': feed_dict = {args_model.phase_train_placeholder: False, args_model.images_placeholder: images, args_model.keep_probability_placeholder: 1.0} if Expr_dataset == 'FER2013': feed_dict = {args_model.phase_train_placeholder: False, args_model.phase_train_placeholder_expression: False, args_model.images_placeholder: images, args_model.keep_probability_placeholder: 1.0} t2 = time.time() emb_array, logits_array = sess.run([args_model.embeddings, args_model.logits], feed_dict=feed_dict) #emb_array = sess.run([args_model.embeddings], feed_dict=feed_dict) t3 = time.time() print('Embedding calculation FPS:%d' % (int(1 / (t3 - t2)))) embeddings1 = emb_array[0] embeddings2 = emb_ref[0] # Caculate the distance of embeddings and verification the two face assert (embeddings1.shape[0] == embeddings2[0].shape[0]) diff = np.subtract(embeddings1, embeddings2) if len(diff.shape)==2: dist = np.sum(np.square(diff), 1) elif len(diff.shape)==1: dist = np.sum(np.square(diff), 0) else: raise ValueError("Dimension of the embeddings2 is not correct!") predict_issame = np.less(dist, args.threshold) logits0 = logits_array[0] express_probs = np.exp(logits0)/sum(np.exp(logits0)) return predict_issame, dist, express_probs
def verification_test(args): rect_len = 120 offset_x = 50 Expr_str = [ 'Neutre', 'Colere', 'Degoute', 'Peur', 'Content', 'Triste', 'Surprise' ] #####FER2013+ EXPRSSIONS_TYPE_fusion Expr_dataset = 'FER2013' c_red = (0, 0, 255) c_green = (0, 255, 0) font = cv2.FONT_HERSHEY_SIMPLEX scale_size = 3 ## scale the original image as the input image to align the face ## load models for the face detection and verfication pnet, rnet, onet, sess, args_model = face_verification_verif.load_models_forward_v2( args, Expr_dataset) face_img_refs_ = [] img_ref_paths = [] for img_ref_path in os.listdir(args.img_ref): img_ref_paths.append(img_ref_path) img_ref = misc.imread(os.path.join(args.img_ref, img_ref_path)) # python format img_size = img_ref.shape[0:2] bb, probs = align.face_align_mtcnn.align_mtcnn_realplay( img_ref, pnet, rnet, onet) if (bb == []): continue bb_face = [] probs_face = [] for i, prob in enumerate(probs): if prob > args.face_detect_threshold: bb_face.append(bb[i]) probs_face.append(prob) bb = np.asarray(bb_face) det = bb if det.shape[0] > 1: bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) img_center = np.array(img_size) / 2 offsets = np.vstack([(det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0]]) offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) index = np.argmax(bounding_box_size - offset_dist_squared * 2.0) # some extra weight on the centering det = det[index, :] det = np.squeeze(det) x0 = det[0] y0 = det[1] bb_tmp = np.zeros(4, dtype=np.int32) bb_tmp[0] = np.maximum(det[0] - args.margin / 2, 0) bb_tmp[1] = np.maximum(det[1] - args.margin / 2, 0) bb_tmp[2] = np.minimum(det[2] + args.margin / 2, img_size[1]) bb_tmp[3] = np.minimum(det[3] + args.margin / 2, img_size[0]) face_img_ref = img_ref[bb_tmp[1]:bb_tmp[3], bb_tmp[0]:bb_tmp[2], :] face_img_ref = misc.imresize(face_img_ref, (args.image_size, args.image_size), interp='bilinear') face_img_ref_ = facenet_ext.load_data_im(face_img_ref, False, False, args.image_size) face_img_refs_.append(face_img_ref_) img_ref_cv = cv2.cvtColor(img_ref, cv2.COLOR_BGR2RGB) cv2.rectangle(img_ref_cv, (int(det[0]), int(det[1])), (int(det[2]), int(det[3])), c_red, 2, 8, 0) img_ref_name = img_ref_path.split('.')[0] cv2.putText(img_ref_cv, "%s" % img_ref_name, (int(x0), int(y0 - 10)), font, 1, c_red, 2) cv2.imshow('%s' % img_ref_path, img_ref_cv) cv2.waitKey(20) face_img_refs_ = np.array(face_img_refs_) emb_ref = face_verification_verif.face_embeddings(face_img_refs_, args, sess, args_model) ################ capture the camera for realplay ############################################# if args.video == '0': video = 0 else: video = args.video cap = cv2.VideoCapture(video) cap.set(cv2.CAP_PROP_FRAME_WIDTH, 800) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 600) realplay_window = "Realplay" cv2.namedWindow(realplay_window, cv2.WINDOW_NORMAL) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output.avi', fourcc, 20.0, (800, 600)) while (cap.isOpened()): if cv2.getWindowProperty(realplay_window, cv2.WINDOW_NORMAL) < 0: return # Capture frame-by-frame ret, frame = cap.read() if ret == False: break cv2_im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) im_np = cv2_im img_size = im_np.shape[0:2] im_np_scale = cv2.resize( im_np, (int(img_size[1] / scale_size), int(img_size[0] / scale_size)), interpolation=cv2.INTER_LINEAR) bb, probs = align.face_align_mtcnn.align_mtcnn_realplay( im_np_scale, pnet, rnet, onet) bb_face = [] probs_face = [] for i, prob in enumerate(probs): if prob > args.face_detect_threshold: bb_face.append(bb[i]) probs_face.append(prob) bb = np.asarray(bb_face) probs = np.asarray(probs_face) bb = bb * scale_size #re_scale of the scaled image for align_face if (len(bb) > 0): for i in range(bb.shape[0]): prob = probs[i] det = bb[i] bb_tmp = np.zeros(4, dtype=np.int32) bb_tmp[0] = np.maximum(det[0] - args.margin / 2, 0) bb_tmp[1] = np.maximum(det[1] - args.margin / 2, 0) bb_tmp[2] = np.minimum(det[2] + args.margin / 2, img_size[1]) bb_tmp[3] = np.minimum(det[3] + args.margin / 2, img_size[0]) face_img = im_np[bb_tmp[1]:bb_tmp[3], bb_tmp[0]:bb_tmp[2], :] face_img_ = misc.imresize(face_img, (args.image_size, args.image_size), interp='bilinear') face_img_ = facenet_ext.load_data_im(face_img_, False, False, args.image_size) ######### x0 = bb[i][0] y0 = bb[i][1] x1 = bb[i][2] y1 = bb[i][3] offset_y = int((y1 - y0) / 7) # face experssion ##### 0=neutral, 1=anger, 2=contempt, 3=disgust, 4=fear, 5=happy, 6=sadness, 7=surprise ############ t2 = time.time() predict_issames, dists = face_verification_verif.face_expression_multiref_forward( face_img_, emb_ref, args, sess, args_model) t3 = time.time() print('face verif FPS:%d' % (int(1 / ((t3 - t2))))) predict_issame_idxes = [ i for i, predict_issame in enumerate(predict_issames) if predict_issame == True ] if len(predict_issame_idxes) > 1: predict_issame_idx = np.argmin(dists) elif len(predict_issame_idxes) == 1: predict_issame_idx = predict_issame_idxes[0] if len(predict_issame_idxes): i = predict_issame_idx dist = dists[i] img_ref_name = img_ref_paths[i].split('.')[0] cv2.rectangle(frame, (int(x0), int(y0)), (int(x1), int(y1)), c_green, 2, 8, 0) cv2.putText(frame, "%.4f" % prob, (int(x0), int(y0)), font, 0.5, c_green, 1) cv2.putText(frame, "%.2f" % dist, (int(x1), int(y1)), font, 0.5, c_green, 1) cv2.putText(frame, "%s" % img_ref_name, (int( (x1 + x0) / 2), int(y0 - 10)), font, 1, c_green, 2) else: dist = min(dists) cv2.rectangle(frame, (int(x0), int(y0)), (int(x1), int(y1)), c_red, 2, 8, 0) cv2.putText(frame, "%.4f" % prob, (int(x0), int(y0)), font, 0.5, c_red, 1) cv2.putText(frame, "%.2f" % dist, (int(x1), int(y1)), font, 0.5, c_red, 1) # visualation out.write(frame) cv2.imshow(realplay_window, frame) if cv2.waitKey(1) & 0xFF == ord('q'): break # When everything done, release the capture cap.release() out.release() cv2.destroyAllWindows() return