class FaceCam(): # Video class based on openCV def __init__(self): self.device = 'cuda:0' if torch.cuda.is_available() else 'cpu' self.mtcnn = MTCNN(device=self.device) self.open = True self.gender_model = def_model('gender', self.device) self.gaze_model = def_model('gaze', self.device) self.emotion_model = def_model('emotion', self.device) self.multimodal_model = def_model('multimodal', self.device) def rec(self): global label cap = cv2.VideoCapture(0) while(self.open==True): timer_start = time.time() print('start camera!') ret, frame = cap.read() try: # detect face box and probability boxes, probs = self.mtcnn.detect(frame, landmarks=False) # draw box on frame frame = draw_bbox(frame, boxes, probs) # perform only when face is detected if len(boxes) > 0: # extract the face rois rois = detect_rois(boxes) for roi in rois: (start_Y, end_Y, start_X, end_X) = roi face = frame[start_Y:end_Y, start_X:end_X] print('detect time: ', time.time()-timer_start) predict_start = time.time() gender_i = predict(self.gender_model, face, self.device) gaze_i = predict(self.gaze_model, face, self.device) emotion_i = predict(self.emotion_model, face, self.device) multimodal_i = predict(self.multimodal_model, face, self.device) cv2.putText(frame, label['gender'][gender_i], (end_X-50, start_Y-55), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,0,0), 2, cv2.LINE_AA) cv2.putText(frame, label['gaze'][gaze_i], (end_X-50, start_Y-40), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,0,0), 2, cv2.LINE_AA) cv2.putText(frame, label['emotion'][emotion_i], (end_X-50, start_Y-25), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,0,0), 2, cv2.LINE_AA) cv2.putText(frame, label['multimodal'][multimodal_i], (end_X-50), start_Y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,0,0), 2, cv2.LINE_AA) print('predict time: ', time.time()-predict_start) except Exception as e: print(e) pass # show the frame cv2.imshow('Demo', frame) # q to quit if cv2.waitKey(1) & 0xFF == ord('q'): print('Interrupted by user!') break # clear program and close windows cap.release() cv2.destroyAllWindows() print('All done!')
saveimg = np.squeeze(saveimg.transpose(1, 2, 0)) Image.fromarray(saveimg).save(f"{name}_{i}.png") result_cropped_tensors.append(cropped_tensors.to(device)) if len(no_face_indices) > 20: # few videos start with silence, allow 0.5 seconds of silence else remove return None del frames # Stack all frames result_cropped_tensors = torch.stack(result_cropped_tensors) # Embed all frames result_cropped_tensors = result_cropped_tensors.to(device) if use_half: result_cropped_tensors = result_cropped_tensors.half() with torch.no_grad(): emb = resnet(result_cropped_tensors) if use_half: emb = emb.float() return emb.to(cpu_device) if __name__ == "__main__": mtcnn = MTCNN(keep_all=True).eval() resnet = InceptionResnetV1(pretrained="vggface2").eval() device = torch.device("cpu") res = input_face_embeddings(["a.jpg", "b.jpg"], True, mtcnn, resnet, device) print(res.shape) # 512D print("Passed")
import numpy as np import matplotlib.pyplot as plt from facenet_pytorch import MTCNN from glob import glob import os from tqdm.auto import tqdm raw_data_dir = r'/media/rrtammyfs/labDatabase/celeb_a/faces' processed_data_dir = r'/media/rrtammyfs/labDatabase/celeb_a/faces/processed/' os.environ["CUDA_VISIBLE_DEVICES"] = '-1' list_imgs = glob(os.path.join(raw_data_dir, "*/*.jpg")) mtcnn = MTCNN(margin=10, select_largest=True, post_process=False) #, device='cuda:0') for img_path in tqdm(list_imgs): img = plt.imread(img_path) face = mtcnn(img) if face is not None: os.makedirs(os.path.join(processed_data_dir, img_path.split('/')[-2]), exist_ok=True) face = face.permute(1, 2, 0).int().numpy() plt.imsave( os.path.join(processed_data_dir, img_path.split('/')[-2], img_path.split('/')[-1]), face.astype(np.uint8))
class Demo(): def __init__(self, args): ## configs self.device = 'cuda:0' if args.gpu else 'cpu' self.checkpoint_path = args.checkpoint self.detect_human_face = args.detect_human_face self.render_video = args.render_video self.output_size = args.output_size self.image_size = 64 self.min_depth = 0.9 self.max_depth = 1.1 self.border_depth = 1.05 self.xyz_rotation_range = 60 self.xy_translation_range = 0.1 self.z_translation_range = 0 self.fov = 10 # in degrees self.depth_rescaler = lambda d: (1 + d) / 2 * self.max_depth + ( 1 - d) / 2 * self.min_depth # (-1,1) => (min_depth,max_depth) self.depth_inv_rescaler = lambda d: (d - self.min_depth) / ( self.max_depth - self.min_depth) # (min_depth,max_depth) => (0,1) fx = (self.image_size - 1) / 2 / (np.tan(self.fov / 2 * np.pi / 180)) fy = (self.image_size - 1) / 2 / (np.tan(self.fov / 2 * np.pi / 180)) cx = (self.image_size - 1) / 2 cy = (self.image_size - 1) / 2 K = [[fx, 0., cx], [0., fy, cy], [0., 0., 1.]] K = torch.FloatTensor(K).to(self.device) self.inv_K = torch.inverse(K).unsqueeze(0) self.K = K.unsqueeze(0) ## NN models self.netD = EDDeconv(cin=3, cout=1, nf=64, zdim=256, activation=None) self.netA = EDDeconv(cin=3, cout=3, nf=64, zdim=256) self.netL = Encoder(cin=3, cout=4, nf=32) self.netV = Encoder(cin=3, cout=6, nf=32) self.netD = self.netD.to(self.device) self.netA = self.netA.to(self.device) self.netL = self.netL.to(self.device) self.netV = self.netV.to(self.device) self.load_checkpoint() self.netD.eval() self.netA.eval() self.netL.eval() self.netV.eval() ## face detecter if self.detect_human_face: from facenet_pytorch import MTCNN self.face_detector = MTCNN(select_largest=True, device=self.device) ## renderer if self.render_video: from unsup3d.renderer import Renderer assert 'cuda' in self.device, 'A GPU device is required for rendering because the neural_renderer only has GPU implementation.' cfgs = { 'device': self.device, 'image_size': self.output_size, 'min_depth': self.min_depth, 'max_depth': self.max_depth, 'fov': self.fov, } self.renderer = Renderer(cfgs) def load_checkpoint(self): print(f"Loading checkpoint from {self.checkpoint_path}") cp = torch.load(self.checkpoint_path, map_location=self.device) self.netD.load_state_dict(cp['netD']) self.netA.load_state_dict(cp['netA']) self.netL.load_state_dict(cp['netL']) self.netV.load_state_dict(cp['netV']) def depth_to_3d_grid(self, depth, inv_K=None): if inv_K is None: inv_K = self.inv_K b, h, w = depth.shape grid_2d = get_grid(b, h, w, normalize=False).to(depth.device) # Nxhxwx2 depth = depth.unsqueeze(-1) grid_3d = torch.cat((grid_2d, torch.ones_like(depth)), dim=3) grid_3d = grid_3d.matmul(inv_K.transpose(2, 1)) * depth return grid_3d def get_normal_from_depth(self, depth): b, h, w = depth.shape grid_3d = self.depth_to_3d_grid(depth) tu = grid_3d[:, 1:-1, 2:] - grid_3d[:, 1:-1, :-2] tv = grid_3d[:, 2:, 1:-1] - grid_3d[:, :-2, 1:-1] normal = tu.cross(tv, dim=3) zero = normal.new_tensor([0, 0, 1]) normal = torch.cat( [zero.repeat(b, h - 2, 1, 1), normal, zero.repeat(b, h - 2, 1, 1)], 2) normal = torch.cat( [zero.repeat(b, 1, w, 1), normal, zero.repeat(b, 1, w, 1)], 1) normal = normal / (((normal**2).sum(3, keepdim=True))**0.5 + EPS) return normal def detect_face(self, im): print("Detecting face using MTCNN face detector") try: bboxes, prob = self.face_detector.detect(im) w0, h0, w1, h1 = bboxes[0] except: print("Could not detect faces in the image") return None hc, wc = (h0 + h1) / 2, (w0 + w1) / 2 crop = int(((h1 - h0) + (w1 - w0)) / 2 / 2 * 1.1) im = np.pad( im, ((crop, crop), (crop, crop), (0, 0)), mode='edge') # allow cropping outside by replicating borders h0 = int(hc - crop + crop + crop * 0.15) w0 = int(wc - crop + crop) return im[h0:h0 + crop * 2, w0:w0 + crop * 2] def run(self, pil_im): im = np.uint8(pil_im) ## face detection if self.detect_human_face: im = self.detect_face(im) if im is None: return -1 h, w, _ = im.shape im = torch.FloatTensor(im / 255.).permute(2, 0, 1).unsqueeze(0) # resize to 128 first if too large, to avoid bilinear downsampling artifacts if h > self.image_size * 4 and w > self.image_size * 4: im = nn.functional.interpolate( im, (self.image_size * 2, self.image_size * 2), mode='bilinear', align_corners=False) im = nn.functional.interpolate(im, (self.image_size, self.image_size), mode='bilinear', align_corners=False) with torch.no_grad(): self.input_im = im.to(self.device) * 2. - 1. b, c, h, w = self.input_im.shape ## predict canonical depth self.canon_depth_raw = self.netD(self.input_im).squeeze(1) # BxHxW self.canon_depth = self.canon_depth_raw - self.canon_depth_raw.view( b, -1).mean(1).view(b, 1, 1) self.canon_depth = self.canon_depth.tanh() self.canon_depth = self.depth_rescaler(self.canon_depth) ## clamp border depth depth_border = torch.zeros(1, h, w - 4).to(self.input_im.device) depth_border = nn.functional.pad(depth_border, (2, 2), mode='constant', value=1) self.canon_depth = self.canon_depth * ( 1 - depth_border) + depth_border * self.border_depth ## predict canonical albedo self.canon_albedo = self.netA(self.input_im) # Bx3xHxW ## predict lighting canon_light = self.netL(self.input_im) # Bx4 self.canon_light_a = canon_light[:, :1] / 2 + 0.5 # ambience term self.canon_light_b = canon_light[:, 1:2] / 2 + 0.5 # diffuse term canon_light_dxy = canon_light[:, 2:] self.canon_light_d = torch.cat( [canon_light_dxy, torch.ones(b, 1).to(self.input_im.device)], 1) self.canon_light_d = self.canon_light_d / ( (self.canon_light_d**2).sum( 1, keepdim=True))**0.5 # diffuse light direction ## shading self.canon_normal = self.get_normal_from_depth(self.canon_depth) self.canon_diffuse_shading = ( self.canon_normal * self.canon_light_d.view(-1, 1, 1, 3)).sum(3).clamp( min=0).unsqueeze(1) canon_shading = self.canon_light_a.view( -1, 1, 1, 1) + self.canon_light_b.view( -1, 1, 1, 1) * self.canon_diffuse_shading self.canon_im = (self.canon_albedo / 2 + 0.5) * canon_shading * 2 - 1 ## predict viewpoint transformation self.view = self.netV(self.input_im) self.view = torch.cat([ self.view[:, :3] * np.pi / 180 * self.xyz_rotation_range, self.view[:, 3:5] * self.xy_translation_range, self.view[:, 5:] * self.z_translation_range ], 1) ## export to obj strings vertices = self.depth_to_3d_grid(self.canon_depth) # BxHxWx3 self.objs, self.mtls = export_to_obj_string( vertices, self.canon_normal) ## resize to output size self.canon_depth = nn.functional.interpolate( self.canon_depth.unsqueeze(1), (self.output_size, self.output_size), mode='bilinear', align_corners=False).squeeze(1) self.canon_normal = nn.functional.interpolate( self.canon_normal.permute(0, 3, 1, 2), (self.output_size, self.output_size), mode='bilinear', align_corners=False).permute(0, 2, 3, 1) self.canon_normal = self.canon_normal / (self.canon_normal**2).sum( 3, keepdim=True)**0.5 self.canon_diffuse_shading = nn.functional.interpolate( self.canon_diffuse_shading, (self.output_size, self.output_size), mode='bilinear', align_corners=False) self.canon_albedo = nn.functional.interpolate( self.canon_albedo, (self.output_size, self.output_size), mode='bilinear', align_corners=False) self.canon_im = nn.functional.interpolate( self.canon_im, (self.output_size, self.output_size), mode='bilinear', align_corners=False) if self.render_video: self.render_animation() def render_animation(self): print(f"Rendering video animations") b, h, w = self.canon_depth.shape ## morph from target view to canonical morph_frames = 15 view_zero = torch.FloatTensor([0.15 * np.pi / 180 * 60, 0, 0, 0, 0, 0]).to(self.canon_depth.device) morph_s = torch.linspace(0, 1, morph_frames).to(self.canon_depth.device) view_morph = morph_s.view(-1, 1, 1) * view_zero.view(1, 1, -1) + ( 1 - morph_s.view(-1, 1, 1)) * self.view.unsqueeze(0) # TxBx6 ## yaw from canonical to both sides yaw_frames = 80 yaw_rotations = np.linspace(-np.pi / 2, np.pi / 2, yaw_frames) # yaw_rotations = np.concatenate([yaw_rotations[40:], yaw_rotations[::-1], yaw_rotations[:40]], 0) ## whole rotation sequence view_after = torch.cat( [view_morph, view_zero.repeat(yaw_frames, b, 1)], 0) yaw_rotations = np.concatenate([np.zeros(morph_frames), yaw_rotations], 0) def rearrange_frames(frames): morph_seq = frames[:, :morph_frames] yaw_seq = frames[:, morph_frames:] out_seq = torch.cat([ morph_seq[:, :1].repeat(1, 5, 1, 1, 1), morph_seq, morph_seq[:, -1:].repeat(1, 5, 1, 1, 1), yaw_seq[:, yaw_frames // 2:], yaw_seq.flip(1), yaw_seq[:, :yaw_frames // 2], morph_seq[:, -1:].repeat(1, 5, 1, 1, 1), morph_seq.flip(1), morph_seq[:, :1].repeat(1, 5, 1, 1, 1), ], 1) return out_seq ## textureless shape front_light = torch.FloatTensor([0, 0, 1]).to(self.canon_depth.device) canon_shape_im = (self.canon_normal * front_light.view(1, 1, 1, 3)).sum(3).clamp( min=0).unsqueeze(1) canon_shape_im = canon_shape_im.repeat(1, 3, 1, 1) * 0.7 shape_animation = self.renderer.render_yaw( canon_shape_im, self.canon_depth, v_after=view_after, rotations=yaw_rotations) # BxTxCxHxW self.shape_animation = rearrange_frames(shape_animation) ## normal map canon_normal_im = self.canon_normal.permute(0, 3, 1, 2) / 2 + 0.5 normal_animation = self.renderer.render_yaw( canon_normal_im, self.canon_depth, v_after=view_after, rotations=yaw_rotations) # BxTxCxHxW self.normal_animation = rearrange_frames(normal_animation) ## textured texture_animation = self.renderer.render_yaw( self.canon_im / 2 + 0.5, self.canon_depth, v_after=view_after, rotations=yaw_rotations) # BxTxCxHxW self.texture_animation = rearrange_frames(texture_animation) def save_results(self, save_dir): print(f"Saving results to {save_dir}") save_image(save_dir, self.input_im[0] / 2 + 0.5, 'input_image') save_image( save_dir, self.depth_inv_rescaler(self.canon_depth)[0].repeat(3, 1, 1), 'canonical_depth') save_image(save_dir, self.canon_normal[0].permute(2, 0, 1) / 2 + 0.5, 'canonical_normal') save_image(save_dir, self.canon_diffuse_shading[0].repeat(3, 1, 1), 'canonical_diffuse_shading') save_image(save_dir, self.canon_albedo[0] / 2 + 0.5, 'canonical_albedo') save_image(save_dir, self.canon_im[0].clamp(-1, 1) / 2 + 0.5, 'canonical_image') with open(os.path.join(save_dir, 'result.mtl'), "w") as f: f.write(self.mtls[0].replace('$TXTFILE', './canonical_image.png')) with open(os.path.join(save_dir, 'result.obj'), "w") as f: f.write(self.objs[0].replace('$MTLFILE', './result.mtl')) if self.render_video: save_video(save_dir, self.shape_animation[0], 'shape_animation') save_video(save_dir, self.normal_animation[0], 'normal_animation') save_video(save_dir, self.texture_animation[0], 'texture_animation')
class FaceDetect: def __init__(self, thresholds=[0.9, 0.9, 0.9], min_face_size=100): self.mtcnn = MTCNN(thresholds=thresholds, select_largest=True, post_process=False, device='cuda:0', min_face_size=min_face_size) def detect(self, img_ls, crop_size=None, mode='Extract_largest', save_faces=False, save_annotate=False, save_path='face_result'): """face detection Args: img_ls (list): list of array crop_size (tuple, optional): crop images with (left, top, right, bottom). Defaults to None. mode (str, optional): There're 3 modes, 'Detect', 'Detect_bool', and 'Extract'. If you only want to know whether there're any faces, use 'Detect_bool' mode. If you want to get boxes and probs of faces, use 'Detect'. If you want to get all information about faces, use 'Extract'. Defaults to 'Detect_bool'. face_num (int, optional): Number of faces to be extracted. Defaults to 1. save_faces (bool, optional): For 'Extract' mode. Defaults to False. save_annotate (bool, optional): For 'Extract' mode. Save images with annotations. Defaults to False. Returns: tuple: depends on the mode. """ if crop_size: for i, img in enumerate(img_ls): img_ls[i] = img.crop(crop_size) try: boxes, probs = self.mtcnn.detect(img_ls) except Exception as e: print( f'{e} \n...add crop_size=(left, top, right, bottom) to make images the same' ) if mode == 'Detect_bool': return isinstance(boxes, np.ndarray) elif mode == 'Detect': return boxes, probs elif 'Extract' in mode: faces = [] annotates = [] boxes = boxes.tolist() probs = probs.tolist() for id_, img in enumerate(img_ls): face_batch = [] img_annotate = img.copy() draw = ImageDraw.Draw(img_annotate) box_all = boxes[id_] if mode == 'Extract_largest': for i, box in enumerate(box_all): left = max(0, box[0]) top = max(0, box[1]) right = min(np.array(img_ls[id_]).shape[1], box[2]) down = min(np.array(img_ls[id_]).shape[0], box[3]) box_all[i] = [left, top, right, down] area = list(map(self._cal_area, box_all)) max_id = area.index(max(area)) box = box_all[max_id] box_head = [ box[0] - box[0] / 8, box[1] - box[1] / 5, box[2] + box[2] / 8, box[3] + box[3] / 10 ] boxes[id_] = [box_head] probs[id_] = [probs[id_][max_id]] draw.rectangle(box_head, width=5) if save_faces: if not os.path.exists(save_path): os.mkdir(save_path) if not os.path.exists(os.path.join(save_path, 'faces')): os.mkdir(os.path.join(save_path, 'faces')) face_batch.append( extract_face(img, box_head, save_path=os.path.join( save_path, f'detected_face_{id_}-{0}.png'))) else: face_batch.append(extract_face(img, box_head)) elif mode == 'Extract_all': for i, box in enumerate(box_all): box_head = [ box[0] - box[0] / 3, box[1] - box[1] / 3, box[2] + box[2] / 83, box[3] + box[3] / 10 ] box_all[i] = box_head draw.rectangle(box_head, width=5) # box.tolist() if save_faces: if not os.path.exists(save_path): os.mkdir(save_path) if not os.path.exists( os.path.join(save_path, 'faces')): os.mkdir(os.path.join(save_path, 'faces')) face_batch.append( extract_face( img, box_head, save_path=os.path.join( save_path, f'detected_face_{id_}-{i}.png'))) else: face_batch.append(extract_face(img, box_head)) else: print(f"Error: there's no mode called {mode}") faces.append(face_batch) annotates.append(np.asarray(img_annotate)) if save_annotate: if not os.path.exists(save_path): os.mkdir(save_path) if not os.path.exists( os.path.join(save_path, 'annotations')): os.mkdir(os.path.join(save_path, 'annotations')) img_annotate.save( os.path.join(save_path, f'annotated_faces_{id_}.png')) return np.asarray(boxes), probs, annotates, faces else: print(f"Error: there's no mode called {mode}") def _cal_area(self, ls): return (ls[2] - ls[0]) * (ls[3] - ls[1])
from facenet_pytorch import MTCNN, InceptionResnetV1 from keras.models import load_model import torch from torchvision import datasets from torch.utils.data import DataLoader from PIL import Image import mtcnn import cv2 import time import os mtcnn = MTCNN(image_size=240, margin=0, keep_all=True, min_face_size=40) # keep_all=True resnet = InceptionResnetV1(pretrained='vggface2').eval() def cropped_image(img_path): img = cv2.imread(img_path) img0 = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_cropped_list, prob_list = mtcnn(img0, return_prob=True) new_img1 = cv2.imread('white.png') new_img2 = cv2.imread('white.png') new_img3 = cv2.imread('white.png') new_img4 = cv2.imread('white.png') if img_cropped_list is not None: boxes, _, faces = mtcnn.detect(img0, landmarks=True) for i, prob in enumerate(prob_list): box = boxes[i] cropped = img[int(box[1])-50: int(box[3])+50,
class FaceAndHandDetector(QThread): frame_update_signal = pyqtSignal(QPixmap) def __init__(self): QThread.__init__(self) self.frame = 0 self.mtcnn = MTCNN() self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # print(self.device) self.frame_counter = 0 self.prev_frame_counter = 0 self.timer = QTimer(self) self.timer.timeout.connect(self.fps_count) self.timer.start(1000) self.model = HPSearchNET(cnn_num=3, fc_num=2, kern_size=3, func_act='elu', nn_prn=True, in_shape=160).to(self.device) self.model.load_state_dict(torch.load("hnd_net_elu_cnn3_fc2_kr3.pth", map_location=self.device)) self.model.eval() # Функция рисования прямоугольника лица def draw_face(self, frame, boxes, probs): # , landmarks try: cnt = 0 for box, prob in zip(boxes, probs): # , ld , landmarks cnt += 1 print(f"Лицо {cnt} box: {box} prob: {prob:.4f}") # Рисуем обрамляющий прямоугольник лица на кадре cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), thickness=2) except Exception as e: print('Error in _draw') print(f'error : {e}') return frame # Функция рисования прямоугольников рук def draw_hand(self, frame, hand_landmarks): # mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS) max_x = max_y = 0 min_x = min_y = 65535 for mark in hand_landmarks.landmark: if mark.x > max_x: max_x = mark.x if mark.x < min_x: min_x = mark.x if mark.y > max_y: max_y = mark.y if mark.y < min_y: min_y = mark.y max_x = round(max_x * IMAGE_WIDTH) + 30 min_x = round(min_x * IMAGE_WIDTH) - 30 max_y = round(max_y * IMAGE_HEIGHT) + 30 min_y = round(min_y * IMAGE_HEIGHT) - 30 if min_x < 0: min_x = 0 if min_y < 0: min_y = 0 if max_x > IMAGE_WIDTH: max_x = IMAGE_WIDTH if max_y > IMAGE_HEIGHT: max_y = IMAGE_HEIGHT print(f"\tmax_x: {max_x} min_x: {min_x} max_y: {max_y} min_y: {min_y}") # Рисуем обрамляющий прямоугольник руки на кадре cv2.rectangle(frame, (min_x, min_y), (max_x, max_y), (0, 255, 0), thickness=2) return frame, [min_x, min_y, max_x, max_y] def fps_count(self): self.prev_frame_counter, self.frame_counter = self.frame_counter, 0 # self.frame_counter = 0 # Определение наличия рук в кадре def hand_detection_mp(self, frame): frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # cv2.flip(frame, 1) frame.flags.writeable = False results = hands.process(frame) frame.flags.writeable = True if results.multi_hand_landmarks: count = 0 for hand_landmarks in results.multi_hand_landmarks: count += 1 print(f"Рука {count}") print( f'\tIndex finger tip coordinates: (' f'x: {round(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * IMAGE_WIDTH)}, ' f'y: {round(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * IMAGE_HEIGHT)})' ) for num, mark in enumerate(hand_landmarks.landmark): print(f"\tМетка {arm_marks[num]}" f"- x: {round(mark.x * IMAGE_WIDTH)}, y: {round(mark.y * IMAGE_HEIGHT)}") return results # Функция в которой будет происходить процесс считывания и обработки каждого кадра def run(self): # Заходим в бесконечный цикл while True: if cam_index_list: # Считываем каждый новый кадр - frame # ret - логическая переменая. Смысл - считали ли мы кадр с потока или нет hands = [] ret, self.frame = cam.read() self.frame = cv2.flip(self.frame, 1) try: # детектируем расположение лица на кадре, вероятности на сколько это лицо boxes, probs = self.mtcnn.detect(self.frame, landmarks=False) # , landmarks if boxes is not None: # Рисуем на кадре self.frame = self.draw_face(self.frame, boxes, probs) # , landmarks # Ищем руки hand_detect_rez = self.hand_detection_mp(self.frame) if hand_detect_rez.multi_hand_landmarks: for hand_landmarks in hand_detect_rez.multi_hand_landmarks: self.frame, hand_box = self.draw_hand(self.frame, hand_landmarks) hands.append(self.filter_hand(self.frame, hand_box)) # размер 160х160 # Нормализуем изображение в значениях [0, 1] img = torch.from_numpy(hands[-1]) / 255 img = img.unsqueeze(0).unsqueeze(0) with torch.no_grad(): outputs = self.model(img.to(self.device)) _, predicted = torch.max(outputs.data, 1) print(f"predicted: {labels_texts[int(predicted)]}") # пишем в кадре какой жест cv2.putText(self.frame, labels_texts[int(predicted)], (hand_box[2], hand_box[3]), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA) except Exception as e: print(f'Error {e} in run') # пишем в кадре число FPS cv2.putText(self.frame, f"FPS: {self.prev_frame_counter}", (20, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 1, cv2.LINE_AA) self.frame_counter += 1 self.frame_update_signal.emit(self.frame_to_qpixmap(self.frame)) # cv2.imshow(self.label, self.frame) # if hands: # self.frame_update_signal.emit(self.frame_to_qpixmap(hands[0])) # Функция преобразования врейма в QPixmap def frame_to_qpixmap(self, frame): rgb_image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) convert_to_qt_format = QImage(rgb_image.data, rgb_image.shape[1], rgb_image.shape[0], QImage.Format_RGB888) convert_to_qt_format = QPixmap.fromImage(convert_to_qt_format) pixmap = QPixmap(convert_to_qt_format) return pixmap def filter_hand(self, frame, hand_box): hand_img = frame[int(hand_box[1]):int(hand_box[3]), int(hand_box[0]):int(hand_box[2])] # hand_img = cv2.resize(hand_img, (48, 48)) hsv = cv2.cvtColor(hand_img, cv2.COLOR_BGR2HSV) # define range of skin color in HSV lower_skin = np.array([0, 20, 70], dtype=np.uint8) upper_skin = np.array([20, 255, 255], dtype=np.uint8) # extract skin colur imagw mask = cv2.inRange(hsv, lower_skin, upper_skin) # extrapolate the hand to fill dark spots within # kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (11, 11)) # kernel = np.ones((3, 3), np.uint8) mask = cv2.erode(mask, kernel, iterations=2) mask = cv2.dilate(mask, kernel, iterations=2) # mask = cv2.dilate(mask, kernel, iterations=4) # blur the image mask = cv2.GaussianBlur(mask, (3, 3), 0) # 10 # mask = cv2.resize(mask, (48, 48)) # hand_img = cv2.resize(hand_img, (48, 48)) res = cv2.bitwise_and(hand_img, hand_img, mask=mask) res = cv2.resize(res, (160, 160)) # Превращаем в 1-канальное серое изображение res = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY) return res
def main(output_dir: str = 'output', features: Features = Features(), parameters: TrainParameters = TrainParameters(), datasets_dir: str = 'datasets', dataset: str = 'UTKFace', pretrained=None, pretrained_encoder='models/encoder/05_mobilenet_v3_small_003.pt', preload: bool = False, use_preprocessed: bool = False, device: torch.device = torch.device('cpu')): global log log = logger.Train(output_dir) model = models.getIntegrated( age=features.age, gender=features.gender, pretrained=pretrained, pretrained_encoder=pretrained_encoder).to(device) mtcnn = MTCNN( keep_all=True, min_face_size=100, image_size=160, margin=14, selection_method="center_weighted_size", post_process=True, device=device, ) pre_transforms = transforms.Compose([ transforms.Resize((160, 160)), np.float32, transforms.ToTensor(), fixed_image_standardization ]) dataset_handler = dataLoaders.get(dataset=dataset, datasets_dir=datasets_dir, preload=preload, use_preprocessed=use_preprocessed, device=device) train_loader, validate_loader, test_loader = dataset_handler.get_loaders( transform=pre_transforms, train_size=parameters.train_size, validate_size=parameters.validate_size, test_size=parameters.test_size, batch_size=parameters.batch_size) criterion = nn.CrossEntropyLoss().to(device) if len(train_loader) > 0: for epoch in range(1, parameters.epochs + 1): log.epochBegin(epoch, parameters.epochs) train_all(model, features, train_loader, criterion, parameters, device) if len(validate_loader) > 0: validate_all(model, features, validate_loader, criterion, output_dir, device) if len(test_loader) > 0: test_all(model, features, test_loader, criterion, device) torch.save(model.state_dict(), os.path.join(output_dir, 'final_model.pt')) pass
from facenet_pytorch import MTCNN import cv2 from PIL import Image from os import listdir, makedirs import glob from os.path import join, exists from skimage.io import imsave mtcnn = MTCNN(keep_all=True, margin=40, select_largest=False, post_process=False, device="cuda:0") # Directory containing images respective to each video source_frames_folders = ["./train_frames/0", "./train_frames/1"] # Destination location where faces cropped out from images will be saved dest_faces = "./train_face/" for i in source_frames_folders: counter = 0 for j in listdir(i): if i.find("0") != -1: dest_faces_folder = "{}0".format(dest_faces) else: dest_faces_folder = "{}1".format(dest_faces) imgs = glob.glob(join(i, j, "*.jpg")) if counter % 1000 == 0:
def detect_live(self): mtcnn = MTCNN() faces = {} frameCount = 0 vid = cv2.VideoCapture(0) if self.record_for is not None : start_time = time.time() while vid.isOpened(): if self.record_for is not None : curr_time = time.time() - start_time if curr_time > self.record_for : break _, frame = vid.read() frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)) frameCount = frameCount + 1 boxes, probs = mtcnn.detect(frame) frame_draw = frame.copy() draw = ImageDraw.Draw(frame_draw) if boxes is not None: faces["frame_{}".format(frameCount)] = [] for box, p in zip(boxes, probs) : if p > 0.70 : draw.rectangle(box.tolist(), outline = (255, 0, 0), width = 1) if self.extract == True : face = extract_face(frame, box.tolist()) faces["frame_{}".format(frameCount)].append(face) if self.save == True : img = self.tsfms(face) if self.saveIn is None : raise ValueError else : img.save(os.path.join(self.saveIn, "frame_{}.jpg".format(len(faces)))) cv2.imshow("Tracking window", cv2.cvtColor(np.array(frame_draw), cv2.COLOR_RGB2BGR)) if self.save_video == True : self.frames_tracked.append(frame_draw) if cv2.waitKey(1) == ord("a") : break vid.release() if self.save_video == True: print(len(self.frames_tracked)) self.saveVideo(self.saveIn, self.frames_tracked, "trackedVid") if self.save == True : return len(faces.keys()), faces else : return None, None
def detect(self): vid = cv2.VideoCapture(self.lookIn) frameCount = int(vid.get(cv2.CAP_PROP_FRAME_COUNT)) - 1 mtcnn = MTCNN() bboxes_and_probs = [] count = frameCount while vid.isOpened(): #if count < frameCount: #break _, frame = vid.read() print("%d to go.." %(count)) count -= 1 frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) boxes, prob = mtcnn.detect(frame) frame_draw = frame.copy() draw = ImageDraw.Draw(frame_draw) if boxes is None : #print("Skipping Frame") if self.writeMode == True: detected_frames.append(frame_draw) cv2.imshow("Frame", cv2.cvtColor(np.asarray(frame_draw), cv2.COLOR_BGR2RGB)) if cv2.waitKey(2) & 0xFF == ord('y'): break continue for box, p in zip(boxes,prob): if p > 0.80: #print("Not skipping!") draw.rectangle(box.tolist(), outline= (255, 0, 0), width= 1) bboxes_and_probs.append({"bbox":box, "prob":p}) if self.writeMode == True: detected_frames.append(frame_draw) cv2.imshow("Frame", cv2.cvtColor(np.asarray(frame_draw), cv2.COLOR_BGR2RGB)) if cv2.waitKey(1) & 0xFF == ord('y'): break print("releasing capture") vid.release() if self.writeMode == True : dim = detected_frames[0].size print(dim , int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))) fourcc = cv2.VideoWriter_fourcc(*"mp4v") video_tracked = cv2.VideoWriter(self.saveIn, fourcc, 25.0, dim) for frame in detected_frames: video_tracked.write(cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR)) video_tracked.release() return bboxes_and_probs
from facenet_pytorch import MTCNN import torch import numpy as np import cv2 from PIL import Image from img_rotate import rotate import os import argparse device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print('Running on device: {}'.format(device)) mtcnn = MTCNN(keep_all=True, device=device, margin=50, select_largest=True, image_size=256) def extract_face(frame, align=True, margin=5): if align: frame = rotate(np.array(frame)) frame = Image.fromarray(frame) # mtcnn(frame, save_path=name) boxes, _ = mtcnn.detect(frame) for box in boxes: box_list = box.tolist() # bounding box coordinated x1 = int(box_list[0]) y1 = int(box_list[1]) x2 = int(box_list[2]) y2 = int(box_list[3])
def __init__(self): self._config = ConfigProvider.config() self._device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print(f'Running on device: {self._device}') self._mtcnn = MTCNN(keep_all=True, device=self._device) self._bbox_tracker = BboxTracker()
from facenet_pytorch import MTCNN, extract_face import torch import numpy as np import mmcv import cv2 from PIL import Image, ImageDraw from IPython import display import glob import os device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print('Running on device: {}'.format(device)) mtcnn = MTCNN(keep_all=True, device=device) frames = [] #files = glob.glob("/home/jeff/datasets/TUM Gait/data_person1+2/image/p001/b01/*") files = glob.glob("reid/b01/*") for myFile in files: fileName = os.path.splitext(os.path.basename(myFile))[0] img = Image.open(myFile) boxes, probs, points = mtcnn.detect(img, landmarks=True) if boxes is not None: # Draw boxes and save faces img_draw = img.copy() draw = ImageDraw.Draw(img_draw) for i, (box, point) in enumerate(zip(boxes, points)): draw.rectangle(box.tolist(), width=5) for p in point:
return im[int(h0):int(h1), int(w0):int(w1)] ######################################################### UPLOAD_FOLDER = 'img_data/upload' TARGET_FOLDER = 'img_data/target' RESULT_FOLDER = 'img_data/result' cudnn.benchmark = True default_args = parse_args() CelebA_HQ = create_model(copy.copy(default_args), 'CelebA-HQ') AFHQ = create_model(copy.copy(default_args), 'AFHQ') face_detector = MTCNN(select_largest=True, device=torch.device('cuda')) requests_queue = Queue() ######################################################### app = Flask(__name__, template_folder="./static/") app.config['MAX_CONTENT_LENGTH'] = 1 * 1024 * 1024 BATCH_SIZE = 1 CHECK_INTERVAL = 0.1 #run model def run(input_file, model_type): f_id = str(uuid.uuid4()) fname = secure_filename(input_file.filename)
import datetime import pickle import argparse import os ap = argparse.ArgumentParser() ap.add_argument("-v", "--videos", help="path to the video", default=0) args = vars(ap.parse_args()) model = load_model('models/facenet_keras.h5') # svm_model = joblib.load('models/svm_face_classification.pkl') with open('models/svm_classification_1.pkl', 'rb') as file: svm_model = pickle.load(file) mtcnn = MTCNN(keep_all=True, post_process=False) names_array = [ 'Afsan', 'Amresh', 'Amritansh', 'Ayush', 'Harish', 'Keyur', 'Rahul' ] video = cv2.VideoCapture(args['videos']) cv2.namedWindow('face Recognition', cv2.WINDOW_NORMAL) cv2.resizeWindow('face Recognition', 800, 800) loop = True while loop: ret, frame = video.read() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) try: faces = mtcnn(frame_rgb) boxes, _ = mtcnn.detect(frame_rgb)
def main(source_path, dest_path): """ Main function to iterate over the images in the raw data and generate data samples to train/test FaceID model. """ # img_dir = os.path.join(raw_data_path, 'aligned_images_DB') frame_dir = os.path.join(source_path, 'frame_images_DB') if not os.path.exists(dest_path): os.makedirs(dest_path) # set parameters num_imgs_per_face = 1 target_im_shape = (160, 120) # set device device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print('Running on device: {}'.format(device)) # create models mtcnn = MTCNN(image_size=80, margin=0, min_face_size=20, thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True, device=device) resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device) # run models on the images num_persons = 0 num_faces = 0 embedding_dict = {} subj_name_list = os.listdir(frame_dir) for f_n, face_file in enumerate(subj_name_list): if (f_n % 100) == 0: print('Subject %d of %d' % (f_n, len(subj_name_list))) f_path = os.path.join(frame_dir, face_file) if os.path.isfile(f_path): if face_file.endswith('txt'): with open(f_path, 'r') as file: lines = file.readlines() num_persons += 1 for line in lines: num_faces += 1 img_name = line.split(',')[0] subj_name, video_no, file_name = img_name.split('\\') img_path = os.path.join(frame_dir, subj_name, video_no, file_name) img = imread(img_path) x_aligned, _, _ = mtcnn(img, return_prob=True) if x_aligned is not None: aligned = x_aligned[None, :, :, :].to(device) embedding = resnet( aligned).detach().cpu().numpy()[0] if subj_name not in embedding_dict: embedding_dict[subj_name] = {} subj_path = os.path.join(dest_path, subj_name) if not os.path.exists(subj_path): os.mkdir(subj_path) if video_no not in embedding_dict[subj_name]: embedding_dict[subj_name][video_no] = {} video_path = os.path.join( dest_path, subj_name, video_no) if not os.path.exists(video_path): os.mkdir(video_path) embedding_dict[subj_name][video_no][ file_name] = embedding.tolist() x_aligned_int = x_aligned.cpu().numpy() x_aligned_int -= np.min(x_aligned_int) x_aligned_int /= np.max(x_aligned_int) x_aligned_int = (255.0 * x_aligned_int).astype( np.uint8) np.save( os.path.join(dest_path, subj_name, video_no, file_name), x_aligned_int) rect = line.split(',')[2:6] for i in range(4): rect[i] = int(rect[i]) box = np.array([ int(rect[0]) - int(rect[2]) // 2, int(rect[1]) - int(rect[3]) // 2, int(rect[0]) + int(rect[2]) // 2, int(rect[1]) + int(rect[3]) // 2 ]) img_arr, _, img, box = generate_image( img, box, num_imgs_per_face) for img_idx in range(num_imgs_per_face): new_file_name = '_'.join([ file_name, str(target_im_shape[0]), str(target_im_shape[1]), str(img_idx) ]) cropped_im_path = os.path.join( dest_path, subj_name, video_no, new_file_name) np.save(cropped_im_path, img_arr[img_idx]) print('Number of People: %d' % num_persons) print('Number of Faces: %d' % num_faces) # save embeddings to json file with open(os.path.join(dest_path, 'embeddings.json'), 'w') as out_file: json.dump(embedding_dict, out_file)
from PIL import Image, ImageDraw from facenet_pytorch import MTCNN, InceptionResnetV1 import torch from torch.utils.data import DataLoader from torchvision import datasets import numpy as np import time import os device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') mtcnn = MTCNN(image_size=160, margin=0, min_face_size=20, thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True, device=device) resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device) def collate_fn(x): return x[0] def process_image_database(): dataset = datasets.ImageFolder('./test_images') dataset.idx_to_class = {i: c for c, i in dataset.class_to_idx.items()} loader = DataLoader(dataset, collate_fn=collate_fn, num_workers=1) aligned = [] names = []
def find_face(self): """ find face on the frames create: self.faces self.frame_ids """ def del_skipped_frames(): idxs = [idx for idx, val in enumerate(self.centers) if val == 0] for index in sorted(idxs, reverse=True): del self.centers[index] del self.frames[index] del self.frame_ids[index] def medfilt_filter(step=7): y_ = medfilt([i[0] for i in self.centers], step) x_ = medfilt([i[1] for i in self.centers], step) return y_, x_ self.centers, h_shift, w_shift, centers = ([], [], [], None) # fast mtcnn pytorch; uses with cuda if cuda.is_available(): frames_cropped = [] box_prev = None mtcnn = MTCNN(image_size=200, device=device) for frame in tqdm(self.frames): box, _ = mtcnn.detect(frame) if box is not None: box = np.array(box[0]).astype(int) x1, x2, y1, y2 = box[1], box[3], box[0], box[2] h_shift += [(y2 - y1) // 2] w_shift += [(x2 - x1) // 2] centers = [y1 + h_shift[-1], x1 + w_shift[-1]] #plt.imshow(frame[x1:x2, y1:y2]) #plt.show() if centers is not None: self.centers += [centers] else: self.centers += [0] else: self.centers += [0] del mtcnn del_skipped_frames() # haard; uses without cuda else: face_cascade = cv2.CascadeClassifier( 'haarcascade_frontalface_default.xml') for frame in tqdm(self.frames): gray = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY) faces = face_cascade.detectMultiScale(gray) for (x, y, w, h) in faces: h_shift += [h // 2] w_shift += [w // 2] centers = [y + h // 2, x + w // 2] if centers is not None: self.centers += [centers] else: self.centers += [0] del face_cascade del_skipped_frames() self.box_shift = [ np.mean(w_shift, dtype=int), np.mean(h_shift, dtype=int) ] # drop discharges from signal if len(self.centers) == 0: raise ValueError("Невозможно определить лицо") if cuda.is_available(): y_, x_ = medfilt_filter(5) else: y_, x_ = medfilt_filter() self.centers = [[int(y), int(x)] for x, y in zip(x_, y_)] for frame, (y, x) in tqdm(zip(self.frames, self.centers)): face = frame[x - self.box_shift[0]:x + self.box_shift[0], y - self.box_shift[1]:y + self.box_shift[1]] self.faces += [face]
temp = 1 else: cv2.putText(img, 'Unknown' + ': ' + '{0}'.format(diff), (start[0], start[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 1) temp = 0 cv2.imshow('Detection', img) if temp == 1: return 1 else: return 0 # Init MTCNN object mtcnn = MTCNN(image_size=image_size, keep_all=True, device=device, post_process=True) model = InceptionResnetV1(pretrained='vggface2', classify=False).eval() # Real time data from webcam frames = [] boxes = [] # Load stored face data related to respective card number faces = [] face_names = [] face_file = None try: for person in os.listdir(card_number): face_file = open(card_number + '/' + person, 'rb') if face_file is not None: face = pickle.load(face_file)
import os from PIL import Image from facenet_pytorch import MTCNN, InceptionResnetV1 import torch import json import pandas as pd import datetime device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') mtcnn = MTCNN(keep_all=False, device=device) resnet = InceptionResnetV1(pretrained='vggface2', device=device).eval() def get_photo(): names = [] photos = [] for _, dirs, files in os.walk('face'): names = dirs break for name in names: photo = Image.open(os.path.join("face", name, name + ".jpg")) photos.append(photo) return photos, names def get_embedding(): faces = [] photos, names = get_photo() for photo in photos:
from PIL import Image,ImageDraw import numpy as np from facenet_pytorch import MTCNN from matplotlib.pyplot import imshow import cv2 import math import os from tqdm import tqdm import pandas as pd mtcnn = MTCNN(image_size=120,select_largest=False) def rotate(origin, point, angle, row): """ rotate coordinates in image coordinate system :param origin: tuple of coordinates,the rotation center :param point: tuple of coordinates, points to rotate :param angle: degrees of rotation :param row: row size of the image :return: rotated coordinates of point """ x1, y1 = point x2, y2 = origin y1 = row - y1 y2 = row - y2 angle = math.radians(angle) x = x2 + math.cos(angle) * (x1 - x2) - math.sin(angle) * (y1 - y2)
def __init__(self, args): ## configs self.device = 'cuda:0' if args.gpu else 'cpu' self.checkpoint_path = args.checkpoint self.detect_human_face = args.detect_human_face self.render_video = args.render_video self.output_size = args.output_size self.image_size = 64 self.min_depth = 0.9 self.max_depth = 1.1 self.border_depth = 1.05 self.xyz_rotation_range = 60 self.xy_translation_range = 0.1 self.z_translation_range = 0 self.fov = 10 # in degrees self.depth_rescaler = lambda d: (1 + d) / 2 * self.max_depth + ( 1 - d) / 2 * self.min_depth # (-1,1) => (min_depth,max_depth) self.depth_inv_rescaler = lambda d: (d - self.min_depth) / ( self.max_depth - self.min_depth) # (min_depth,max_depth) => (0,1) fx = (self.image_size - 1) / 2 / (np.tan(self.fov / 2 * np.pi / 180)) fy = (self.image_size - 1) / 2 / (np.tan(self.fov / 2 * np.pi / 180)) cx = (self.image_size - 1) / 2 cy = (self.image_size - 1) / 2 K = [[fx, 0., cx], [0., fy, cy], [0., 0., 1.]] K = torch.FloatTensor(K).to(self.device) self.inv_K = torch.inverse(K).unsqueeze(0) self.K = K.unsqueeze(0) ## NN models self.netD = EDDeconv(cin=3, cout=1, nf=64, zdim=256, activation=None) self.netA = EDDeconv(cin=3, cout=3, nf=64, zdim=256) self.netL = Encoder(cin=3, cout=4, nf=32) self.netV = Encoder(cin=3, cout=6, nf=32) self.netD = self.netD.to(self.device) self.netA = self.netA.to(self.device) self.netL = self.netL.to(self.device) self.netV = self.netV.to(self.device) self.load_checkpoint() self.netD.eval() self.netA.eval() self.netL.eval() self.netV.eval() ## face detecter if self.detect_human_face: from facenet_pytorch import MTCNN self.face_detector = MTCNN(select_largest=True, device=self.device) ## renderer if self.render_video: from unsup3d.renderer import Renderer assert 'cuda' in self.device, 'A GPU device is required for rendering because the neural_renderer only has GPU implementation.' cfgs = { 'device': self.device, 'image_size': self.output_size, 'min_depth': self.min_depth, 'max_depth': self.max_depth, 'fov': self.fov, } self.renderer = Renderer(cfgs)
import torch import numpy as np import cv2 from matplotlib import pyplot as plt import os import copy import torch.nn.functional as F from facenet_pytorch import MTCNN device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') mtcnn = MTCNN(keep_all=True, device=device) def corp_img(img, c): return img[c[1]:c[3], c[0]:c[2]] def predict_draw(model, img): model.eval() img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) boxes, a = mtcnn.detect(img) if (type(boxes) is not np.ndarray): return img for i in range(len(boxes)): bnd = boxes[i].astype(int) if (bnd[3] - bnd[1] < 40): continue img2 = corp_img(img, bnd) / 255 if (len(img.shape) != 3): return img
def __init__(self, thresholds=[0.9, 0.9, 0.9], min_face_size=100): self.mtcnn = MTCNN(thresholds=thresholds, select_largest=True, post_process=False, device='cuda:0', min_face_size=min_face_size)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") mtcnn_params = dict( image_size=160, margin=0, min_face_size=30, thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=False, selection_method="probability", select_largest=True, keep_all=False, device=device, ) mtcnn = MTCNN(**mtcnn_params) class FrameExtractor(object): @staticmethod def get_video(fp: str) -> cv2.VideoCapture: """ Method that returns a video read from disk by cv2 Parameters ---------- fp : str Filepath to file Returns -------
def input_face_embeddings( frames: Union[List[str], np.ndarray], is_path: bool, mtcnn: MTCNN, resnet: InceptionResnetV1, face_embed_cuda: bool, use_half: bool, coord: List, name: str = None, save_frames: bool = False, ) -> torch.Tensor: """ Get the face embedding NOTE: If a face is not detected by the detector, instead of throwing an error it zeros the input for embedder. NOTE: Memory hungry function, hence the profiler. Args: frames: Frames from the video is_path: Whether to read from filesystem or memory mtcnn: face detector resnet: face embedder face_embed_cuda: use cuda for model use_half: use half precision Returns: emb: Embedding for all input frames """ if face_embed_cuda and torch.cuda.is_available(): device = torch.device("cuda:0") else: device = torch.device("cpu") result_cropped_tensors = [] no_face_indices = [] for i, f in enumerate(frames): if is_path: frame = Image.open(f) else: frame = Image.fromarray(f.astype("uint8")) with torch.no_grad(): cropped_tensors = None height, width, c = f.shape bounding_box, prob = mtcnn.detect(frame) if bounding_box is not None: for box in bounding_box: x1, y1, x2, y2 = box if x1 > x2: x1, x2 = x2, x1 if y1 > y2: y1, y2 = y2, y1 # for point in coord: x, y = coord[0], coord[1] x *= width y *= height if x >= x1 and y >= y1 and x <= x2 and y <= y2: cropped_tensors = extract_face(frame, box) # print("found", box, x, y, end='\r') break if cropped_tensors is None: # Face not detected, for some reason cropped_tensors = torch.zeros((3, 160, 160)) no_face_indices.append(i) if save_frames: name = name.replace(".mp4", "") saveimg = cropped_tensors.detach().cpu().numpy().astype("uint8") saveimg = np.squeeze(saveimg.transpose(1, 2, 0)) Image.fromarray(saveimg).save(f"{name}_{i}.png") result_cropped_tensors.append(cropped_tensors.to(device)) if len(no_face_indices) > 20: # few videos start with silence, allow 0.5 seconds of silence else remove return None del frames # Stack all frames result_cropped_tensors = torch.stack(result_cropped_tensors) # Embed all frames result_cropped_tensors = result_cropped_tensors.to(device) if use_half: result_cropped_tensors = result_cropped_tensors.half() with torch.no_grad(): emb = resnet(result_cropped_tensors) if use_half: emb = emb.float() return emb.to(cpu_device)
from skimage.io import imsave from facenet_pytorch import MTCNN import cv2 from PIL import Image import os from pathlib import Path def show_img(img): cv2.imshow(winname="Fa", mat=img) cv2.waitKey(delay=0) cv2.destroyAllWindows() mtcnn = MTCNN(margin=50, select_largest=False, post_process=False, device="cuda:0") # ============================================================================= # source_frames_folders = Path(r'C:\Users\jeremy\Desktop\2021DF\model\666\os\000_003') # # # video = [x for x in source_frames_folders.iterdir()] # # problem = [] # some videos which is failed # # dst = r'C:\Users\jeremy\Desktop\2021DF\model\666\os\000_003\tt\546.jpg' # ============================================================================= # ============================================================================= # for i in video:
fname = sorted(fname) dname = sorted(dname) return fname, dname if __name__ == "__main__": # Parâmetros basedir = '/projects/jeff/TUMGAIDimage' # Checar se há GPU disponÃvel device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu') print('Running on device: {}'.format(device)) # Definir parâmetros do módulo MTCNN mtcnn = MTCNN(keep_all=False, device=device, post_process=False) # Obter lista de arquivos e diretorios fname, dname = listar_imagens(basedir) # Detectar faces e salvar na pasta facecrops inicio = time.time() print('Processamento iniciado') facecrop = [it.replace(basedir, basedir+'_faces') for it in fname] for f, filename in enumerate(fname): try: img = Image.open(filename) box, prob = mtcnn.detect(img) except: print('Falha no processamento do arquivo '+filename) continue
def main(): # Read options opt = TestOptions().parse(save=False) # If demo directory to save generated frames is given if opt.demo_dir is not None and not os.path.exists(opt.demo_dir): os.makedirs(opt.demo_dir) # hardcoded constant values opt.nThreads = 0 opt.batchSize = 1 opt.serial_batches = True # GPU id to be used for mxnet/reconstructor opt.gpu_id = opt.gpu_ids[-1] # Device to be used for MTCNN face detector detector_device = 'cpu' # Face bounding box margin margin = 120 # How many frames from the target's training video # to consider when gathering head pose and eye size statistics n_frames_target_used = 1000 # How many of the first source frames to consider for eye size adaptation # between source and target. n_frames_init = 25 # For cuda initialization errors. torch.multiprocessing.set_start_method('spawn', force=True) # Initialize video renderer. modelG = create_model(opt) # Initialize NMFC renderer. renderer = NMFCRenderer(opt) # Initialize face detector. detector = MTCNN(image_size=opt.loadSize, margin=margin, post_process=False, device=detector_device) # Initialize landmark extractor. dlib_detector = dlib.get_frontal_face_detector() dlib_predictor = dlib.shape_predictor( 'preprocessing/files/shape_predictor_68_face_landmarks.dat') # Read the identity parameters from the target person. id_params, _ = read_params( 'id', os.path.join(opt.dataroot, 'train', 'id_coeffs'), opt.target_name) # Read camera parameters from target t_cam_params, _ = read_params('cam', os.path.join(opt.dataroot, 'train', 'misc'), opt.target_name) t_cam_params = t_cam_params[:n_frames_target_used] # Read eye landmarks from target's video. eye_landmarks_target = read_eye_landmarks( os.path.join(opt.dataroot, 'train', 'landmarks70'), opt.target_name) eye_landmarks_target[0] = eye_landmarks_target[0][:n_frames_target_used] eye_landmarks_target[1] = eye_landmarks_target[1][:n_frames_target_used] # Setup camera capturing window_name = 'Hea2Head Demo' video_capture = cv2.VideoCapture(0) video_capture.set(cv2.CAP_PROP_BUFFERSIZE, 2) # set double buffer for capture fps = video_capture.get(cv2.CAP_PROP_FPS) print("Video capture at {} fps.".format(fps)) proccesses = [] # Face tracker / detector box_redecect_nframes = opt.box_redetect_nframes box = None # Face bounding box, calculated by first frame # Face reconstructor / NMFC renderer nmfc = None # Current nmfc image s_cam_params = [] # camera parameters of source video. adapted_cam_params = [ ] # camera parameters of source video, adapted to target. # Facial (eyes) landmarks detector prev_eye_centres = None # Eye centres in previous frame eye_landmarks = None # Final eye landmarks, send to video renderer. eye_landmarks_source = [ [], [] ] # Eye landmarks from n_frames_init first frames of source video. eye_landmarks_source_queue = Queue( ) # Queue to write extracted eye landmarks from source video. landmarks_success_queue = Queue( ) # Queue to write whether eye landmark detection was successful frames_queue = Queue( ) # Queue for writing video frames, read by the landmark detector process. # Process for running 68 + 2 landmark detection in parallel with Face reconstruction / NMFC renderering proccess_eye_landmarks = Process( target=compute_eye_landmarks, args=(dlib_detector, dlib_predictor, eye_landmarks_source_queue, landmarks_success_queue, frames_queue)) proccess_eye_landmarks.start() proccesses.append(proccess_eye_landmarks) print('Launced landmark extractor!') # Video renderer (GAN). input_queue = torchQueue() # Queue of GAN's input output_queue = torchQueue() # Queue of GAN's output # Process for running the video renderer without waiting NMFC + eye lands creation. proccess_video_renderer = torchProcess(target=compute_fake_video, args=(input_queue, output_queue, modelG, opt)) proccess_video_renderer.start() proccesses.append(proccess_video_renderer) print('Launced video renderer!') camera = None if opt.realtime: try: import pyfakewebcam stream_id = opt.realtime_cam_id webcam_width = webcam_height = opt.loadSize camera = pyfakewebcam.FakeWebcam(f'/dev/video{stream_id}', webcam_width, webcam_height) camera.print_capabilities() print(f'Fake webcam created on /dev/video{stream_id}.') except Exception as ex: print('Fake webcam initialization failed:') print(str(ex)) iter = 0 # Start main Process (Face reconstruction / NMFC renderering) while True: t0 = time.perf_counter() try: # Read generated frames from video renderer's output Queue. # Non-blocking fake_frame, real_frame = output_queue.get_nowait() result = np.concatenate([real_frame, fake_frame[..., ::-1]], axis=1) # If output directory is specified save frames there. if opt.demo_dir is not None: result_path = os.path.join(opt.demo_dir, "{:06d}".format(iter) + '.png') cv2.imwrite(result_path, result) elif camera is not None: camera.schedule_frame(fake_frame) else: cv2.imshow(window_name, result) cv2.waitKey(1) except queue.Empty: # If empty queue continue. pass # Read next frame _, frame = video_capture.read() # Crop the larger dimension of frame to make it square frame = make_frame_square(frame) if box_redecect_nframes > 0 and iter % box_redecect_nframes == 0: box = None # If no bounding box has been detected yet, run MTCNN (once in first frame) if box is None: box = detect_box(detector, frame) # If no face detected exit. if box is None: break # Crop frame at the point were the face was seen in the first frame. frame = extract_face(frame, box, opt.loadSize, margin) frame = tensor2npimage(frame) frame = np.transpose(frame, (1, 2, 0)) # Send ROI frame to landmark detector, while the main Process performs face reconstruction. frames_queue.put(frame) # Get expression and pose, adapt pose and identity to target and render NMFC. success, s_cam_params, adapted_cam_params, new_nmfc = \ compute_reconstruction(renderer, id_params, t_cam_params, s_cam_params, adapted_cam_params, frame) # Update the current NMFC if reconstruction was successful if success: nmfc = new_nmfc # If not, use previous nmfc. If it does not exist, exit. if not success and nmfc is None: break # Find eye centres using nmfc image. eye_centres, prev_eye_centres = search_eye_centres([nmfc[:, :, ::-1]], prev_eye_centres) # Read Queue to get eye landmarks, if detection was successful. if landmarks_success_queue.get(): eye_landmarks = eye_landmarks_source_queue.get() # If not, use previous eye landmarks. If they do not exist, exit. if eye_landmarks is None: break # If in first frames, determine the source-target eye size (height) ratio. if iter < n_frames_init: eye_landmarks_source[0].append(eye_landmarks[0]) eye_landmarks_source[1].append(eye_landmarks[1]) eye_ratios = compute_eye_landmarks_ratio(eye_landmarks_source, eye_landmarks_target) # Adapt the eye landmarks to the target face, by placing to the eyes centre # and re-scaling their size to match the NMFC size and target eyes mean height (top-down distance). eye_lands = adapt_eye_landmarks( [[eye_landmarks[0]], [eye_landmarks[1]]], eye_centres, eye_ratios, s_cam_params[-1:], adapted_cam_params[-1:]) # Send the conditional input to video renderer input_queue.put((nmfc, eye_lands[0], frame)) iter += 1 # Show frame rate. t1 = time.perf_counter() dt = t1 - t0 print('fps: %0.2f' % (1 / dt)) # Terminate proccesses and join for process in proccesses: process.terminate() process.join() renderer.clear() print('Main process exiting')