def main(): parser = argparse.ArgumentParser() parser.add_argument('--video_input_path', type=str, required=True) parser.add_argument('--output_path', type=str, required=True) parser.add_argument('--detect_every_N_frame', type=int, default=8) parser.add_argument('--scalar_face_detection', type=float, default=1.5) parser.add_argument('--number_of_speakers', type=int, default=2) args = parser.parse_args() device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print('Running on device: {}'.format(device)) utils.mkdirs(os.path.join(args.output_path, 'faces')) landmarks_dic = {} faces_dic = {} boxes_dic = {} for i in range(args.number_of_speakers): landmarks_dic[i] = [] faces_dic[i] = [] boxes_dic[i] = [] mtcnn = MTCNN(keep_all=True, device=device) video = mmcv.VideoReader(args.video_input_path) print("Video statistics: ", video.width, video.height, video.resolution, video.fps) frames = [ Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) for frame in video ] print('Number of frames in video: ', len(frames)) fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False) for i, frame in enumerate(frames): print('\rTracking frame: {}'.format(i + 1), end='') # Detect faces if i % args.detect_every_N_frame == 0: boxes, _ = mtcnn.detect(frame) boxes = boxes[:args.number_of_speakers] boxes = face2head(boxes, args.scalar_face_detection) else: boxes = [boxes_dic[j][-1] for j in range(args.number_of_speakers)] # Crop faces and save landmarks for each speaker if len(boxes) != args.number_of_speakers: boxes = [boxes_dic[j][-1] for j in range(args.number_of_speakers)] for j, box in enumerate(boxes): face = frame.crop((box[0], box[1], box[2], box[3])).resize( (224, 224)) preds = fa.get_landmarks(np.array(face)) if i == 0: faces_dic[j].append(face) landmarks_dic[j].append(preds) boxes_dic[j].append(box) else: iou_scores = [] for b_index in range(args.number_of_speakers): last_box = boxes_dic[b_index][-1] iou_score = bb_intersection_over_union(box, last_box) iou_scores.append(iou_score) box_index = iou_scores.index(max(iou_scores)) faces_dic[box_index].append(face) landmarks_dic[box_index].append(preds) boxes_dic[box_index].append(box) for s in range(args.number_of_speakers): frames_tracked = [] for i, frame in enumerate(frames): # Draw faces frame_draw = frame.copy() draw = ImageDraw.Draw(frame_draw) draw.rectangle(boxes_dic[s][i], outline=(255, 0, 0), width=6) # Add to frame list frames_tracked.append(frame_draw) dim = frames_tracked[0].size fourcc = cv2.VideoWriter_fourcc(*'FMP4') video_tracked = cv2.VideoWriter( os.path.join(args.output_path, 'video_tracked' + str(s + 1) + '.mp4'), fourcc, 25.0, dim) for frame in frames_tracked: video_tracked.write( cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR)) video_tracked.release() # Save landmarks for i in range(args.number_of_speakers): utils.save2npz(os.path.join(args.output_path, 'landmark', 'speaker' + str(i + 1) + '.npz'), data=landmarks_dic[i]) dim = face.size fourcc = cv2.VideoWriter_fourcc(*'FMP4') speaker_video = cv2.VideoWriter( os.path.join(args.output_path, 'faces', 'speaker' + str(i + 1) + '.mp4'), fourcc, 25.0, dim) for frame in faces_dic[i]: speaker_video.write( cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR)) speaker_video.release() # Output video path parts = args.video_input_path.split('/') video_name = parts[-1][:-4] if not os.path.exists(os.path.join(args.output_path, 'filename_input')): os.mkdir(os.path.join(args.output_path, 'filename_input')) csvfile = open( os.path.join(args.output_path, 'filename_input', str(video_name) + '.csv'), 'w') for i in range(args.number_of_speakers): csvfile.write('speaker' + str(i + 1) + ',0\n') csvfile.close()
def __init__(self, model_path="grid", gpu=-1): if model_path == "grid": model_path = os.path.split(__file__)[0] + "/data/grid.dat" elif model_path == "timit": model_path = os.path.split(__file__)[0] + "/data/timit.dat" elif model_path == "crema": model_path = os.path.split(__file__)[0] + "/data/crema.dat" if gpu < 0: self.device = torch.device("cpu") model_dict = torch.load(model_path, map_location=lambda storage, loc: storage) self.fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, device="cpu", flip_input=False) else: self.device = torch.device("cuda:" + str(gpu)) model_dict = torch.load(model_path, map_location=lambda storage, loc: storage.cuda(gpu)) self.fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, device="cuda:" + str(gpu), flip_input=False) self.stablePntsIDs = [33, 36, 39, 42, 45] self.mean_face = model_dict["mean_face"] self.img_size = model_dict["img_size"] self.audio_rate = model_dict["audio_rate"] self.video_rate = model_dict["video_rate"] self.audio_feat_len = model_dict['audio_feat_len'] self.audio_feat_samples = model_dict['audio_feat_samples'] self.id_enc_dim = model_dict['id_enc_dim'] self.rnn_gen_dim = model_dict['rnn_gen_dim'] self.aud_enc_dim = model_dict['aud_enc_dim'] self.aux_latent = model_dict['aux_latent'] self.sequential_noise = model_dict['sequential_noise'] self.img_transform = transforms.Compose([ transforms.ToPILImage(), transforms.Resize((self.img_size[0], self.img_size[1])), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) self.audio_transform = torchaudio.transforms.Scale() self.encoder = RNN(self.audio_feat_len, self.aud_enc_dim, self.rnn_gen_dim, self.audio_rate, init_kernel=0.005, init_stride=0.001) self.encoder.to(self.device) self.encoder.load_state_dict(model_dict['encoder']) self.encoder_id = Encoder(self.id_enc_dim, self.img_size) self.encoder_id.to(self.device) self.encoder_id.load_state_dict(model_dict['encoder_id']) skip_channels = list(self.encoder_id.channels) skip_channels.reverse() self.generator = Generator(self.img_size, self.rnn_gen_dim, condition_size=self.id_enc_dim, num_gen_channels=self.encoder_id.channels[-1], skip_channels=skip_channels, aux_size=self.aux_latent, sequential_noise=self.sequential_noise) self.generator.to(self.device) self.generator.load_state_dict(model_dict['generator']) self.encoder.eval() self.encoder_id.eval() self.generator.eval()
if f.endswith('.jpg') or f.endswith('.jpeg') or f.endswith('.png'): log(f'{i}: {f}') img = imageio.imread(f) if img.ndim == 2: img = np.tile(img[..., None], [1, 1, 3]) img = resize(img, (256, 256))[..., :3] avatars.append(img) log('load checkpoints..') generator, kp_detector = load_checkpoints(config_path=opt.config, checkpoint_path=opt.checkpoint, device=device) fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=True, device=device) # cap = cv2.VideoCapture(opt.cam) cap = VideoCaptureAsync(opt.cam) if not cap.isOpened(): log("Cannot open camera. Try to choose other CAMID in './scripts/settings.sh'" ) exit() cap.start() ret, frame = cap.read() if not ret: log("Cannot read from camera") exit()
def __init__(self): import face_alignment self.model = face_alignment.FaceAlignment( face_alignment.LandmarksType._2D, flip_input=False)
import face_alignment from matplotlib import pyplot as plt parser = argparse.ArgumentParser() parser.add_argument('--data-dir') parser.add_argument('--output') args = parser.parse_args() path_to_mp4 = args.data_dir K = 8 num_vid = 0 device = torch.device('cuda:0') saves_dir = args.output face_aligner = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False, device ='cuda:0') if not os.path.isdir(saves_dir): os.mkdir(saves_dir) def generate_landmarks(frames_list, face_aligner): frame_landmark_list = [] fa = face_aligner for i in range(len(frames_list)): try: input = frames_list[i] preds = fa.get_landmarks(input)[0] dpi = 100 fig = plt.figure(figsize=(input.shape[1]/dpi, input.shape[0]/dpi), dpi = dpi)
def __init__(self): self.subjects = dict() self.bridge = CvBridge() self.__subject_bridge = SubjectListBridge() self.margin = rospy.get_param("~margin", 42) self.margin_eyes_height = rospy.get_param("~margin_eyes_height", 36) self.margin_eyes_width = rospy.get_param("~margin_eyes_width", 60) self.interpupillary_distance = rospy.get_param( "~interpupillary_distance", default=0.058) self.cropped_face_size = (rospy.get_param("~face_size_height", 224), rospy.get_param("~face_size_width", 224)) self.rgb_frame_id = rospy.get_param("~rgb_frame_id", "/kinect2_link") self.rgb_frame_id_ros = rospy.get_param("~rgb_frame_id_ros", "/kinect2_nonrotated_link") self.model_points = None self.eye_image_size = (rospy.get_param("~eye_image_height", 36), rospy.get_param("~eye_image_width", 60)) self.tf_broadcaster = TransformBroadcaster() self.tf_listener = TransformListener() self.tf_prefix = rospy.get_param("~tf_prefix", default="gaze") self.use_previous_headpose_estimate = True self.last_rvec = {} self.last_tvec = {} self.pose_stabilizers = {} # Introduce scalar stabilizers for pose. try: tqdm.write("Wait for camera message") cam_info = rospy.wait_for_message("/camera_info", CameraInfo, timeout=None) self.img_proc = PinholeCameraModel() # noinspection PyTypeChecker self.img_proc.fromCameraInfo(cam_info) if np.array_equal( self.img_proc.intrinsicMatrix(), np.matrix([[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]])): raise Exception( 'Camera matrix is zero-matrix. Did you calibrate' 'the camera and linked to the yaml file in the launch file?' ) tqdm.write("Camera message received") except rospy.ROSException: raise Exception("Could not get camera info") # multiple person images publication self.subject_pub = rospy.Publisher("/subjects/images", MSG_SubjectImagesList, queue_size=1) # multiple person faces publication for visualisation self.subject_faces_pub = rospy.Publisher("/subjects/faces", Image, queue_size=1) self.model_points = self._get_full_model_points() self.sess_bb = None self.face_net = FaceDetector(device="cuda:0") self.color_sub = rospy.Subscriber("/image", Image, self.callback, buff_size=2**24, queue_size=1) self.facial_landmark_nn = face_alignment.FaceAlignment( landmarks_type=face_alignment.LandmarksType._2D, device="cuda:0", flip_input=False)
img_tensor.shape[2]) input.data.copy_(img_tensor) output = mission2_face(input) output = output.cpu().view(output.shape[0]) cur_prob = output.data[0] if max_prob < cur_prob: max_prob = cur_prob cur_result['prob'] = max_prob prediction_results.append(cur_result) # save result save_result_to_txt( prediction_results, os.path.join(_res_dir, 'mission2_%s.txt' % kResultFilePostfix)) if __name__ == "__main__": fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, enable_cuda=True, flip_input=True) do_mission_1(options.data_dir, options.result_dir, fa) do_mission_2(options.data_dir, options.result_dir, fa) # ()() # ('') HAANJU & YEOLJERRY
def _init_(self, **config): self.fa = face_alignment.FaceAlignment( face_alignment.LandmarksType._2D, flip_input=False, device='cpu')
def parse_arg(): parser = argparse.ArgumentParser( description=f'Script for face detection and landmark retrieving') parser.add_argument('--image-path', type=str, help=f'Path to image file') args = parser.parse_args() return args if __name__ == '__main__': args = parse_arg() fa_model = face_alignment.FaceAlignment( landmarks_type=face_alignment.LandmarksType._2D, ) image_path = args.image_path image = read_image(image_path=image_path) # image = np.transpose(image, axes=(2, 1, 0)) image_batch = np.concatenate([image[np.newaxis, :]] * 10, axis=0) image_batch = torch.tensor(image_batch) print(image_batch.size()) res_facial_info = fa_model.get_landmarks(image_or_path=image) # res_facial_info = [res_facial_info[0][:17]] # res_facial_info = [res_facial_info[0][17:22]] # res_facial_info = [res_facial_info[0][22:27]]
NOSE_H = int(32 / 128 * img_size) NOSE_W = int(40 / 128 * img_size) MOUTH_H = int(32 / 128 * img_size) MOUTH_W = int(48 / 128 * img_size) def wait(): while True: key = cv2.waitKeyEx(10) if key == ord('q'): break tpgan = TPGAN(generator_weights=GENERATOR_WEIGHTS_FILE) fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2halfD, device='cpu', flip_input=False) img = cv2.imread(IMG_PATH) landmarks = fa.get_landmarks(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) assert landmarks is not None points = landmarks[0] reye = np.average(np.array((points[37], points[38], points[40], points[41])), axis=0) leye = np.average(np.array((points[43], points[44], points[46], points[47])), axis=0) mouth = np.average(np.array((points[51], points[57])), axis=0) nose_tip = points[30]
# From: https://github.com/1adrianb/face-alignment import face_alignment from skimage import io import numpy as np import os import time from joblib import Parallel, delayed import shutil baseDir = "/home/socialvv/socialvv" fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D) def delete_helper(cam, ID): delDir = os.path.join(baseDir, f'ID{ID}', f'cam{cam}-wav2lip', 'landmarks') outDir = os.path.join(baseDir, f'ID{ID}', f'cam{cam}-wav2lip', 'landmarksv2') if os.path.isdir(delDir): shutil.rmtree(delDir) if os.path.isdir(outDir): shutil.rmtree(outDir) def parallel_generation(cam, ID): frameDir = os.path.join(baseDir, f'ID{ID}', f'cam{cam}-wav2lip', 'frames') boundingBoxFile = os.path.join( baseDir, f'ID{ID}', 'bounding-boxes',
import face_alignment import numpy as np from mpl_toolkits.mplot3d import Axes3D import matplotlib.pyplot as plt from skimage import io import os # Run the 3D face alignment on a test image, without CUDA. #fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._3, device='cuda:0', flip_input=True) fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D, device='cuda:0', flip_input=True) path = '../test/assets/yates/' imgs = os.listdir(path) print(imgs) for img in imgs: input = io.imread(path + img) preds = fa.get_landmarks(input)[-1] print(preds) #TODO: Make this nice fig = plt.figure(figsize=plt.figaspect(1), frameon=False) #fig = plt.figure(frameon=False) ax = fig.add_subplot(1, 1, 1) #ax = plt.axes([0,0,1,1], frameon=False) ax.imshow(input) ax.axis('off') lwS = 1 mS = 3 ax.plot(preds[0:17, 0], preds[0:17, 1],
import face_alignment import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from skimage import io import collections # Optionally set detector and some additional detector parameters face_detector = 'sfd' face_detector_kwargs = {"filter_threshold": 0.8} # Run the 3D face alignment on a test image, without CUDA. fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D, device='cpu', flip_input=True, face_detector=face_detector, face_detector_kwargs=face_detector_kwargs) try: input_img = io.imread('../test/assets/aflw-test.jpg') except FileNotFoundError: input_img = io.imread('test/assets/aflw-test.jpg') preds = fa.get_landmarks(input_img)[-1] # 2D-Plot plot_style = dict(marker='o', markersize=4, linestyle='-', lw=2) pred_type = collections.namedtuple('prediction_type', ['slice', 'color']) pred_types = { 'face': pred_type(slice(0, 17), (0.682, 0.780, 0.909, 0.5)), 'eyebrow1': pred_type(slice(17, 22), (1.0, 0.498, 0.055, 0.4)),
def __init__(self, opt_parser, single_test=False): print('Run on device {}'.format(device)) # for key in vars(opt_parser).keys(): # print(key, ':', vars(opt_parser)[key]) self.opt_parser = opt_parser # model if (opt_parser.add_audio_in): self.G = ResUnetGenerator(input_nc=7, output_nc=3, num_downs=6, use_dropout=False) else: self.G = ResUnetGenerator(input_nc=6, output_nc=3, num_downs=6, use_dropout=False) if (opt_parser.load_G_name != ''): ckpt = torch.load(opt_parser.load_G_name) try: self.G.load_state_dict(ckpt['G']) except: tmp = nn.DataParallel(self.G) tmp.load_state_dict(ckpt['G']) self.G.load_state_dict(tmp.module.state_dict()) del tmp if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs in G mode!") self.G = nn.DataParallel(self.G) self.G.to(device) if (not single_test): # dataset if (opt_parser.use_vox_dataset == 'raw'): if (opt_parser.comb_fan_awing): from dataset.image_translation.image_translation_dataset import \ image_translation_raw74_dataset as image_translation_dataset elif (opt_parser.add_audio_in): from dataset.image_translation.image_translation_dataset import image_translation_raw98_with_audio_dataset as \ image_translation_dataset else: from dataset.image_translation.image_translation_dataset import image_translation_raw98_dataset as \ image_translation_dataset else: from dataset.image_translation.image_translation_dataset import image_translation_preprocessed98_dataset as \ image_translation_dataset self.dataset = image_translation_dataset( num_frames=opt_parser.num_frames) self.dataloader = torch.utils.data.DataLoader( self.dataset, batch_size=opt_parser.batch_size, shuffle=True, num_workers=opt_parser.num_workers) # criterion self.criterionL1 = nn.L1Loss() self.criterionVGG = VGGLoss() if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs in VGG model!") self.criterionVGG = nn.DataParallel(self.criterionVGG) self.criterionVGG.to(device) # optimizer self.optimizer = torch.optim.Adam(self.G.parameters(), lr=opt_parser.lr, betas=(0.5, 0.999)) # writer if (opt_parser.write): self.writer = SummaryWriter( log_dir=os.path.join(opt_parser.log_dir, opt_parser.name)) self.count = 0 # =========================================================== # online landmark alignment : Awing # =========================================================== PRETRAINED_WEIGHTS = 'thirdparty/AdaptiveWingLoss/ckpt/WFLW_4HG.pth' GRAY_SCALE = False HG_BLOCKS = 4 END_RELU = False NUM_LANDMARKS = 98 self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") model_ft = models.FAN(HG_BLOCKS, END_RELU, GRAY_SCALE, NUM_LANDMARKS) checkpoint = torch.load(PRETRAINED_WEIGHTS) if 'state_dict' not in checkpoint: model_ft.load_state_dict(checkpoint) else: pretrained_weights = checkpoint['state_dict'] model_weights = model_ft.state_dict() pretrained_weights = {k: v for k, v in pretrained_weights.items() \ if k in model_weights} model_weights.update(pretrained_weights) model_ft.load_state_dict(model_weights) print('Load AWing model sucessfully') if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs for AWing!") self.fa_model = nn.DataParallel(model_ft).to( self.device).eval() else: self.fa_model = model_ft.to(self.device).eval() # =========================================================== # online landmark alignment : FAN # =========================================================== if (opt_parser.comb_fan_awing): if (opt_parser.fan_2or3D == '2D'): self.predictor = face_alignment.FaceAlignment( face_alignment.LandmarksType._2D, device='cuda' if torch.cuda.is_available() else "cpu", flip_input=True) else: self.predictor = face_alignment.FaceAlignment( face_alignment.LandmarksType._3D, device='cuda' if torch.cuda.is_available() else "cpu", flip_input=True)
# return the aligned face return output, True, scale, angle, dist else: return image, False, 0, 0, 0 ''' Main part of the code ''' #base_dir_ori = 'D:/Master/Master Data Science/2 Cuatrimestre/Master Thesis/Database/IEMOCAP_cropped/' #base_dir_des = 'D:/Master/Master Data Science/2 Cuatrimestre/Master Thesis/Database/IEMOCAP_faces_2/' base_dir_ori = r'/app/data/IEMOCAP_dat1/IEMOCAP_cropped_2/' base_dir_des = r'/app/data/IEMOCAP_dat1/IEMOCAP_faces_final_2/' base_dir_des_list = r'/app/data/IEMOCAP_dat1/IEMOCAP_error_final_frames/' predictor = face_alignment.FaceAlignment( face_alignment.LandmarksType._2D, device='cuda', flip_input=False) #or 'gpu' to use gpu fa = FaceAligner(predictor, desiredFaceWidth=224) for i in range(0, 2): print('SESSION ' + str(i + 1) + ':\n') ses_dir_ori = base_dir_ori + r'Session' + str(i + 1) + r'/' ses_dir_des = base_dir_des + r'Session' + str(i + 1) + r'/' ses_dir_des_list = base_dir_des_list + r'Session' + str(i + 1) + r'/' videos = os.listdir(ses_dir_ori) for video in videos: print(video) video_folder = ses_dir_des + video[:-4] + '/'
parser.add_argument('--lr', type=float, default=1e-3, help='learning rate') parser.add_argument('--reg_lr', type=float, default=1e-6, help='weight decay') parser.add_argument('--write', default=False, action='store_true') parser.add_argument('--segment_batch_size', type=int, default=1, help='batch size') parser.add_argument('--emb_coef', default=3.0, type=float) parser.add_argument('--lambda_laplacian_smooth_loss', default=1.0, type=float) parser.add_argument('--use_11spk_only', default=False, action='store_true') opt_parser = parser.parse_args() ''' STEP 1: preprocess input single image ''' img = cv2.imread(opt_parser.jpg) predictor = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, device='cuda', flip_input=True) shapes = predictor.get_landmarks(img) if (not shapes or len(shapes) != 1): print('Cannot detect face landmarks. Exit.') exit(-1) shape_3d = shapes[0] shape_3d = np.concatenate([shape_3d, np.ones(shape=(68, 1))], axis=1) # # close mouth # shape_3d = shape_3d.reshape((1, 68, 3)) # index1 = list(range(60-1, 55-1, -1)) # index2 = list(range(68-1, 65-1, -1)) # mean_out = 0.5 * (shape_3d[:, 49:54] + shape_3d[:, index1]) # mean_in = 0.5 * (shape_3d[:, 61:64] + shape_3d[:, index2]) # shape_3d[:, 50:53] -= (shape_3d[:, 61:64] - mean_in) * 0.7
def test_predict_points(self): fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D, enable_cuda=False) fa.get_landmarks('test/assets/aflw-test.jpg')
#!/usr/bin/env python3 import os import time import datetime import cv2 import numpy as np from PIL import Image from PIL.ExifTags import TAGS import face_alignment import sys fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, device='cpu') def extract_face(img): faces = fa.get_landmarks_from_image(img) if len(faces) == 0: return None return faces[0] def master_align(img, face): A = np.zeros((2 * face.shape[0], 4)) A[0::2, 0] = face[:, 0] A[0::2, 1] = face[:, 1] A[0::2, 2] = 1 A[1::2, 0] = face[:, 1]
def generate_landmarks(frames_list): frame_landmark_list = [] fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False, device='cuda:0') for i in range(len(frames_list)): try: input = frames_list[i] preds = fa.get_landmarks(input)[0] dpi = 100 fig = plt.figure(figsize=(input.shape[1] / dpi, input.shape[0] / dpi), dpi=dpi) ax = fig.add_subplot(1, 1, 1) ax.imshow(np.ones(input.shape)) plt.subplots_adjust(left=0, right=1, top=1, bottom=0) #chin ax.plot(preds[0:17, 0], preds[0:17, 1], marker='', markersize=5, linestyle='-', color='green', lw=2) #left and right eyebrow ax.plot(preds[17:22, 0], preds[17:22, 1], marker='', markersize=5, linestyle='-', color='orange', lw=2) ax.plot(preds[22:27, 0], preds[22:27, 1], marker='', markersize=5, linestyle='-', color='orange', lw=2) #nose ax.plot(preds[27:31, 0], preds[27:31, 1], marker='', markersize=5, linestyle='-', color='blue', lw=2) ax.plot(preds[31:36, 0], preds[31:36, 1], marker='', markersize=5, linestyle='-', color='blue', lw=2) #left and right eye ax.plot(preds[36:42, 0], preds[36:42, 1], marker='', markersize=5, linestyle='-', color='red', lw=2) ax.plot(preds[42:48, 0], preds[42:48, 1], marker='', markersize=5, linestyle='-', color='red', lw=2) #outer and inner lip ax.plot(preds[48:60, 0], preds[48:60, 1], marker='', markersize=5, linestyle='-', color='purple', lw=2) ax.plot(preds[60:68, 0], preds[60:68, 1], marker='', markersize=5, linestyle='-', color='pink', lw=2) ax.axis('off') fig.canvas.draw() data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8) data = data.reshape(fig.canvas.get_width_height()[::-1] + (3, )) frame_landmark_list.append((input, data)) plt.close(fig) except: print('Error: Video corrupted or no landmarks visible') for i in range(len(frames_list) - len(frame_landmark_list)): #filling frame_landmark_list in case of error frame_landmark_list.append(frame_landmark_list[i]) return frame_landmark_list
import face_alignment from skimage import io fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False) input = io.imread( 'examples/Pictures2020/50_org_ref_Europaerinnen/98-04358_CF-0-1dummy0098_AGE_30_GLASSES_false_ETHNICITY_caucasian_TEINT_1.jpg' ) preds = fa.get_landmarks(input) print(preds)
def main(txtfile, out_dir, models_root, min_size=120, frame_sample_ratio=0.1, min_samples=5, sample_limit=100, min_res=720, id_limit=7, cudev=0): # vid_paths = glob(os.path.join(in_dir, '*.mp4')) <-- in_dir replaced with vid_paths os.environ["CUDA_VISIBLE_DEVICES"] = str( cudev) # <-- added in order to use multiple gpus to process IJB-C vid_paths = [] with open(txtfile) as a: for line in a: vid_paths.append(line.rstrip()) # INITIALIZE MODELS device, gpus = utils.set_device() # Initialize detection and landmarks extraction fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False) # Initialize eulers angles model Gp = hopenet.Hopenet() Gp.to(device) path2w = "{}/hopenet_robust_alpha1.pkl".format(models_root) weights = torch.load(path2w) Gp.load_state_dict(weights) Gp.eval() # Initialize verification model verificator = verification.ImageToFeatures(models_root, device, fa) # For each video file for vid_path in sorted(vid_paths): vid_name = os.path.splitext(os.path.basename(vid_path))[0] curr_out_dir = os.path.join(out_dir, vid_name) if os.path.exists(curr_out_dir): print('Skipping "%s"' % vid_name) continue else: print('Processing "%s"...' % vid_name) # os.mkdir(curr_out_dir) # Process video try: video_landmark_keyframes.main(vid_path, curr_out_dir, fa, Gp, verificator, device, min_size, frame_sample_ratio, min_samples, sample_limit, min_res, id_limit) except Exception as e: logging.error(traceback.format_exc()) # Parse program arguments import argparse parser = argparse.ArgumentParser('video_landmarks_keyframes_batch') parser.add_argument('input', metavar='DIR', help='input directory') parser.add_argument('-m', '--models_root', metavar='DIR', help='model root directory') parser.add_argument('-o', '--output', metavar='DIR', help='output directory') parser.add_argument('-mb', '--min_bbox_size', default=200, type=int, metavar='N', help='minimum bounding box size') parser.add_argument('-fs', '--frame_samples', default=0.1, type=float, metavar='F', help='the number of samples per video') parser.add_argument('-ms', '--min_samples', default=5, type=int, metavar='N', help='the limit on the number of samples') parser.add_argument('-sl', '--sample_limit', default=100, type=int, metavar='N', help='the limit on the number of samples') parser.add_argument('-mr', '--min_res', default=720, type=int, metavar='N', help='minimum video resolution (height pixels)') parser.add_argument('-il', '--id_limit', default=7, type=int, metavar='N', help='the limit on the number of identities') args = parser.parse_args() main(args.input, args.output, args.models_root, args.min_bbox_size, args.frame_samples, args.min_samples, args.sample_limit, args.min_res, args.id_limit)
converter = pylon.ImageFormatConverter() # converting to opencv bgr format converter.OutputPixelFormat = pylon.PixelType_BGR8packed converter.OutputBitAlignment = pylon.OutputBitAlignment_MsbAligned # Print the model name of the camera. print("Using device ", camera.GetDeviceInfo().GetModelName()) # Start the grabbing of c_countOfImagesToGrab images. # The camera device is parameterized with a default configuration which # sets up free-running continuous acquisition. camera.StartGrabbingMax(10000, pylon.GrabStrategy_LatestImageOnly) fa = fa.FaceAlignment(fa.LandmarksType._2D, face_detector='sfd') while camera.IsGrabbing(): # Wait for an image and then retrieve it. A timeout of 5000 ms is used. grabResult = camera.RetrieveResult(5000, pylon.TimeoutHandling_ThrowException) # Image grabbed successfully? if grabResult.GrabSucceeded(): image = converter.Convert(grabResult) img = image.GetArray()[] masked_face, rgb_vals = gr.get_roi(frame, fa) imageWindow.SetImage(grabResult) imageWindow.Show() else: print("Error: ", grabResult.ErrorCode) # grabResult.ErrorDescription does not work properly in python could throw UnicodeDecodeError
import face_alignment import numpy as np from mpl_toolkits.mplot3d import Axes3D import matplotlib.pyplot as plt from skimage import io # Run the 3D face alignment on a test image, without CUDA. fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D, enable_cuda=False, flip_input=False) input = io.imread('../test/assets/aflw-test.jpg') preds = fa.get_landmarks(input)[-1] #TODO: Make this nice fig = plt.figure(figsize=plt.figaspect(.5)) ax = fig.add_subplot(1, 2, 1) ax.imshow(input) ax.plot(preds[0:17, 0], preds[0:17, 1], marker='o', markersize=6, linestyle='-', color='w', lw=2) ax.plot(preds[17:22, 0], preds[17:22, 1], marker='o', markersize=6, linestyle='-', color='w',
def main(args): # 1. load pre-tained model checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar' arch = 'mobilenet_1' checkpoint = torch.load( checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict'] model = getattr(mobilenet_v1, arch)( num_classes=62) # 62 = 12(pose) + 40(shape) +10(expression) model_dict = model.state_dict() # because the model is trained by multiple gpus, prefix module should be removed for k in checkpoint.keys(): model_dict[k.replace('module.', '')] = checkpoint[k] model.load_state_dict(model_dict) if args.mode == 'gpu': cudnn.benchmark = True model = model.cuda() model.eval() ''' # 2. load dlib model for face detection and landmark used for face cropping if args.dlib_landmark: dlib_landmark_model = 'models/shape_predictor_68_face_landmarks.dat' face_regressor = dlib.shape_predictor(dlib_landmark_model) if args.dlib_bbox: face_detector = dlib.get_frontal_face_detector() ''' face_regressor = face_alignment.FaceAlignment( face_alignment.LandmarksType._2D, flip_input=False) # face_detector = face_regressor.face_detector # 3. forward tri = sio.loadmat('visualize/tri.mat')['tri'] transform = transforms.Compose( [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)]) for img_fp in args.files: img_ori = cv2.imread(img_fp) ''' if args.dlib_bbox: rects = face_detector(img_ori, 1) else: rects = [] if len(rects) == 0: rects = dlib.rectangles() rect_fp = img_fp + '.bbox' lines = open(rect_fp).read().strip().split('\n')[1:] for l in lines: l, r, t, b = [int(_) for _ in l.split(' ')[1:]] rect = dlib.rectangle(l, r, t, b) rects.append(rect) ''' img_rgb = img_ori[:, :, ::-1] ptss = face_regressor.get_landmarks(img_rgb) pts_res = [] Ps = [] # Camera matrix collection poses = [] # pose collection, [todo: validate it] vertices_lst = [] # store multiple face vertices ind = 0 suffix = get_suffix(img_fp) for pts in ptss: # whether use dlib landmark to crop image, if not, use only face bbox to calc roi bbox for cropping if args.dlib_landmark: # - use landmark for cropping # pts = face_regressor(img_ori, rect).parts() # pts = np.array([[pt.x, pt.y] for pt in pts]).T roi_box = parse_roi_box_from_landmark(pts.T) else: # - use detected face bbox bbox = [rect.left(), rect.top(), rect.right(), rect.bottom()] roi_box = parse_roi_box_from_bbox(bbox) img = crop_img(img_ori, roi_box) # forward: one step img = cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) input = transform(img).unsqueeze(0) with torch.no_grad(): if args.mode == 'gpu': input = input.cuda() param = model(input) param = param.squeeze().cpu().numpy().flatten().astype( np.float32) # 68 pts pts68 = predict_68pts(param, roi_box) # two-step for more accurate bbox to crop face if args.bbox_init == 'two': roi_box = parse_roi_box_from_landmark(pts68) img_step2 = crop_img(img_ori, roi_box) img_step2 = cv2.resize(img_step2, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) input = transform(img_step2).unsqueeze(0) with torch.no_grad(): if args.mode == 'gpu': input = input.cuda() param = model(input) param = param.squeeze().cpu().numpy().flatten().astype( np.float32) pts68 = predict_68pts(param, roi_box) pts_res.append(pts68) P, pose = parse_pose(param) Ps.append(P) poses.append(pose) # dense face 3d vertices if args.dump_ply or args.dump_vertex or args.dump_depth or args.dump_pncc or args.dump_obj: vertices = predict_dense(param, roi_box) vertices_lst.append(vertices) if args.dump_ply: dump_to_ply( vertices, tri, '{}_{}.ply'.format(img_fp.replace(suffix, ''), ind)) if args.dump_vertex: dump_vertex( vertices, '{}_{}.mat'.format(img_fp.replace(suffix, ''), ind)) if args.dump_pts: wfp = '{}_{}.txt'.format(img_fp.replace(suffix, ''), ind) np.savetxt(wfp, pts68, fmt='%.3f') print('Save 68 3d landmarks to {}'.format(wfp)) if args.dump_roi_box: wfp = '{}_{}.roibox'.format(img_fp.replace(suffix, ''), ind) np.savetxt(wfp, roi_box, fmt='%.3f') print('Save roi box to {}'.format(wfp)) if args.dump_paf: wfp_paf = '{}_{}_paf.jpg'.format(img_fp.replace(suffix, ''), ind) wfp_crop = '{}_{}_crop.jpg'.format(img_fp.replace(suffix, ''), ind) paf_feature = gen_img_paf(img_crop=img, param=param, kernel_size=args.paf_size) cv2.imwrite(wfp_paf, paf_feature) cv2.imwrite(wfp_crop, img) print('Dump to {} and {}'.format(wfp_crop, wfp_paf)) if args.dump_obj: wfp = '{}_{}.obj'.format(img_fp.replace(suffix, ''), ind) colors = get_colors(img_ori, vertices) write_obj_with_colors(wfp, vertices, tri, colors) print('Dump obj with sampled texture to {}'.format(wfp)) ind += 1 if args.dump_pose: # P, pose = parse_pose(param) # Camera matrix (without scale), and pose (yaw, pitch, roll, to verify) img_pose = plot_pose_box(img_ori, Ps, pts_res) wfp = img_fp.replace(suffix, '_pose.jpg') cv2.imwrite(wfp, img_pose) print('Dump to {}'.format(wfp)) if args.dump_depth: wfp = img_fp.replace(suffix, '_depth.png') # depths_img = get_depths_image(img_ori, vertices_lst, tri-1) # python version depths_img = cget_depths_image(img_ori, vertices_lst, tri - 1) # cython version cv2.imwrite(wfp, depths_img) print('Dump to {}'.format(wfp)) if args.dump_pncc: wfp = img_fp.replace(suffix, '_pncc.png') pncc_feature = cpncc(img_ori, vertices_lst, tri - 1) # cython version cv2.imwrite( wfp, pncc_feature[:, :, ::-1]) # cv2.imwrite will swap RGB -> BGR print('Dump to {}'.format(wfp)) if args.dump_res: draw_landmarks(img_ori, pts_res, wfp=img_fp.replace(suffix, '_3DDFA.jpg'), show_flg=args.show_flg)
import face_alignment from skimage import io import pandas import numpy import os import sys fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D, device='cpu', flip_input=True) fa2 = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, device='cpu', flip_input=True) # Ubah ke csv # for filename in os.listdir("dataset"): # if filename.endswith(".jpg"): # print(filename) # database = io.imread("dataset/" + filename) # preds = fa2.get_landmarks(database)[-1] # prediction = pandas.DataFrame(preds) # cvs_hasil = prediction.to_csv("csvHasil2d/" + filename + ".csv", index=False) for filename in os.listdir("Input"): ctrTrue = 0 if filename.endswith(".jpg"): print("now testing " + filename) input_img = io.imread('Input/' + filename) inp = fa.get_landmarks(input_img)[-1] for csvname in os.listdir("csvHasil"): if csvname.endswith(".csv"):
def run(dir_faceA, dir_faceB): dir_bm_faceA_eyes = f"{dir_faceA}/binary_masks_eyes2" dir_bm_faceB_eyes = f"{dir_faceB}/binary_masks_eyes2" fns_faceA = glob(f"{dir_faceA}/raw_faces/*.*") fns_faceB = glob(f"{dir_faceB}/raw_faces/*.*") print('faces', dir_faceA, dir_faceB, dir_bm_faceA_eyes, dir_bm_faceB_eyes) fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False) # !mkdir -p binary_masks/faceA_eyes Path(f"{dir_bm_faceA_eyes}").mkdir(parents=True, exist_ok=True) # !mkdir -p binary_masks/faceB_eyes Path(f"{dir_bm_faceB_eyes}").mkdir(parents=True, exist_ok=True) fns_face_not_detected = [] for idx, fns in enumerate([fns_faceA, fns_faceB]): if idx == 0: save_path = dir_bm_faceA_eyes elif idx == 1: save_path = dir_bm_faceB_eyes # create binary mask for each training image for fn in fns: raw_fn = PurePath(fn).parts[-1] x = plt.imread(fn) x = cv2.resize(x, (256, 256)) preds = fa.get_landmarks(x) if preds is not None: preds = preds[0] mask = np.zeros_like(x) # Draw right eye binary mask pnts_right = [(preds[i, 0], preds[i, 1]) for i in range(36, 42)] hull = cv2.convexHull(np.array(pnts_right)).astype(np.int32) mask = cv2.drawContours(mask, [hull], 0, (255, 255, 255), -1) # Draw left eye binary mask pnts_left = [(preds[i, 0], preds[i, 1]) for i in range(42, 48)] hull = cv2.convexHull(np.array(pnts_left)).astype(np.int32) mask = cv2.drawContours(mask, [hull], 0, (255, 255, 255), -1) # Draw mouth binary mask #pnts_mouth = [(preds[i,0],preds[i,1]) for i in range(48,60)] #hull = cv2.convexHull(np.array(pnts_mouth)).astype(np.int32) #mask = cv2.drawContours(mask,[hull],0,(255,255,255),-1) mask = cv2.dilate(mask, np.ones((13, 13), np.uint8), iterations=1) mask = cv2.GaussianBlur(mask, (7, 7), 0) else: mask = np.zeros_like(x) print(f"No faces were detected in image '{fn}''") fns_face_not_detected.append(fn) plt.imsave(fname=f"{save_path}/{raw_fn}", arr=mask, format="jpg") num_faceA = len(glob(f"{dir_faceA}/*.*")) num_faceB = len(glob(f"{dir_faceB}/*.*")) print("Nuber of processed images: " + str(num_faceA + num_faceB)) print("Number of image(s) with no face detected: " + str(len(fns_face_not_detected)))
from torch import multiprocessing import time def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('-i','--in_file', type=str, default='56') return parser.parse_args() config = parse_args() fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._3D, flip_input=False)#, device='cpu') detector = dlib.get_frontal_face_detector() predictor = dlib.shape_predictor('./basics/shape_predictor_68_face_landmarks.dat') def unzip_video(path): tar_files = os.listdir(path) valid = [] for f in tar_files: if 'mpg' in f: valid.append(f) for f in valid: f_path = os.path.join(path, f)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print('Running on device: {}'.format(device)) mtcnn = MTCNN(image_size=160, margin=0, min_face_size=20, thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True, device=device) resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device) ################################################################################# #t######## Face alignment predictor face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, face_detector='sfd', device='cpu') classifier = None model_path = "/home/brance/Mine/Prototypes/Face Ui/trained_faces_model.clf" # Loading model try: with open(model_path, 'rb') as f: classifier = pickle.load(f) print("Loaded classifier ...................") except Exception as e: print("[ERROR]:>>> Could Not Load FaceSickNess Classifier ! \n", e)
def generate_landmarks(cap, device, pad): """Input: cap a cv2.VideoCapture object, device the torch.device, pad the distance in pixel from border to face output: x the camera output, g_y the corresponding landmark""" #Get video image frame_landmark_list = [] fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False, device='cuda:0') i = 0 if (cap.isOpened()): # Capture frame-by-frame ret, frame = cap.read() if not ret: print("broke at if not ret", ret) return None, None # Can't receive frame. Possibly due to stream end rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frames_list = [rgb] #Create landmark for face frame_landmark_list = [] try: input = frames_list[i] preds = fa.get_landmarks(input)[0] input = crop_and_reshape_img(input, preds, pad=pad) preds = crop_and_reshape_preds(preds, pad=pad) dpi = 100 fig = plt.figure(figsize=(256 / dpi, 256 / dpi), dpi=dpi) ax = fig.add_subplot(1, 1, 1) ax.imshow(np.ones(input.shape)) plt.subplots_adjust(left=0, right=1, top=1, bottom=0) #chin ax.plot(preds[0:17, 0], preds[0:17, 1], marker='', markersize=5, linestyle='-', color='green', lw=2) #left and right eyebrow ax.plot(preds[17:22, 0], preds[17:22, 1], marker='', markersize=5, linestyle='-', color='orange', lw=2) ax.plot(preds[22:27, 0], preds[22:27, 1], marker='', markersize=5, linestyle='-', color='orange', lw=2) #nose ax.plot(preds[27:31, 0], preds[27:31, 1], marker='', markersize=5, linestyle='-', color='blue', lw=2) ax.plot(preds[31:36, 0], preds[31:36, 1], marker='', markersize=5, linestyle='-', color='blue', lw=2) #left and right eye ax.plot(preds[36:42, 0], preds[36:42, 1], marker='', markersize=5, linestyle='-', color='red', lw=2) ax.plot(preds[42:48, 0], preds[42:48, 1], marker='', markersize=5, linestyle='-', color='red', lw=2) #outer and inner lip ax.plot(preds[48:60, 0], preds[48:60, 1], marker='', markersize=5, linestyle='-', color='purple', lw=2) ax.plot(preds[60:68, 0], preds[60:68, 1], marker='', markersize=5, linestyle='-', color='pink', lw=2) ax.axis('off') fig.canvas.draw() data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8) data = data.reshape(fig.canvas.get_width_height()[::-1] + (3, )) frame_landmark_list.append((input, data)) plt.close(fig) no_pic = False except: print('Error: Video corrupted or no landmarks visible') frame_mark = torch.from_numpy(np.array(frame_landmark_list)).type( dtype=torch.float) #K,2,256,256,3 if frame_mark.shape[0] == 0: print("broke at frame_mark.shape", frame_mark.shape, frame_mark) return None, None frame_mark = frame_mark.transpose(2, 4).to(device) #K,2,3,256,256 x = frame_mark[0, 0].to(device) g_y = frame_mark[0, 1].to(device) return x, g_y
def main(): fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False, device='cpu') ms = MS() def get_lms5_2d_from_2dFan(img): if any([x < 32 for x in img.shape[:2]]): return None preds = fa.get_landmarks(img) if preds is None: return None Lm2D = preds[0] lm_idx = np.array([31, 37, 40, 43, 46, 49, 55]) - 1 l_eye = np.mean(Lm2D[lm_idx[[1, 2]]], 0) r_eye = np.mean(Lm2D[lm_idx[[3, 4]]], 0) nose = Lm2D[lm_idx[0]] l_mouth = Lm2D[lm_idx[5]] r_mouth = Lm2D[lm_idx[6]] lms5 = np.stack([l_eye, r_eye, nose, l_mouth, r_mouth], axis=0) lms5 = np.round(lms5).astype(int) return lms5 def draw_vertices(mesh_vertices, image, vertice_color=[255, 255, 255], vertice_r=1, extra_scale=3): mesh_vertices *= extra_scale h, w, _ = image.shape w *= extra_scale h *= extra_scale image = cv2.resize(image, (w, h)) def draw_r_1(): image[y, x] = vertice_color def draw_r(): cv2.circle(image, (x, y), vertice_r, vertice_color, thickness=-1) if vertice_r == 1: draw_func = draw_r_1 else: draw_func = draw_r for vertex in mesh_vertices: x, y = [np.round(v).astype(int) for v in vertex[:2]] if 0 <= x < w and 0 <= y < h: draw_func() return image mp = mappings.GetWorkingDir() file = os.path.join( mp(**{})[mp.PIPE_DEFAULT.KEY_OUT_working_dir], 'SVO-03-01-detect.json') assert os.path.isfile(file) data = json.load(open(file)) frame_img_file = os.path.join(mp.PIPE_DEFAULT.KEY_OUT_working_dir, 'tmp_sb/0001.jpg') assert os.path.isfile(frame_img_file) frame_img = cv2.imread(frame_img_file) frame_img = cv2.cvtColor(frame_img, cv2.COLOR_RGB2BGR) vis_scale = 4 h, w, _ = frame_img.shape w *= vis_scale h *= vis_scale frame_img_vis = cv2.resize(frame_img, (w, h)) keypoints_ref = dict( nose=0, leye=15, reye=16, lear=17, rear=18, chest=1, lshoulder=2, rshoulder=5, ) for frame in data['FrameSequences'][0]['Frames']: for detected_object in frame['DetectedObjects'][0:]: if 'Bones' in detected_object: bones = detected_object['Bones'] points = [] for x, y in zip(bones[::2], bones[1::2]): if x == -1 and y == -1: pass else: assert x >= 0 and y >= 0, "{}, {}".format(x, y) points.append([x, y]) points = np.array(points) points = points[list(keypoints_ref.values())] points = np.array( list(filter(lambda p: (p[0] >= 0) and (p[1] >= 0), points))) if len(points) == 0: continue stds = 3 * np.array( [np.std(points[:, 0]), 1.5 * np.std(points[:, 1])]) if not 0.1 < stds[0] / stds[1] < 10: continue center = np.array(np.mean(points, axis=0)) assert center.shape == stds.shape bbox = [ center[0] - stds[0], center[1] - stds[1], center[0] + stds[0], center[1] + stds[1] ] bbox = np.round(bbox).astype(int) # # try: # assert all([x>=0 for x in nose]) # assert all([x>=0 for x in chest]) # except Exception: # continue # bbox_h = abs(nose[1]-chest[1])/2 # frame_img = cv2.rectangle(frame_img, (int(round(nose[0]-bbox_h)), int(round(nose[1]-bbox_h))), # (int(round(nose[0]+bbox_h)), int(round(nose[1]+bbox_h))), # color=(0,255,255), thickness=2) # frame_img = cv2.rectangle(frame_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color=(0,255,255), thickness=2) crop_from_openpose = frame_img[bbox[1]:bbox[3], bbox[0]:bbox[2], :] # plt.imshow(frame_img) # plt.show() # plt.imshow(crop_from_openpose) # plt.title('crop_from_openpose') # plt.show() lms2d_n5 = get_lms5_2d_from_2dFan(deepcopy(crop_from_openpose)) if lms2d_n5 is None: continue vis_crop = deepcopy(crop_from_openpose) for p in lms2d_n5: vis_crop = cv2.circle(vis_crop, tuple([int(x) for x in p]), radius=1, color=(0, 255, 0), thickness=-1) # plt.imshow(vis_crop) # plt.title('crop_from_openpose with lms2d_n5') # plt.show() img = np.expand_dims(deepcopy(crop_from_openpose), axis=0) lms2d_n5 = np.expand_dims(lms2d_n5, axis=0) out = ms.get_depthmap(img, lms2d_n5, False) if any([x == 0 for x in out['crop'].numpy().shape]): continue plt.imshow(out['crop'].numpy().astype(int)) plt.title('ms_crop') plt.show() mesh = out['mesh'].numpy()[0] # vis = draw_vertices(mesh, out['crop'].numpy()) # vis = vis.astype(int) # for p in lms2d_n5: # vis = cv2.circle(vis, tuple([int(x) for x in p]), radius=1, color=(0,255,0), thickness=-1) # plt.imshow(vis) # plt.title('ms_crop') # plt.show() # print() t = out['t'].numpy() t[0] *= crop_from_openpose.shape[1] t[1] *= crop_from_openpose.shape[0] # t = np.round(t).astype(int) mesh = out['mesh'].numpy()[0] mesh *= 1 / out['s'].numpy() mesh[:, 0] += t[0] mesh[:, 1] += t[1] # vis = draw_vertices(deepcopy(mesh), deepcopy(crop_from_openpose), extra_scale=6) # vis = vis.astype(int) # plt.imshow(vis) # plt.title('openpose_crop') # plt.show() # print() # ms_crop = np.round(out['crop'].numpy()).astype(int) # new_shape = np.round(1/out['s'].numpy()*np.array(ms_crop.shape[:2])).astype(int) # a = np.round(tf.image.resize(ms_crop, new_shape[:2],).numpy()).astype(int) # t = np.round(t).astype(int) # crop_from_openpose[t[1]:t[1]+new_shape[0], t[0]:t[0]+new_shape[1], :] = a # plt.imshow(crop_from_openpose) # plt.title('ms crop insertion') # plt.show() # print() mesh[:, 0] += bbox[0] mesh[:, 1] += bbox[1] mesh *= vis_scale frame_img_vis = draw_vertices(deepcopy(mesh), deepcopy(frame_img_vis), extra_scale=1) frame_img_vis = frame_img_vis.astype(int) plt.imshow(frame_img_vis) plt.title('main frame') plt.show() print() # break break print()