def load_angle_model(path): ''' Loads a pretrained model ''' checkpoint = torch.load(path, map_location=device) model = PFLDInference().to(device) model.load_state_dict(checkpoint['plfd_backbone']) return model
def main(args): checkpoint = torch.load(args.model_path) plfd_backbone = PFLDInference().cuda() plfd_backbone.load_state_dict(checkpoint) transform = transforms.Compose([transforms.ToTensor()]) wlfw_val_dataset = WLFWDatasets(args.test_dataset, transform) wlfw_val_dataloader = DataLoader( wlfw_val_dataset, batch_size=8, shuffle=False, num_workers=0) validate(wlfw_val_dataloader, plfd_backbone)
def main(args): det = hdface_detector(use_cuda=False) checkpoint = torch.load(args.model_path) plfd_backbone = PFLDInference().cuda() plfd_backbone.load_state_dict(checkpoint) plfd_backbone.eval() plfd_backbone = plfd_backbone.cuda() transform = transforms.Compose([transforms.ToTensor()]) root = args.images_path path_list = glob.glob(os.path.join(root, "*.jpg")) # cap = cv2.VideoCapture("") for img_path in path_list: img = cv2.imread(img_path) height, width = img.shape[:2] img_det = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) result = det.detect_face(img_det) for i in range(len(result)): box = result[i]['box'] cls = result[i]['cls'] pts = result[i]['pts'] x1, y1, x2, y2 = box cv2.rectangle(img, (x1, y1), (x2, y2), (255, 255, 25)) w = x2 - x1 + 1 h = y2 - y1 + 1 size_w = int(max([w, h]) * 0.9) size_h = int(max([w, h]) * 0.9) cx = x1 + w // 2 cy = y1 + h // 2 x1 = cx - size_w // 2 x2 = x1 + size_w y1 = cy - int(size_h * 0.4) y2 = y1 + size_h left = 0 top = 0 bottom = 0 right = 0 if x1 < 0: left = -x1 if y1 < 0: top = -y1 if x2 >= width: right = x2 - width if y2 >= height: bottom = y2 - height x1 = max(0, x1) y1 = max(0, y1) x2 = min(width, x2) y2 = min(height, y2) cropped = img[y1:y2, x1:x2] print(top, bottom, left, right) cropped = cv2.copyMakeBorder(cropped, top, bottom, left, right, cv2.BORDER_CONSTANT, 0) cropped = cv2.resize(cropped, (112, 112)) input = cv2.resize(cropped, (112, 112)) input = cv2.cvtColor(input, cv2.COLOR_BGR2RGB) input = transform(input).unsqueeze(0).cuda() pose, landmarks = plfd_backbone(input) poses = pose.cpu().detach().numpy()[0] * 180 / np.pi pre_landmark = landmarks[0] pre_landmark = pre_landmark.cpu().detach().numpy().reshape( -1, 2) * [size_w, size_h] cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0)) for (x, y) in pre_landmark.astype(np.int32): cv2.circle(img, (x1 - left + x, y1 - bottom + y), 1, (255, 255, 0), 1) plot_pose_cube(img, poses[0], poses[1], poses[2], tdx=pts['nose'][0], tdy=pts['nose'][1], size=(x2 - x1) // 2) cv2.imshow('0', img) cv2.waitKey(0)
def main(args): print_args(args) plfd_backbone = PFLDInference().cuda() if args.resume: try: plfd_backbone.load_state_dict( torch.load(args.resume, map_location=lambda storage, loc: storage)) logging.info("load %s successfully ! " % args.resume) except KeyError: plfd_backbone = torch.nn.DataParallel(plfd_backbone) plfd_backbone.load_state_dict(torch.load(args.resume)) step_epoch = [int(x) for x in args.step.split(',')] if args.loss == 'mse': criterion = MSELoss() elif args.loss == 'sommthl1': criterion = SmoothL1() elif args.loss == 'wing': criterion = WingLoss() elif args.loss == 'pfld': criterion = PFLDLoss() cur_lr = args.base_lr optimizer = torch.optim.Adam(plfd_backbone.parameters(), lr=args.base_lr, weight_decay=args.weight_decay) # SGD optimizer is very sensitive to the magnitude of loss, # which is likely to be non convergent or nan, I recommend Adam. # optimizer = torch.optim.SGD(plfd_backbone.parameters(), lr=args.base_lr, momentum=0.9, weight_decay=args.weight_decay) train_transform = transforms.Compose([ transforms.RandomGrayscale(p=0.2), transforms.ColorJitter(0.2, 0.2, 0.2, 0.2), transforms.ToTensor() ]) wlfwdataset = WLFWDatasets(args.dataroot, train_transform) dataloader = DataLoader(wlfwdataset, batch_size=args.train_batchsize, shuffle=True, num_workers=args.workers, drop_last=False) val_transform = transforms.Compose([transforms.ToTensor()]) wlfw_val_dataset = WLFWDatasets(args.val_dataroot, val_transform) wlfw_val_dataloader = DataLoader(wlfw_val_dataset, batch_size=args.val_batchsize, shuffle=False, num_workers=args.workers) step_index = 0 writer = SummaryWriter(args.tensorboard) for epoch in range(args.start_epoch, args.end_epoch + 1): train_pose_loss, train_lds_loss = train(dataloader, plfd_backbone, criterion, optimizer, epoch) filename = os.path.join(str(args.snapshot), "checkpoint_epoch_" + str(epoch) + '.pth') save_checkpoint(plfd_backbone.state_dict(), filename) val_pose_loss, val_lds_loss = validate(wlfw_val_dataloader, plfd_backbone, criterion, epoch) if epoch in step_epoch: step_index += 1 cur_lr = adjust_learning_rate(optimizer, args.base_lr, step_index) print( 'Epoch: %d, train pose loss: %6.4f, train lds loss:%6.4f, val pose MAE:%6.4f, val lds MAE:%6.4f, lr:%8.6f' % (epoch, train_pose_loss, train_lds_loss, val_pose_loss, val_lds_loss, cur_lr)) writer.add_scalar('data/pose_loss', train_pose_loss, epoch) writer.add_scalars( 'data/loss', { 'val pose loss': val_pose_loss, 'val lds loss': val_lds_loss, 'train loss': train_lds_loss }, epoch) writer.close()
def main(args): checkpoint = torch.load(args.model_path, map_location=device) plfd_backbone = PFLDInference().to(device) plfd_backbone.load_state_dict(checkpoint['plfd_backbone']) plfd_backbone.eval() plfd_backbone = plfd_backbone.to(device) transform = transforms.Compose([transforms.ToTensor()]) videoCapture = cv2.VideoCapture(args.image_name) fps = videoCapture.get(cv2.CAP_PROP_FPS) size = (int(videoCapture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(videoCapture.get(cv2.CAP_PROP_FRAME_HEIGHT))) print("fps:", fps, "size:", size) videoWriter = cv2.VideoWriter("./video/result.avi", cv2.VideoWriter_fourcc('X', 'V', 'I', 'D'), fps, size) success, img = videoCapture.read() cv2.imwrite("1.jpg", img) while success: height, width = img.shape[:2] model_test = AntiSpoofPredict(args.device_id) image_bbox = model_test.get_bbox(img) x1 = image_bbox[0] y1 = image_bbox[1] x2 = image_bbox[0] + image_bbox[2] y2 = image_bbox[1] + image_bbox[3] w = x2 - x1 h = y2 - y1 size = int(max([w, h])) cx = x1 + w / 2 cy = y1 + h / 2 x1 = cx - size / 2 x2 = x1 + size y1 = cy - size / 2 y2 = y1 + size dx = max(0, -x1) dy = max(0, -y1) x1 = max(0, x1) y1 = max(0, y1) edx = max(0, x2 - width) edy = max(0, y2 - height) x2 = min(width, x2) y2 = min(height, y2) cropped = img[int(y1):int(y2), int(x1):int(x2)] if (dx > 0 or dy > 0 or edx > 0 or edy > 0): cropped = cv2.copyMakeBorder(cropped, dy, edy, dx, edx, cv2.BORDER_CONSTANT, 0) cropped = cv2.resize(cropped, (112, 112)) input = cv2.resize(cropped, (112, 112)) input = cv2.cvtColor(input, cv2.COLOR_BGR2RGB) input = transform(input).unsqueeze(0).to(device) _, landmarks = plfd_backbone(input) pre_landmark = landmarks[0] pre_landmark = pre_landmark.cpu().detach().numpy().reshape( -1, 2) * [112, 112] point_dict = {} i = 0 for (x, y) in pre_landmark.astype(np.float32): point_dict[f'{i}'] = [x, y] i += 1 #yaw point1 = [get_num(point_dict, 1, 0), get_num(point_dict, 1, 1)] point31 = [get_num(point_dict, 31, 0), get_num(point_dict, 31, 1)] point51 = [get_num(point_dict, 51, 0), get_num(point_dict, 51, 1)] crossover51 = point_line( point51, [point1[0], point1[1], point31[0], point31[1]]) yaw_mean = point_point(point1, point31) / 2 yaw_right = point_point(point1, crossover51) yaw = (yaw_mean - yaw_right) / yaw_mean yaw = int(yaw * 71.58 + 0.7037) #pitch pitch_dis = point_point(point51, crossover51) if point51[1] < crossover51[1]: pitch_dis = -pitch_dis pitch = int(1.497 * pitch_dis + 18.97) #roll roll_tan = abs( get_num(point_dict, 60, 1) - get_num(point_dict, 72, 1)) / abs( get_num(point_dict, 60, 0) - get_num(point_dict, 72, 0)) roll = math.atan(roll_tan) roll = math.degrees(roll) if get_num(point_dict, 60, 1) > get_num(point_dict, 72, 1): roll = -roll roll = int(roll) cv2.putText(img, f"Head_Yaw(degree): {yaw}", (30, 50), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 2) cv2.putText(img, f"Head_Pitch(degree): {pitch}", (30, 100), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 2) cv2.putText(img, f"Head_Roll(degree): {roll}", (30, 150), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 2) videoWriter.write(img) success, img = videoCapture.read()
""" This code is used to convert the pytorch models into an onnx format models. """ import torch.onnx from pfld.pfld import PFLDInference input_img_size = 112 # define input size model_path = "models/pretrained/checkpoint_epoch_final.pth" checkpoint = torch.load(model_path) net = PFLDInference() net.load_state_dict(checkpoint) net.eval() net.to("cuda") model_name = model_path.split("/")[-1].split(".")[0] model_path = f"models/onnx/{model_name}.onnx" dummy_input = torch.randn(1, 3, 112, 112).to("cuda") torch.onnx.export(net, dummy_input, model_path, export_params=True, verbose=False, input_names=['input'], output_names=['pose', 'landms']) # torch_out = torch.onnx._export(net, inputs, output_onnx, export_params=True, verbose=False, # input_names=input_names, output_names=output_names)