def compute_error(axis, cont_labels, preds, idx_tensor): if axis == "yaw": dim = 0 elif axis == "pitch": dim = 1 elif axis == "roll": dim = 2 else: raise IndexError("{} is not in ['yaw', 'pitch', 'roll']".format(axis)) label_cont = cont_labels[:, dim].float() predictions = utils.softmax_temperature(preds.data, 1) predictions = torch.sum(predictions * idx_tensor, 1).cpu() * 3 - 99 error = torch.sum(torch.abs(predictions - label_cont)) return error
images = Variable(images).cuda(gpu) total += cont_labels.size(0) label_yaw = cont_labels[:, 0].float() label_pitch = cont_labels[:, 1].float() label_roll = cont_labels[:, 2].float() yaw, pitch, roll = model(images) # Binned predictions _, yaw_bpred = torch.max(yaw.data, 1) _, pitch_bpred = torch.max(pitch.data, 1) _, roll_bpred = torch.max(roll.data, 1) # Continuous predictions yaw_predicted = utils.softmax_temperature(yaw.data, 1) pitch_predicted = utils.softmax_temperature(pitch.data, 1) roll_predicted = utils.softmax_temperature(roll.data, 1) yaw_predicted = torch.sum(yaw_predicted * idx_tensor, 1).cpu() * 3 - 99 pitch_predicted = torch.sum(pitch_predicted * idx_tensor, 1).cpu() * 3 - 99 roll_predicted = torch.sum(roll_predicted * idx_tensor, 1).cpu() * 3 - 99 # Mean absolute error yaw_error += torch.sum(torch.abs(yaw_predicted - label_yaw)) pitch_error += torch.sum(torch.abs(pitch_predicted - label_pitch)) roll_error += torch.sum(torch.abs(roll_predicted - label_roll)) # Save first image in batch with pose cube or axis.
def test_network(self): print 'Ready to test network.' total = 0 yaw_error = .0 pitch_error = .0 roll_error = .0 l1loss = torch.nn.L1Loss(size_average=False) for i, (images, labels, cont_labels, name) in enumerate(self.test_loader): images = Variable(images).cuda(gpu) total += cont_labels.size(0) label_yaw = cont_labels[:, 0].float() label_pitch = cont_labels[:, 1].float() label_roll = cont_labels[:, 2].float() yaw, pitch, roll = model(images) # Binned predictions _, yaw_bpred = torch.max(yaw.data, 1) _, pitch_bpred = torch.max(pitch.data, 1) _, roll_bpred = torch.max(roll.data, 1) # Continuous predictions yaw_predicted = utils.softmax_temperature(yaw.data, 1) pitch_predicted = utils.softmax_temperature(pitch.data, 1) roll_predicted = utils.softmax_temperature(roll.data, 1) yaw_predicted = torch.sum(yaw_predicted * idx_tensor, 1).cpu() * 3 - 99 pitch_predicted = torch.sum(pitch_predicted * idx_tensor, 1).cpu() * 3 - 99 roll_predicted = torch.sum(roll_predicted * idx_tensor, 1).cpu() * 3 - 99 # Mean absolute error yaw_error += torch.sum(torch.abs(yaw_predicted - label_yaw)) pitch_error += torch.sum(torch.abs(pitch_predicted - label_pitch)) roll_error += torch.sum(torch.abs(roll_predicted - label_roll)) # Save first image in batch with pose cube or axis. if args.save_viz: name = name[0] if args.dataset == 'BIWI': cv2_img = cv2.imread( os.path.join(args.data_dir, name + '_rgb.png')) else: cv2_img = cv2.imread( os.path.join(args.data_dir, name + '.jpg')) if args.batch_size == 1: error_string = 'y %.2f, p %.2f, r %.2f' % \ (torch.sum(torch.abs(yaw_predicted - label_yaw)), torch.sum(torch.abs(pitch_predicted - label_pitch)), torch.sum(torch.abs(roll_predicted - label_roll))) cv2.putText(cv2_img, error_string, (30, cv2_img.shape[0] - 30), fontFace=1, fontScale=1, color=(0, 0, 255), thickness=2) # utils.plot_pose_cube(cv2_img, yaw_predicted[0], pitch_predicted[0], roll_predicted[0], size=100) utils.draw_axis(cv2_img, yaw_predicted[0], pitch_predicted[0], roll_predicted[0], tdx=200, tdy=200, size=100) cv2.imwrite(os.path.join('output/images', name + '.jpg'), cv2_img) print( 'Test error in degrees of the model on the ' + str(total) + ' test images. Yaw: %.4f, Pitch: %.4f, Roll: %.4f' % (yaw_error / total, pitch_error / total, roll_error / total)) return yaw_error, pitch_error, roll_error
def detect_headpose_resnet_112(): facedet = FaceDetection() #model path snapshot_path = "output/no_mask_03_gray_biwi_300w_lp_cosin_112/gray_biwi_300W_LP_squire_epoch_30.pkl" cap = cv2.VideoCapture(' test_data/20200522164730261_0.avi') model = ResidualNet("ImageNet", 50, 66, "CBAM") new_state_dict = OrderedDict() saved_state_dict = torch.load(snapshot_path) for k, v in saved_state_dict.items(): name = k[7:] new_state_dict[name] = v model.load_state_dict(new_state_dict) model.cuda(0) model.eval() transformations = transforms.Compose([ transforms.Scale(112), # transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.392, 0.392, 0.392], std=[0.254, 0.254, 0.254]) ]) frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('result_data/result_with_mask_resnet_112.avi', fourcc, 20.0, (frame_width, frame_height)) font = cv2.FONT_HERSHEY_SIMPLEX while cap.isOpened(): ret, frame = cap.read() if ret is True: rect = facedet.detection_image(frame) h, w = 0, 0 k = 0.35 b = 30 if len(rect) == 0: out.write(frame) continue for i, data in enumerate(rect): temp_w, temp_h = data[3] - data[1], data[4] - data[2] if (i == 0) or (h * w < temp_h * temp_w): h, w = temp_h, temp_w x1, y1, x2, y2 = data[1], data[2], data[3], data[4] ratio = h / w if ratio > 1: ratio = ratio - 1 x1 -= (ratio / 2 * w + k * h) y1 -= (k * h + b) x2 += (ratio / 2 * w + k * h) y2 += (k * h - b) # crop_img.append(img.crop((int(x1),int(ymin),int(x1),int(ymax)))) else: ratio = w / h - 1 x1 -= (k * w) y1 -= (ratio / 2 * h + k * w + b) x2 += (k * w) y2 += (ratio / 2 * h + k * w - b) crop_img = frame[int(y1):int(y2) + 1, int(x1):int(x2) + 1] # change to rgb cv2.cvtColor(crop_img, cv2.COLOR_BGR2RGB, crop_img) detect_img = Image.fromarray(crop_img) cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 1) # cv2.putText(frame, str(data[0]), (data[1], data[2] + 30), font, 1.2, (0, 255, 0), 1) # head pose idx_tensor = [idx for idx in range(66)] idx_tensor = torch.FloatTensor(idx_tensor).cuda(0) yaw_error = .0 pitch_error = .0 roll_error = .0 with torch.no_grad(): detect_img = transformations(detect_img) detect_img = detect_img.unsqueeze(dim=0) yaw, pitch, roll = model(detect_img.cuda(0)) # Binned predictions _, yaw_bpred = torch.max(yaw.data, 1) _, pitch_bpred = torch.max(pitch.data, 1) _, roll_bpred = torch.max(roll.data, 1) # Continuous predictions yaw_predicted = utils.softmax_temperature(yaw.data, 1) pitch_predicted = utils.softmax_temperature(pitch.data, 1) roll_predicted = utils.softmax_temperature(roll.data, 1) yaw_predicted = torch.sum(yaw_predicted * idx_tensor, 1).cpu() * 3 - 99 pitch_predicted = torch.sum(pitch_predicted * idx_tensor, 1).cpu() * 3 - 99 roll_predicted = torch.sum(roll_predicted * idx_tensor, 1).cpu() * 3 - 99 utils.draw_axis(frame, yaw_predicted[0], pitch_predicted[0], roll_predicted[0], tdx=(x2 - x1) // 2 + x1, tdy=(y2 - y1) // 2 + y1, size=50) put_ptich_str = "ptich:{:.4f}".format(pitch_predicted[0]) put_yaw_str = "yaw:{:.4f}".format(yaw_predicted[0]) cv2.putText(frame, str(put_ptich_str), (int(x1), int(y1) - 30), font, 1, (0, 255, 0), 1) cv2.putText(frame, str(put_yaw_str), (int(x1), int(y1) - 60), font, 1, (0, 255, 0), 1) out.write(frame) else: print("finish") out.release() break # cv2.cvtColor(img,cv2.COLOR_GRAY2RGB) # cap.release() # out.release() # np.uint8 print("finish")