def draw_grid(image, cp_norm): im_size = torch.Tensor([[240, 240]]).cuda() cp = PointsToPixelCoords(P=cp_norm, im_size=im_size) cp = cp.squeeze().cpu().numpy() + 50 for j in range(9): cv2.drawMarker(image, (cp[0, j], cp[1, j]), (0, 0, 255), cv2.MARKER_TILTED_CROSS, 12, 2, cv2.LINE_AA) for j in range(2): for k in range(3): # vertical grid cv2.line(image, (cp[0, j + k * 3], cp[1, j + k * 3]), (cp[0, j + k * 3 + 1], cp[1, j + k * 3 + 1]), (0, 0, 255), 2, cv2.LINE_AA) # horizontal grid cv2.line(image, (cp[0, j * 3 + k], cp[1, j * 3 + k]), (cp[0, j * 3 + k + 3], cp[1, j * 3 + k + 3]), (0, 0, 255), 2, cv2.LINE_AA) return image
def pck_metric(batch, batch_start_idx, theta_det, theta_aff, theta_tps, theta_afftps, results, args): alpha = args.pck_alpha do_det = theta_det is not None do_aff = theta_aff is not None do_tps = theta_tps is not None do_aff_tps = theta_afftps is not None source_im_size = batch['source_im_info'][:, 0:3] target_im_size = batch['target_im_info'][:, 0:3] source_points = batch['source_points'] target_points = batch['target_points'] # Instantiate point transformer pt = PointTnf(use_cuda=args.cuda, tps_reg_factor=args.tps_reg_factor) # pt = PointTnf(use_cuda=args.cuda) # warp points with estimated transformations target_points_norm = PointsToUnitCoords(P=target_points, im_size=target_im_size) if do_det: # Affine transformation only based on object detection warped_points_det_norm = pt.affPointTnf(theta=theta_det, points=target_points_norm) warped_points_det = PointsToPixelCoords(P=warped_points_det_norm, im_size=source_im_size) if do_aff: # do affine only warped_points_aff_norm = pt.affPointTnf(theta=theta_aff, points=target_points_norm) if do_det: warped_points_aff_norm = pt.affPointTnf( theta=theta_det, points=warped_points_aff_norm) warped_points_aff = PointsToPixelCoords(P=warped_points_aff_norm, im_size=source_im_size) if do_tps: # do tps only warped_points_tps_norm = pt.tpsPointTnf(theta=theta_tps, points=target_points_norm) warped_points_tps = PointsToPixelCoords(P=warped_points_tps_norm, im_size=source_im_size) if do_aff_tps: # do tps+affine warped_points_aff_tps_norm = pt.tpsPointTnf(theta=theta_afftps, points=target_points_norm) warped_points_aff_tps_norm = pt.affPointTnf( theta=theta_aff, points=warped_points_aff_tps_norm) if do_det: warped_points_aff_tps_norm = pt.affPointTnf( theta=theta_det, points=warped_points_aff_tps_norm) warped_points_aff_tps = PointsToPixelCoords( P=warped_points_aff_tps_norm, im_size=source_im_size) L_pck = batch['L_pck'] current_batch_size = batch['source_im_info'].size(0) indices = range(batch_start_idx, batch_start_idx + current_batch_size) # import pdb; pdb.set_trace() if do_det: pck_det = pck(source_points, warped_points_det, L_pck, alpha) if do_aff: pck_aff = pck(source_points, warped_points_aff, L_pck, alpha) if do_tps: pck_tps = pck(source_points, warped_points_tps, L_pck, alpha) if do_aff_tps: pck_aff_tps = pck(source_points, warped_points_aff_tps, L_pck, alpha) if do_det: results['det']['pck'][indices] = pck_det.unsqueeze(1).cpu().numpy() if do_aff: if do_det: key = 'det_aff' else: key = 'aff' results[key]['pck'][indices] = pck_aff.unsqueeze(1).cpu().numpy() if do_tps: results['tps']['pck'][indices] = pck_tps.unsqueeze(1).cpu().numpy() if do_aff_tps: if do_det: key = 'det_aff_tps' else: key = 'afftps' results[key]['pck'][indices] = pck_aff_tps.unsqueeze(1).cpu().numpy() return results
def vis_pf(vis, dataloader, theta, theta_weak, theta_inver, theta_weak_inver, results, results_weak, dataset_name, use_cuda=True): # Visualize watch images affTnf = GeometricTnf(geometric_model='affine', use_cuda=use_cuda) tpsTnf = GeometricTnf(geometric_model='tps', use_cuda=use_cuda) pt = PointTnf(use_cuda=use_cuda) watch_images = torch.ones(len(dataloader) * 6, 3, 280, 240) watch_keypoints = -torch.ones(len(dataloader) * 6, 2, 20) if use_cuda: watch_images = watch_images.cuda() watch_keypoints = watch_keypoints.cuda() num_points = np.ones(len(dataloader) * 6).astype(np.int8) correct_index = list() image_names = list() metrics = list() # Colors for keypoints cmap = plt.get_cmap('tab20') colors = list() for c in range(20): r = cmap(c)[0] * 255 g = cmap(c)[1] * 255 b = cmap(c)[2] * 255 colors.append((b, g, r)) fnt = cv2.FONT_HERSHEY_COMPLEX # means for normalize of caffe resnet and vgg # pixel_means = torch.Tensor(np.array([[[[102.9801, 115.9465, 122.7717]]]]).astype(np.float32)) for batch_idx, batch in enumerate(dataloader): if use_cuda: batch = batch_cuda(batch) # Theta and theta_inver theta_aff = theta['aff'][batch_idx].unsqueeze(0) theta_aff_tps = theta['aff_tps'][batch_idx].unsqueeze(0) theta_weak_aff = theta_weak['aff'][batch_idx].unsqueeze(0) theta_weak_aff_tps = theta_weak['aff_tps'][batch_idx].unsqueeze(0) theta_aff_inver = theta_inver['aff'][batch_idx].unsqueeze(0) theta_aff_tps_inver = theta_inver['aff_tps'][batch_idx].unsqueeze(0) theta_weak_aff_inver = theta_weak_inver['aff'][batch_idx].unsqueeze(0) theta_weak_aff_tps_inver = theta_weak_inver['aff_tps'][ batch_idx].unsqueeze(0) # Warped image warped_aff = affTnf(batch['source_image'], theta_aff) warped_aff_tps = tpsTnf(warped_aff, theta_aff_tps) warped_weak_aff = affTnf(batch['source_image'], theta_weak_aff) warped_weak_aff_tps = tpsTnf(warped_weak_aff, theta_weak_aff_tps) watch_images[batch_idx * 6, :, 0:240, :] = batch['source_image'] watch_images[batch_idx * 6 + 1, :, 0:240, :] = warped_aff watch_images[batch_idx * 6 + 2, :, 0:240, :] = warped_aff_tps watch_images[batch_idx * 6 + 3, :, 0:240, :] = batch['target_image'] watch_images[batch_idx * 6 + 4, :, 0:240, :] = warped_weak_aff watch_images[batch_idx * 6 + 5, :, 0:240, :] = warped_weak_aff_tps # Warped keypoints source_im_size = batch['source_im_info'][:, 0:3] target_im_size = batch['target_im_info'][:, 0:3] source_points = batch['source_points'] target_points = batch['target_points'] source_points_norm = PointsToUnitCoords(P=source_points, im_size=source_im_size) target_points_norm = PointsToUnitCoords(P=target_points, im_size=target_im_size) warped_points_aff_norm = pt.affPointTnf(theta=theta_aff_inver, points=source_points_norm) warped_points_aff = PointsToPixelCoords(P=warped_points_aff_norm, im_size=target_im_size) pck_aff, index_aff, N_pts = pck(target_points, warped_points_aff, dataset_name) warped_points_aff = relocate(warped_points_aff, target_im_size) warped_points_aff_tps_norm = pt.tpsPointTnf(theta=theta_aff_tps_inver, points=source_points_norm) warped_points_aff_tps_norm = pt.affPointTnf( theta=theta_aff_inver, points=warped_points_aff_tps_norm) warped_points_aff_tps = PointsToPixelCoords( P=warped_points_aff_tps_norm, im_size=target_im_size) pck_aff_tps, index_aff_tps, _ = pck(target_points, warped_points_aff_tps, dataset_name) warped_points_aff_tps = relocate(warped_points_aff_tps, target_im_size) warped_points_weak_aff_norm = pt.affPointTnf( theta=theta_weak_aff_inver, points=source_points_norm) warped_points_weak_aff = PointsToPixelCoords( P=warped_points_weak_aff_norm, im_size=target_im_size) pck_weak_aff, index_weak_aff, _ = pck(target_points, warped_points_weak_aff, dataset_name) warped_points_weak_aff = relocate(warped_points_weak_aff, target_im_size) warped_points_weak_aff_tps_norm = pt.tpsPointTnf( theta=theta_weak_aff_tps_inver, points=source_points_norm) warped_points_weak_aff_tps_norm = pt.affPointTnf( theta=theta_weak_aff_inver, points=warped_points_weak_aff_tps_norm) warped_points_weak_aff_tps = PointsToPixelCoords( P=warped_points_weak_aff_tps_norm, im_size=target_im_size) pck_weak_aff_tps, index_weak_aff_tps, _ = pck( target_points, warped_points_weak_aff_tps, dataset_name) warped_points_weak_aff_tps = relocate(warped_points_weak_aff_tps, target_im_size) watch_keypoints[batch_idx * 6, :, :N_pts] = relocate( batch['source_points'], source_im_size)[:, :, :N_pts] watch_keypoints[batch_idx * 6 + 1, :, :N_pts] = warped_points_aff[:, :, :N_pts] watch_keypoints[batch_idx * 6 + 2, :, :N_pts] = warped_points_aff_tps[:, :, :N_pts] watch_keypoints[batch_idx * 6 + 3, :, :N_pts] = relocate( batch['target_points'], target_im_size)[:, :, :N_pts] watch_keypoints[batch_idx * 6 + 4, :, :N_pts] = warped_points_weak_aff[:, :, :N_pts] watch_keypoints[ batch_idx * 6 + 5, :, :N_pts] = warped_points_weak_aff_tps[:, :, :N_pts] num_points[batch_idx * 6:batch_idx * 6 + 6] = N_pts correct_index.append(np.arange(N_pts)) correct_index.append(index_aff) correct_index.append(index_aff_tps) correct_index.append(np.arange(N_pts)) correct_index.append(index_weak_aff) correct_index.append(index_weak_aff_tps) image_names.append('Source') image_names.append('Aff') image_names.append('Aff_tps') image_names.append('Target') image_names.append('Rocco_aff') image_names.append('Rocco_aff_tps') metrics.append('') metrics.append('PCK: {:.2%}'.format(pck_aff)) metrics.append('PCK: {:.2%}'.format(pck_aff_tps)) metrics.append('') metrics.append('PCK: {:.2%}'.format(pck_weak_aff)) metrics.append('PCK: {:.2%}'.format(pck_weak_aff_tps)) opts = dict(jpgquality=100, title=dataset_name) # Un-normalize for caffe resnet and vgg # watch_images = watch_images.permute(0, 2, 3, 1) + pixel_means # watch_images = watch_images[:, :, :, [2, 1, 0]].permute(0, 3, 1, 2) # watch_images = normalize_image(watch_images, forward=False) * 255.0 watch_images[:, :, 0:240, :] = normalize_image(watch_images[:, :, 0:240, :], forward=False) watch_images *= 255.0 watch_images = watch_images.permute(0, 2, 3, 1).cpu().numpy().astype(np.uint8) watch_keypoints = watch_keypoints.cpu().numpy() for i in range(watch_images.shape[0]): pos_name = (80, 255) if (i + 1) % 6 == 1 or (i + 1) % 6 == 4: pos_pck = (0, 0) else: pos_pck = (70, 275) cv2.putText(watch_images[i], image_names[i], pos_name, fnt, 0.5, (0, 0, 0), 1) cv2.putText(watch_images[i], metrics[i], pos_pck, fnt, 0.5, (0, 0, 0), 1) if (i + 1) % 6 == 4: for j in range(num_points[i]): cv2.drawMarker( watch_images[i], (watch_keypoints[i, 0, j], watch_keypoints[i, 1, j]), colors[j], cv2.MARKER_DIAMOND, 12, 2, cv2.LINE_AA) else: for j in correct_index[i]: cv2.drawMarker( watch_images[i], (watch_keypoints[i, 0, j], watch_keypoints[i, 1, j]), colors[j], cv2.MARKER_CROSS, 12, 2, cv2.LINE_AA) cv2.drawMarker(watch_images[i], (watch_keypoints[i + 3 - (i % 6), 0, j], watch_keypoints[i + 3 - (i % 6), 1, j]), colors[j], cv2.MARKER_DIAMOND, 12, 2, cv2.LINE_AA) watch_images = torch.Tensor(watch_images.astype(np.float32)) watch_images = watch_images.permute(0, 3, 1, 2) vis.image(torchvision.utils.make_grid(watch_images, nrow=3, padding=3), opts=opts)
def vis_pf(vis, dataloader, theta_1, theta_2, theta_inver_1, theta_inver_2, results_1, results_2, dataset_name, use_cuda=True): # Visualize watch images tpsTnf_1 = GeometricTnf(geometric_model='tps', use_cuda=use_cuda) tpsTnf_2 = GeometricTnf2(geometric_model='tps', use_cuda=use_cuda) pt_1 = PointTnf(use_cuda=use_cuda) pt_2 = PointTPS(use_cuda=use_cuda) group_size = 4 watch_images = torch.ones(len(dataloader) * group_size, 3, 280, 240) watch_keypoints = -torch.ones(len(dataloader) * group_size, 2, 20) if use_cuda: watch_images = watch_images.cuda() watch_keypoints = watch_keypoints.cuda() num_points = np.ones(len(dataloader) * 6).astype(np.int8) correct_index = list() image_names = list() metrics = list() # Colors for keypoints cmap = plt.get_cmap('tab20') colors = list() for c in range(20): r = cmap(c)[0] * 255 g = cmap(c)[1] * 255 b = cmap(c)[2] * 255 colors.append((b, g, r)) fnt = cv2.FONT_HERSHEY_COMPLEX # means for normalize of caffe resnet and vgg # pixel_means = torch.Tensor(np.array([[[[102.9801, 115.9465, 122.7717]]]]).astype(np.float32)) for batch_idx, batch in enumerate(dataloader): if use_cuda: batch = batch_cuda(batch) # Theta and theta_inver theta_tps_1 = theta_1['tps'][batch_idx].unsqueeze(0) theta_tps_2 = theta_2['tps'][batch_idx].unsqueeze(0) thetai_tps_1 = theta_inver_1['tps'][batch_idx].unsqueeze(0) thetai_tps_2 = theta_inver_2['tps'][batch_idx].unsqueeze(0) # Warped image warped_tps_1 = tpsTnf_1(batch['source_image'], theta_tps_1) warped_tps_2 = tpsTnf_2(batch['source_image'], theta_tps_2) watch_images[batch_idx * group_size, :, 0:240, :] = batch['source_image'] watch_images[batch_idx * group_size + 1, :, 0:240, :] = warped_tps_1 watch_images[batch_idx * group_size + 2, :, 0:240, :] = warped_tps_2 watch_images[batch_idx * group_size + 3, :, 0:240, :] = batch['target_image'] # Warped keypoints source_im_size = batch['source_im_info'][:, 0:3] target_im_size = batch['target_im_info'][:, 0:3] source_points = batch['source_points'] target_points = batch['target_points'] source_points_norm = PointsToUnitCoords(P=source_points, im_size=source_im_size) target_points_norm = PointsToUnitCoords(P=target_points, im_size=target_im_size) warped_points_tps_norm_1 = pt_1.tpsPointTnf(theta=thetai_tps_1, points=source_points_norm) warped_points_tps_1 = PointsToPixelCoords(P=warped_points_tps_norm_1, im_size=target_im_size) pck_tps_1, index_tps_1, N_pts = pck(target_points, warped_points_tps_1, dataset_name) warped_points_tps_1 = relocate(warped_points_tps_1, target_im_size) warped_points_tps_norm_2 = pt_2.tpsPointTnf(theta=thetai_tps_2, points=source_points_norm) warped_points_tps_2 = PointsToPixelCoords(P=warped_points_tps_norm_2, im_size=target_im_size) pck_tps_2, index_tps_2, _ = pck(target_points, warped_points_tps_2, dataset_name) warped_points_tps_2 = relocate(warped_points_tps_2, target_im_size) watch_keypoints[batch_idx * group_size, :, :N_pts] = relocate( batch['source_points'], source_im_size)[:, :, :N_pts] watch_keypoints[batch_idx * group_size + 1, :, :N_pts] = warped_points_tps_1[:, :, :N_pts] watch_keypoints[batch_idx * group_size + 2, :, :N_pts] = warped_points_tps_2[:, :, :N_pts] watch_keypoints[batch_idx * group_size + 3, :, :N_pts] = relocate( batch['target_points'], target_im_size)[:, :, :N_pts] num_points[batch_idx * group_size:batch_idx * group_size + group_size] = N_pts correct_index.append(np.arange(N_pts)) correct_index.append(index_tps_1) correct_index.append(index_tps_2) correct_index.append(np.arange(N_pts)) image_names.append('Source') image_names.append('TPS') image_names.append('TPS_Jitter') image_names.append('Target') metrics.append('') metrics.append('PCK: {:.2%}'.format(pck_tps_1)) metrics.append('PCK: {:.2%}'.format(pck_tps_2)) metrics.append('') opts = dict(jpgquality=100, title=dataset_name) # Un-normalize for caffe resnet and vgg # watch_images = watch_images.permute(0, 2, 3, 1) + pixel_means # watch_images = watch_images[:, :, :, [2, 1, 0]].permute(0, 3, 1, 2) # watch_images = normalize_image(watch_images, forward=False) * 255.0 watch_images[:, :, 0:240, :] = normalize_image(watch_images[:, :, 0:240, :], forward=False) watch_images *= 255.0 watch_images = watch_images.permute(0, 2, 3, 1).cpu().numpy().astype(np.uint8) watch_keypoints = watch_keypoints.cpu().numpy() for i in range(watch_images.shape[0]): pos_name = (80, 255) if (i + 1) % group_size == 1 or (i + 1) % group_size == 0: pos_pck = (0, 0) else: pos_pck = (70, 275) cv2.putText(watch_images[i], image_names[i], pos_name, fnt, 0.5, (0, 0, 0), 1) cv2.putText(watch_images[i], metrics[i], pos_pck, fnt, 0.5, (0, 0, 0), 1) if (i + 1) % group_size == 0: for j in range(num_points[i]): cv2.drawMarker( watch_images[i], (watch_keypoints[i, 0, j], watch_keypoints[i, 1, j]), colors[j], cv2.MARKER_DIAMOND, 12, 2, cv2.LINE_AA) else: for j in correct_index[i]: cv2.drawMarker( watch_images[i], (watch_keypoints[i, 0, j], watch_keypoints[i, 1, j]), colors[j], cv2.MARKER_CROSS, 12, 2, cv2.LINE_AA) cv2.drawMarker(watch_images[i], (watch_keypoints[i + (group_size - 1) - (i % group_size), 0, j], watch_keypoints[i + (group_size - 1) - (i % group_size), 1, j]), colors[j], cv2.MARKER_DIAMOND, 12, 2, cv2.LINE_AA) watch_images = torch.Tensor(watch_images.astype(np.float32)) watch_images = watch_images.permute(0, 3, 1, 2) vis.image(torchvision.utils.make_grid(watch_images, nrow=4, padding=5), opts=opts)
def vis_control(vis, dataloader, theta_1, theta_2, dataset_name, use_cuda=True): # Visualize watch images tpsTnf_1 = GeometricTnf(geometric_model='tps', use_cuda=use_cuda) tpsTnf_2 = GeometricTnf2(geometric_model='tps', use_cuda=use_cuda) group_size = 5 watch_images = torch.ones(len(dataloader) * group_size, 3, 340, 340) if use_cuda: watch_images = watch_images.cuda() # Colors for keypoints cmap = plt.get_cmap('tab20') colors = list() for c in range(20): r = cmap(c)[0] * 255 g = cmap(c)[1] * 255 b = cmap(c)[2] * 255 colors.append((b, g, r)) fnt = cv2.FONT_HERSHEY_COMPLEX # means for normalize of caffe resnet and vgg # pixel_means = torch.Tensor(np.array([[[[102.9801, 115.9465, 122.7717]]]]).astype(np.float32)) for batch_idx, batch in enumerate(dataloader): if use_cuda: batch = batch_cuda(batch) # Theta and theta_inver theta_tps_1 = theta_1['tps'][batch_idx].unsqueeze(0) theta_tps_2 = theta_2['tps'][batch_idx].unsqueeze(0) # Warped image warped_tps_1 = tpsTnf_1(batch['source_image'], theta_tps_1) warped_tps_2 = tpsTnf_2(batch['source_image'], theta_tps_2) watch_images[batch_idx * group_size, :, 50:290, 50:290] = batch['source_image'] watch_images[batch_idx * group_size + 1, :, 50:290, 50:290] = warped_tps_1 watch_images[batch_idx * group_size + 2, :, 50:290, 50:290] = batch['source_image'] watch_images[batch_idx * group_size + 3, :, 50:290, 50:290] = warped_tps_2 watch_images[batch_idx * group_size + 4, :, 50:290, 50:290] = batch['target_image'] opts = dict(jpgquality=100, title=dataset_name) watch_images[:, :, 50:290, 50:290] = normalize_image(watch_images[:, :, 50:290, 50:290], forward=False) watch_images *= 255.0 watch_images = watch_images.permute(0, 2, 3, 1).cpu().numpy().astype(np.uint8) im_size = torch.Tensor([[240, 240]]).cuda() for i in range(watch_images.shape[0]): if i % group_size == 0: cp_norm = theta_1['tps'][int(i / group_size)].view(1, 2, -1) cp = PointsToPixelCoords(P=cp_norm, im_size=im_size) cp = cp.squeeze().cpu().numpy() + 50 for j in range(9): cv2.drawMarker(watch_images[i], (cp[0, j], cp[1, j]), (0, 0, 255), cv2.MARKER_TILTED_CROSS, 12, 2, cv2.LINE_AA) for j in range(2): for k in range(3): # vertical grid cv2.line(watch_images[i], (cp[0, j + k * 3], cp[1, j + k * 3]), (cp[0, j + k * 3 + 1], cp[1, j + k * 3 + 1]), (0, 0, 255), 2, cv2.LINE_AA) # horizontal grid cv2.line(watch_images[i], (cp[0, j * 3 + k], cp[1, j * 3 + k]), (cp[0, j * 3 + k + 3], cp[1, j * 3 + k + 3]), (0, 0, 255), 2, cv2.LINE_AA) if i % group_size == 1: cp_norm = torch.Tensor( [-1, -1, -1, 0, 0, 0, 1, 1, 1, -1, 0, 1, -1, 0, 1, -1, 0, 1]).cuda().view(1, 2, -1) cp = PointsToPixelCoords(P=cp_norm, im_size=im_size) cp = cp.squeeze().cpu().numpy() + 50 for j in range(9): cv2.drawMarker(watch_images[i], (cp[0, j], cp[1, j]), (0, 0, 255), cv2.MARKER_TILTED_CROSS, 12, 2, cv2.LINE_AA) for j in range(1): for k in range(3): # vertical grid cv2.line(watch_images[i], (cp[0, j + k * 3], cp[1, j + k * 3]), (cp[0, j + k * 3 + 1], cp[1, j + k * 3 + 1]), (0, 0, 255), 2, cv2.LINE_AA) # horizontal grid cv2.line(watch_images[i], (cp[0, j * 3 + k], cp[1, j * 3 + k]), (cp[0, j * 3 + k + 3], cp[1, j * 3 + k + 3]), (0, 0, 255), 2, cv2.LINE_AA) if i % group_size == 2: cp_norm = theta_2['tps'][int(i / group_size)][:18].view(1, 2, -1) cp = PointsToPixelCoords(P=cp_norm, im_size=im_size) cp = cp.squeeze().cpu().numpy() + 50 for j in range(9): cv2.drawMarker(watch_images[i], (cp[0, j], cp[1, j]), (0, 0, 255), cv2.MARKER_TILTED_CROSS, 12, 2, cv2.LINE_AA) for j in range(2): for k in range(3): # vertical grid cv2.line(watch_images[i], (cp[0, j + k * 3], cp[1, j + k * 3]), (cp[0, j + k * 3 + 1], cp[1, j + k * 3 + 1]), (0, 0, 255), 2, cv2.LINE_AA) # horizontal grid cv2.line(watch_images[i], (cp[0, j * 3 + k], cp[1, j * 3 + k]), (cp[0, j * 3 + k + 3], cp[1, j * 3 + k + 3]), (0, 0, 255), 2, cv2.LINE_AA) if i % group_size == 3: cp_norm = theta_2['tps'][int(i / group_size)][18:].view(1, 2, -1) cp = PointsToPixelCoords(P=cp_norm, im_size=im_size) cp = cp.squeeze().cpu().numpy() + 50 for j in range(9): cv2.drawMarker(watch_images[i], (cp[0, j], cp[1, j]), (0, 0, 255), cv2.MARKER_TILTED_CROSS, 12, 2, cv2.LINE_AA) for j in range(2): for k in range(3): # vertical grid cv2.line(watch_images[i], (cp[0, j + k * 3], cp[1, j + k * 3]), (cp[0, j + k * 3 + 1], cp[1, j + k * 3 + 1]), (0, 0, 255), 2, cv2.LINE_AA) # horizontal grid cv2.line(watch_images[i], (cp[0, j * 3 + k], cp[1, j * 3 + k]), (cp[0, j * 3 + k + 3], cp[1, j * 3 + k + 3]), (0, 0, 255), 2, cv2.LINE_AA) watch_images = torch.Tensor(watch_images.astype(np.float32)) watch_images = watch_images.permute(0, 3, 1, 2) vis.image(torchvision.utils.make_grid(watch_images, nrow=5, padding=5), opts=opts)
def vis_fn_dual(vis, train_loss, val_pck, train_lr, epoch, num_epochs, dataloader, theta, thetai, results, title, use_cuda=True): # Visualize watch images affTnf = GeometricTnf(geometric_model='affine', use_cuda=use_cuda) tpsTnf = GeometricTnf(geometric_model='tps', use_cuda=use_cuda) pt = PointTnf(use_cuda=use_cuda) group_size = 4 watch_images = torch.ones(len(dataloader) * group_size, 3, 280, 240) watch_keypoints = -torch.ones(len(dataloader) * group_size, 2, 20) if use_cuda: watch_images = watch_images.cuda() watch_keypoints = watch_keypoints.cuda() num_points = np.ones(len(dataloader) * group_size).astype(np.int8) correct_index = list() image_names = list() metrics = list() # Colors for keypoints cmap = plt.get_cmap('tab20') colors = list() for c in range(20): r = cmap(c)[0] * 255 g = cmap(c)[1] * 255 b = cmap(c)[2] * 255 colors.append((b, g, r)) fnt = cv2.FONT_HERSHEY_COMPLEX theta, thetai = swap(theta, thetai) # means for normalize of caffe resnet and vgg # pixel_means = torch.Tensor(np.array([[[[102.9801, 115.9465, 122.7717]]]]).astype(np.float32)) for batch_idx, batch in enumerate(dataloader): if use_cuda: batch = batch_cuda(batch) batch['source_image'], batch['target_image'] = swap(batch['source_image'], batch['target_image']) batch['source_im_info'], batch['target_im_info'] = swap(batch['source_im_info'], batch['target_im_info']) batch['source_points'], batch['target_points'] = swap(batch['source_points'], batch['target_points']) # Theta and thetai theta_aff = theta['aff'][batch_idx].unsqueeze(0) theta_aff_tps = theta['afftps'][batch_idx].unsqueeze(0) theta_aff_inver = thetai['aff'][batch_idx].unsqueeze(0) theta_aff_tps_inver = thetai['afftps'][batch_idx].unsqueeze(0) # Warped image warped_aff = affTnf(batch['source_image'], theta_aff) warped_aff_tps = tpsTnf(warped_aff, theta_aff_tps) watch_images[batch_idx * group_size, :, 0:240, :] = batch['source_image'] watch_images[batch_idx * group_size + 1, :, 0:240, :] = warped_aff watch_images[batch_idx * group_size + 2, :, 0:240, :] = warped_aff_tps watch_images[batch_idx * group_size + 3, :, 0:240, :] = batch['target_image'] # Warped keypoints source_im_size = batch['source_im_info'][:, 0:3] target_im_size = batch['target_im_info'][:, 0:3] source_points = batch['source_points'] target_points = batch['target_points'] source_points_norm = PointsToUnitCoords(P=source_points, im_size=source_im_size) target_points_norm = PointsToUnitCoords(P=target_points, im_size=target_im_size) warped_points_aff_norm = pt.affPointTnf(theta=theta_aff_inver, points=source_points_norm) warped_points_aff = PointsToPixelCoords(P=warped_points_aff_norm, im_size=target_im_size) _, index_aff, N_pts = pck(target_points, warped_points_aff) warped_points_aff = relocate(warped_points_aff, target_im_size) warped_points_aff_tps_norm = pt.tpsPointTnf(theta=theta_aff_tps_inver, points=source_points_norm) warped_points_aff_tps_norm = pt.affPointTnf(theta=theta_aff_inver, points=warped_points_aff_tps_norm) warped_points_aff_tps = PointsToPixelCoords(P=warped_points_aff_tps_norm, im_size=target_im_size) _, index_aff_tps, _ = pck(target_points, warped_points_aff_tps) warped_points_aff_tps = relocate(warped_points_aff_tps, target_im_size) watch_keypoints[batch_idx * group_size, :, :N_pts] = relocate(batch['source_points'], source_im_size)[:, :, :N_pts] watch_keypoints[batch_idx * group_size + 1, :, :N_pts] = warped_points_aff[:, :, :N_pts] watch_keypoints[batch_idx * group_size + 2, :, :N_pts] = warped_points_aff_tps[:, :, :N_pts] watch_keypoints[batch_idx * group_size + 3, :, :N_pts] = relocate(batch['target_points'], target_im_size)[:, :, :N_pts] num_points[batch_idx * group_size:batch_idx * group_size + group_size] = N_pts correct_index.append(np.arange(N_pts)) correct_index.append(index_aff) correct_index.append(index_aff_tps) correct_index.append(np.arange(N_pts)) image_names.append('Source') image_names.append('Aff') image_names.append('AffTPS') image_names.append('Target') metrics.append('') metrics.append('PCK: {:.2%}'.format(float(results['aff']['pck'][batch_idx]))) metrics.append('PCK: {:.2%}'.format(float(results['afftps']['pck'][batch_idx]))) metrics.append('') opts = dict(jpgquality=100, title='Epoch ' + str(epoch) + ' source warped target') # Un-normalize for caffe resnet and vgg # watch_images = watch_images.permute(0, 2, 3, 1) + pixel_means # watch_images = watch_images[:, :, :, [2, 1, 0]].permute(0, 3, 1, 2) # watch_images = normalize_image(watch_images, forward=False) * 255.0 watch_images[:, :, 0:240, :] = normalize_image(watch_images[:, :, 0:240, :], forward=False) watch_images *= 255.0 watch_images = watch_images.permute(0, 2, 3, 1).cpu().numpy().astype(np.uint8) watch_keypoints = watch_keypoints.cpu().numpy() for i in range(watch_images.shape[0]): pos_name = (80, 255) if (i + 1) % group_size == 1 or (i + 1) % group_size == 0: pos_pck = (0, 0) else: pos_pck = (70, 275) cv2.putText(watch_images[i], image_names[i], pos_name, fnt, 0.5, (0, 0, 0), 1) cv2.putText(watch_images[i], metrics[i], pos_pck, fnt, 0.5, (0, 0, 0), 1) if (i + 1) % group_size == 0: for j in range(num_points[i]): cv2.drawMarker(watch_images[i], (watch_keypoints[i, 0, j], watch_keypoints[i, 1, j]), colors[j], cv2.MARKER_CROSS, 12, 2, cv2.LINE_AA) else: for j in correct_index[i]: cv2.drawMarker(watch_images[i], (watch_keypoints[i, 0, j], watch_keypoints[i, 1, j]), colors[j], cv2.MARKER_DIAMOND, 12, 2, cv2.LINE_AA) cv2.drawMarker(watch_images[i], (watch_keypoints[i + (group_size - 1) - (i % group_size), 0, j], watch_keypoints[i + (group_size - 1) - (i % group_size), 1, j]), colors[j], cv2.MARKER_CROSS, 12, 2, cv2.LINE_AA) watch_images = torch.Tensor(watch_images.astype(np.float32)) watch_images = watch_images.permute(0, 3, 1, 2) vis.image(torchvision.utils.make_grid(watch_images, nrow=4, padding=3), opts=opts) if epoch == num_epochs: epochs = np.arange(1, num_epochs+1) # Visualize train loss opts_loss = dict(xlabel='Epoch', ylabel='Loss', title='GM ResNet101 ' + title + ' Training Loss', legend=['Loss'], width=2000) vis.line(train_loss, epochs, opts=opts_loss) # Visualize val pck opts_pck = dict(xlabel='Epoch', ylabel='Val PCK', title='GM ResNet101 ' + title + ' Val PCK', legend=['PCK'], width=2000) vis.line(val_pck, epochs, opts=opts_pck) # Visualize train lr opts_lr = dict(xlabel='Epoch', ylabel='Learning Rate', title='GM ResNet101 ' + title + ' Training Learning Rate', legend=['LR'], width=2000) vis.line(train_lr, epochs, opts=opts_lr)
def pck_metric(batch, batch_start_idx, theta_aff, theta_tps, theta_aff_tps, stats, args, use_cuda=True): alpha = args.pck_alpha do_aff = theta_aff is not None do_tps = theta_tps is not None do_aff_tps = theta_aff_tps is not None source_im_size = batch['source_im_size'] target_im_size = batch['target_im_size'] source_points = batch['source_points'] target_points = batch['target_points'] # Instantiate point transformer # pt = PointTnf(use_cuda=use_cuda, tps_reg_factor=args.tps_reg_factor) pt = PointTnf(use_cuda=use_cuda) # warp points with estimated transformations target_points_norm = PointsToUnitCoords(target_points, target_im_size) if do_aff: # do affine only warped_points_aff_norm = pt.affPointTnf(theta_aff, target_points_norm) warped_points_aff = PointsToPixelCoords(warped_points_aff_norm, source_im_size) if do_tps: # do tps only warped_points_tps_norm = pt.tpsPointTnf(theta_tps, target_points_norm) warped_points_tps = PointsToPixelCoords(warped_points_tps_norm, source_im_size) if do_aff_tps: # do tps+affine warped_points_aff_tps_norm = pt.tpsPointTnf(theta_aff_tps, target_points_norm) warped_points_aff_tps_norm = pt.affPointTnf( theta_aff, warped_points_aff_tps_norm) warped_points_aff_tps = PointsToPixelCoords(warped_points_aff_tps_norm, source_im_size) L_pck = batch['L_pck'].data current_batch_size = batch['source_im_size'].size(0) indices = range(batch_start_idx, batch_start_idx + current_batch_size) # import pdb; pdb.set_trace() if do_aff: pck_aff = pck(source_points.data, warped_points_aff.data, L_pck, alpha) if do_tps: pck_tps = pck(source_points.data, warped_points_tps.data, L_pck, alpha) if do_aff_tps: pck_aff_tps = pck(source_points.data, warped_points_aff_tps.data, L_pck, alpha) if do_aff: stats['aff']['pck'][indices] = pck_aff.unsqueeze(1).cpu().numpy() if do_tps: stats['tps']['pck'][indices] = pck_tps.unsqueeze(1).cpu().numpy() if do_aff_tps: stats['aff_tps']['pck'][indices] = pck_aff_tps.unsqueeze( 1).cpu().numpy() return stats