def forward(self, images, proj_matricies): device = images.device batch_size, n_views = images.shape[:2] # reshape for backbone forward images = images.view(-1, *images.shape[2:]) # forward backbone heatmaps, features = self.backbone(images) keypoints_2d = op.integrate_tensor_2d( heatmaps * self.heatmap_multiplier, self.heatmap_softmax) # reshape back keypoints_2d = keypoints_2d.view(batch_size, n_views, *keypoints_2d.shape[1:]) # triangulate keypoints_3d_Alg = multiview.triangulate_batch_of_points( proj_matricies, keypoints_2d, #confidences_batch=alg_confidences ) ## triangulate #try: # keypoints_3d_Alg = multiview.triangulate_batch_of_points( # proj_matricies, keypoints_2d, # #confidences_batch=alg_confidences # ) #except RuntimeError as e: # print("Error: ", e) # print("confidences =", confidences_batch_pred) # print("proj_matricies = ", proj_matricies) # print("keypoints_2d_batch_pred =", keypoints_2d_batch_pred) # exit() # ALG ################################ # build coord volumes coord_volumes = torch.zeros(batch_size, self.volume_size, self.volume_size, self.volume_size, 3, device=device) # Bx64x64x64x3 for batch_i in range(batch_size): keypoints_3d = keypoints_3d_Alg[0].to( 'cpu').detach().numpy().copy() base_point = keypoints_3d[6, :3] # build cuboid sides = np.array( [self.cuboid_side, self.cuboid_side, self.cuboid_side]) position = base_point - sides / 2 # build coord volume xxx, yyy, zzz = torch.meshgrid( torch.arange(self.volume_size, device=device), torch.arange(self.volume_size, device=device), torch.arange(self.volume_size, device=device)) grid = torch.stack([xxx, yyy, zzz], dim=-1).type(torch.float) grid = grid.reshape((-1, 3)) grid_coord = torch.zeros_like(grid) grid_coord[:, 0] = position[0] + (sides[0] / (self.volume_size - 1)) * grid[:, 0] grid_coord[:, 1] = position[1] + (sides[1] / (self.volume_size - 1)) * grid[:, 1] grid_coord[:, 2] = position[2] + (sides[2] / (self.volume_size - 1)) * grid[:, 2] coord_volumes[batch_i] = grid_coord.reshape( self.volume_size, self.volume_size, self.volume_size, 3) # process features before unprojecting #features = features.view(batch_size, n_views, *features.shape[1:]) features = features.view(-1, *features.shape[2:]) features = self.process_features(features) features = features.view(batch_size, n_views, *features.shape[1:]) # lift to volume volumes = op.unproject_heatmaps( features, proj_matricies, coord_volumes, volume_aggregation_method=self.volume_aggregation_method) # integral 3d volumes = self.volume_net(volumes) vol_keypoints_3d = op.integrate_tensor_3d_with_coordinates( volumes * self.volume_multiplier, coord_volumes, softmax=self.volume_softmax) return vol_keypoints_3d, features, volumes, coord_volumes
def forward(self, images, proj_matricies, batch): device = images.device batch_size, n_views = images.shape[:2] # reshape for backbone forward images = images.view(-1, *images.shape[2:]) # forward backbone heatmaps, features, _, vol_confidences = self.backbone(images) # reshape back images = images.view(batch_size, n_views, *images.shape[1:]) heatmaps = heatmaps.view(batch_size, n_views, *heatmaps.shape[1:]) features = features.view(batch_size, n_views, *features.shape[1:]) if vol_confidences is not None: vol_confidences = vol_confidences.view(batch_size, n_views, *vol_confidences.shape[1:]) # calcualte shapes image_shape, heatmap_shape = tuple(images.shape[3:]), tuple( heatmaps.shape[3:]) n_joints = heatmaps.shape[2] # norm vol confidences # 应该是用于反投影,不同的权重 if self.volume_aggregation_method == 'conf_norm': vol_confidences = vol_confidences / vol_confidences.sum( dim=1, keepdim=True) # change camera intrinsics new_cameras = deepcopy(batch['cameras']) for view_i in range(n_views): for batch_i in range(batch_size): # 将摄像机参数转换为heatmap下的参数 new_cameras[view_i][batch_i].update_after_resize( image_shape, heatmap_shape) proj_matricies = torch.stack( [ torch.stack([ torch.from_numpy(camera.projection) for camera in camera_batch ], dim=0) for camera_batch in new_cameras ], dim=0).transpose(1, 0) # shape (batch_size, n_views, 3, 4) proj_matricies = proj_matricies.float().to(device) # build coord volumes cuboids = [] base_points = torch.zeros(batch_size, 3, device=device) # coord_volumes 是反投影的对象 coord_volumes = torch.zeros(batch_size, self.volume_size, self.volume_size, self.volume_size, 3, device=device) for batch_i in range(batch_size): # 用于确定base_point: if self.use_gt_pelvis: # TODO 所以这里的keypoints真值应该是世界坐标系下的坐标 keypoints_3d = batch['keypoints_3d'][batch_i] else: keypoints_3d = batch['pred_keypoints_3d'][batch_i] # pelv 基准点 if self.kind == "coco": base_point = (keypoints_3d[11, :3] + keypoints_3d[12, :3]) / 2 elif self.kind == "mpii": base_point = keypoints_3d[6, :3] #摄像机坐标系 base_points[batch_i] = torch.from_numpy(base_point).to(device) # build cuboid, cuboid_side表示构建的立方体的size,往往比heamtmap更加精确,默认2500, # TODO 这个定义的2500应该是通过摄像机位置决定的,check摄像机参数 sides = np.array( [self.cuboid_side, self.cuboid_side, self.cuboid_side]) #所有的base_point减去新立方体的中心坐标 position = base_point - sides / 2 cuboid = volumetric.Cuboid3D(position, sides) cuboids.append(cuboid) # build coord volume, volume为由热图恢复的,默认64 xxx, yyy, zzz = torch.meshgrid( torch.arange(self.volume_size, device=device), torch.arange(self.volume_size, device=device), torch.arange(self.volume_size, device=device)) grid = torch.stack([xxx, yyy, zzz], dim=-1).type(torch.float) grid = grid.reshape((-1, 3)) grid_coord = torch.zeros_like(grid) # TODO 得到围绕position每个点在世界坐标系下的坐标 grid_coord[:, 0] = position[0] + (sides[0] / (self.volume_size - 1)) * grid[:, 0] grid_coord[:, 1] = position[1] + (sides[1] / (self.volume_size - 1)) * grid[:, 1] grid_coord[:, 2] = position[2] + (sides[2] / (self.volume_size - 1)) * grid[:, 2] coord_volume = grid_coord.reshape(self.volume_size, self.volume_size, self.volume_size, 3) # random rotation if self.training: theta = np.random.uniform(0.0, 2 * np.pi) else: theta = 0.0 if self.kind == "coco": axis = [0, 1, 0] # y axis elif self.kind == "mpii": axis = [0, 0, 1] # z axis center = torch.from_numpy(base_point).type(torch.float).to(device) # rotate coord_volume = coord_volume - center coord_volume = volumetric.rotate_coord_volume( coord_volume, theta, axis) coord_volume = coord_volume + center # transfer if self.transfer_cmu_to_human36m: # different world coordinates coord_volume = coord_volume.permute(0, 2, 1, 3) inv_idx = torch.arange(coord_volume.shape[1] - 1, -1, -1).long().to(device) coord_volume = coord_volume.index_select(1, inv_idx) coord_volumes[batch_i] = coord_volume # process features before unprojecting features = features.view(-1, *features.shape[2:]) # 特征层的通道重组 features = self.process_features(features) features = features.view(batch_size, n_views, *features.shape[1:]) # lift to volume volumes = op.unproject_heatmaps( features, proj_matricies, coord_volumes, volume_aggregation_method=self.volume_aggregation_method, vol_confidences=vol_confidences) # integral 3d volumes = self.volume_net(volumes) vol_keypoints_3d, volumes = op.integrate_tensor_3d_with_coordinates( volumes * self.volume_multiplier, coord_volumes, softmax=self.volume_softmax) return vol_keypoints_3d, features, volumes, vol_confidences, cuboids, coord_volumes, base_points
def forward(self, images, proj_matricies, batch, keypoints_3d_gt): device = images.device batch_size, n_views = images.shape[:2] # images [batch_size, n_views, 3, 384, 384] # reshape for backbone forward images = images.view(-1, *images.shape[2:]) # images [batch_size*n_views, 3, 384, 384] # forward backbone heatmaps, features, _, vol_confidences = self.backbone(images) # reshape back images = images.view(batch_size, n_views, *images.shape[1:]) # images [batch_size, n_views, 3, 384, 384] heatmaps = heatmaps.view(batch_size, n_views, *heatmaps.shape[1:]) # heatmaps [batch_size, n_views, 17, 96, 96] features = features.view(batch_size, n_views, *features.shape[1:]) # features [batch_size, n_views, 256, 96, 96] if vol_confidences is not None: vol_confidences = vol_confidences.view(batch_size, n_views, *vol_confidences.shape[1:]) # calcualte shapes image_shape, heatmap_shape = tuple(images.shape[3:]), tuple(heatmaps.shape[3:]) n_joints = heatmaps.shape[2] # 17 # norm vol confidences if self.volume_aggregation_method == 'conf_norm': vol_confidences = vol_confidences / vol_confidences.sum(dim=1, keepdim=True) # change camera intrinsics new_cameras = deepcopy(batch['cameras']) for view_i in range(n_views): for batch_i in range(batch_size): new_cameras[view_i][batch_i].update_after_resize(image_shape, heatmap_shape) proj_matricies = torch.stack([torch.stack([torch.from_numpy(camera.projection) for camera in camera_batch], dim=0) for camera_batch in new_cameras], dim=0).transpose(1, 0) # shape (batch_size, n_views, 3, 4) proj_matricies = proj_matricies.float().to(device) # build coord volumes cuboids = [] base_points = torch.zeros(batch_size, 3, device=device) coord_volumes = torch.zeros(batch_size, self.volume_size, self.volume_size, self.volume_size, 3, device=device) coord_volumes_aux = torch.zeros(batch_size, self.volume_size//4, self.volume_size//4, self.volume_size//4, 3, device=device) for batch_i in range(batch_size): # if self.use_precalculated_pelvis: if self.use_gt_pelvis: keypoints_3d = batch['keypoints_3d'][batch_i] else: keypoints_3d = batch['pred_keypoints_3d'][batch_i] if self.kind == "coco": base_point = (keypoints_3d[11, :3] + keypoints_3d[12, :3]) / 2 elif self.kind == "mpii": base_point = keypoints_3d[6, :3] base_points[batch_i] = torch.from_numpy(base_point).to(device) # build cuboid sizes = np.array([self.cuboid_size, self.cuboid_size, self.cuboid_size]) aux_sizes = sizes - 3 * sizes / (self.volume_size - 1) position = base_point - sizes / 2 cuboid = volumetric.Cuboid3D(position, sizes) cuboids.append(cuboid) # random rotation if self.training: theta = np.random.uniform(0.0, 2 * np.pi) else: theta = 0.0 if self.kind == "coco": axis = [0, 1, 0] # y axis elif self.kind == "mpii": axis = [0, 0, 1] # z axis # build coord volume coord_volumes[batch_i] = self.build_coord_volume(self.volume_size, position, sizes, base_point, theta, axis, device) coord_volumes_aux[batch_i] = self.build_coord_volume(self.volume_size//4, position, aux_sizes, base_point, theta, axis, device) # compute gt global attention, using keypoints_3d_gt ga_mask_gt = self.calc_ga_mask(keypoints_3d_gt, coord_volumes_aux) # process features before unprojecting if self.use_feature: features = features.view(-1, *features.shape[2:]) # features [batch_size*n_views, 256, 96, 96] features = self.process_features(features) # conv2d 1x1 kernel [256 -> 32] features = features.view(batch_size, n_views, *features.shape[1:]) # features [batch_size, n_views, 32, 96, 96] v2v_input = features else: v2v_input = heatmaps # lift to volume volumes = op.unproject_heatmaps(v2v_input, proj_matricies, coord_volumes, volume_aggregation_method=self.volume_aggregation_method, vol_confidences=vol_confidences) # volumes [batch_size, 32, 64, 64, 64] # integral 3d volumes, atten_global = self.volume_net(volumes, None) # volumes [batch_size, 17, 64, 64, 64] voxel_keypoints_3d, _ = op.integrate_tensor_3d(volumes * self.volume_multiplier, softmax=self.volume_softmax) # voxel_3d: keypoints_3d in volumes [batch_size, 17, 3] vol_keypoints_3d, volumes = op.integrate_tensor_3d_with_coordinates(volumes * self.volume_multiplier, coord_volumes, softmax=self.volume_softmax) # vol_keypoints_3d [batch_size, 17, 3] return voxel_keypoints_3d, vol_keypoints_3d, heatmaps, volumes, ga_mask_gt, atten_global, vol_confidences, cuboids, coord_volumes, base_points
def forward(self, images, proj_matricies, batch): device = images.device batch_size, n_views = images.shape[:2] # reshape for backbone forward images = images.view(-1, *images.shape[2:]) # forward backbone heatmaps, features, _, vol_confidences = self.backbone(images) # reshape back images = images.view(batch_size, n_views, *images.shape[1:]) heatmaps = heatmaps.view(batch_size, n_views, *heatmaps.shape[1:]) features = features.view(batch_size, n_views, *features.shape[1:]) if vol_confidences is not None: vol_confidences = vol_confidences.view(batch_size, n_views, *vol_confidences.shape[1:]) # calcualte shapes image_shape, heatmap_shape = tuple(images.shape[3:]), tuple( heatmaps.shape[3:]) n_joints = heatmaps.shape[2] # norm vol confidences if self.volume_aggregation_method == 'conf_norm': vol_confidences = vol_confidences / vol_confidences.sum( dim=1, keepdim=True) # change camera intrinsics new_cameras = deepcopy(batch['cameras']) for view_i in range(n_views): for batch_i in range(batch_size): new_cameras[view_i][batch_i].update_after_resize( image_shape, heatmap_shape) proj_matricies = torch.stack( [ torch.stack([ torch.from_numpy(camera.projection) for camera in camera_batch ], dim=0) for camera_batch in new_cameras ], dim=0).transpose(1, 0) # shape (batch_size, n_views, 3, 4) proj_matricies = proj_matricies.float().to(device) # build coord volumes cuboids = [] base_points = torch.zeros(batch_size, 3, device=device) coord_volumes = torch.zeros(batch_size, self.volume_size, self.volume_size, self.volume_size, 3, device=device) for batch_i in range(batch_size): # if self.use_precalculated_pelvis: if self.use_gt_pelvis: keypoints_3d = batch['keypoints_3d'][batch_i] else: keypoints_3d = batch['pred_keypoints_3d'][batch_i] if self.kind == "coco": base_point = (keypoints_3d[11, :3] + keypoints_3d[12, :3]) / 2 elif self.kind == "mpii": base_point = keypoints_3d[6, :3] elif self.kind == "cmu": base_point = keypoints_3d[2, :3] base_points[batch_i] = torch.from_numpy(base_point).to(device) # build cuboid # NOTE: This is part of the paper where they build the cuboid used # for volumetric extrapolation from the pelvis sides = np.array( [self.cuboid_side, self.cuboid_side, self.cuboid_side]) position = base_point - sides / 2 cuboid = volumetric.Cuboid3D(position, sides) cuboids.append(cuboid) # build coord volume xxx, yyy, zzz = torch.meshgrid( torch.arange(self.volume_size, device=device), torch.arange(self.volume_size, device=device), torch.arange(self.volume_size, device=device)) grid = torch.stack([xxx, yyy, zzz], dim=-1).type(torch.float) grid = grid.reshape((-1, 3)) grid_coord = torch.zeros_like(grid) grid_coord[:, 0] = position[0] + (sides[0] / (self.volume_size - 1)) * grid[:, 0] grid_coord[:, 1] = position[1] + (sides[1] / (self.volume_size - 1)) * grid[:, 1] grid_coord[:, 2] = position[2] + (sides[2] / (self.volume_size - 1)) * grid[:, 2] coord_volume = grid_coord.reshape(self.volume_size, self.volume_size, self.volume_size, 3) # random rotation if self.training: theta = np.random.uniform(0.0, 2 * np.pi) else: theta = 0.0 if self.kind == "coco": axis = [0, 1, 0] # y axis elif self.kind in ("mpii", "cmu"): axis = [0, 0, 1] # z axis center = torch.from_numpy(base_point).type(torch.float).to(device) # rotate coord_volume = coord_volume - center coord_volume = volumetric.rotate_coord_volume( coord_volume, theta, axis) coord_volume = coord_volume + center # transfer if self.transfer_cmu_to_human36m or self.kind == "cmu": # different world coordinates coord_volume = coord_volume.permute(0, 2, 1, 3) inv_idx = torch.arange(coord_volume.shape[1] - 1, -1, -1).long().to(device) coord_volume = coord_volume.index_select(1, inv_idx) # print("Using different world coordinates") coord_volumes[batch_i] = coord_volume # process features before unprojecting features = features.view(-1, *features.shape[2:]) features = self.process_features(features) features = features.view(batch_size, n_views, *features.shape[1:]) # lift to volume volumes = op.unproject_heatmaps( features, proj_matricies, coord_volumes, volume_aggregation_method=self.volume_aggregation_method, vol_confidences=vol_confidences) # integral 3d volumes = self.volume_net(volumes) vol_keypoints_3d, volumes = op.integrate_tensor_3d_with_coordinates( volumes * self.volume_multiplier, coord_volumes, softmax=self.volume_softmax) return vol_keypoints_3d, features, volumes, vol_confidences, cuboids, coord_volumes, base_points