def _infer_clusters(self, st_s, st_t, sem_label_s, sem_label_t): # Cluster the source and target point cloud (only source clusters will be used) running_idx_s = 0 running_idx_t = 0 clusters_s = defaultdict(list) clusters_t = defaultdict(list) clusters_s_rot = defaultdict(list) clusters_s_trans = defaultdict(list) batch_size = torch.max(st_s.coordinates[:,0]) + 1 for b_idx in range(batch_size): b_fgrnd_idx_s = torch.where(sem_label_s[running_idx_s:(running_idx_s + st_s.C[st_s.C[:,0] == b_idx,1:].shape[0])] == 1)[0] b_fgrnd_idx_t = torch.where(sem_label_t[running_idx_t:(running_idx_t + st_t.C[st_t.C[:,0] == b_idx,1:].shape[0])] == 1)[0] coor_s = st_s.C[st_s.C[:,0] == b_idx,1:].to(self.device) * self.voxel_size coor_t = st_t.C[st_t.C[:,0] == b_idx,1:].to(self.device) * self.voxel_size # Only perform if foreground points are in both source and target if b_fgrnd_idx_s.shape[0] and b_fgrnd_idx_t.shape[0]: xyz_fgrnd_s = coor_s[b_fgrnd_idx_s, :].cpu().numpy() xyz_fgrnd_t = coor_t[b_fgrnd_idx_t, :].cpu().numpy() # Perform clustering labels_s = self.cluster_estimator.fit_predict(xyz_fgrnd_s) labels_t = self.cluster_estimator.fit_predict(xyz_fgrnd_t) # Map cluster labels to indices (consider only clusters that have at least n points) for class_label in np.unique(labels_s): if class_label != -1 and np.where(labels_s == class_label)[0].shape[0] >= self.min_p_cluster: clusters_s[str(b_idx)].append(b_fgrnd_idx_s[np.where(labels_s == class_label)[0]] + running_idx_s) for class_label in np.unique(labels_t): if class_label != -1 and np.where(labels_t == class_label)[0].shape[0] >= self.min_p_cluster: clusters_t[str(b_idx)].append(b_fgrnd_idx_t[np.where(labels_t == class_label)[0]] + running_idx_t) # Estimate the relative transformation parameteres of each cluster if self.test_flag: for c_idx in clusters_s[str(b_idx)]: cluster_xyz_s = (st_s.C[c_idx,1:] * self.voxel_size).unsqueeze(0).to(self.device) cluster_flow = self.inferred_values['refined_flow'][c_idx,:].unsqueeze(0) reconstructed_xyz = cluster_xyz_s + cluster_flow R_cluster, t_cluster, _, _ = kabsch_transformation_estimation(cluster_xyz_s, reconstructed_xyz) clusters_s_rot[str(b_idx)].append(R_cluster.squeeze(0)) clusters_s_trans[str(b_idx)].append(t_cluster.squeeze(0)) running_idx_s += coor_s.shape[0] running_idx_t += coor_t.shape[0] self.inferred_values['clusters_s'] = clusters_s self.inferred_values['clusters_t'] = clusters_t self.inferred_values['clusters_s_R'] = clusters_s_rot self.inferred_values['clusters_s_t'] = clusters_s_trans
def forward(self, score_matrix, mask, xyz_s, xyz_t): affinity = -(score_matrix - self.softplus(self.alpha))/(torch.exp(self.beta) + 0.02) # Compute weighted coordinates log_perm_matrix = self.sinkhorn(affinity, n_iters=self.sinkhorn_iter, slack=self.slack) perm_matrix = torch.exp(log_perm_matrix) * mask weighted_t = perm_matrix @ xyz_t / (torch.sum(perm_matrix, dim=2, keepdim=True) + _EPS) # Compute transform and transform points #transform = self.compute_rigid_transform(xyz_s, weighted_t, weights=torch.sum(perm_matrix, dim=2)) R_est, t_est, _, _ = kabsch_transformation_estimation(xyz_s, weighted_t, weights=torch.sum(perm_matrix, dim=2)) return R_est, t_est, perm_matrix
def forward(self, data, xs): #data: b*c*n*1 x1_1 = self.conv1(data) x1_1 = self.l1_1(x1_1) x_down = self.down1(x1_1) x2 = self.l2(x_down) x_up = self.up1(x1_1, x2) out = self.l1_2(torch.cat([x1_1, x_up], dim=1)) logits = torch.squeeze(torch.squeeze(self.output(out), 3), 1) weights = torch.relu(torch.tanh(logits)) if torch.any(torch.sum(weights, dim=1) == 0.0): weights = weights + 1 / weights.shape[1] x1, x2 = xs[:, 0, :, :3], xs[:, 0, :, 3:] rotation_est, translation_est, residuals, gradient_not_valid = kabsch_transformation_estimation( x1, x2, weights) return logits, weights, rotation_est, translation_est, residuals, out, gradient_not_valid
def __call__(self, inferred_values, gt_data): # Initialize the dictionary losses = {} if self.args['method']['flow'] and self.args['loss']['flow_loss']: assert (('coarse_flow' in inferred_values) & ('flow' in gt_data)), 'Flow loss selected \ but either est or gt flow not provided' losses['refined_flow_loss'] = self.flow_criterion( inferred_values['refined_flow'], gt_data['flow']) * self.args['loss'].get('flow_loss_w', 1.0) losses['coarse_flow_loss'] = self.flow_criterion( inferred_values['coarse_flow'], gt_data['flow']) * self.args['loss'].get('flow_loss_w', 1.0) if self.args['method']['ego_motion'] and self.args['loss']['ego_loss']: assert (('R_est' in inferred_values) & ('R_s_t' in gt_data) is not None), "Ego motion loss selected \ but either est or gt ego motion not provided" assert 'permutation' in inferred_values is not None, 'Outlier loss selected \ but the permutation matrix is not provided' # Only evaluate on the background points mask = (gt_data['fg_labels_s'] == 0) prev_idx = 0 pc_t_gt, pc_t_est = [], [] # Iterate over the samples in the batch for batch_idx in range(gt_data['R_ego'].shape[0]): # Convert the voxel indices back to the coordinates p_s_temp = gt_data['sinput_s_C'][ prev_idx:prev_idx + gt_data['len_batch'][batch_idx][0], :].to( self.device) * self.args['misc']['voxel_size'] mask_temp = mask[prev_idx:prev_idx + gt_data['len_batch'][batch_idx][0]] # Transform the point cloud with gt and estimated ego-motion parameters pc_t_gt_temp = transform_point_cloud( p_s_temp[mask_temp, :3], gt_data['R_ego'][batch_idx, :, :], gt_data['t_ego'][batch_idx, :, :]) pc_t_est_temp = transform_point_cloud( p_s_temp[mask_temp, :3], inferred_values['R_est'][batch_idx, :, :], inferred_values['t_est'][batch_idx, :, :]) pc_t_gt.append(pc_t_gt_temp.squeeze(0)) pc_t_est.append(pc_t_est_temp.squeeze(0)) prev_idx += gt_data['len_batch'][batch_idx][0] pc_t_est = torch.cat(pc_t_est, 0) pc_t_gt = torch.cat(pc_t_gt, 0) losses['ego_loss'] = self.ego_l1_criterion( pc_t_est, pc_t_gt) * self.args['loss'].get('ego_loss_w', 1.0) losses['outlier_loss'] = self.ego_outlier_criterion( inferred_values['permutation']) * self.args['loss'].get( 'inlier_loss_w', 1.0) # Background segmentation loss if self.args['method']['semantic'] and self.args['loss'][ 'background_loss']: assert ( ('semantic_logits_s' in inferred_values) & ('fg_labels_s' in gt_data) ), "Background loss selected but either est or gt labels not provided" semantic_loss = torch.tensor(0.0).to(self.device) semantic_loss += self.seg_criterion( inferred_values['semantic_logits_s'].F, gt_data['fg_labels_s']) * self.args['loss'].get( 'bg_loss_w', 1.0) # If the background labels for the target point cloud are available also use them for the loss computation if 'semantic_logits_t' in inferred_values: semantic_loss += self.seg_criterion( inferred_values['semantic_logits_t'].F, gt_data['fg_labels_t']) * self.args['loss'].get( 'bg_loss_w', 1.0) semantic_loss = semantic_loss / 2 losses['semantic_loss'] = semantic_loss # Foreground loss if self.args['method']['clustering'] and self.args['loss'][ 'foreground_loss']: assert ( 'clusters_s' in inferred_values ), "Foreground loss selected but inferred cluster labels not provided" rigidity_loss = torch.tensor(0.0).to(self.device) chamfer_loss = torch.tensor(0.0).to(self.device) xyz_s = torch.cat(gt_data['pcd_s'], 0).to(self.device) xyz_t = torch.cat(gt_data['pcd_t'], 0).to(self.device) # Two-way chamfer distance for the foreground points (only compute if both point clouds have more than 50 foreground points) if torch.where(gt_data['fg_labels_s'] == 1)[0].shape[0] > 50 and torch.where( gt_data['fg_labels_t'] == 1)[0].shape[0] > 50: foreground_mask_s = (gt_data['fg_labels_s'] == 1) foreground_mask_t = (gt_data['fg_labels_t'] == 1) foreground_xyz_s = xyz_s[foreground_mask_s, :] foreground_flow = inferred_values['refined_flow'][ foreground_mask_s, :] foreground_xyz_t = xyz_t[foreground_mask_t, :] dist1, dist2 = self.chamfer_criterion( foreground_xyz_t.unsqueeze(0), (foreground_xyz_s + foreground_flow).unsqueeze(0)) # Clamp the distance to prevent outliers (objects that appear and disappear from the scene) dist1 = torch.clamp(torch.sqrt(dist1), max=1.0) dist2 = torch.clamp(torch.sqrt(dist2), max=1.0) chamfer_loss += ((torch.mean(dist1) + torch.mean(dist2)) / 2.0) losses['chamfer_loss'] = chamfer_loss * self.args['loss'].get( 'cd_loss_w', 1.0) # Rigidity loss (flow vectors of each cluster should be congruent) n_clusters = 0 # Iterate over the clusters and enforce rigidity within each cluster for batch_idx in inferred_values['clusters_s']: for cluster in inferred_values['clusters_s'][batch_idx]: cluster_xyz_s = xyz_s[cluster, :].unsqueeze(0) cluster_flow = inferred_values['refined_flow'][ cluster, :].unsqueeze(0) reconstructed_xyz = cluster_xyz_s + cluster_flow # Compute the unweighted Kabsch estimation (transformation parameters which best explain the vectors) R_cluster, t_cluster, _, _ = kabsch_transformation_estimation( cluster_xyz_s, reconstructed_xyz) # Detach the gradients such that they do not flow through the tansformation parameters but only through flow rigid_xyz = (torch.matmul(R_cluster, cluster_xyz_s.transpose(1, 2)) + t_cluster).detach().squeeze(0).transpose( 0, 1) rigidity_loss += self.rigidity_criterion( reconstructed_xyz.squeeze(0), rigid_xyz) n_clusters += 1 n_clusters = 1.0 if n_clusters == 0 else n_clusters losses['rigidity_loss'] = (rigidity_loss / n_clusters) * self.args['loss'].get( 'rigid_loss_w', 1.0) # Compute the total loss as the sum of individual losses total_loss = 0.0 for key in losses: total_loss += losses[key] losses['total_loss'] = total_loss return losses