def forward(self, pts, one_hot_vec):  #bs,4,n
        # 3D Instance Segmentation PointNet
        logits = self.InsSeg(pts, one_hot_vec)  #bs,n,2

        # Mask Point Centroid
        object_pts_xyz, mask_xyz_mean, mask = \
                 point_cloud_masking(pts, logits)###logits.detach()

        # T-Net
        object_pts_xyz = object_pts_xyz
        center_delta = self.STN(object_pts_xyz, one_hot_vec)  #(32,3)
        stage1_center = center_delta + mask_xyz_mean  #(32,3)

        # if(np.isnan(stage1_center.cpu().detach().numpy()).any()):
        # ipdb.set_trace()
        object_pts_xyz_new = object_pts_xyz - \
                    center_delta.view(center_delta.shape[0],-1,1).repeat(1,1,object_pts_xyz.shape[-1])

        # 3D Box Estimation
        box_pred = self.est(object_pts_xyz_new, one_hot_vec)  #(32, 59)

        center_boxnet, \
        heading_scores, heading_residuals_normalized, heading_residuals, \
        size_scores, size_residuals_normalized, size_residuals = \
                parse_output_to_tensors(box_pred, logits, mask, stage1_center)

        center = center_boxnet + stage1_center  #bs,3
        return logits, mask, stage1_center, center_boxnet, object_pts_xyz_new,\
            heading_scores, heading_residuals_normalized, heading_residuals, \
            size_scores, size_residuals_normalized, size_residuals, center
def get_model(point_cloud,
              one_hot_vec,
              is_training,
              bn_decay=None,
              track=False,
              lstm_params=None):
    ''' Frustum PointNets model. The model predict 3D object masks and
    amodel bounding boxes for objects in frustum point clouds.

    Input:
        point_cloud: TF tensor in shape (B,N,4)
            frustum point clouds with XYZ and intensity in point channels
            XYZs are in frustum coordinate
        one_hot_vec: TF tensor in shape (B,3)
            length-3 vectors indicating predicted object type
        is_training: TF boolean scalar
        bn_decay: TF float scalar
        track   : true to use track ids for the amodal box estimation pointnet
        lstm_params: A dictionary that contains lstm parameters; n_batch, tau, feat_vec_len
            This is only used if track is True. 
    Output:
        end_points: dict (map from name strings to TF tensors)
    '''
    end_points = {}

    # 3D Instance Segmentation PointNet
    logits, end_points = get_instance_seg_v1_net(point_cloud, one_hot_vec,
                                                 is_training, bn_decay,
                                                 end_points)
    end_points['mask_logits'] = logits

    # Masking
    # select masked points and translate to masked points' centroid
    object_point_cloud_xyz, mask_xyz_mean, end_points = point_cloud_masking(
        point_cloud, logits, end_points)

    # T-Net and coordinate translation
    center_delta, end_points = get_center_regression_net(
        object_point_cloud_xyz, one_hot_vec, is_training, bn_decay, end_points)
    stage1_center = center_delta + mask_xyz_mean  # Bx3
    end_points['stage1_center'] = stage1_center
    # Get object point cloud in object coordinate
    object_point_cloud_xyz_new = object_point_cloud_xyz - tf.expand_dims(
        center_delta, 1)

    # Amodel Box Estimation PointNet
    output, end_points = get_3d_box_estimation_v1_net(
        object_point_cloud_xyz_new,
        one_hot_vec,
        is_training,
        bn_decay,
        end_points,
        track=track,
        lstm_params=lstm_params,
        center_est=stage1_center)
    # Parse output to 3D box parameters
    end_points = parse_output_to_tensors(output, end_points)
    end_points['center'] = end_points['center_boxnet'] + stage1_center  # Bx3

    return end_points
    def forward(self, point_cloud, one_hot_vec):
        '''
        @ author chonepieceyb
        :param point_cloud:  tensor in shape (B,4,N)
                             frustum point clouds with XYZ and intensity in point channels
                             XYZs are in frustum coordinate
        :param one_hot_vec:  F tensor in shape (B,3)
            length-3 vectors indicating predicted object type
        :return:  end_points: dict (map from name strings to  tensors)
        '''
        logits = self.get_instance_seg_v1_net(point_cloud,
                                              one_hot_vec)  # 这里接口可能有点问题
        self.end_points['mask_logits'] = logits
        # masking
        # select masked points and translate to masked points' centroid
        object_point_cloud_xyz, mask_xyz_mean ,self.end_points = \
            point_cloud_masking(point_cloud,logits,self.end_points)
        # T-Net and coordinate translation
        center_delta = self.get_center_regression_net(
            object_point_cloud_xyz,
            one_hot_vec)  # 局部坐标系的回归, center_delta shape (B,3)
        stage1_center = center_delta + mask_xyz_mean
        self.end_points['stage1_center'] = stage1_center
        object_point_cloud_xyz_new = object_point_cloud_xyz - torch.unsqueeze(
            center_delta, dim=2)  # -(B,3,1)

        # Amodel Box Estimation PointNet
        output = self.get_3d_box_estimation_v1_net(object_point_cloud_xyz_new,
                                                   one_hot_vec)

        # parse output to 3D box parameters
        self.end_points = parse_output_to_tensors(output, self.end_points)
        self.end_points[
            'center'] = self.end_points['center_boxnet'] + stage1_center  # Bx3
        return self.end_points
def get_model(point_cloud, one_hot_vec, is_training, bn_decay=None):
    ''' Frustum PointNets model. The model predict 3D object masks and
    amodel bounding boxes for objects in frustum point clouds.

    Input:
        point_cloud: TF tensor in shape (B,N,4)
            frustum point clouds with XYZ and intensity in point channels
            XYZs are in frustum coordinate
        one_hot_vec: TF tensor in shape (B,3)
            length-3 vectors indicating predicted object type
        is_training: TF boolean scalar
        bn_decay: TF float scalar
    Output:
        end_points: dict (map from name strings to TF tensors)
    '''
    end_points = {}

    # 3D Instance Segmentation PointNet
    logits, end_points = get_instance_seg_v1_net(\
        point_cloud, one_hot_vec,
        is_training, bn_decay, end_points)
    end_points['mask_logits'] = logits

    # Masking
    # select masked points and translate to masked points' centroid
    object_point_cloud_xyz, mask_xyz_mean, end_points = \
        point_cloud_masking(point_cloud, logits, end_points)
    end_points['object_point_cloud_xyz'] = object_point_cloud_xyz
    end_points['mask_xyz_mean'] = mask_xyz_mean

    # T-Net and coordinate translation
    center_delta, end_points = get_center_regression_net(\
        object_point_cloud_xyz, one_hot_vec,
        is_training, bn_decay, end_points)
    end_points['center_delta'] = center_delta
    stage1_center = center_delta + mask_xyz_mean  # Bx3
    end_points['stage1_center'] = stage1_center
    # Get object point cloud in object coordinate
    object_point_cloud_xyz_new = \
        object_point_cloud_xyz - tf.expand_dims(center_delta, 1)

    # Amodel Box Estimation PointNet
    output, end_points = get_3d_box_estimation_v1_net(\
        object_point_cloud_xyz_new, one_hot_vec,
        is_training, bn_decay, end_points)

    # Parse output to 3D box parameters
    end_points = parse_output_to_tensors(output, end_points)
    end_points['center'] = end_points['center_boxnet'] + stage1_center  # Bx3

    return end_points
def get_model(point_cloud, one_hot_vec, is_training, bn_decay=None):
    ''' Frustum PointNets model. The model predict 3D object masks and
    amodel bounding boxes for objects in frustum point clouds.

    Input:
        point_cloud: TF tensor in shape (B,N,4)
            frustum point clouds with XYZ and intensity in point channels
            XYZs are in frustum coordinate
        one_hot_vec: TF tensor in shape (B,3)
            length-3 vectors indicating predicted object type
        is_training: TF boolean scalar
        bn_decay: TF float scalar
    Output:
        end_points: dict (map from name strings to TF tensors)
    '''
    end_points = {}
    
    # 3D Instance Segmentation PointNet
    logits, end_points = get_instance_seg_v1_net(\
        point_cloud, one_hot_vec,
        is_training, bn_decay, end_points)
    end_points['mask_logits'] = logits

    # Masking
    # select masked points and translate to masked points' centroid
    object_point_cloud_xyz, mask_xyz_mean, end_points = \
        point_cloud_masking(point_cloud, logits, end_points)

    # T-Net and coordinate translation
    center_delta, end_points = get_center_regression_net(\
        object_point_cloud_xyz, one_hot_vec,
        is_training, bn_decay, end_points)
    stage1_center = center_delta + mask_xyz_mean # Bx3
    end_points['stage1_center'] = stage1_center
    # Get object point cloud in object coordinate
    object_point_cloud_xyz_new = \
        object_point_cloud_xyz - tf.expand_dims(center_delta, 1)

    # Amodel Box Estimation PointNet
    output, end_points = get_3d_box_estimation_v1_net(\
        object_point_cloud_xyz_new, one_hot_vec,
        is_training, bn_decay, end_points)

    # Parse output to 3D box parameters
    end_points = parse_output_to_tensors(output, end_points)
    end_points['center'] = end_points['center_boxnet'] + stage1_center # Bx3

    return end_points
def get_model(point_cloud, one_hot_vec, is_training, bn_decay=None):
    ''' Frustum PointNets model. The model predict 3D object masks and
    amodel bounding boxes for objects in frustum point clouds.

    Input:
        point_cloud: TF tensor in shape (B,N,4)
            frustum point clouds with XYZ and intensity in point channels
            XYZs are in frustum coordinate
        one_hot_vec: TF tensor in shape (B,3)
            length-3 vectors indicating predicted object type
        is_training: TF boolean scalar
        bn_decay: TF float scalar
    Output:
        end_points: dict (map from name strings to TF tensors)
    '''
    end_points = {}

    # 3D实例分割PointNet
    logits, end_points = get_instance_seg_v1_net(
        point_cloud, one_hot_vec,
        is_training, bn_decay, end_points)
    end_points['mask_logits'] = logits

    # 根据mask筛选点云,并转换到mask点云中心
    object_point_cloud_xyz, mask_xyz_mean, end_points = \
        point_cloud_masking(point_cloud, logits, end_points)

    # T-Net中心残差回归网络和坐标转换
    center_delta, end_points = get_center_regression_net(
        object_point_cloud_xyz, one_hot_vec,
        is_training, bn_decay, end_points)
    stage1_center = center_delta + mask_xyz_mean  # Bx3
    end_points['stage1_center'] = stage1_center     # 物体坐标系中心在原坐标下的位置
    # 获得物体坐标系下的物体点云
    object_point_cloud_xyz_new = object_point_cloud_xyz - tf.expand_dims(center_delta, 1)

    # 边框估计PointNet
    output, end_points = get_3d_box_estimation_v1_net(
        object_point_cloud_xyz_new, one_hot_vec,
        is_training, bn_decay, end_points)

    # 将输出拆分并进行保存
    end_points = parse_output_to_tensors(output, end_points)
    # center_boxnet为3D边框估计网络估计的边框中心,加上物体坐标系中心在原坐标下的位置,得到边框中心在原坐标系下的位置
    end_points['center'] = end_points['center_boxnet'] + stage1_center  # Bx3

    return end_points
def get_model(point_cloud, is_training, bn_decay=None):
    end_points = {}

    # T-Net and coordinate translation
    center_delta = get_center_regression_net(point_cloud, is_training,
                                             bn_decay)
    end_points['center_delta'] = center_delta

    # Get object point cloud in object coordinate
    point_cloud_xyz = tf.slice(point_cloud, [0, 0, 0], [-1, -1, 3])  # BxNx3
    point_cloud_features = tf.slice(point_cloud, [0, 0, 3], [-1, -1, -1])
    point_cloud_xyz_new = point_cloud_xyz - tf.expand_dims(center_delta, 1)
    point_cloud_new = tf.concat([point_cloud_xyz_new, point_cloud_features],
                                axis=-1)

    # Amodel Box Estimation PointNet
    output = get_3d_box_estimation_net(point_cloud_new, is_training, bn_decay)

    # Parse output to 3D box parameters
    end_points = parse_output_to_tensors(output, end_points)
    end_points[
        'center'] = end_points['center_res'] + end_points['center_delta']

    return end_points
def get_model(point_cloud, one_hot_vec, is_training, bn_decay=None):
    ''' Frustum PointNets model. The model predict 3D object masks and
    amodel bounding boxes for objects in frustum point clouds.

    Input:
        point_cloud: TF tensor in shape (B,N,4)
            frustum point clouds with XYZ and intensity in point channels
            XYZs are in frustum coordinate
        one_hot_vec: TF tensor in shape (B,3)
            length-3 vectors indicating predicted object type
        is_training: TF boolean scalar
        bn_decay: TF float scalar
    Output:
        end_points: dict (map from name strings to TF tensors)
    '''

    #############Invariance transformation Net###########################################

    ### Add Neighboring feature

    ### generate new only xyz coordinate point cloud tensor -- no intensity

    point_cloud_xyz = tf.slice(point_cloud, [0, 0, 0], [-1, -1, 3])

    print("point_cloud shape", point_cloud.get_shape())
    print("point_cloud_xyz", point_cloud_xyz.get_shape())

    features_initial = None
    invariance_transformation_net = Invariance_Transformation_Net(
        point_cloud=point_cloud_xyz,
        features=features_initial,
        is_training=is_training,
        invarians_trans_param=invariants_trans_param_7_layer).layer_fts[-1]

    print("invariance_transformation_net",
          tf.shape(invariance_transformation_net))

    print("invariance_transformation_net", type(invariance_transformation_net))
    print(
        '----------------------------------done----------------------------------------------'
    )

    end_points = {}

    batch_size = point_cloud.get_shape()[0].value
    num_point = point_cloud.get_shape()[1].value

    k = 4  ###################Set the number of neighboring #################################################################################################

    adj_matrix = pairwise_distance(point_cloud_xyz)
    print("adj_matrix", adj_matrix.get_shape())
    nn_idx = knn(adj_matrix, k=k)
    print("nn_idx", nn_idx.get_shape())
    #edge_feature=get_edge_feature(point_cloud,point_cloud_xyz,nn_idx=nn_idx,k=k)
    edge_feature = get_edge_feature(point_cloud_xyz, nn_idx=nn_idx, k=k)
    print("edge_feature", edge_feature.get_shape())
    with tf.variable_scope('transform_net1') as sc:
        transform = input_transform_net(edge_feature,
                                        is_training,
                                        bn_decay,
                                        K=3)

    poinr_cloud_transformed = tf.matmul(point_cloud_xyz, transform)

    print("edge_transform_feature", poinr_cloud_transformed.get_shape())

    point_cloud_concat = tf.concat([point_cloud, poinr_cloud_transformed],
                                   axis=-1)

    print("point_cloud_concat", point_cloud_concat.get_shape())

    ####################################Invariance transformation features added##############################################################################

    point_cloud_invari = tf.concat(
        [point_cloud_concat, invariance_transformation_net], axis=-1)
    #point_cloud_invari= tf.concat([point_cloud,invariance_transformation_net],axis=-1)
    print("point_cloud_invari", point_cloud_invari.get_shape())
    # 3D Instance Segmentation PointNet
    #logits, end_points = get_instance_seg_v1_net(\
    #    point_cloud_concat, one_hot_vec,
    #    is_training, bn_decay, end_points)
    #end_points['mask_logits'] = logits


    logits, end_points = get_instance_seg_v1_net(\
        point_cloud_invari, one_hot_vec,
        is_training, bn_decay, end_points)
    end_points['mask_logits'] = logits

    # Masking
    # select masked points and translate to masked points' centroid
    object_point_cloud_xyz, mask_xyz_mean, end_points = \
        point_cloud_masking(point_cloud, logits, end_points)

    # T-Net and coordinate translation
    center_delta, end_points = get_center_regression_net(\
        object_point_cloud_xyz, one_hot_vec,
        is_training, bn_decay, end_points)
    stage1_center = center_delta + mask_xyz_mean  # Bx3
    end_points['stage1_center'] = stage1_center
    # Get object point cloud in object coordinate
    object_point_cloud_xyz_new = \
        object_point_cloud_xyz - tf.expand_dims(center_delta, 1)

    # Amodel Box Estimation PointNet
    output, end_points = get_3d_box_estimation_v1_net(\
        object_point_cloud_xyz_new, one_hot_vec,
        is_training, bn_decay, end_points)

    # Parse output to 3D box parameters
    end_points = parse_output_to_tensors(output, end_points)
    end_points['center'] = end_points['center_boxnet'] + stage1_center  # Bx3

    return end_points
Ejemplo n.º 9
0
def get_model(point_cloud, one_hot_vec, is_training, bn_decay=None):
    ''' Frustum PointNets model. The model predict 3D object masks and
    amodel bounding boxes for objects in frustum point clouds.

    Input:
        point_cloud: TF tensor in shape (B,N,4)
            frustum point clouds with XYZ and intensity in point channels
            XYZs are in frustum coordinate
        one_hot_vec: TF tensor in shape (B,3)
            length-3 vectors indicating predicted object type
        is_training: TF boolean scalar
        bn_decay: TF float scalar
    Output:
        end_points: dict (map from name strings to TF tensors)
    '''

    #############Invariance transformation Net###########################################

    ### Add Neighboring feature

    ### generate new only xyz coordinate point cloud tensor -- no intensity

    point_cloud_xyz = tf.slice(point_cloud, [0, 0, 0], [-1, -1, 3])

    print("point_cloud shape", point_cloud.get_shape())
    print("point_cloud_xyz", point_cloud_xyz.get_shape())

    end_points = {}

    batch_size = point_cloud.get_shape()[0].value
    num_point = point_cloud.get_shape()[1].value

    k = 2  ###################Set the number of neighboring #################################################################################################

    adj_matrix = pairwise_distance(point_cloud_xyz)
    print("adj_matrix", adj_matrix.get_shape())
    nn_idx = knn(adj_matrix, k=k)
    print("nn_idx", nn_idx.get_shape())
    #edge_feature=get_edge_feature(point_cloud,point_cloud_xyz,nn_idx=nn_idx,k=k)
    edge_feature = get_edge_feature(point_cloud_xyz, nn_idx=nn_idx, k=k)
    print("edge_feature", edge_feature.get_shape())
    with tf.variable_scope('transform_net1') as sc:
        transform = input_transform_net(edge_feature,
                                        is_training,
                                        bn_decay,
                                        K=3)

    poinr_cloud_transformed = tf.matmul(point_cloud_xyz, transform)

    print("edge_transform_feature", poinr_cloud_transformed.get_shape())

    adj_matrix_edge = pairwise_distance(poinr_cloud_transformed)

    print("adj_matrix_edg_0", adj_matrix_edge.get_shape())

    nn_idx = knn(adj_matrix_edge, k=k)

    print("nn_idx_0", nn_idx.get_shape())

    edge_feature_edge = get_edge_feature(poinr_cloud_transformed,
                                         nn_idx=nn_idx,
                                         k=k)

    print("edge_feature_edge_0", edge_feature_edge.get_shape())

    edge_net = tf_util.conv2d_dgcnn(edge_feature_edge,
                                    64, [1, 1],
                                    padding='VALID',
                                    stride=[1, 1],
                                    bn=True,
                                    is_training=is_training,
                                    bn_decay=bn_decay,
                                    scope="edge_conv_0")
    print("edge_feature_conv_0", edge_net.get_shape())

    edge_net = tf.reduce_max(edge_net, axis=-2, keep_dims=True)

    print("edge_net_change_channel_0", edge_net.get_shape())

    net1 = edge_net

    adj_matrix_edge = pairwise_distance(edge_net)

    print("adj_matrix_edg_1", adj_matrix_edge.get_shape())
    nn_idx = knn(adj_matrix_edge, k=k)

    edge_net = get_edge_feature(edge_net, nn_idx=nn_idx, k=k)

    edge_net = tf_util.conv2d_dgcnn(edge_net,
                                    64, [1, 1],
                                    padding='VALID',
                                    stride=[1, 1],
                                    bn=True,
                                    is_training=is_training,
                                    bn_decay=bn_decay,
                                    scope="edge_conv_1")
    edge_net = tf.reduce_max(edge_net, axis=-2, keep_dims=True)

    print("edge_net_change_channel_1", edge_net.get_shape())

    net2 = edge_net

    adj_matrix_edge = pairwise_distance(edge_net)

    print("adj_matrix_edg_2", adj_matrix_edge.get_shape())
    nn_idx = knn(adj_matrix_edge, k=k)

    edge_net = get_edge_feature(edge_net, nn_idx=nn_idx, k=k)

    edge_net = tf_util.conv2d_dgcnn(edge_net,
                                    64, [1, 1],
                                    padding='VALID',
                                    stride=[1, 1],
                                    bn=True,
                                    is_training=is_training,
                                    bn_decay=bn_decay,
                                    scope="edge_conv_2")
    edge_net = tf.reduce_max(edge_net, axis=-2, keep_dims=True)
    print("edge_net_change_channel_2", edge_net.get_shape())
    net3 = edge_net

    print("net3", net3.get_shape())

    net4 = tf.squeeze(net3, axis=-2)

    point_cloud_concat = tf.concat([point_cloud, net4], axis=-1)

    print("point_cloud_concat", point_cloud_concat.get_shape())





    logits, end_points = get_instance_seg_v1_net(\
        point_cloud_concat, one_hot_vec,
        is_training, bn_decay, end_points)
    end_points['mask_logits'] = logits

    # Masking
    # select masked points and translate to masked points' centroid
    object_point_cloud_xyz, mask_xyz_mean, end_points = \
        point_cloud_masking(point_cloud, logits, end_points)

    # T-Net and coordinate translation
    center_delta, end_points = get_center_regression_net(\
        object_point_cloud_xyz, one_hot_vec,
        is_training, bn_decay, end_points)
    stage1_center = center_delta + mask_xyz_mean  # Bx3
    end_points['stage1_center'] = stage1_center
    # Get object point cloud in object coordinate
    object_point_cloud_xyz_new = \
        object_point_cloud_xyz - tf.expand_dims(center_delta, 1)

    # Amodel Box Estimation PointNet
    output, end_points = get_3d_box_estimation_v1_net(\
        object_point_cloud_xyz_new, one_hot_vec,
        is_training, bn_decay, end_points)

    # Parse output to 3D box parameters
    end_points = parse_output_to_tensors(output, end_points)
    end_points['center'] = end_points['center_boxnet'] + stage1_center  # Bx3

    return end_points
    def forward(self, data_dicts):
        #dict_keys(['point_cloud', 'rot_angle', 'box3d_center', 'size_class', 'size_residual', 'angle_class', 'angle_residual', 'one_hot', 'seg'])

        point_cloud = data_dicts.get('point_cloud')  #torch.Size([32, 4, 1024])
        point_cloud = point_cloud[:, :self.n_channel, :]
        one_hot = data_dicts.get('one_hot')  #torch.Size([32, 3])
        bs = point_cloud.shape[0]
        # If not None, use to Compute Loss
        seg_label = data_dicts.get('seg')  #torch.Size([32, 1024])
        box3d_center_label = data_dicts.get(
            'box3d_center')  #torch.Size([32, 3])
        size_class_label = data_dicts.get('size_class')  #torch.Size([32, 1])
        size_residual_label = data_dicts.get(
            'size_residual')  #torch.Size([32, 3])
        heading_class_label = data_dicts.get(
            'angle_class')  #torch.Size([32, 1])
        heading_residual_label = data_dicts.get(
            'angle_residual')  #torch.Size([32, 1])

        # 3D Instance Segmentation PointNet
        logits = self.InsSeg(point_cloud, one_hot)  #bs,n,2

        # Mask Point Centroid
        object_pts_xyz, mask_xyz_mean, mask = \
                 point_cloud_masking(point_cloud, logits)

        # T-Net
        object_pts_xyz = object_pts_xyz.cuda()
        center_delta = self.STN(object_pts_xyz, one_hot)  #(32,3)
        stage1_center = center_delta + mask_xyz_mean  #(32,3)

        if (np.isnan(stage1_center.cpu().detach().numpy()).any()):
            ipdb.set_trace()
        object_pts_xyz_new = object_pts_xyz - \
                    center_delta.view(center_delta.shape[0],-1,1).repeat(1,1,object_pts_xyz.shape[-1])

        # 3D Box Estimation
        box_pred = self.est(object_pts_xyz_new, one_hot)  #(32, 59)

        center_boxnet, \
        heading_scores, heading_residual_normalized, heading_residual, \
        size_scores, size_residual_normalized, size_residual = \
                parse_output_to_tensors(box_pred, logits, mask, stage1_center)

        box3d_center = center_boxnet + stage1_center  #bs,3

        losses = self.Loss(logits, seg_label, \
                 box3d_center, box3d_center_label, stage1_center, \
                 heading_scores, heading_residual_normalized, \
                 heading_residual, \
                 heading_class_label, heading_residual_label, \
                 size_scores, size_residual_normalized, \
                 size_residual, \
                 size_class_label, size_residual_label)

        for key in losses.keys():
            losses[key] = losses[key] / bs

        with torch.no_grad():
            seg_correct = torch.argmax(logits.detach().cpu(),
                                       2).eq(seg_label.detach().cpu()).numpy()
            seg_accuracy = np.sum(seg_correct) / float(point_cloud.shape[-1])

            iou2ds, iou3ds = compute_box3d_iou( \
                box3d_center.detach().cpu().numpy(),
                heading_scores.detach().cpu().numpy(),
                heading_residual.detach().cpu().numpy(),
                size_scores.detach().cpu().numpy(),
                size_residual.detach().cpu().numpy(),
                box3d_center_label.detach().cpu().numpy(),
                heading_class_label.detach().cpu().numpy(),
                heading_residual_label.detach().cpu().numpy(),
                size_class_label.detach().cpu().numpy(),
                size_residual_label.detach().cpu().numpy())
        metrics = {
            'seg_acc': seg_accuracy,
            'iou2d': iou2ds.mean(),
            'iou3d': iou3ds.mean(),
            'iou3d_0.7': np.sum(iou3ds >= 0.7) / bs
        }
        return losses, metrics