def __init__(self, model_cfg, IN_DIM, dropout=0.1): super().__init__() self.model_cfg = model_cfg self.pc_range = self.model_cfg.PC_RANGE self.voxel_size = self.model_cfg.VOXEL_SIZE self.grid_size = self.model_cfg.GRID_SIZE self.IN_DIM = IN_DIM # Self-attention layers self.self_attn1 = SA_block(inplanes=self.model_cfg.ATTN_DIM, planes=self.model_cfg.ATTN_DIM) self.self_attn2 = SA_block(inplanes=2 * self.model_cfg.ATTN_DIM, planes=2 * self.model_cfg.ATTN_DIM) # MLP layers self.reduce_dim = nn.Sequential(nn.Conv1d(IN_DIM, self.model_cfg.ATTN_DIM, kernel_size=1), nn.BatchNorm1d(self.model_cfg.ATTN_DIM), nn.ReLU(inplace=True), nn.Conv1d(self.model_cfg.ATTN_DIM, self.model_cfg.ATTN_DIM, kernel_size=1), nn.BatchNorm1d(self.model_cfg.ATTN_DIM), nn.ReLU(inplace=True) ) self.reduce_dim_cat = nn.Sequential(nn.Conv1d(2*self.model_cfg.ATTN_DIM, self.model_cfg.ATTN_DIM, kernel_size=1), nn.BatchNorm1d(self.model_cfg.ATTN_DIM), nn.ReLU(inplace=True), nn.Conv1d(self.model_cfg.ATTN_DIM, self.model_cfg.ATTN_DIM, kernel_size=1), nn.BatchNorm1d(self.model_cfg.ATTN_DIM), nn.ReLU(inplace=True) )
def __init__(self, model_cfg, grid_size, voxel_size, point_cloud_range, dropout=0.3): super().__init__() self.model_cfg = model_cfg self.nx, self.ny, self.nz = grid_size self.voxel_size = voxel_size self.point_cloud_range = point_cloud_range self.voxel_x = voxel_size[0] self.voxel_y = voxel_size[1] self.voxel_z = voxel_size[2] self.x_offset = self.voxel_x / 2 + point_cloud_range[0] self.y_offset = self.voxel_y / 2 + point_cloud_range[1] self.z_offset = self.voxel_z / 2 + point_cloud_range[2] # layers to deform + aggregate local features mlps = self.model_cfg.LOCAL_CONTEXT.MLPS for k in range(len(mlps)): mlps[k] = [self.model_cfg.NUM_BEV_FEATURES] + mlps[k] self.adapt_context = pointnet2_stack_modules.StackSAModuleMSGAdapt( radii=self.model_cfg.LOCAL_CONTEXT.POOL_RADIUS, deform_radii=self.model_cfg.LOCAL_CONTEXT.DEFORM_RADIUS, nsamples=self.model_cfg.LOCAL_CONTEXT.NSAMPLE, mlps=mlps, use_xyz=True, pool_method=self.model_cfg.LOCAL_CONTEXT.POOL_METHOD, pc_range=self.point_cloud_range, ) # UnPool layers mlps_decode = self.model_cfg.DECODE.MLPS for k in range(len(mlps_decode)): mlps_decode[k] = [self.model_cfg.IN_DIM] + mlps_decode[k] self.decode = pointnet2_stack_modules.StackSAModuleMSGDecode( radii=self.model_cfg.DECODE.POOL_RADIUS, nsamples=self.model_cfg.DECODE.NSAMPLE, mlps=mlps_decode, use_xyz=True, pool_method=self.model_cfg.DECODE.POOL_METHOD, ) # self-attention layers to operate on deformed pillars self.self_full_fast_attn = SA_block(inplanes=self.model_cfg.IN_DIM, planes=self.model_cfg.IN_DIM) self.reduce_dim = nn.Sequential(nn.Conv1d(2*self.model_cfg.IN_DIM, self.model_cfg.IN_DIM, kernel_size=1), nn.BatchNorm1d(self.model_cfg.IN_DIM), nn.ReLU(inplace=True), nn.Conv1d(self.model_cfg.IN_DIM, self.model_cfg.IN_DIM, kernel_size=1), nn.BatchNorm1d(self.model_cfg.IN_DIM), nn.ReLU(inplace=True) ) self.self_attn_ms1 = SA_block(inplanes=2*self.model_cfg.IN_DIM, planes=2*self.model_cfg.IN_DIM) self.self_attn_ms2 = SA_block(inplanes=2*self.model_cfg.IN_DIM, planes=2*self.model_cfg.IN_DIM)
def __init__(self, model_cfg, IN_DIM, dropout=0.1): super().__init__() self.model_cfg = model_cfg self.IN_DIM = IN_DIM # Self attention layers self.self_attn1 = SA_block(inplanes=self.model_cfg.ATTN_DIM, planes=self.model_cfg.ATTN_DIM) self.self_attn2 = SA_block(inplanes=self.model_cfg.ATTN_DIM, planes=self.model_cfg.ATTN_DIM) # MLP layer self.reduce_dim = nn.Sequential( nn.Conv1d(IN_DIM, self.model_cfg.ATTN_DIM, kernel_size=1), nn.BatchNorm1d(self.model_cfg.ATTN_DIM), nn.ReLU(inplace=True), nn.Conv1d(self.model_cfg.ATTN_DIM, self.model_cfg.ATTN_DIM, kernel_size=1), nn.BatchNorm1d(self.model_cfg.ATTN_DIM), nn.ReLU(inplace=True))
def __init__(self, model_cfg, grid_size, voxel_size, point_cloud_range, dropout=0.1): super().__init__() self.model_cfg = model_cfg self.nx, self.ny, self.nz = grid_size self.position_enc = PositionalEncoding(self.model_cfg.IN_DIM, height=grid_size[1], width=grid_size[0]) self.layer_norm = nn.LayerNorm(self.model_cfg.IN_DIM, eps=1e-6) self.self_attn1 = SA_block(inplanes=self.model_cfg.IN_DIM, planes=self.model_cfg.IN_DIM) self.self_attn2 = SA_block(inplanes=self.model_cfg.IN_DIM, planes=self.model_cfg.IN_DIM)
def __init__(self, model_cfg, grid_size, voxel_size, point_cloud_range, dropout=0.1): super().__init__() self.model_cfg = model_cfg self.voxel_size = voxel_size self.point_cloud_range = point_cloud_range self.voxel_x = voxel_size[0] self.voxel_y = voxel_size[1] self.voxel_z = voxel_size[2] self.x_offset = self.voxel_x / 2 + point_cloud_range[0] self.y_offset = self.voxel_y / 2 + point_cloud_range[1] self.z_offset = self.voxel_z / 2 + point_cloud_range[2] # Positional encoding layers self.position_enc = PositionalEncoding(self.model_cfg.IN_DIM, height=grid_size[1]//self.model_cfg.downsampled, width=grid_size[0]//self.model_cfg.downsampled, depth=2) self.dropout = nn.Dropout(p=dropout) self.layer_norm = nn.LayerNorm(self.model_cfg.IN_DIM, eps=1e-6) # Self-attention layers self.self_attn1 = SA_block(inplanes=self.model_cfg.IN_DIM, planes=self.model_cfg.IN_DIM) self.self_attn2 = SA_block(inplanes=self.model_cfg.IN_DIM, planes=self.model_cfg.IN_DIM)
def __init__(self, model_cfg, grid_size, voxel_size, point_cloud_range, dropout=0.1): super().__init__() self.model_cfg = model_cfg self.voxel_size = voxel_size self.point_cloud_range = point_cloud_range self.self_attn1 = SA_block(inplanes=self.model_cfg.IN_DIM, planes=self.model_cfg.IN_DIM) # Deform and aggregate local features mlps = self.model_cfg.LOCAL_CONTEXT.MLPS for k in range(len(mlps)): mlps[k] = [self.model_cfg.NUM_BEV_FEATURES] + mlps[k] self.adapt_context = pointnet2_stack_modules.StackSAModuleMSGAdapt( radii=self.model_cfg.LOCAL_CONTEXT.POOL_RADIUS, deform_radii=self.model_cfg.LOCAL_CONTEXT.DEFORM_RADIUS, nsamples=self.model_cfg.LOCAL_CONTEXT.NSAMPLE, mlps=mlps, use_xyz=True, pool_method=self.model_cfg.LOCAL_CONTEXT.POOL_METHOD, pc_range=self.point_cloud_range, ) # Self-attention layers self.self_attn2 = SA_block(inplanes=self.model_cfg.IN_DIM, planes=self.model_cfg.IN_DIM) self.self_attn3 = SA_block(inplanes=self.model_cfg.IN_DIM, planes=self.model_cfg.IN_DIM) # UnPool layers mlps_decode = self.model_cfg.DECODE.MLPS for k in range(len(mlps_decode)): mlps_decode[k] = [self.model_cfg.IN_DIM] + mlps_decode[k] self.decode = pointnet2_stack_modules.StackSAModuleMSGDecode( radii=self.model_cfg.DECODE.POOL_RADIUS, nsamples=self.model_cfg.DECODE.NSAMPLE, mlps=mlps_decode, use_xyz=True, pool_method=self.model_cfg.DECODE.POOL_METHOD, )
def __init__(self, model_cfg, grid_size, voxel_size, point_cloud_range, dropout=0.3): super().__init__() self.model_cfg = model_cfg self.nx, self.ny, self.nz = grid_size self.voxel_size = voxel_size self.point_cloud_range = point_cloud_range self.voxel_x = voxel_size[0] self.voxel_y = voxel_size[1] self.voxel_z = voxel_size[2] self.x_offset = self.voxel_x / 2 + point_cloud_range[0] self.y_offset = self.voxel_y / 2 + point_cloud_range[1] self.z_offset = self.voxel_z / 2 + point_cloud_range[2] # layers to deform + aggregate local features mlps = self.model_cfg.LOCAL_CONTEXT.MLPS for k in range(len(mlps)): mlps[k] = [self.model_cfg.NUM_BEV_FEATURES] + mlps[k] self.adapt_context = pointnet2_stack_modules.StackSAModuleMSGAdapt( radii=self.model_cfg.LOCAL_CONTEXT.POOL_RADIUS, deform_radii=self.model_cfg.LOCAL_CONTEXT.DEFORM_RADIUS, nsamples=self.model_cfg.LOCAL_CONTEXT.NSAMPLE, mlps=mlps, use_xyz=True, pool_method=self.model_cfg.LOCAL_CONTEXT.POOL_METHOD, pc_range=self.point_cloud_range, ) self.self_full_fast_attn = SA_block(inplanes=self.model_cfg.IN_DIM, planes=self.model_cfg.IN_DIM) self.self_attn1 = SA_block_def(inplanes=self.model_cfg.IN_DIM, planes=self.model_cfg.IN_DIM) self.self_attn2 = SA_block_def(inplanes=self.model_cfg.IN_DIM, planes=self.model_cfg.IN_DIM)
def __init__(self, model_cfg, voxel_size, point_cloud_range, num_bev_features=None, num_rawpoint_features=None, **kwargs): super().__init__() self.model_cfg = model_cfg self.voxel_size = voxel_size self.point_cloud_range = point_cloud_range SA_cfg = self.model_cfg.SA_LAYER self.SA_layers = nn.ModuleList() self.SA_layer_names = [] self.downsample_times_map = {} c_in = 0 for src_name in self.model_cfg.FEATURES_SOURCE: if src_name in ['bev', 'raw_points', ]: continue self.downsample_times_map[src_name] = SA_cfg[src_name].DOWNSAMPLE_FACTOR mlps = SA_cfg[src_name].MLPS for k in range(len(mlps)): mlps[k] = [mlps[k][0]] + mlps[k] cur_layer = pointnet2_stack_modules.StackSAModuleMSG( radii=SA_cfg[src_name].POOL_RADIUS, nsamples=SA_cfg[src_name].NSAMPLE, mlps=mlps, use_xyz=True, pool_method='max_pool', ) self.SA_layers.append(cur_layer) self.SA_layer_names.append(src_name) c_in += sum([x[-1] for x in mlps]) if 'bev' in self.model_cfg.FEATURES_SOURCE: c_bev = num_bev_features c_in += c_bev if 'raw_points' in self.model_cfg.FEATURES_SOURCE: mlps = SA_cfg['raw_points'].MLPS for k in range(len(mlps)): mlps[k] = [num_rawpoint_features - 3] + mlps[k] self.SA_rawpoints = pointnet2_stack_modules.StackSAModuleMSG( radii=SA_cfg['raw_points'].POOL_RADIUS, nsamples=SA_cfg['raw_points'].NSAMPLE, mlps=mlps, use_xyz=True, pool_method='max_pool' ) c_in += sum([x[-1] for x in mlps]) self.vsa_point_feature_fusion = nn.Sequential( nn.Linear(c_in, self.model_cfg.NUM_OUTPUT_FEATURES, bias=False), nn.BatchNorm1d(self.model_cfg.NUM_OUTPUT_FEATURES), nn.ReLU(), ) self.num_point_features = self.model_cfg.NUM_OUTPUT_FEATURES self.num_point_features_before_fusion = c_in # FSA layers self.self_attn1 = SA_block(inplanes=self.model_cfg.NUM_OUTPUT_FEATURES, planes=self.model_cfg.NUM_OUTPUT_FEATURES//2) self.self_attn2 = SA_block(inplanes=self.model_cfg.NUM_OUTPUT_FEATURES, planes=self.model_cfg.NUM_OUTPUT_FEATURES//2)
def __init__(self, model_cfg, voxel_size, point_cloud_range, num_bev_features=None, num_rawpoint_features=None, **kwargs): super().__init__() self.model_cfg = model_cfg self.voxel_size = voxel_size self.point_cloud_range = point_cloud_range SA_cfg = self.model_cfg.SA_LAYER self.SA_layers = nn.ModuleList() self.SA_layer_names = [] self.downsample_times_map = {} c_in = 0 for src_name in self.model_cfg.FEATURES_SOURCE: if src_name in ['bev', 'raw_points']: continue self.downsample_times_map[src_name] = SA_cfg[src_name].DOWNSAMPLE_FACTOR mlps = SA_cfg[src_name].MLPS for k in range(len(mlps)): mlps[k] = [mlps[k][0]] + mlps[k] if src_name in self.model_cfg.FEATURES_SOURCE: # Deform keypoints adaptively cur_layer = pointnet2_stack_modules.StackSAModuleMSGAdapt( radii=SA_cfg[src_name].POOL_RADIUS, deform_radii=SA_cfg[src_name].POOL_RADIUS, nsamples=SA_cfg[src_name].NSAMPLE, mlps=mlps, use_xyz=True, pool_method='max_pool', ) else: cur_layer = pointnet2_stack_modules.StackSAModuleMSG( radii=SA_cfg[src_name].POOL_RADIUS, nsamples=SA_cfg[src_name].NSAMPLE, mlps=mlps, use_xyz=True, pool_method='max_pool', ) self.SA_layers.append(cur_layer) self.SA_layer_names.append(src_name) c_in += sum([x[-1] for x in mlps]) if 'bev' in self.model_cfg.FEATURES_SOURCE: c_bev = num_bev_features c_in += c_bev if 'raw_points' in self.model_cfg.FEATURES_SOURCE: mlps = SA_cfg['raw_points'].MLPS for k in range(len(mlps)): mlps[k] = [num_rawpoint_features - 3] + mlps[k] self.SA_rawpoints = pointnet2_stack_modules.StackSAModuleMSGGated( radii=SA_cfg['raw_points'].POOL_RADIUS, nsamples=SA_cfg['raw_points'].NSAMPLE, mlps=mlps, use_xyz=True, pool_method='max_pool' ) c_in += sum([x[-1] for x in mlps]) self.vsa_point_feature_fusion = nn.Sequential( nn.Linear(c_in, self.model_cfg.NUM_OUTPUT_FEATURES, bias=False), nn.BatchNorm1d(self.model_cfg.NUM_OUTPUT_FEATURES), nn.ReLU(), ) self.num_point_features = self.model_cfg.NUM_OUTPUT_FEATURES self.num_point_features_before_fusion = c_in # Deform and self-attention layers self.pred_bev_offset = nn.Sequential(nn.Conv1d(num_bev_features, 2, kernel_size=1, bias=False), nn.Tanh()) self.mod_bev_offset = nn.Conv1d(num_bev_features, 1, kernel_size=1, bias=False) in_dim = self.model_cfg.NUM_OUTPUT_FEATURES self.self_attn1 = SA_block(inplanes=in_dim, planes=in_dim) self.self_attn2 = SA_block(inplanes=in_dim, planes=in_dim) self.sa_point_feature_fusion = nn.Sequential( nn.Linear(in_dim, in_dim, bias=False), nn.BatchNorm1d(in_dim), nn.ReLU(), ) self.layer_norm1 = nn.LayerNorm(in_dim) self.layer_norm2 = nn.LayerNorm(in_dim)