def _make_layer(self, inplanes, planes, num_blocks, idx, stride=1): print("NUM BLOCKS:", num_blocks, "STRIDE:", stride) if self._use_norm: BatchNorm2d = change_default_args( eps=1e-3, momentum=0.01)(ME.MinkowskiBatchNorm) Conv2d = change_default_args(bias=False, dimension=2)(ME.MinkowskiConvolution) SubMConv2d = change_default_args(bias=False, dimension=2)(ME.MinkowskiConvolution) ConvTranspose2d = change_default_args(bias=False, dimension=2)( ME.MinkowskiConvolutionTranspose) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True, dimension=2)(ME.MinkowskiConvolution) SubMConv2d = change_default_args(bias=True, dimension=2)(ME.MinkowskiConvolution) ConvTranspose2d = change_default_args(bias=True, dimension=2)( ME.MinkowskiConvolutionTranspose) ReLU = ME.MinkowskiReLU() block = Sequential( # PrintLayer(0), Conv2d(inplanes, planes, 2, stride=stride), BatchNorm2d(planes), ReLU, # PrintLayer(1), ) for j in range(num_blocks): block.add(SubMConv2d(planes, planes, 3)) block.add(BatchNorm2d(planes)), block.add(ReLU) # block.add(PrintLayer(2 + j)) return block, planes
def _make_layer(self, inplanes, planes, num_blocks, stride=1): if self._use_norm: if self._use_groupnorm: BatchNorm2d = change_default_args( num_groups=self._num_groups, eps=1e-3)(GroupNorm) else: BatchNorm2d = change_default_args( eps=1e-3, momentum=0.01)(nn.BatchNorm2d) Conv2d = change_default_args(bias=False)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=False)( nn.ConvTranspose2d) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=True)( nn.ConvTranspose2d) block = Sequential( nn.ZeroPad2d(1), Conv2d(inplanes, planes, 3, stride=stride), BatchNorm2d(planes), nn.ReLU(), ) for j in range(num_blocks): block.add(Conv2d(planes, planes, 3, padding=1)) block.add(BatchNorm2d(planes)) block.add(nn.ReLU()) return block, planes
def _make_layer(self, inplanes, planes, num_blocks, idx, stride=1): if self._use_norm: if self._use_groupnorm: SparseBatchNorm2d = change_default_args( num_groups=self._num_groups, eps=1e-3)(GroupNorm) DenseBatchNorm2d = change_default_args( num_groups=self._num_groups, eps=1e-3)(GroupNorm) else: SparseBatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)( nn.BatchNorm1d) DenseBatchNorm2d = change_default_args( eps=1e-3, momentum=0.01)(nn.BatchNorm2d) SparseConv2d = change_default_args(bias=False)(spconv.SparseConv2d) DenseConv2d = change_default_args(bias=False)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=False)( spconv.SparseConvTranspose2d) else: SparseBatchNorm2d = Empty DenseBatchNorm2d = Empty SparseConv2d = change_default_args(bias=True)(spconv.SparseConv2d) DenseConv2d = change_default_args(bias=True)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=True)( spconv.SparseConvTranspose2d) print("STRIDE:", stride) if idx <= LAST_SPARSE_IDX: block = spconv.SparseSequential( SparseZeroPad2d(1), SparseConv2d(inplanes, planes, 3, stride=stride), SparseBatchNorm2d(planes), nn.ReLU(), ) for j in range(num_blocks): block.add(SparseConv2d(planes, planes, 3, padding=1)) block.add(SparseBatchNorm2d(planes)) block.add(nn.ReLU()) else: block = Sequential( nn.ZeroPad2d(1), DenseConv2d(inplanes, planes, 3, stride=stride), DenseBatchNorm2d(planes), nn.ReLU(), ) for j in range(num_blocks): block.add(DenseConv2d(planes, planes, 3, padding=1)) block.add(DenseBatchNorm2d(planes)) block.add(nn.ReLU()) return block, planes
class PSA(nn.Module): def __init__(self, use_norm=True, num_class=2, layer_nums=[3, 5, 5], layer_strides=[2, 2, 2], num_filters=[128, 128, 256], upsample_strides=[1, 2, 4], num_upsample_filters=[256, 256, 256], num_input_filters=128, num_anchor_per_loc=2, encode_background_as_zeros=True, use_direction_classifier=True, use_groupnorm=False, num_groups=32, use_bev=False, box_code_size=7, name='psa'): """ :param use_norm: :param num_class: :param layer_nums: :param layer_strides: :param num_filters: :param upsample_strides: :param num_upsample_filters: :param num_input_filters: :param num_anchor_per_loc: :param encode_background_as_zeros: :param use_direction_classifier: :param use_groupnorm: :param num_groups: :param use_bev: :param box_code_size: :param name: """ super(PSA, self).__init__() self._num_anchor_per_loc = num_anchor_per_loc ## 2 self._use_direction_classifier = use_direction_classifier # True self._use_bev = use_bev # False assert len(layer_nums) == 3 assert len(layer_strides) == len(layer_nums) assert len(num_filters) == len(layer_nums) assert len(upsample_strides) == len(layer_nums) assert len(num_upsample_filters) == len(layer_nums) factors = [] for i in range(len(layer_nums)): assert int(np.prod( layer_strides[:i + 1])) % upsample_strides[i] == 0 factors.append( np.prod(layer_strides[:i + 1]) // upsample_strides[i]) assert all([x == factors[0] for x in factors]) if use_norm: # True if use_groupnorm: BatchNorm2d = change_default_args(num_groups=num_groups, eps=1e-3)(GroupNorm) else: BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)( nn.BatchNorm2d) Conv2d = change_default_args(bias=False)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=False)( nn.ConvTranspose2d) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=True)( nn.ConvTranspose2d) # note that when stride > 1, conv2d with same padding isn't # equal to pad-conv2d. we should use pad-conv2d. block2_input_filters = num_filters[0] if use_bev: self.bev_extractor = Sequential( Conv2d(6, 32, 3, padding=1), BatchNorm2d(32), nn.ReLU(), # nn.MaxPool2d(2, 2), Conv2d(32, 64, 3, padding=1), BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2, 2), ) block2_input_filters += 64 self.block1 = Sequential( nn.ZeroPad2d(1), Conv2d(num_input_filters, num_filters[0], 3, stride=layer_strides[0]), BatchNorm2d(num_filters[0]), nn.ReLU(), ) for i in range(layer_nums[0]): self.block1.add( Conv2d(num_filters[0], num_filters[0], 3, padding=1)) self.block1.add(BatchNorm2d(num_filters[0])) self.block1.add(nn.ReLU()) self.deconv1 = Sequential( ConvTranspose2d(num_filters[0], num_upsample_filters[0], upsample_strides[0], stride=upsample_strides[0]), BatchNorm2d(num_upsample_filters[0]), nn.ReLU(), ) self.block2 = Sequential( nn.ZeroPad2d(1), Conv2d(block2_input_filters, num_filters[1], 3, stride=layer_strides[1]), BatchNorm2d(num_filters[1]), nn.ReLU(), ) for i in range(layer_nums[1]): self.block2.add( Conv2d(num_filters[1], num_filters[1], 3, padding=1)) self.block2.add(BatchNorm2d(num_filters[1])) self.block2.add(nn.ReLU()) self.deconv2 = Sequential( ConvTranspose2d(num_filters[1], num_upsample_filters[1], upsample_strides[1], stride=upsample_strides[1]), BatchNorm2d(num_upsample_filters[1]), nn.ReLU(), ) self.block3 = Sequential( nn.ZeroPad2d(1), Conv2d(num_filters[1], num_filters[2], 3, stride=layer_strides[2]), BatchNorm2d(num_filters[2]), nn.ReLU(), ) for i in range(layer_nums[2]): self.block3.add( Conv2d(num_filters[2], num_filters[2], 3, padding=1)) self.block3.add(BatchNorm2d(num_filters[2])) self.block3.add(nn.ReLU()) self.deconv3 = Sequential( ConvTranspose2d(num_filters[2], num_upsample_filters[2], upsample_strides[2], stride=upsample_strides[2]), BatchNorm2d(num_upsample_filters[2]), nn.ReLU(), ) if encode_background_as_zeros: num_cls = num_anchor_per_loc * num_class else: num_cls = num_anchor_per_loc * (num_class + 1) self.conv_cls = nn.Conv2d(sum(num_upsample_filters), num_cls, 1) self.conv_box = nn.Conv2d(sum(num_upsample_filters), num_anchor_per_loc * box_code_size, 1) if use_direction_classifier: self.conv_dir_cls = nn.Conv2d(sum(num_upsample_filters), num_anchor_per_loc * 2, 1) ################### refine self.bottle_conv = nn.Conv2d(sum(num_upsample_filters), sum(num_upsample_filters) // 3, 1) self.block1_dec2x = nn.MaxPool2d(kernel_size=2) ### C=64 self.block1_dec4x = nn.MaxPool2d(kernel_size=4) ### C=64 self.block2_dec2x = nn.MaxPool2d(kernel_size=2) ### C=128 self.block2_inc2x = ConvTranspose2d( num_filters[1], num_filters[0] // 2, upsample_strides[1], stride=upsample_strides[1]) ### C=32 self.block3_inc2x = ConvTranspose2d( num_filters[2], num_filters[1] // 2, upsample_strides[1], stride=upsample_strides[1]) #### C=64 self.block3_inc4x = ConvTranspose2d( num_filters[2], num_filters[0] // 2, upsample_strides[2], stride=upsample_strides[2]) #### C=32 self.fusion_block1 = nn.Conv2d( num_filters[0] + num_filters[0] // 2 + num_filters[0] // 2, num_filters[0], 1) self.fusion_block2 = nn.Conv2d( num_filters[0] + num_filters[1] + num_filters[1] // 2, num_filters[1], 1) self.fusion_block3 = nn.Conv2d( num_filters[0] + num_filters[1] + num_filters[2], num_filters[2], 1) self.refine_up1 = Sequential( ConvTranspose2d(num_filters[0], num_upsample_filters[0], upsample_strides[0], stride=upsample_strides[0]), BatchNorm2d(num_upsample_filters[0]), nn.ReLU(), ) self.refine_up2 = Sequential( ConvTranspose2d(num_filters[1], num_upsample_filters[1], upsample_strides[1], stride=upsample_strides[1]), BatchNorm2d(num_upsample_filters[1]), nn.ReLU(), ) self.refine_up3 = Sequential( ConvTranspose2d(num_filters[2], num_upsample_filters[2], upsample_strides[2], stride=upsample_strides[2]), BatchNorm2d(num_upsample_filters[2]), nn.ReLU(), ) ####### C_Bottle = cfg.PSA.C_Bottle C = cfg.PSA.C_Reudce self.RF1 = Sequential( # 3*3 Conv2d(C_Bottle * 2, C, kernel_size=1, stride=1), BatchNorm2d(C), nn.ReLU(inplace=True), Conv2d(C, C_Bottle * 2, kernel_size=3, stride=1, padding=1, dilation=1), BatchNorm2d(C_Bottle * 2), nn.ReLU(inplace=True), ) self.RF2 = Sequential( # 5*5 Conv2d(C_Bottle, C, kernel_size=3, stride=1, padding=1), BatchNorm2d(C), nn.ReLU(inplace=True), Conv2d(C, C_Bottle, kernel_size=3, stride=1, padding=1, dilation=1), BatchNorm2d(C_Bottle), nn.ReLU(inplace=True), ) self.RF3 = Sequential( # 7*7 Conv2d(C_Bottle // 2, C, kernel_size=3, stride=1, padding=1), BatchNorm2d(C), nn.ReLU(inplace=True), Conv2d(C, C, kernel_size=3, stride=1, padding=1), BatchNorm2d(C), nn.ReLU(inplace=True), Conv2d(C, C_Bottle // 2, kernel_size=3, stride=1, padding=1), BatchNorm2d(C_Bottle // 2), nn.ReLU(inplace=True), ) self.concat_conv1 = nn.Conv2d(num_filters[1], num_filters[1], kernel_size=3, padding=1) ## kernel_size=3 self.concat_conv2 = nn.Conv2d(num_filters[1], num_filters[1], kernel_size=3, padding=1) self.concat_conv3 = nn.Conv2d(num_filters[1], num_filters[1], kernel_size=3, padding=1) self.refine_cls = nn.Conv2d(sum(num_upsample_filters), num_cls, 1) self.refine_loc = nn.Conv2d(sum(num_upsample_filters), num_anchor_per_loc * box_code_size, 1) if use_direction_classifier: self.refine_dir = nn.Conv2d(sum(num_upsample_filters), num_anchor_per_loc * 2, 1) def forward(self, x, bev=None): x1 = self.block1(x) up1 = self.deconv1(x1) x2 = self.block2(x1) up2 = self.deconv2(x2) x3 = self.block3(x2) up3 = self.deconv3(x3) coarse_feat = torch.cat([up1, up2, up3], dim=1) box_preds = self.conv_box(coarse_feat) cls_preds = self.conv_cls(coarse_feat) # [N, C, y(H), x(W)] box_preds = box_preds.permute(0, 2, 3, 1).contiguous() cls_preds = cls_preds.permute(0, 2, 3, 1).contiguous() ret_dict = { "box_preds": box_preds, "cls_preds": cls_preds, } if self._use_direction_classifier: dir_cls_preds = self.conv_dir_cls(coarse_feat) dir_cls_preds = dir_cls_preds.permute(0, 2, 3, 1).contiguous() ret_dict["dir_cls_preds"] = dir_cls_preds ###############Refine: blottle_conv = self.bottle_conv(coarse_feat) x1_dec2x = self.block1_dec2x(x1) x1_dec4x = self.block1_dec4x(x1) x2_dec2x = self.block2_dec2x(x2) x2_inc2x = self.block2_inc2x(x2) x3_inc2x = self.block3_inc2x(x3) x3_inc4x = self.block3_inc4x(x3) concat_block1 = torch.cat([x1, x2_inc2x, x3_inc4x], dim=1) fusion_block1 = self.fusion_block1(concat_block1) concat_block2 = torch.cat([x1_dec2x, x2, x3_inc2x], dim=1) fusion_block2 = self.fusion_block2(concat_block2) concat_block3 = torch.cat([x1_dec4x, x2_dec2x, x3], dim=1) fusion_block3 = self.fusion_block3(concat_block3) refine_up1 = self.RF3(fusion_block1) refine_up1 = self.refine_up1(refine_up1) refine_up2 = self.RF2(fusion_block2) refine_up2 = self.refine_up2(refine_up2) refine_up3 = self.RF1(fusion_block3) refine_up3 = self.refine_up3(refine_up3) branch1_sum_wise = refine_up1 + blottle_conv branch2_sum_wise = refine_up2 + blottle_conv branch3_sum_wise = refine_up3 + blottle_conv concat_conv1 = self.concat_conv1(branch1_sum_wise) concat_conv2 = self.concat_conv2(branch2_sum_wise) concat_conv3 = self.concat_conv3(branch3_sum_wise) PSA_output = torch.cat([concat_conv1, concat_conv2, concat_conv3], dim=1) refine_cls_preds = self.refine_cls(PSA_output) refine_loc_preds = self.refine_loc(PSA_output) refine_loc_preds = refine_loc_preds.permute(0, 2, 3, 1).contiguous() refine_cls_preds = refine_cls_preds.permute(0, 2, 3, 1).contiguous() ret_dict["Refine_loc_preds"] = refine_loc_preds ret_dict["Refine_cls_preds"] = refine_cls_preds if self._use_direction_classifier: refine_dir_preds = self.refine_dir(PSA_output) refine_dir_preds = refine_dir_preds.permute(0, 2, 3, 1).contiguous() ret_dict["Refine_dir_preds"] = refine_dir_preds return ret_dict
class RPN(nn.Module): def __init__(self, use_norm=True, num_class=2, layer_nums=(3, 5, 5), layer_strides=(2, 2, 2), num_filters=(128, 128, 256), upsample_strides=(1, 2, 4), num_upsample_filters=(256, 256, 256), num_input_features=128, num_anchor_per_loc=2, encode_background_as_zeros=True, use_direction_classifier=True, use_groupnorm=False, num_groups=32, box_code_size=7, num_direction_bins=2, name='rpn'): """deprecated. exists for checkpoint backward compilability (SECOND v1.0) """ super(RPN, self).__init__() self._num_anchor_per_loc = num_anchor_per_loc self._use_direction_classifier = use_direction_classifier assert len(layer_nums) == 3 assert len(layer_strides) == len(layer_nums) assert len(num_filters) == len(layer_nums) assert len(upsample_strides) == len(layer_nums) assert len(num_upsample_filters) == len(layer_nums) upsample_strides = [ np.round(u).astype(np.int64) for u in upsample_strides ] factors = [] for i in range(len(layer_nums)): assert int(np.prod( layer_strides[:i + 1])) % upsample_strides[i] == 0 factors.append( np.prod(layer_strides[:i + 1]) // upsample_strides[i]) assert all([x == factors[0] for x in factors]) if use_norm: if use_groupnorm: BatchNorm2d = change_default_args( num_groups=num_groups, eps=1e-3)(GroupNorm) else: BatchNorm2d = change_default_args( eps=1e-3, momentum=0.01)(nn.BatchNorm2d) Conv2d = change_default_args(bias=False)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=False)( nn.ConvTranspose2d) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=True)( nn.ConvTranspose2d) # note that when stride > 1, conv2d with same padding isn't # equal to pad-conv2d. we should use pad-conv2d. block2_input_filters = num_filters[0] self.block1 = Sequential( nn.ZeroPad2d(1), Conv2d( num_input_features, num_filters[0], 3, stride=layer_strides[0]), BatchNorm2d(num_filters[0]), nn.ReLU(), ) for i in range(layer_nums[0]): self.block1.add( Conv2d(num_filters[0], num_filters[0], 3, padding=1)) self.block1.add(BatchNorm2d(num_filters[0])) self.block1.add(nn.ReLU()) self.deconv1 = Sequential( ConvTranspose2d( num_filters[0], num_upsample_filters[0], upsample_strides[0], stride=upsample_strides[0]), BatchNorm2d(num_upsample_filters[0]), nn.ReLU(), ) self.block2 = Sequential( nn.ZeroPad2d(1), Conv2d( block2_input_filters, num_filters[1], 3, stride=layer_strides[1]), BatchNorm2d(num_filters[1]), nn.ReLU(), ) for i in range(layer_nums[1]): self.block2.add( Conv2d(num_filters[1], num_filters[1], 3, padding=1)) self.block2.add(BatchNorm2d(num_filters[1])) self.block2.add(nn.ReLU()) self.deconv2 = Sequential( ConvTranspose2d( num_filters[1], num_upsample_filters[1], upsample_strides[1], stride=upsample_strides[1]), BatchNorm2d(num_upsample_filters[1]), nn.ReLU(), ) self.block3 = Sequential( nn.ZeroPad2d(1), Conv2d(num_filters[1], num_filters[2], 3, stride=layer_strides[2]), BatchNorm2d(num_filters[2]), nn.ReLU(), ) for i in range(layer_nums[2]): self.block3.add( Conv2d(num_filters[2], num_filters[2], 3, padding=1)) self.block3.add(BatchNorm2d(num_filters[2])) self.block3.add(nn.ReLU()) self.deconv3 = Sequential( ConvTranspose2d( num_filters[2], num_upsample_filters[2], upsample_strides[2], stride=upsample_strides[2]), BatchNorm2d(num_upsample_filters[2]), nn.ReLU(), ) if encode_background_as_zeros: num_cls = num_anchor_per_loc * num_class else: num_cls = num_anchor_per_loc * (num_class + 1) self.conv_cls = nn.Conv2d(sum(num_upsample_filters), num_cls, 1) self.conv_box = nn.Conv2d( sum(num_upsample_filters), num_anchor_per_loc * box_code_size, 1) if use_direction_classifier: self.conv_dir_cls = nn.Conv2d( sum(num_upsample_filters), num_anchor_per_loc * num_direction_bins, 1) if self._use_rc_net: self.conv_rc = nn.Conv2d( sum(num_upsample_filters), num_anchor_per_loc * box_code_size, 1) def forward(self, x): # t = time.time() # torch.cuda.synchronize() x = self.block1(x) up1 = self.deconv1(x) x = self.block2(x) up2 = self.deconv2(x) x = self.block3(x) up3 = self.deconv3(x) x = torch.cat([up1, up2, up3], dim=1) box_preds = self.conv_box(x) cls_preds = self.conv_cls(x) # [N, C, y(H), x(W)] box_preds = box_preds.permute(0, 2, 3, 1).contiguous() cls_preds = cls_preds.permute(0, 2, 3, 1).contiguous() ret_dict = { "box_preds": box_preds, "cls_preds": cls_preds, } if self._use_direction_classifier: dir_cls_preds = self.conv_dir_cls(x) dir_cls_preds = dir_cls_preds.permute(0, 2, 3, 1).contiguous() ret_dict["dir_cls_preds"] = dir_cls_preds if self._use_rc_net: rc_preds = self.conv_rc(x) rc_preds = rc_preds.permute(0, 2, 3, 1).contiguous() ret_dict["rc_preds"] = rc_preds # torch.cuda.synchronize() # print("rpn forward time", time.time() - t) return ret_dict
class RPN(nn.Module): def __init__(self, use_norm=True, num_class=2, layer_nums=[3, 5, 5], layer_strides=[2, 2, 2], num_filters=[128, 128, 256], upsample_strides=[1, 2, 4], num_upsample_filters=[256, 256, 256], num_input_filters=128, num_anchor_per_loc=2, encode_background_as_zeros=True, use_direction_classifier=True, use_groupnorm=False, num_groups=32, use_bev=False, box_code_size=7, name='rpn'): super(RPN, self).__init__() self._num_anchor_per_loc = num_anchor_per_loc self._use_direction_classifier = use_direction_classifier self._use_bev = use_bev assert len(layer_nums) == 3 assert len(layer_strides) == len(layer_nums) assert len(num_filters) == len(layer_nums) assert len(upsample_strides) == len(layer_nums) assert len(num_upsample_filters) == len(layer_nums) factors = [] for i in range(len(layer_nums)): assert int(np.prod( layer_strides[:i + 1])) % upsample_strides[i] == 0 factors.append( np.prod(layer_strides[:i + 1]) // upsample_strides[i]) assert all([x == factors[0] for x in factors]) if use_norm: if use_groupnorm: BatchNorm2d = change_default_args(num_groups=num_groups, eps=1e-3)(GroupNorm) else: BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)( nn.BatchNorm2d) Conv2d = change_default_args(bias=False)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=False)( nn.ConvTranspose2d) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=True)( nn.ConvTranspose2d) # note that when stride > 1, conv2d with same padding isn't # equal to pad-conv2d. we should use pad-conv2d. block2_input_filters = num_filters[0] if use_bev: self.bev_extractor = Sequential( Conv2d(6, 32, 3, padding=1), BatchNorm2d(32), nn.ReLU(), # nn.MaxPool2d(2, 2), Conv2d(32, 64, 3, padding=1), BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2, 2), ) block2_input_filters += 64 self.block1 = Sequential( nn.ZeroPad2d(1), Conv2d(num_input_filters, num_filters[0], 3, stride=layer_strides[0]), BatchNorm2d(num_filters[0]), nn.ReLU(), ) for i in range(layer_nums[0]): self.block1.add( Conv2d(num_filters[0], num_filters[0], 3, padding=1)) self.block1.add(BatchNorm2d(num_filters[0])) self.block1.add(nn.ReLU()) self.deconv1 = Sequential( ConvTranspose2d(num_filters[0], num_upsample_filters[0], upsample_strides[0], stride=upsample_strides[0]), BatchNorm2d(num_upsample_filters[0]), nn.ReLU(), ) self.block2 = Sequential( nn.ZeroPad2d(1), Conv2d(block2_input_filters, num_filters[1], 3, stride=layer_strides[1]), BatchNorm2d(num_filters[1]), nn.ReLU(), ) for i in range(layer_nums[1]): self.block2.add( Conv2d(num_filters[1], num_filters[1], 3, padding=1)) self.block2.add(BatchNorm2d(num_filters[1])) self.block2.add(nn.ReLU()) self.deconv2 = Sequential( ConvTranspose2d(num_filters[1], num_upsample_filters[1], upsample_strides[1], stride=upsample_strides[1]), BatchNorm2d(num_upsample_filters[1]), nn.ReLU(), ) self.block3 = Sequential( nn.ZeroPad2d(1), Conv2d(num_filters[1], num_filters[2], 3, stride=layer_strides[2]), BatchNorm2d(num_filters[2]), nn.ReLU(), ) for i in range(layer_nums[2]): self.block3.add( Conv2d(num_filters[2], num_filters[2], 3, padding=1)) self.block3.add(BatchNorm2d(num_filters[2])) self.block3.add(nn.ReLU()) self.deconv3 = Sequential( ConvTranspose2d(num_filters[2], num_upsample_filters[2], upsample_strides[2], stride=upsample_strides[2]), BatchNorm2d(num_upsample_filters[2]), nn.ReLU(), ) if encode_background_as_zeros: num_cls = num_anchor_per_loc * num_class else: num_cls = num_anchor_per_loc * (num_class + 1) self.conv_cls = nn.Conv2d(sum(num_upsample_filters), num_cls, 1) self.conv_box = nn.Conv2d(sum(num_upsample_filters), num_anchor_per_loc * box_code_size, 1) if use_direction_classifier: self.conv_dir_cls = nn.Conv2d(sum(num_upsample_filters), num_anchor_per_loc * 2, 1) def forward(self, x, bev=None): x = self.block1(x) up1 = self.deconv1(x) if self._use_bev: bev[:, -1] = torch.clamp(torch.log(1 + bev[:, -1]) / np.log(16.0), max=1.0) x = torch.cat([x, self.bev_extractor(bev)], dim=1) x = self.block2(x) up2 = self.deconv2(x) x = self.block3(x) up3 = self.deconv3(x) x = torch.cat([up1, up2, up3], dim=1) box_preds = self.conv_box(x) cls_preds = self.conv_cls(x) # [N, C, y(H), x(W)] box_preds = box_preds.permute(0, 2, 3, 1).contiguous() cls_preds = cls_preds.permute(0, 2, 3, 1).contiguous() ret_dict = { "box_preds": box_preds, "cls_preds": cls_preds, } if self._use_direction_classifier: dir_cls_preds = self.conv_dir_cls(x) dir_cls_preds = dir_cls_preds.permute(0, 2, 3, 1).contiguous() ret_dict["dir_cls_preds"] = dir_cls_preds return ret_dict
class RPN_refine(nn.Module): def __init__(self, use_norm=True, num_class=2, layer_nums=(3, 5, 5), layer_strides=(2, 2, 2), num_filters=(128, 128, 256), upsample_strides=(1, 2, 4), num_upsample_filters=(256, 256, 256), num_input_features=128, num_anchor_per_loc=2, encode_background_as_zeros=True, use_direction_classifier=True, use_groupnorm=False, num_groups=32, box_code_size=7, num_direction_bins=2, name='rpn'): super(RPN_refine, self).__init__() self._num_anchor_per_loc = num_anchor_per_loc self._use_direction_classifier = use_direction_classifier assert len(layer_nums) == 3 assert len(layer_strides) == len(layer_nums) assert len(num_filters) == len(layer_nums) assert len(upsample_strides) == len(layer_nums) assert len(num_upsample_filters) == len(layer_nums) self._box_code_size = box_code_size self._num_class = num_class self._num_direction_bins = num_direction_bins upsample_strides = [ np.round(u).astype(np.int64) for u in upsample_strides ] if use_norm: if use_groupnorm: BatchNorm2d = change_default_args(num_groups=num_groups, eps=1e-3)(GroupNorm) else: BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)( nn.BatchNorm2d) Conv2d = change_default_args(bias=False)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=False)( nn.ConvTranspose2d) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=True)( nn.ConvTranspose2d) # note that when stride > 1, conv2d with same padding isn't # equal to pad-conv2d. we should use pad-conv2d. block2_input_filters = num_filters[0] self.block1 = Sequential( nn.ZeroPad2d(1), Conv2d(num_input_features, num_filters[0], 3, stride=layer_strides[0]), BatchNorm2d(num_filters[0]), nn.ReLU(), ) for i in range(layer_nums[0]): self.block1.add( Conv2d(num_filters[0], num_filters[0], 3, padding=1)) self.block1.add(BatchNorm2d(num_filters[0])) self.block1.add(nn.ReLU()) self.deconv1 = Sequential( ConvTranspose2d(num_filters[0], num_upsample_filters[0], upsample_strides[0], stride=upsample_strides[0]), BatchNorm2d(num_upsample_filters[0]), nn.ReLU(), ) self.block2 = Sequential( nn.ZeroPad2d(1), Conv2d(block2_input_filters, num_filters[1], 3, stride=layer_strides[1]), BatchNorm2d(num_filters[1]), nn.ReLU(), ) for i in range(layer_nums[1]): self.block2.add( Conv2d(num_filters[1], num_filters[1], 3, padding=1)) self.block2.add(BatchNorm2d(num_filters[1])) self.block2.add(nn.ReLU()) self.deconv2 = Sequential( ConvTranspose2d(num_filters[1], num_upsample_filters[1], upsample_strides[1], stride=upsample_strides[1]), BatchNorm2d(num_upsample_filters[1]), nn.ReLU(), ) self.block3 = Sequential( nn.ZeroPad2d(1), Conv2d(num_filters[1], num_filters[2], 3, stride=layer_strides[2]), BatchNorm2d(num_filters[2]), nn.ReLU(), ) for i in range(layer_nums[2]): self.block3.add( Conv2d(num_filters[2], num_filters[2], 3, padding=1)) self.block3.add(BatchNorm2d(num_filters[2])) self.block3.add(nn.ReLU()) self.deconv3 = Sequential( ConvTranspose2d(num_filters[2], num_upsample_filters[2], upsample_strides[2], stride=upsample_strides[2]), BatchNorm2d(num_upsample_filters[2]), nn.ReLU(), ) if encode_background_as_zeros: num_cls = num_anchor_per_loc * num_class else: num_cls = num_anchor_per_loc * (num_class + 1) self.conv_cls_coarse = nn.Conv2d(num_upsample_filters[0], num_cls, 1) self.conv_box_coarse = nn.Conv2d(num_upsample_filters[0], num_anchor_per_loc * box_code_size, 1) self.conv_cls = nn.Conv2d(num_upsample_filters[0], num_cls, 1) self.conv_box = nn.Conv2d(num_upsample_filters[0], num_anchor_per_loc * box_code_size, 1) if use_direction_classifier: self.conv_dir_cls = nn.Conv2d( num_upsample_filters[0], num_anchor_per_loc * num_direction_bins, 1) def forward(self, x): H, W = x.shape[2:] box_refine = self.conv_box_coarse(x) box_refine = box_refine.view(-1, self._num_anchor_per_loc, self._box_code_size, H, W).permute(0, 1, 3, 4, 2).contiguous() cls_constraint = self.conv_cls_coarse(x) cls_constraint = cls_constraint.view(-1, self._num_anchor_per_loc, self._num_class, H, W).permute(0, 1, 3, 4, 2).contiguous() x = self.block1(x) up1 = self.deconv1(x) x = self.block2(x) up2 = self.deconv2(x) x = self.block3(x) up3 = self.deconv3(x) x = up1 + up2 + up3 box_preds = self.conv_box(x) cls_preds = self.conv_cls(x) # [N, C, y(H), x(W)] box_preds = box_preds.view(-1, self._num_anchor_per_loc, self._box_code_size, H, W).permute(0, 1, 3, 4, 2).contiguous() cls_preds = cls_preds.view(-1, self._num_anchor_per_loc, self._num_class, H, W).permute(0, 1, 3, 4, 2).contiguous() ret_dict = { "box_refine": box_refine, "cls_constraint": cls_constraint, "box_preds": box_preds, "cls_preds": cls_preds, } if self._use_direction_classifier: dir_cls_preds = self.conv_dir_cls(x) dir_cls_preds = dir_cls_preds.view(-1, self._num_anchor_per_loc, self._num_direction_bins, H, W).permute(0, 1, 3, 4, 2).contiguous() ret_dict["dir_cls_preds"] = dir_cls_preds return x, ret_dict
def __init__(self, use_norm=True, num_class=2, layer_nums=[3, 5, 5], layer_strides=[2, 2, 2], num_filters=[128, 128, 256], upsample_strides=[1, 2, 4], num_upsample_filters=[256, 256, 256], num_input_features=128, num_anchor_per_loc=2, encode_background_as_zeros=True, use_direction_classifier=True, use_groupnorm=False, num_groups=32, use_bev=False, box_code_size=7, use_rc_net=False, name='rpn'): super(RPNV2, self).__init__() self._num_anchor_per_loc = num_anchor_per_loc self._use_direction_classifier = use_direction_classifier self._use_bev = use_bev self._use_rc_net = use_rc_net # assert len(layer_nums) == 3 assert len(layer_strides) == len(layer_nums) assert len(num_filters) == len(layer_nums) assert len(upsample_strides) == len(layer_nums) assert len(num_upsample_filters) == len(layer_nums) """ factors = [] for i in range(len(layer_nums)): assert int(np.prod(layer_strides[:i + 1])) % upsample_strides[i] == 0 factors.append(np.prod(layer_strides[:i + 1]) // upsample_strides[i]) assert all([x == factors[0] for x in factors]) """ if use_norm: if use_groupnorm: BatchNorm2d = change_default_args(num_groups=num_groups, eps=1e-3)(GroupNorm) else: BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)( nn.BatchNorm2d) Conv2d = change_default_args(bias=False)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=False)( nn.ConvTranspose2d) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=True)( nn.ConvTranspose2d) in_filters = [num_input_features, *num_filters[:-1]] # note that when stride > 1, conv2d with same padding isn't # equal to pad-conv2d. we should use pad-conv2d. blocks = [] deblocks = [] for i, layer_num in enumerate(layer_nums): block = Sequential( nn.ZeroPad2d(1), Conv2d(in_filters[i], num_filters[i], 3, stride=layer_strides[i]), BatchNorm2d(num_filters[i]), nn.ReLU(), ) for j in range(layer_num): block.add(Conv2d(num_filters[i], num_filters[i], 3, padding=1)) block.add(BatchNorm2d(num_filters[i])) block.add(nn.ReLU()) blocks.append(block) deblock = Sequential( ConvTranspose2d(num_filters[i], num_upsample_filters[i], upsample_strides[i], stride=upsample_strides[i]), BatchNorm2d(num_upsample_filters[i]), nn.ReLU(), ) deblocks.append(deblock) self.blocks = nn.ModuleList(blocks) self.deblocks = nn.ModuleList(deblocks) if encode_background_as_zeros: num_cls = num_anchor_per_loc * num_class else: num_cls = num_anchor_per_loc * (num_class + 1) self.conv_cls = nn.Conv2d(sum(num_upsample_filters), num_cls, 1) self.conv_box = nn.Conv2d(sum(num_upsample_filters), num_anchor_per_loc * box_code_size, 1) if use_direction_classifier: self.conv_dir_cls = nn.Conv2d(sum(num_upsample_filters), num_anchor_per_loc * 2, 1) if self._use_rc_net: self.conv_rc = nn.Conv2d(sum(num_upsample_filters), num_anchor_per_loc * box_code_size, 1)
def __init__(self, use_norm=True, num_class=2, layer_nums=[3, 5, 5], layer_strides=[2, 2, 2], num_filters=[128, 128, 256], upsample_strides=[1, 2, 4], num_upsample_filters=[256, 256, 256], num_input_features=128, num_anchor_per_loc=2, encode_background_as_zeros=True, use_direction_classifier=True, use_groupnorm=False, num_groups=32, use_bev=False, box_code_size=7, use_rc_net=False, name='rpn'): super(RPN_FUSION, self).__init__() self._num_anchor_per_loc = num_anchor_per_loc self._use_direction_classifier = use_direction_classifier self._use_bev = use_bev self._use_rc_net = use_rc_net # assert len(layer_nums) == 3 assert len(layer_strides) == len(layer_nums) assert len(num_filters) == len(layer_nums) assert len(upsample_strides) == len(layer_nums) assert len(num_upsample_filters) == len(layer_nums) """ factors = [] for i in range(len(layer_nums)): assert int(np.prod(layer_strides[:i + 1])) % upsample_strides[i] == 0 factors.append(np.prod(layer_strides[:i + 1]) // upsample_strides[i]) assert all([x == factors[0] for x in factors]) """ if use_norm: if use_groupnorm: BatchNorm2d = change_default_args(num_groups=num_groups, eps=1e-3)(GroupNorm) else: BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)( nn.BatchNorm2d) Conv2d = change_default_args(bias=False)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=False)( nn.ConvTranspose2d) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=True)( nn.ConvTranspose2d) in_filters = [num_input_features, *num_filters[:-1]] # note that when stride > 1, conv2d with same padding isn't # equal to pad-conv2d. we should use pad-conv2d. blocks = [] deblocks = [] for i, layer_num in enumerate(layer_nums): # in_f = 256 if i == 0 else in_filters[i] in_f = in_filters[i] block = Sequential( nn.ZeroPad2d(1), Conv2d(in_f, num_filters[i], 3, stride=layer_strides[i]), BatchNorm2d(num_filters[i]), nn.ReLU(), ) for j in range(layer_num): block.add(Conv2d(num_filters[i], num_filters[i], 3, padding=1)) block.add(BatchNorm2d(num_filters[i])) block.add(nn.ReLU()) blocks.append(block) deblock = Sequential( ConvTranspose2d(num_filters[i], num_upsample_filters[i], upsample_strides[i], stride=upsample_strides[i]), BatchNorm2d(num_upsample_filters[i]), nn.ReLU(), ) deblocks.append(deblock) self.blocks = nn.ModuleList(blocks) self.deblocks = nn.ModuleList(deblocks) if encode_background_as_zeros: num_cls = num_anchor_per_loc * num_class else: num_cls = num_anchor_per_loc * (num_class + 1) ######################### det_num = sum(num_upsample_filters) ######################### self.conv_cls = nn.Conv2d(det_num, num_cls, 1) self.conv_box = nn.Conv2d(det_num, num_anchor_per_loc * box_code_size, 1) if use_direction_classifier: self.conv_dir_cls = nn.Conv2d(det_num, num_anchor_per_loc * 2, 1) if self._use_rc_net: self.conv_rc = nn.Conv2d(det_num, num_anchor_per_loc * box_code_size, 1) ########################################################## self.f_in_planes_det = 64 net_type = 'FPN18' if net_type == 'FPN50': num_blocks = [3, 4, 6, 3] bb_block = Bottleneck elif net_type == 'FPN18': num_blocks = [2, 2, 2, 2] bb_block = BasicBlock # For RGB Feature Network self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer_det(bb_block, 64, num_blocks[0], stride=1) self.layer2 = self._make_layer_det(bb_block, 128, num_blocks[1], stride=2) self.layer3 = self._make_layer_det(bb_block, 256, num_blocks[2], stride=2) self.layer4 = self._make_layer_det(bb_block, 512, num_blocks[3], stride=2) if net_type == 'FPN18': fpn_sizes = [ self.layer2[1].conv2.out_channels, self.layer3[1].conv2.out_channels, self.layer4[1].conv2.out_channels ] else: fpn_sizes = [ self.layer2[num_blocks[1] - 1].conv3.out_channels, self.layer3[num_blocks[2] - 1].conv3.out_channels, self.layer4[num_blocks[3] - 1].conv3.out_channels ] self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2]) #################################################################### # Fusion Layer num_z_feat = 3 n_feats = 128 self.rgb_refine = Sequential( nn.Conv2d(256 * num_z_feat, 256, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(256), nn.ReLU(), nn.Conv2d(256, n_feats, kernel_size=1, stride=1, padding=0), nn.BatchNorm2d(n_feats), nn.ReLU(), ) self.fusion_refine = Sequential( nn.Conv2d(n_feats * 2, n_feats * 2, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(n_feats * 2), nn.ReLU(), nn.Conv2d(n_feats * 2, n_feats, kernel_size=1, stride=1, padding=0), nn.BatchNorm2d(n_feats), nn.ReLU(), ) self.bev_gate = BasicGate(n_feats) self.crop_gate = BasicGate(n_feats)
class Sp2RPN(nn.Module): def __init__(self, use_norm=True, num_class=2, layer_nums=(1, 1), layer_strides=(1, 2), num_filters=(256, 256), upsample_strides=(1, 2), num_upsample_filters=(128, 128), num_input_features=128, num_anchor_per_loc=2, encode_background_as_zeros=True, use_direction_classifier=True, use_groupnorm=False, num_groups=32, use_bev=False, box_code_size=7, num_direction_bins=2): super().__init__() self.name = 'Sp2RPN2' self._num_class = num_class self._num_anchor_per_loc = num_anchor_per_loc self._use_direction_classifier = use_direction_classifier self._num_direction_bins = num_direction_bins self._box_code_size = box_code_size self._use_bev = use_bev assert len(layer_strides) == len(layer_nums) assert len(num_filters) == len(layer_nums) assert len(upsample_strides) == len(layer_nums) assert len(num_upsample_filters) == len(layer_nums) # upsample_strides is defined as double... upsample_strides = [int(s) for s in upsample_strides] if use_norm: if use_groupnorm: BatchNorm2d = change_default_args(num_groups=num_groups, eps=1e-3)(GroupNorm) else: BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)( nn.BatchNorm2d) Conv2d = change_default_args(bias=False)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=False)( nn.ConvTranspose2d) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=True)( nn.ConvTranspose2d) self.block1 = Sequential() for i in range(layer_nums[0]): self.block1.add( Conv2d(num_filters[0], num_filters[0], 3, padding=1)) self.block1.add(BatchNorm2d(num_filters[0])) self.block1.add(nn.ReLU()) self.deconv1 = Sequential( ConvTranspose2d(num_filters[0], num_upsample_filters[0], upsample_strides[0], stride=upsample_strides[0]), BatchNorm2d(num_upsample_filters[0]), nn.ReLU(), ) self.block2 = Sequential() for i in range(layer_nums[1]): self.block2.add( Conv2d(num_filters[1], num_filters[1], 3, padding=1)) self.block2.add(BatchNorm2d(num_filters[1])) self.block2.add(nn.ReLU()) self.deconv2 = Sequential( ConvTranspose2d(num_filters[1], num_upsample_filters[1], upsample_strides[1], stride=upsample_strides[1]), BatchNorm2d(num_upsample_filters[1]), nn.ReLU(), ) if encode_background_as_zeros: num_cls = num_anchor_per_loc * num_class else: num_cls = num_anchor_per_loc * (num_class + 1) self.conv_cls = nn.Conv2d(sum(num_upsample_filters), num_cls, 1) self.conv_box = nn.Conv2d(sum(num_upsample_filters), num_anchor_per_loc * box_code_size, 1) if use_direction_classifier: self.conv_dir_cls = nn.Conv2d(sum(num_upsample_filters), num_anchor_per_loc * 2, 1) def forward(self, x): x1, x2 = x x1 = self.block1(x1) up1 = self.deconv1(x1) x2 = self.block2(x2) up2 = self.deconv2(x2) x_cat = torch.cat([up1, up2], dim=1) box_preds = self.conv_box(x_cat) cls_preds = self.conv_cls(x_cat) # [N, C, y(H), x(W)] C, H, W = box_preds.shape[1:] box_preds = box_preds.view(-1, self._num_anchor_per_loc, self._box_code_size, H, W).permute(0, 1, 3, 4, 2).contiguous() cls_preds = cls_preds.view(-1, self._num_anchor_per_loc, self._num_class, H, W).permute(0, 1, 3, 4, 2).contiguous() # box_preds = box_preds.permute(0, 2, 3, 1).contiguous() # cls_preds = cls_preds.permute(0, 2, 3, 1).contiguous() ret_dict = { "box_preds": box_preds, "cls_preds": cls_preds, } if self._use_direction_classifier: dir_cls_preds = self.conv_dir_cls(x_cat) dir_cls_preds = dir_cls_preds.view(-1, self._num_anchor_per_loc, self._num_direction_bins, H, W).permute(0, 1, 3, 4, 2).contiguous() # dir_cls_preds = dir_cls_preds.permute(0, 2, 3, 1).contiguous() ret_dict["dir_cls_preds"] = dir_cls_preds return ret_dict
class res_fpn(nn.Module): def __init__(self, in_channels=128, num_of_convs=4, prior_prob=0.01, use_norm=True, num_class=2, num_convs=12, layer_nums=(3, 5, 5), layer_strides=(1, 2, 2), num_filters=(128, 128, 256), upsample_strides=(1, 2, 4), num_upsample_filters=(256, 256, 256), num_input_features=128, num_anchor_per_loc=2, encode_background_as_zeros=True, use_direction_classifier=True, use_groupnorm=False, num_groups=32, box_code_size=7, name='rpn'): """ Arguments: input = (batch, channel, x, y) output = ret_dict in_channels (int): number of channels of the input feature """ super(res_fpn, self).__init__() # convs_fpn head factors = [] for i in range(len(layer_nums)): assert int(np.prod( layer_strides[:i + 1])) % upsample_strides[i] == 0 factors.append( np.prod(layer_strides[:i + 1]) // upsample_strides[i]) assert all([x == factors[0] for x in factors]) if use_norm: if use_groupnorm: BatchNorm2d = change_default_args(num_groups=num_groups, eps=1e-3)(GroupNorm) else: BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)( nn.BatchNorm2d) Conv2d = change_default_args(bias=False)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=False)( nn.ConvTranspose2d) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=True)( nn.ConvTranspose2d) # note that when stride > 1, conv2d with same padding isn't # equal to pad-conv2d. we should use pad-conv2d. block2_input_filters = num_filters[0] self.block1 = Sequential( nn.ZeroPad2d(1), Conv2d(num_input_features, num_filters[0], 3, stride=layer_strides[0]), BatchNorm2d(num_filters[0]), nn.ReLU(), ) for i in range(layer_nums[0]): self.block1.add( Conv2d(num_filters[0], num_filters[0], 3, padding=1)) self.block1.add(BatchNorm2d(num_filters[0])) self.block1.add(nn.ReLU()) self.deconv1 = Sequential( ConvTranspose2d(num_filters[0], num_upsample_filters[0], upsample_strides[0], stride=upsample_strides[0]), BatchNorm2d(num_upsample_filters[0]), nn.ReLU(), ) self.block2 = Sequential( nn.ZeroPad2d(1), Conv2d(block2_input_filters, num_filters[1], 3, stride=layer_strides[1]), BatchNorm2d(num_filters[1]), nn.ReLU(), ) for i in range(layer_nums[1]): self.block2.add( Conv2d(num_filters[1], num_filters[1], 3, padding=1)) self.block2.add(BatchNorm2d(num_filters[1])) self.block2.add(nn.ReLU()) self.deconv2 = Sequential( ConvTranspose2d(num_filters[1], num_upsample_filters[1], upsample_strides[1], stride=upsample_strides[1]), BatchNorm2d(num_upsample_filters[1]), nn.ReLU(), ) self.block3 = Sequential( nn.ZeroPad2d(1), Conv2d(num_filters[1], num_filters[2], 3, stride=layer_strides[2]), BatchNorm2d(num_filters[2]), nn.ReLU(), ) for i in range(layer_nums[2]): self.block3.add( Conv2d(num_filters[2], num_filters[2], 3, padding=1)) self.block3.add(BatchNorm2d(num_filters[2])) self.block3.add(nn.ReLU()) self.deconv3 = Sequential( ConvTranspose2d(num_filters[2], num_upsample_filters[2], upsample_strides[2], stride=upsample_strides[2]), BatchNorm2d(num_upsample_filters[2]), nn.ReLU(), ) def forward(self, x): fpn_head = [] # fpn x = self.block1(x) up1 = self.deconv1(x) x = self.block2(x) up2 = self.deconv2(x) x = self.block3(x) up3 = self.deconv3(x) #fpn_head.append(up1) #fpn_head.append(up2) #fpn_head.append(up3) #this is for single head x = torch.cat([up1, up2, up3], dim=1) fpn_head.append(x) return fpn_head
class img_extractor_VGG16(nn.Module): def __init__(self, use_norm=True, num_class=2, img_input_channel=3, img_extractor_layer_nums=[2, 3], layer_strides=[2, 2], num_filters=[32, 64], upsample_strides=[1, 2], num_upsample_filters=[128, 128], num_anchor_per_loc=2, encode_background_as_zeros=True, use_direction_classifier=True, use_groupnorm=False, num_groups=32, box_code_size=7, name='img_extractor_SSD_like'): super(img_extractor_VGG16, self).__init__() self._num_anchor_per_loc = num_anchor_per_loc self._use_direction_classifier = use_direction_classifier assert len(layer_strides) == len(img_extractor_layer_nums) assert len(num_filters) == len(img_extractor_layer_nums) assert len(upsample_strides) == len(img_extractor_layer_nums) assert len(num_upsample_filters) == len(img_extractor_layer_nums) if use_norm: if use_groupnorm: BatchNorm2d = change_default_args(num_groups=num_groups, eps=1e-3)(GroupNorm) else: BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)( nn.BatchNorm2d) Conv2d = change_default_args(bias=False)(nn.Conv2d) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) self.block1 = Sequential() for i in range(img_extractor_layer_nums[0]): if i == 0: block1_in = 3 else: block1_in = num_filters[0] self.block1.add(Conv2d(block1_in, num_filters[0], 3, padding=1)) self.block1.add(BatchNorm2d(num_filters[0])) self.block1.add(nn.ReLU(inplace=False)) self.block1.add(torch.nn.MaxPool2d(kernel_size=2, stride=2)) self.block2 = Sequential() for i in range(img_extractor_layer_nums[1]): if i == 0: block2_in = num_filters[0] else: block2_in = num_filters[1] self.block2.add(Conv2d(block2_in, num_filters[1], 3, padding=1)) self.block2.add(BatchNorm2d(num_filters[1])) self.block2.add(nn.ReLU(inplace=False)) self.block2.add(torch.nn.MaxPool2d(kernel_size=2, stride=2)) self.block3 = Sequential() for i in range(img_extractor_layer_nums[2]): if i == 0: block2_in = num_filters[1] else: block2_in = num_filters[2] self.block2.add(Conv2d(block2_in, num_filters[2], 3, padding=1)) self.block2.add(BatchNorm2d(num_filters[2])) self.block2.add(nn.ReLU(inplace=False)) self.block2.add(torch.nn.MaxPool2d(kernel_size=2, stride=2)) def forward(self, inputs, bev=None): # x: [1, 3, 375, 1240] img_feat_block1 = self.block1(inputs) # [1, 64, 188, 620] img_feat_block2 = self.block2(img_feat_block1) # [1,128, 94, 310] img_feat_block3 = self.block3(img_feat_block2) return img_feat_block3