def __init__(self, input_dim=(128, 256), pred_input_dim=(256, 256), pred_inter_dim=(256, 256)): super().__init__() # _r for reference, _t for test self.conv3_1r = conv(input_dim[0], 128, kernel_size=3, stride=1) self.conv3_1t = conv(input_dim[0], 256, kernel_size=3, stride=1) self.conv3_2t = conv(256, pred_input_dim[0], kernel_size=3, stride=1) self.prroi_pool3r = PrRoIPool2D(3, 3, 1 / 8) self.prroi_pool3t = PrRoIPool2D(5, 5, 1 / 8) self.fc3_1r = conv(128, 256, kernel_size=3, stride=1, padding=0) self.conv4_1r = conv(input_dim[1], 256, kernel_size=3, stride=1) self.conv4_1t = conv(input_dim[1], 256, kernel_size=3, stride=1) self.conv4_2t = conv(256, pred_input_dim[1], kernel_size=3, stride=1) self.prroi_pool4r = PrRoIPool2D(1, 1, 1 / 16) self.prroi_pool4t = PrRoIPool2D(3, 3, 1 / 16) self.fc34_3r = conv(256 + 256, pred_input_dim[0], kernel_size=1, stride=1, padding=0) self.fc34_4r = conv(256 + 256, pred_input_dim[1], kernel_size=1, stride=1, padding=0) self.fc3_rt = LinearBlock(pred_input_dim[0], pred_inter_dim[0], 36) #5) self.fc4_rt = LinearBlock(pred_input_dim[1], pred_inter_dim[1], 18) #3) #self.iou_predictor = nn.Linear(pred_inter_dim[0]+pred_inter_dim[1], 1, bias=True) self.wh_predictor = nn.Linear(pred_inter_dim[0] + pred_inter_dim[1], 2, bias=True) self.sigmoid = nn.Sigmoid() # Init weights for m in self.modules(): if isinstance(m, nn.Conv2d) or isinstance( m, nn.ConvTranspose2d) or isinstance(m, nn.Linear): nn.init.kaiming_normal_(m.weight.data, mode='fan_in') if m.bias is not None: m.bias.data.zero_() elif isinstance(m, nn.BatchNorm2d): # In earlier versions batch norm parameters was initialized with default initialization, # which changed in pytorch 1.2. In 1.1 and earlier the weight was set to U(0,1). # So we use the same initialization here. # m.weight.data.fill_(1) m.weight.data.uniform_() m.bias.data.zero_()
def __init__(self, input_dim=(128,256), pred_input_dim=(256,256), pred_inter_dim=(256,256)): super().__init__() # _r for reference, _t for test self.conv3_1r = conv(input_dim[0], 128, kernel_size=3, stride=1) self.conv3_1t = conv(input_dim[0], 256, kernel_size=3, stride=1) self.conv3_2t = conv(256, pred_input_dim[0], kernel_size=3, stride=1) self.prroi_pool3r = PrRoIPool2D(3, 3, 1/8) self.prroi_pool3t = PrRoIPool2D(5, 5, 1/8) self.fc3_1r = conv(128, 256, kernel_size=3, stride=1, padding=0) self.conv4_1r = conv(input_dim[1], 256, kernel_size=3, stride=1) self.conv4_1t = conv(input_dim[1], 256, kernel_size=3, stride=1) self.conv4_2t = conv(256, pred_input_dim[1], kernel_size=3, stride=1) self.prroi_pool4r = PrRoIPool2D(1, 1, 1/16) self.prroi_pool4t = PrRoIPool2D(3, 3, 1 / 16) self.fc34_3r = conv(256 + 256, pred_input_dim[0], kernel_size=1, stride=1, padding=0) self.fc34_4r = conv(256 + 256, pred_input_dim[1], kernel_size=1, stride=1, padding=0) self.fc3_rt = LinearBlock(pred_input_dim[0], pred_inter_dim[0], 5) self.fc4_rt = LinearBlock(pred_input_dim[1], pred_inter_dim[1], 3) self.iou_predictor = nn.Linear(pred_inter_dim[0]+pred_inter_dim[1], 1, bias=True) # Init weights for m in self.modules(): if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d) or isinstance(m, nn.Linear): nn.init.kaiming_normal_(m.weight.data, mode='fan_in') if m.bias is not None: m.bias.data.zero_()
def __init__(self, input_dim=(32, 64), pred_input_dim=(64, 64), pred_inter_dim=(64, 64), cpu=False): super().__init__(input_dim, pred_input_dim, pred_inter_dim) # _r for reference, _t for test self.conv3_1r = conv(input_dim[0], 32, kernel_size=3, stride=1) self.conv3_1t = conv(input_dim[0], 64, kernel_size=3, stride=1) self.conv3_2t = conv(64, pred_input_dim[0], kernel_size=3, stride=1) if cpu: self.prroi_pool3r = RoIPool((3, 3), 1 / 8) self.prroi_pool3t = RoIPool((5, 5), 1 / 8) else: self.prroi_pool3r = PrRoIPool2D(3, 3, 1 / 8) self.prroi_pool3t = PrRoIPool2D(5, 5, 1 / 8) self.fc3_1r = conv(32, 64, kernel_size=3, stride=1, padding=0) self.conv4_1r = conv(input_dim[1], 64, kernel_size=3, stride=1) self.conv4_1t = conv(input_dim[1], 64, kernel_size=3, stride=1) self.conv4_2t = conv(64, pred_input_dim[1], kernel_size=3, stride=1) self.prroi_pool4r = PrRoIPool2D(1, 1, 1 / 16) self.prroi_pool4t = PrRoIPool2D(3, 3, 1 / 16) self.fc34_3r = conv(64 + 64, pred_input_dim[0], kernel_size=1, stride=1, padding=0) self.fc34_4r = conv(64 + 64, pred_input_dim[1], kernel_size=1, stride=1, padding=0) self.fc3_rt = LinearBlock(pred_input_dim[0], pred_inter_dim[0], 5) self.fc4_rt = LinearBlock(pred_input_dim[1], pred_inter_dim[1], 3) self.box_predictor = nn.Linear(pred_inter_dim[0] + pred_inter_dim[1], 4, bias=True) # Init weights for m in self.modules(): if isinstance(m, nn.Conv2d) or isinstance( m, nn.ConvTranspose2d) or isinstance(m, nn.Linear): nn.init.kaiming_normal_(m.weight.data, mode='fan_in') if m.bias is not None: m.bias.data.zero_()
def __init__(self, pool_size=8, use_NL=True): super().__init__() self.prroi_pool = PrRoIPool2D(pool_size, pool_size, 1 / 16) num_corr_channel = pool_size * pool_size self.channel_attention = SEModule(num_corr_channel, reduction=4) self.spatial_attention = NONLocalBlock2D(in_channels=num_corr_channel) self.use_NL = use_NL
def __init__(self, input_dim=(128,256), pred_input_dim=(128,256)): super().__init__() self.conv3_1 = conv(input_dim[0], pred_input_dim[0], kernel_size=3, stride=1) self.conv3_2 = conv(pred_input_dim[0], pred_input_dim[0], kernel_size=3, stride=1) self.conv4_1 = conv(input_dim[1], pred_input_dim[1], kernel_size=3, stride=1) self.conv4_2 = conv(pred_input_dim[1], pred_input_dim[1], kernel_size=3, stride=1) self.prroi_pool3 = PrRoIPool2D(8, 8, 1/8) self.prroi_pool4 = PrRoIPool2D(4, 4, 1/16) ## We perform L2 norm to features, therefore barch_norm is not needed. ## When relu is True, the linear system is easy to non-invertible. self.fc3 = LinearBlock(pred_input_dim[0], 512, 8, batch_norm=False, relu=False) self.fc4 = LinearBlock(pred_input_dim[1], 512, 4, batch_norm=False, relu=False) # Init weights for m in self.modules(): if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d) or isinstance(m, nn.Linear): nn.init.kaiming_normal_(m.weight.data, mode='fan_in') if m.bias is not None: m.bias.data.zero_()
def __init__(self, input_dim=(128,256), pred_input_dim=(128,256)): super().__init__() self.conv3_1 = conv(input_dim[0], pred_input_dim[0], kernel_size=3, stride=1) self.conv3_2 = conv(pred_input_dim[0], pred_input_dim[0], kernel_size=3, stride=1) self.conv3_3 = conv(pred_input_dim[0], pred_input_dim[0], kernel_size=3, stride=1) self.conv3_4 = conv(pred_input_dim[0], pred_input_dim[0], kernel_size=3, stride=1) self.conv4_1 = conv(input_dim[1], pred_input_dim[1], kernel_size=3, stride=1) self.conv4_2 = conv(pred_input_dim[1], pred_input_dim[1], kernel_size=3, stride=1) self.conv4_3 = conv(pred_input_dim[1], pred_input_dim[1], kernel_size=3, stride=1) self.conv4_4 = conv(pred_input_dim[1], pred_input_dim[1], kernel_size=3, stride=1) self.prroi_pool3 = PrRoIPool2D(8, 8, 1/8) self.prroi_pool4 = PrRoIPool2D(4, 4, 1/16) self.fc3 = LinearBlock(pred_input_dim[0], 512, 8, batch_norm=False, relu=False) self.fc4 = LinearBlock(pred_input_dim[1], 512, 4, batch_norm=False, relu=False) # Init weights for m in self.modules(): if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d) or isinstance(m, nn.Linear): nn.init.kaiming_normal_(m.weight.data, mode='fan_in') if m.bias is not None: m.bias.data.zero_()
def __init__(self, pool_size=8, use_post_corr=True, use_NL=True): super().__init__() '''PrRoIPool2D的第三个参数是当前特征图尺寸相对于原图的比例(下采样率)''' '''layer2的stride是8, layer3的stride是16 当输入分辨率为256x256时, layer3的输出分辨率为16x16, 目标尺寸大约为8x8 ##### 注意: 如果输入分辨率改变,或者使用的层改变,那么这块的参数需要重新填 #####''' self.prroi_pool = PrRoIPool2D(pool_size, pool_size, 1 / 16) num_corr_channel = pool_size * pool_size '''newly added''' self.adjust_layer = conv(1024, 64) self.channel_attention = SEModule(num_corr_channel, reduction=4) self.use_post_corr = use_post_corr if use_post_corr: self.post_corr = nn.Sequential( nn.Conv2d(64, 128, kernel_size=(1, 1), padding=0, stride=1), nn.BatchNorm2d(128), nn.ReLU(), nn.Conv2d(128, 128, kernel_size=(1, 1), padding=0, stride=1), nn.BatchNorm2d(128), nn.ReLU(), nn.Conv2d(128, 64, kernel_size=(1, 1), padding=0, stride=1), nn.BatchNorm2d(64), nn.ReLU(), ) self.channel_attention = SEModule(num_corr_channel, reduction=4) self.use_NL = use_NL if self.use_NL is True: self.spatial_attention = NONLocalBlock2D( in_channels=num_corr_channel) elif self.use_NL is False: self.spatial_attention = nn.Sequential( nn.Conv2d(64, 64, kernel_size=(1, 1), padding=0, stride=1), nn.BatchNorm2d(64), nn.ReLU(), nn.Conv2d(64, 64, kernel_size=(1, 1), padding=0, stride=1), nn.BatchNorm2d(64), nn.ReLU(), nn.Conv2d(64, 64, kernel_size=(1, 1), padding=0, stride=1), nn.BatchNorm2d(64), ) else: self.spatial_attention = nn.Sequential()
def __init__(self, filter_size=1, feature_stride=16, pool_square=False): super().__init__() self.prroi_pool = PrRoIPool2D(filter_size, filter_size, 1/feature_stride) self.pool_square = pool_square
def __init__(self, settings=None, input_dim=(128, 256), pred_input_dim=(256, 256), pred_inter_dim=(256, 256)): super().__init__() self.settings = settings self.depthconv = self.settings.depthaware_for_iounet # _r for reference, _t for test #conv(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dilation=1): if self.depthconv: self.conv3_1r = DepthConvModule(input_dim[0], 128, kernel_size=3, stride=1) else: self.conv3_1r = conv(input_dim[0], 128, kernel_size=3, stride=1) if self.depthconv: self.conv3_1t = DepthConvModule(input_dim[0], 256, kernel_size=3, stride=1) self.conv3_2t = conv(256, pred_input_dim[0], kernel_size=3, stride=1) else: self.conv3_1t = conv(input_dim[0], 256, kernel_size=3, stride=1) self.conv3_2t = conv(256, pred_input_dim[0], kernel_size=3, stride=1) self.prroi_pool3r = PrRoIPool2D(3, 3, 1 / 8) self.prroi_pool3t = PrRoIPool2D(5, 5, 1 / 8) if False: self.fc3_1r = DepthConvModule(128, 256, kernel_size=3, stride=1, padding=0) else: self.fc3_1r = conv(128, 256, kernel_size=3, stride=1, padding=0) if self.depthconv: self.conv4_1r = DepthConvModule(input_dim[1], 256, kernel_size=3, stride=1) self.conv4_1t = DepthConvModule(input_dim[1], 256, kernel_size=3, stride=1) else: self.conv4_1r = conv(input_dim[1], 256, kernel_size=3, stride=1) self.conv4_1t = conv(input_dim[1], 256, kernel_size=3, stride=1) # if self.depthconv: # self.conv4_2t = DepthConvModule(256, pred_input_dim[1], kernel_size=3, stride=1) # else: # self.conv4_2t = conv(256, pred_input_dim[1], kernel_size=3, stride=1) self.conv4_2t = conv(256, pred_input_dim[1], kernel_size=3, stride=1) self.prroi_pool4r = PrRoIPool2D(1, 1, 1 / 16) self.prroi_pool4t = PrRoIPool2D(3, 3, 1 / 16) if False: self.fc34_3r = DepthConvModule(256 + 256, pred_input_dim[0], kernel_size=1, stride=1, padding=0) self.fc34_4r = DepthConvModule(256 + 256, pred_input_dim[1], kernel_size=1, stride=1, padding=0) else: self.fc34_3r = conv(256 + 256, pred_input_dim[0], kernel_size=1, stride=1, padding=0) self.fc34_4r = conv(256 + 256, pred_input_dim[1], kernel_size=1, stride=1, padding=0) self.fc3_rt = LinearBlock(pred_input_dim[0], pred_inter_dim[0], 5) self.fc4_rt = LinearBlock(pred_input_dim[1], pred_inter_dim[1], 3) self.iou_predictor = nn.Linear(pred_inter_dim[0] + pred_inter_dim[1], 1, bias=True) # Init weights for m in self.modules(): if isinstance(m, nn.Conv2d) or isinstance( m, nn.ConvTranspose2d) or isinstance(m, nn.Linear): nn.init.kaiming_normal_(m.weight.data, mode='fan_in') if m.bias is not None: m.bias.data.zero_() elif isinstance(m, nn.BatchNorm2d): # In earlier versions batch norm parameters was initialized with default initialization, # which changed in pytorch 1.2. In 1.1 and earlier the weight was set to U(0,1). # So we use the same initialization here. # m.weight.data.fill_(1) m.weight.data.uniform_() m.bias.data.zero_()
def __init__(self, input_dim=(16, 32), pred_input_dim=(32, 32), pred_inter_dim=(32, 32), cpu=False): super().__init__(input_dim, pred_input_dim, pred_inter_dim) # _r for reference, _t for test # in: 36x36x16 out: 36x36x16 self.conv3_1r = conv(input_dim[0], 16, kernel_size=3, stride=1) # in: 36x36x16 out: 36x36x32 self.conv3_1t = conv(input_dim[0], 32, kernel_size=3, stride=1) # in: 36x36x32 out: 36x36x32 self.conv3_2t = conv(32, pred_input_dim[0], kernel_size=3, stride=1) if cpu: self.prroi_pool3r = RoIPool((3, 3), 1 / 8) self.prroi_pool3t = RoIPool((5, 5), 1 / 8) else: # in: 36x36x16 out:3x3x16 self.prroi_pool3r = PrRoIPool2D(3, 3, 1 / 8) # in: 36x36x32 out:5x5x32 self.prroi_pool3t = PrRoIPool2D(5, 5, 1 / 8) # in: 3x3x16 out:1x1x32 self.fc3_1r = conv(16, 32, kernel_size=3, stride=1, padding=0) # in: 18x18x32 out: 18x18x32 self.conv4_1r = conv(input_dim[1], 32, kernel_size=3, stride=1) # in: 18x18x32 out: 18x18x32 self.conv4_1t = conv(input_dim[1], 32, kernel_size=3, stride=1) # in: 18x18x32 out: 18x18x32 self.conv4_2t = conv(32, pred_input_dim[1], kernel_size=3, stride=1) if cpu: self.prroi_pool4r = RoIPool((1, 1), 1 / 16) self.prroi_pool4t = RoIPool((3, 3), 1 / 16) else: # in: 18x18x32 out:1x1x32 self.prroi_pool4r = PrRoIPool2D(1, 1, 1 / 16) # in: 18x18x32 out: 3x3x32 self.prroi_pool4t = PrRoIPool2D(3, 3, 1 / 16) # in: 1x1x64 out: 1x1x32 self.fc34_3r = conv(32 + 32, pred_input_dim[0], kernel_size=1, stride=1, padding=0) # in: 1x1x64 out: 1x1x32 self.fc34_4r = conv(32 + 32, pred_input_dim[1], kernel_size=1, stride=1, padding=0) # in: 5x5x32 out: 1x1x32 self.fc3_rt = LinearBlock(pred_input_dim[0], pred_inter_dim[0], 5) # in: 3x3x32 out: 1x1x32 self.fc4_rt = LinearBlock(pred_input_dim[1], pred_inter_dim[1], 3) # in: 1x1x64 out: 1x1x1 self.iou_predictor = nn.Linear(pred_inter_dim[0] + pred_inter_dim[1], 1, bias=True) # Init weights for m in self.modules(): if isinstance(m, nn.Conv2d) or isinstance( m, nn.ConvTranspose2d) or isinstance(m, nn.Linear): nn.init.kaiming_normal_(m.weight.data, mode='fan_in') if m.bias is not None: m.bias.data.zero_() elif isinstance(m, nn.BatchNorm2d): # In earlier versions batch norm parameters was initialized with default initialization, # which changed in pytorch 1.2. In 1.1 and earlier the weight was set to U(0,1). # So we use the same initialization here. # m.weight.data.fill_(1) m.weight.data.uniform_() m.bias.data.zero_()
def __init__(self, pool_size=5, filter_dim=4, filter_channel=256, inner_channel=256, input_features_size=72, input_features_channel=256, filter_optimizer=None, train_reg_optimizer=False, train_cls_72_and_reg_init=True): super(RegFilter, self).__init__() self.pool_size = pool_size self.filter_channel = filter_channel self.filter_dim = filter_dim self.input_features_size = input_features_size self.input_features_channel = input_features_channel self.filter_optimizer = filter_optimizer self.train_cls_72_and_reg_init = train_cls_72_and_reg_init self.reg_initializer_72 = nn.Sequential( nn.Conv2d(input_features_channel, inner_channel, 3, 1, 1, bias=False), nn.GroupNorm(32, inner_channel), nn.ReLU(), DCN(inner_channel, inner_channel, kernel_size=(3, 3), stride=1, padding=1, dilation=1, deformable_groups=1), nn.GroupNorm(32, inner_channel), nn.ReLU(), DCN(inner_channel, filter_channel * filter_dim, kernel_size=(3, 3), stride=1, padding=1, dilation=1, deformable_groups=1), nn.ReLU(), ) self.prroipool_72 = PrRoIPool2D(pool_size, pool_size, 72 / 288.0) self.reg_initializer_36 = nn.Sequential( nn.Conv2d(input_features_channel, inner_channel, 3, 1, 1, bias=False), nn.GroupNorm(32, inner_channel), nn.ReLU(), DCN(inner_channel, inner_channel, kernel_size=(3, 3), stride=1, padding=1, dilation=1, deformable_groups=1), nn.GroupNorm(32, inner_channel), nn.ReLU(), DCN(inner_channel, filter_channel * filter_dim, kernel_size=(3, 3), stride=1, padding=1, dilation=1, deformable_groups=1), nn.ReLU(), ) self.prroipool_36 = PrRoIPool2D(pool_size, pool_size, 36 / 288.0) self.reg_initializer_merge = nn.Conv2d(filter_channel * filter_dim * 2, filter_channel * filter_dim, 1, 1, bias=False) self.reg_head_72 = nn.Sequential( nn.Conv2d(input_features_channel, inner_channel, 3, 1, 1, bias=False), nn.GroupNorm(32, inner_channel), nn.ReLU(), DCN(inner_channel, inner_channel, kernel_size=(3, 3), stride=1, padding=1, dilation=1, deformable_groups=1), nn.GroupNorm(32, inner_channel), nn.ReLU(), DCN(inner_channel, filter_channel, kernel_size=(3, 3), stride=1, padding=1, dilation=1, deformable_groups=1), nn.ReLU(), ) self.reg_head_36 = nn.Sequential( nn.Conv2d(input_features_channel, inner_channel, 3, 1, 1, bias=False), nn.GroupNorm(32, inner_channel), nn.ReLU(), DCN(inner_channel, inner_channel, kernel_size=(3, 3), stride=1, padding=1, dilation=1, deformable_groups=1), nn.GroupNorm(32, inner_channel), nn.ReLU(), DCN(inner_channel, filter_channel, kernel_size=(3, 3), stride=1, padding=1, dilation=1, deformable_groups=1), nn.ReLU(), ) self.reg_head_merge = nn.Conv2d(filter_channel * 2, filter_channel, 1, 1) if train_reg_optimizer: for func_name in [ 'reg_initializer_36', 'reg_initializer_72', 'reg_initializer_merge', 'reg_head_36', 'reg_head_72', 'reg_head_merge' ]: for p in getattr(self, func_name).parameters(): p.requires_grad_(False) # Init weights for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) if m.bias is not None: m.bias.data.zero_() elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() w = self.reg_initializer_merge.weight.data for i in range(w.size(0)): for j in range(w.size(1)): if i == j or w.size(0) + i == j: w[i, j, 0, 0] = 0.5 else: w[i, j, 0, 0] = 0.0 w = self.reg_head_merge.weight.data for i in range(w.size(0)): for j in range(w.size(1)): if i == j or w.size(0) + i == j: w[i, j, 0, 0] = 0.5 else: w[i, j, 0, 0] = 0.0
def __init__(self, input_dim=(128, 256), pred_input_dim=(256, 256), pred_inter_dim=(256, 256), fpn_inter_dim=None, share_rt=False): super().__init__() # _r for reference, _t for test # =============== FPN =============== if fpn_inter_dim is None: fpn_inter_dim = input_dim add_conv5 = len(input_dim) == 3 self.add_conv5 = add_conv5 if add_conv5: self.conv5_lat_r = nn.Conv2d(input_dim[2], fpn_inter_dim[2], kernel_size=1) if share_rt: self.conv5_lat_t = self.conv5_lat_r else: self.conv5_lat_t = nn.Conv2d(input_dim[2], fpn_inter_dim[2], kernel_size=1) self.conv5_ct_r = ContextTexture(up=fpn_inter_dim[2], main=input_dim[1]) if share_rt: self.conv5_ct_t = self.conv5_ct_r else: self.conv5_ct_t = ContextTexture(up=fpn_inter_dim[2], main=input_dim[1]) self.conv4_lat_r = nn.Conv2d(input_dim[1], fpn_inter_dim[1], kernel_size=1) if share_rt: self.conv4_lat_t = self.conv4_lat_r else: self.conv4_lat_t = nn.Conv2d(input_dim[1], fpn_inter_dim[1], kernel_size=1) self.conv4_ct_r = ContextTexture(up=fpn_inter_dim[1], main=input_dim[0]) if share_rt: self.conv4_ct_t = self.conv4_ct_r else: self.conv4_ct_t = ContextTexture(up=fpn_inter_dim[1], main=input_dim[0]) # =============== FPN END =========== self.conv3_1r = conv(input_dim[0], 128, kernel_size=3, stride=1) self.conv3_1t = conv(input_dim[0], 256, kernel_size=3, stride=1) self.conv3_2t = conv(256, pred_input_dim[0], kernel_size=3, stride=1) self.prroi_pool3r = PrRoIPool2D(3, 3, 1 / 8) self.prroi_pool3t = PrRoIPool2D(5, 5, 1 / 8) self.fc3_1r = conv(128, 256, kernel_size=3, stride=1, padding=0) self.conv4_1r = conv(input_dim[1], 256, kernel_size=3, stride=1) self.conv4_1t = conv(input_dim[1], 256, kernel_size=3, stride=1) self.conv4_2t = conv(256, pred_input_dim[1], kernel_size=3, stride=1) self.prroi_pool4r = PrRoIPool2D(1, 1, 1 / 16) self.prroi_pool4t = PrRoIPool2D(3, 3, 1 / 16) self.fc34_3r = conv(256 + 256, pred_input_dim[0], kernel_size=1, stride=1, padding=0) self.fc34_4r = conv(256 + 256, pred_input_dim[1], kernel_size=1, stride=1, padding=0) self.fc3_rt = LinearBlock(pred_input_dim[0], pred_inter_dim[0], 5) self.fc4_rt = LinearBlock(pred_input_dim[1], pred_inter_dim[1], 3) self.iou_predictor = nn.Linear(pred_inter_dim[0] + pred_inter_dim[1], 1, bias=True) # Init weights for m in self.modules(): if isinstance(m, nn.Conv2d) or isinstance( m, nn.ConvTranspose2d) or isinstance(m, nn.Linear): nn.init.kaiming_normal_(m.weight.data, mode='fan_in') if m.bias is not None: m.bias.data.zero_()