Esempio n. 1
0
    def __init__(self,
                 input_dim=(128, 256),
                 pred_input_dim=(256, 256),
                 pred_inter_dim=(256, 256)):
        super().__init__()
        # _r for reference, _t for test
        self.conv3_1r = conv(input_dim[0], 128, kernel_size=3, stride=1)
        self.conv3_1t = conv(input_dim[0], 256, kernel_size=3, stride=1)

        self.conv3_2t = conv(256, pred_input_dim[0], kernel_size=3, stride=1)

        self.prroi_pool3r = PrRoIPool2D(3, 3, 1 / 8)
        self.prroi_pool3t = PrRoIPool2D(5, 5, 1 / 8)

        self.fc3_1r = conv(128, 256, kernel_size=3, stride=1, padding=0)

        self.conv4_1r = conv(input_dim[1], 256, kernel_size=3, stride=1)
        self.conv4_1t = conv(input_dim[1], 256, kernel_size=3, stride=1)

        self.conv4_2t = conv(256, pred_input_dim[1], kernel_size=3, stride=1)

        self.prroi_pool4r = PrRoIPool2D(1, 1, 1 / 16)
        self.prroi_pool4t = PrRoIPool2D(3, 3, 1 / 16)

        self.fc34_3r = conv(256 + 256,
                            pred_input_dim[0],
                            kernel_size=1,
                            stride=1,
                            padding=0)
        self.fc34_4r = conv(256 + 256,
                            pred_input_dim[1],
                            kernel_size=1,
                            stride=1,
                            padding=0)

        self.fc3_rt = LinearBlock(pred_input_dim[0], pred_inter_dim[0],
                                  36)  #5)
        self.fc4_rt = LinearBlock(pred_input_dim[1], pred_inter_dim[1],
                                  18)  #3)

        #self.iou_predictor = nn.Linear(pred_inter_dim[0]+pred_inter_dim[1], 1, bias=True)
        self.wh_predictor = nn.Linear(pred_inter_dim[0] + pred_inter_dim[1],
                                      2,
                                      bias=True)
        self.sigmoid = nn.Sigmoid()

        # Init weights
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(
                    m, nn.ConvTranspose2d) or isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight.data, mode='fan_in')
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                # In earlier versions batch norm parameters was initialized with default initialization,
                # which changed in pytorch 1.2. In 1.1 and earlier the weight was set to U(0,1).
                # So we use the same initialization here.
                # m.weight.data.fill_(1)
                m.weight.data.uniform_()
                m.bias.data.zero_()
Esempio n. 2
0
    def __init__(self, input_dim=(128,256), pred_input_dim=(256,256), pred_inter_dim=(256,256)):
        super().__init__()
        # _r for reference, _t for test
        self.conv3_1r = conv(input_dim[0], 128, kernel_size=3, stride=1)
        self.conv3_1t = conv(input_dim[0], 256, kernel_size=3, stride=1)

        self.conv3_2t = conv(256, pred_input_dim[0], kernel_size=3, stride=1)

        self.prroi_pool3r = PrRoIPool2D(3, 3, 1/8)
        self.prroi_pool3t = PrRoIPool2D(5, 5, 1/8)

        self.fc3_1r = conv(128, 256, kernel_size=3, stride=1, padding=0)

        self.conv4_1r = conv(input_dim[1], 256, kernel_size=3, stride=1)
        self.conv4_1t = conv(input_dim[1], 256, kernel_size=3, stride=1)

        self.conv4_2t = conv(256, pred_input_dim[1], kernel_size=3, stride=1)

        self.prroi_pool4r = PrRoIPool2D(1, 1, 1/16)
        self.prroi_pool4t = PrRoIPool2D(3, 3, 1 / 16)

        self.fc34_3r = conv(256 + 256, pred_input_dim[0], kernel_size=1, stride=1, padding=0)
        self.fc34_4r = conv(256 + 256, pred_input_dim[1], kernel_size=1, stride=1, padding=0)

        self.fc3_rt = LinearBlock(pred_input_dim[0], pred_inter_dim[0], 5)
        self.fc4_rt = LinearBlock(pred_input_dim[1], pred_inter_dim[1], 3)

        self.iou_predictor = nn.Linear(pred_inter_dim[0]+pred_inter_dim[1], 1, bias=True)

        # Init weights
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d) or isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight.data, mode='fan_in')
                if m.bias is not None:
                    m.bias.data.zero_()
Esempio n. 3
0
    def __init__(self,
                 input_dim=(32, 64),
                 pred_input_dim=(64, 64),
                 pred_inter_dim=(64, 64),
                 cpu=False):
        super().__init__(input_dim, pred_input_dim, pred_inter_dim)
        # _r for reference, _t for test
        self.conv3_1r = conv(input_dim[0], 32, kernel_size=3, stride=1)
        self.conv3_1t = conv(input_dim[0], 64, kernel_size=3, stride=1)

        self.conv3_2t = conv(64, pred_input_dim[0], kernel_size=3, stride=1)

        if cpu:
            self.prroi_pool3r = RoIPool((3, 3), 1 / 8)
            self.prroi_pool3t = RoIPool((5, 5), 1 / 8)
        else:
            self.prroi_pool3r = PrRoIPool2D(3, 3, 1 / 8)
            self.prroi_pool3t = PrRoIPool2D(5, 5, 1 / 8)

        self.fc3_1r = conv(32, 64, kernel_size=3, stride=1, padding=0)

        self.conv4_1r = conv(input_dim[1], 64, kernel_size=3, stride=1)
        self.conv4_1t = conv(input_dim[1], 64, kernel_size=3, stride=1)

        self.conv4_2t = conv(64, pred_input_dim[1], kernel_size=3, stride=1)

        self.prroi_pool4r = PrRoIPool2D(1, 1, 1 / 16)
        self.prroi_pool4t = PrRoIPool2D(3, 3, 1 / 16)

        self.fc34_3r = conv(64 + 64,
                            pred_input_dim[0],
                            kernel_size=1,
                            stride=1,
                            padding=0)
        self.fc34_4r = conv(64 + 64,
                            pred_input_dim[1],
                            kernel_size=1,
                            stride=1,
                            padding=0)

        self.fc3_rt = LinearBlock(pred_input_dim[0], pred_inter_dim[0], 5)
        self.fc4_rt = LinearBlock(pred_input_dim[1], pred_inter_dim[1], 3)

        self.box_predictor = nn.Linear(pred_inter_dim[0] + pred_inter_dim[1],
                                       4,
                                       bias=True)

        # Init weights
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(
                    m, nn.ConvTranspose2d) or isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight.data, mode='fan_in')
                if m.bias is not None:
                    m.bias.data.zero_()
Esempio n. 4
0
 def __init__(self, pool_size=8, use_NL=True):
     super().__init__()
     self.prroi_pool = PrRoIPool2D(pool_size, pool_size, 1 / 16)
     num_corr_channel = pool_size * pool_size
     self.channel_attention = SEModule(num_corr_channel, reduction=4)
     self.spatial_attention = NONLocalBlock2D(in_channels=num_corr_channel)
     self.use_NL = use_NL
Esempio n. 5
0
    def __init__(self, input_dim=(128,256), pred_input_dim=(128,256)):
        super().__init__()
        self.conv3_1 = conv(input_dim[0], pred_input_dim[0], kernel_size=3, stride=1)
        self.conv3_2 = conv(pred_input_dim[0], pred_input_dim[0], kernel_size=3, stride=1)
        self.conv4_1 = conv(input_dim[1], pred_input_dim[1], kernel_size=3, stride=1)
        self.conv4_2 = conv(pred_input_dim[1], pred_input_dim[1], kernel_size=3, stride=1)

        self.prroi_pool3 = PrRoIPool2D(8, 8, 1/8)
        self.prroi_pool4 = PrRoIPool2D(4, 4, 1/16)

        ## We perform L2 norm to features, therefore barch_norm is not needed.
        ## When relu is True, the linear system is easy to non-invertible.
        self.fc3 = LinearBlock(pred_input_dim[0], 512, 8, batch_norm=False, relu=False)
        self.fc4 = LinearBlock(pred_input_dim[1], 512, 4, batch_norm=False, relu=False)

        # Init weights
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d) or isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight.data, mode='fan_in')
                if m.bias is not None:
                    m.bias.data.zero_()
Esempio n. 6
0
    def __init__(self, input_dim=(128,256), pred_input_dim=(128,256)):
        super().__init__()
        self.conv3_1 = conv(input_dim[0], pred_input_dim[0], kernel_size=3, stride=1)
        self.conv3_2 = conv(pred_input_dim[0], pred_input_dim[0], kernel_size=3, stride=1)
        self.conv3_3 = conv(pred_input_dim[0], pred_input_dim[0], kernel_size=3, stride=1)
        self.conv3_4 = conv(pred_input_dim[0], pred_input_dim[0], kernel_size=3, stride=1)
        self.conv4_1 = conv(input_dim[1], pred_input_dim[1], kernel_size=3, stride=1)
        self.conv4_2 = conv(pred_input_dim[1], pred_input_dim[1], kernel_size=3, stride=1)
        self.conv4_3 = conv(pred_input_dim[1], pred_input_dim[1], kernel_size=3, stride=1)
        self.conv4_4 = conv(pred_input_dim[1], pred_input_dim[1], kernel_size=3, stride=1)

        self.prroi_pool3 = PrRoIPool2D(8, 8, 1/8)
        self.prroi_pool4 = PrRoIPool2D(4, 4, 1/16)

        self.fc3 = LinearBlock(pred_input_dim[0], 512, 8, batch_norm=False, relu=False)
        self.fc4 = LinearBlock(pred_input_dim[1], 512, 4, batch_norm=False, relu=False)

        # Init weights
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d) or isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight.data, mode='fan_in')
                if m.bias is not None:
                    m.bias.data.zero_()
Esempio n. 7
0
 def __init__(self, pool_size=8, use_post_corr=True, use_NL=True):
     super().__init__()
     '''PrRoIPool2D的第三个参数是当前特征图尺寸相对于原图的比例(下采样率)'''
     '''layer2的stride是8, layer3的stride是16
     当输入分辨率为256x256时, layer3的输出分辨率为16x16, 目标尺寸大约为8x8
     ##### 注意: 如果输入分辨率改变,或者使用的层改变,那么这块的参数需要重新填 #####'''
     self.prroi_pool = PrRoIPool2D(pool_size, pool_size, 1 / 16)
     num_corr_channel = pool_size * pool_size
     '''newly added'''
     self.adjust_layer = conv(1024, 64)
     self.channel_attention = SEModule(num_corr_channel, reduction=4)
     self.use_post_corr = use_post_corr
     if use_post_corr:
         self.post_corr = nn.Sequential(
             nn.Conv2d(64, 128, kernel_size=(1, 1), padding=0, stride=1),
             nn.BatchNorm2d(128),
             nn.ReLU(),
             nn.Conv2d(128, 128, kernel_size=(1, 1), padding=0, stride=1),
             nn.BatchNorm2d(128),
             nn.ReLU(),
             nn.Conv2d(128, 64, kernel_size=(1, 1), padding=0, stride=1),
             nn.BatchNorm2d(64),
             nn.ReLU(),
         )
     self.channel_attention = SEModule(num_corr_channel, reduction=4)
     self.use_NL = use_NL
     if self.use_NL is True:
         self.spatial_attention = NONLocalBlock2D(
             in_channels=num_corr_channel)
     elif self.use_NL is False:
         self.spatial_attention = nn.Sequential(
             nn.Conv2d(64, 64, kernel_size=(1, 1), padding=0, stride=1),
             nn.BatchNorm2d(64),
             nn.ReLU(),
             nn.Conv2d(64, 64, kernel_size=(1, 1), padding=0, stride=1),
             nn.BatchNorm2d(64),
             nn.ReLU(),
             nn.Conv2d(64, 64, kernel_size=(1, 1), padding=0, stride=1),
             nn.BatchNorm2d(64),
         )
     else:
         self.spatial_attention = nn.Sequential()
Esempio n. 8
0
 def __init__(self, filter_size=1, feature_stride=16, pool_square=False):
     super().__init__()
     self.prroi_pool = PrRoIPool2D(filter_size, filter_size, 1/feature_stride)
     self.pool_square = pool_square
Esempio n. 9
0
    def __init__(self,
                 settings=None,
                 input_dim=(128, 256),
                 pred_input_dim=(256, 256),
                 pred_inter_dim=(256, 256)):
        super().__init__()

        self.settings = settings
        self.depthconv = self.settings.depthaware_for_iounet
        # _r for reference, _t for test
        #conv(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dilation=1):

        if self.depthconv:
            self.conv3_1r = DepthConvModule(input_dim[0],
                                            128,
                                            kernel_size=3,
                                            stride=1)
        else:
            self.conv3_1r = conv(input_dim[0], 128, kernel_size=3, stride=1)

        if self.depthconv:
            self.conv3_1t = DepthConvModule(input_dim[0],
                                            256,
                                            kernel_size=3,
                                            stride=1)
            self.conv3_2t = conv(256,
                                 pred_input_dim[0],
                                 kernel_size=3,
                                 stride=1)
        else:
            self.conv3_1t = conv(input_dim[0], 256, kernel_size=3, stride=1)
            self.conv3_2t = conv(256,
                                 pred_input_dim[0],
                                 kernel_size=3,
                                 stride=1)

        self.prroi_pool3r = PrRoIPool2D(3, 3, 1 / 8)
        self.prroi_pool3t = PrRoIPool2D(5, 5, 1 / 8)

        if False:
            self.fc3_1r = DepthConvModule(128,
                                          256,
                                          kernel_size=3,
                                          stride=1,
                                          padding=0)
        else:
            self.fc3_1r = conv(128, 256, kernel_size=3, stride=1, padding=0)

        if self.depthconv:
            self.conv4_1r = DepthConvModule(input_dim[1],
                                            256,
                                            kernel_size=3,
                                            stride=1)
            self.conv4_1t = DepthConvModule(input_dim[1],
                                            256,
                                            kernel_size=3,
                                            stride=1)
        else:
            self.conv4_1r = conv(input_dim[1], 256, kernel_size=3, stride=1)
            self.conv4_1t = conv(input_dim[1], 256, kernel_size=3, stride=1)

        # if self.depthconv:
        #     self.conv4_2t = DepthConvModule(256, pred_input_dim[1], kernel_size=3, stride=1)
        # else:
        #     self.conv4_2t = conv(256, pred_input_dim[1], kernel_size=3, stride=1)
        self.conv4_2t = conv(256, pred_input_dim[1], kernel_size=3, stride=1)

        self.prroi_pool4r = PrRoIPool2D(1, 1, 1 / 16)
        self.prroi_pool4t = PrRoIPool2D(3, 3, 1 / 16)

        if False:
            self.fc34_3r = DepthConvModule(256 + 256,
                                           pred_input_dim[0],
                                           kernel_size=1,
                                           stride=1,
                                           padding=0)
            self.fc34_4r = DepthConvModule(256 + 256,
                                           pred_input_dim[1],
                                           kernel_size=1,
                                           stride=1,
                                           padding=0)
        else:
            self.fc34_3r = conv(256 + 256,
                                pred_input_dim[0],
                                kernel_size=1,
                                stride=1,
                                padding=0)
            self.fc34_4r = conv(256 + 256,
                                pred_input_dim[1],
                                kernel_size=1,
                                stride=1,
                                padding=0)

        self.fc3_rt = LinearBlock(pred_input_dim[0], pred_inter_dim[0], 5)
        self.fc4_rt = LinearBlock(pred_input_dim[1], pred_inter_dim[1], 3)

        self.iou_predictor = nn.Linear(pred_inter_dim[0] + pred_inter_dim[1],
                                       1,
                                       bias=True)

        # Init weights
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(
                    m, nn.ConvTranspose2d) or isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight.data, mode='fan_in')
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                # In earlier versions batch norm parameters was initialized with default initialization,
                # which changed in pytorch 1.2. In 1.1 and earlier the weight was set to U(0,1).
                # So we use the same initialization here.
                # m.weight.data.fill_(1)
                m.weight.data.uniform_()
                m.bias.data.zero_()
Esempio n. 10
0
    def __init__(self,
                 input_dim=(16, 32),
                 pred_input_dim=(32, 32),
                 pred_inter_dim=(32, 32),
                 cpu=False):
        super().__init__(input_dim, pred_input_dim, pred_inter_dim)
        # _r for reference, _t for test
        # in: 36x36x16  out: 36x36x16
        self.conv3_1r = conv(input_dim[0], 16, kernel_size=3, stride=1)
        # in: 36x36x16  out: 36x36x32
        self.conv3_1t = conv(input_dim[0], 32, kernel_size=3, stride=1)

        # in: 36x36x32  out: 36x36x32
        self.conv3_2t = conv(32, pred_input_dim[0], kernel_size=3, stride=1)

        if cpu:
            self.prroi_pool3r = RoIPool((3, 3), 1 / 8)
            self.prroi_pool3t = RoIPool((5, 5), 1 / 8)
        else:
            # in: 36x36x16  out:3x3x16
            self.prroi_pool3r = PrRoIPool2D(3, 3, 1 / 8)
            # in: 36x36x32  out:5x5x32
            self.prroi_pool3t = PrRoIPool2D(5, 5, 1 / 8)

        # in: 3x3x16  out:1x1x32
        self.fc3_1r = conv(16, 32, kernel_size=3, stride=1, padding=0)

        # in: 18x18x32  out: 18x18x32
        self.conv4_1r = conv(input_dim[1], 32, kernel_size=3, stride=1)
        # in: 18x18x32  out: 18x18x32
        self.conv4_1t = conv(input_dim[1], 32, kernel_size=3, stride=1)

        # in: 18x18x32  out: 18x18x32
        self.conv4_2t = conv(32, pred_input_dim[1], kernel_size=3, stride=1)

        if cpu:
            self.prroi_pool4r = RoIPool((1, 1), 1 / 16)
            self.prroi_pool4t = RoIPool((3, 3), 1 / 16)
        else:
            # in: 18x18x32  out:1x1x32
            self.prroi_pool4r = PrRoIPool2D(1, 1, 1 / 16)
            # in: 18x18x32  out: 3x3x32
            self.prroi_pool4t = PrRoIPool2D(3, 3, 1 / 16)

        # in: 1x1x64  out: 1x1x32
        self.fc34_3r = conv(32 + 32,
                            pred_input_dim[0],
                            kernel_size=1,
                            stride=1,
                            padding=0)
        # in: 1x1x64  out: 1x1x32
        self.fc34_4r = conv(32 + 32,
                            pred_input_dim[1],
                            kernel_size=1,
                            stride=1,
                            padding=0)

        # in: 5x5x32  out: 1x1x32
        self.fc3_rt = LinearBlock(pred_input_dim[0], pred_inter_dim[0], 5)
        # in: 3x3x32  out: 1x1x32
        self.fc4_rt = LinearBlock(pred_input_dim[1], pred_inter_dim[1], 3)

        # in: 1x1x64  out: 1x1x1
        self.iou_predictor = nn.Linear(pred_inter_dim[0] + pred_inter_dim[1],
                                       1,
                                       bias=True)

        # Init weights
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(
                    m, nn.ConvTranspose2d) or isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight.data, mode='fan_in')
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                # In earlier versions batch norm parameters was initialized with default initialization,
                # which changed in pytorch 1.2. In 1.1 and earlier the weight was set to U(0,1).
                # So we use the same initialization here.
                # m.weight.data.fill_(1)
                m.weight.data.uniform_()
                m.bias.data.zero_()
Esempio n. 11
0
    def __init__(self,
                 pool_size=5,
                 filter_dim=4,
                 filter_channel=256,
                 inner_channel=256,
                 input_features_size=72,
                 input_features_channel=256,
                 filter_optimizer=None,
                 train_reg_optimizer=False,
                 train_cls_72_and_reg_init=True):
        super(RegFilter, self).__init__()
        self.pool_size = pool_size
        self.filter_channel = filter_channel
        self.filter_dim = filter_dim
        self.input_features_size = input_features_size
        self.input_features_channel = input_features_channel
        self.filter_optimizer = filter_optimizer
        self.train_cls_72_and_reg_init = train_cls_72_and_reg_init

        self.reg_initializer_72 = nn.Sequential(
            nn.Conv2d(input_features_channel,
                      inner_channel,
                      3,
                      1,
                      1,
                      bias=False),
            nn.GroupNorm(32, inner_channel),
            nn.ReLU(),
            DCN(inner_channel,
                inner_channel,
                kernel_size=(3, 3),
                stride=1,
                padding=1,
                dilation=1,
                deformable_groups=1),
            nn.GroupNorm(32, inner_channel),
            nn.ReLU(),
            DCN(inner_channel,
                filter_channel * filter_dim,
                kernel_size=(3, 3),
                stride=1,
                padding=1,
                dilation=1,
                deformable_groups=1),
            nn.ReLU(),
        )
        self.prroipool_72 = PrRoIPool2D(pool_size, pool_size, 72 / 288.0)

        self.reg_initializer_36 = nn.Sequential(
            nn.Conv2d(input_features_channel,
                      inner_channel,
                      3,
                      1,
                      1,
                      bias=False),
            nn.GroupNorm(32, inner_channel),
            nn.ReLU(),
            DCN(inner_channel,
                inner_channel,
                kernel_size=(3, 3),
                stride=1,
                padding=1,
                dilation=1,
                deformable_groups=1),
            nn.GroupNorm(32, inner_channel),
            nn.ReLU(),
            DCN(inner_channel,
                filter_channel * filter_dim,
                kernel_size=(3, 3),
                stride=1,
                padding=1,
                dilation=1,
                deformable_groups=1),
            nn.ReLU(),
        )
        self.prroipool_36 = PrRoIPool2D(pool_size, pool_size, 36 / 288.0)
        self.reg_initializer_merge = nn.Conv2d(filter_channel * filter_dim * 2,
                                               filter_channel * filter_dim,
                                               1,
                                               1,
                                               bias=False)

        self.reg_head_72 = nn.Sequential(
            nn.Conv2d(input_features_channel,
                      inner_channel,
                      3,
                      1,
                      1,
                      bias=False),
            nn.GroupNorm(32, inner_channel),
            nn.ReLU(),
            DCN(inner_channel,
                inner_channel,
                kernel_size=(3, 3),
                stride=1,
                padding=1,
                dilation=1,
                deformable_groups=1),
            nn.GroupNorm(32, inner_channel),
            nn.ReLU(),
            DCN(inner_channel,
                filter_channel,
                kernel_size=(3, 3),
                stride=1,
                padding=1,
                dilation=1,
                deformable_groups=1),
            nn.ReLU(),
        )
        self.reg_head_36 = nn.Sequential(
            nn.Conv2d(input_features_channel,
                      inner_channel,
                      3,
                      1,
                      1,
                      bias=False),
            nn.GroupNorm(32, inner_channel),
            nn.ReLU(),
            DCN(inner_channel,
                inner_channel,
                kernel_size=(3, 3),
                stride=1,
                padding=1,
                dilation=1,
                deformable_groups=1),
            nn.GroupNorm(32, inner_channel),
            nn.ReLU(),
            DCN(inner_channel,
                filter_channel,
                kernel_size=(3, 3),
                stride=1,
                padding=1,
                dilation=1,
                deformable_groups=1),
            nn.ReLU(),
        )
        self.reg_head_merge = nn.Conv2d(filter_channel * 2, filter_channel, 1,
                                        1)

        if train_reg_optimizer:
            for func_name in [
                    'reg_initializer_36', 'reg_initializer_72',
                    'reg_initializer_merge', 'reg_head_36', 'reg_head_72',
                    'reg_head_merge'
            ]:
                for p in getattr(self, func_name).parameters():
                    p.requires_grad_(False)

        # Init weights
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

        w = self.reg_initializer_merge.weight.data
        for i in range(w.size(0)):
            for j in range(w.size(1)):
                if i == j or w.size(0) + i == j:
                    w[i, j, 0, 0] = 0.5
                else:
                    w[i, j, 0, 0] = 0.0

        w = self.reg_head_merge.weight.data
        for i in range(w.size(0)):
            for j in range(w.size(1)):
                if i == j or w.size(0) + i == j:
                    w[i, j, 0, 0] = 0.5
                else:
                    w[i, j, 0, 0] = 0.0
Esempio n. 12
0
    def __init__(self,
                 input_dim=(128, 256),
                 pred_input_dim=(256, 256),
                 pred_inter_dim=(256, 256),
                 fpn_inter_dim=None,
                 share_rt=False):
        super().__init__()
        # _r for reference, _t for test
        # =============== FPN ===============
        if fpn_inter_dim is None:
            fpn_inter_dim = input_dim
        add_conv5 = len(input_dim) == 3
        self.add_conv5 = add_conv5
        if add_conv5:
            self.conv5_lat_r = nn.Conv2d(input_dim[2],
                                         fpn_inter_dim[2],
                                         kernel_size=1)
            if share_rt:
                self.conv5_lat_t = self.conv5_lat_r
            else:
                self.conv5_lat_t = nn.Conv2d(input_dim[2],
                                             fpn_inter_dim[2],
                                             kernel_size=1)

            self.conv5_ct_r = ContextTexture(up=fpn_inter_dim[2],
                                             main=input_dim[1])
            if share_rt:
                self.conv5_ct_t = self.conv5_ct_r
            else:
                self.conv5_ct_t = ContextTexture(up=fpn_inter_dim[2],
                                                 main=input_dim[1])

        self.conv4_lat_r = nn.Conv2d(input_dim[1],
                                     fpn_inter_dim[1],
                                     kernel_size=1)
        if share_rt:
            self.conv4_lat_t = self.conv4_lat_r
        else:
            self.conv4_lat_t = nn.Conv2d(input_dim[1],
                                         fpn_inter_dim[1],
                                         kernel_size=1)

        self.conv4_ct_r = ContextTexture(up=fpn_inter_dim[1],
                                         main=input_dim[0])
        if share_rt:
            self.conv4_ct_t = self.conv4_ct_r
        else:
            self.conv4_ct_t = ContextTexture(up=fpn_inter_dim[1],
                                             main=input_dim[0])
        # =============== FPN END ===========

        self.conv3_1r = conv(input_dim[0], 128, kernel_size=3, stride=1)
        self.conv3_1t = conv(input_dim[0], 256, kernel_size=3, stride=1)

        self.conv3_2t = conv(256, pred_input_dim[0], kernel_size=3, stride=1)

        self.prroi_pool3r = PrRoIPool2D(3, 3, 1 / 8)
        self.prroi_pool3t = PrRoIPool2D(5, 5, 1 / 8)

        self.fc3_1r = conv(128, 256, kernel_size=3, stride=1, padding=0)

        self.conv4_1r = conv(input_dim[1], 256, kernel_size=3, stride=1)
        self.conv4_1t = conv(input_dim[1], 256, kernel_size=3, stride=1)

        self.conv4_2t = conv(256, pred_input_dim[1], kernel_size=3, stride=1)

        self.prroi_pool4r = PrRoIPool2D(1, 1, 1 / 16)
        self.prroi_pool4t = PrRoIPool2D(3, 3, 1 / 16)

        self.fc34_3r = conv(256 + 256,
                            pred_input_dim[0],
                            kernel_size=1,
                            stride=1,
                            padding=0)
        self.fc34_4r = conv(256 + 256,
                            pred_input_dim[1],
                            kernel_size=1,
                            stride=1,
                            padding=0)

        self.fc3_rt = LinearBlock(pred_input_dim[0], pred_inter_dim[0], 5)
        self.fc4_rt = LinearBlock(pred_input_dim[1], pred_inter_dim[1], 3)

        self.iou_predictor = nn.Linear(pred_inter_dim[0] + pred_inter_dim[1],
                                       1,
                                       bias=True)

        # Init weights
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(
                    m, nn.ConvTranspose2d) or isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight.data, mode='fan_in')
                if m.bias is not None:
                    m.bias.data.zero_()