예제 #1
0
    def __init__(self, maxdisp):
        super(PSMNet, self).__init__()
        self.maxdisp = maxdisp

        self.feature_extraction = feature_extraction()

        self.dres0 = nn.Sequential(convbn_3d(64, 32, 3, 1, 1),
                                   nn.ReLU(inplace=True),
                                   convbn_3d(32, 32, 3, 1, 1),
                                   nn.ReLU(inplace=True))

        self.dres1 = nn.Sequential(convbn_3d(32, 32, 3, 1, 1),
                                   nn.ReLU(inplace=True),
                                   convbn_3d(32, 32, 3, 1, 1))

        self.dres2 = hourglass(32)

        self.dres3 = hourglass(32)

        self.dres4 = hourglass(32)

        self.classif1 = nn.Sequential(
            convbn_3d(32, 32, 3, 1, 1), nn.ReLU(inplace=True),
            nn.Conv3d(32, 1, kernel_size=3, padding=1, stride=1, bias=False))

        self.classif2 = nn.Sequential(
            convbn_3d(32, 32, 3, 1, 1), nn.ReLU(inplace=True),
            nn.Conv3d(32, 1, kernel_size=3, padding=1, stride=1, bias=False))

        self.classif3 = nn.Sequential(
            convbn_3d(32, 32, 3, 1, 1), nn.ReLU(inplace=True),
            nn.Conv3d(32, 1, kernel_size=3, padding=1, stride=1, bias=False))
        net_init_v0(self)
        print("[***] PSMNet Weights inilization done!")
예제 #2
0
    def __init__(
            self,
            maxdisp=192,
            sigma_s=0.7,  # 1.7: 13 x 13; 0.3 : 3 x 3;
            sigma_v=0.1,
            isEmbed=True,
            dilation=1,
            cost_filter_grad=False):
        super(AttenStereoNet, self).__init__(maxdisp=maxdisp)
        #self.maxdisp = maxdisp

        self.isEmbed = isEmbed  # True of False
        self.sigma_s = sigma_s
        self.sigma_v = sigma_v
        self.dilation = dilation
        self.cost_filter_grad = cost_filter_grad
        """ embedding network """
        if self.isEmbed:
            print(' Enable Embedding Network!!!')
            self.embednet = embed_net()
            #the module layer:
            self.bifilter = bilateralFilter(sigma_s,
                                            sigma_v,
                                            isCUDA=True,
                                            dilation=self.dilation)
        else:
            self.embednet = None
            self.bifilter = None

        net_init_v0(self)
        print("[***] AttenStereoNet weights inilization done!")
    def __init__(
            self,
            maxdisp=192,
            kernel_size=5,
            isPAC=True,
            isEmbed=False,
            pac_in_channels=64,  # e.g., == cost_volume_channel
            pac_out_channels=64,
            dilation=2,
            cost_filter_grad=True,
            native_impl=True):

        super(AttenStereoNet, self).__init__(maxdisp=maxdisp)

        self.isPAC = isPAC  # True of False
        self.isEmbed = isEmbed  # True of False
        self.k = kernel_size
        self.pac_in_ch = pac_in_channels
        self.pac_out_ch = pac_out_channels
        self.d = dilation
        self.pad = dilation * (kernel_size - 1) // 2
        self.cost_filter_grad = cost_filter_grad
        """ pixel-adaptive convolution (PAC) network """
        if self.isPAC:
            self.pacconv = PacConv2d(self.pac_in_ch,
                                     self.pac_out_ch,
                                     kernel_size=self.k,
                                     stride=1,
                                     padding=self.pad,
                                     dilation=self.d,
                                     native_impl=native_impl)
            if native_impl:
                print(
                    ' Enable Native_implement Pixel-Adaptive Convolution (NPAC) Network!!!'
                )
            else:
                print(
                    ' Enable (Filter_implemetn) Pixel-Adaptive Convolution (PAC) Network!!!'
                )

        else:
            print('[!!!] No PAC Network!!')
            self.pacconv = None
        """ embedding network """
        if self.isEmbed:
            print(
                ' PAC adapting feture f comes from Another Embedding Network!!!'
            )
            self.embednet = embed_net()

        else:
            self.embednet = None

        net_init_v0(self)
        print("[***] AttenStereoNet weights inilization done!")
        """ the GANet initilization is omitted due to inheritance from GANet """
    def __init__(
        self,
        maxdisp=192,
        kernel_size=9,
        isPAC=True,
        isEmbed=False,
        pac_in_channels=64,  # e.g., == cost_volume_channel
        pac_out_channels=64,
        dilation=1,
        cost_filter_grad=True,
        native_impl=True,
        is_kendall_version=True,  # excatly following the structure in Kendall's GCNet paper;
        is_quarter_size_cost_volume_gcnet=False  # cost volume in quarter image size, i.e., [D/4, H/4, W/4]
    ):

        super(AttenStereoNet, self).__init__(
            maxdisp=maxdisp,
            #newly added arguments:
            is_kendall_version=is_kendall_version,
            is_quarter_size_cost_volume_gcnet=is_quarter_size_cost_volume_gcnet
        )
        self.isPAC = isPAC  # True of False
        self.isEmbed = isEmbed  # True of False
        self.k = kernel_size
        self.pac_in_ch = pac_in_channels
        self.pac_out_ch = pac_out_channels
        self.d = dilation
        self.pad = dilation * (kernel_size - 1) // 2
        self.cost_filter_grad = cost_filter_grad
        """ pixel-adaptive convolution (PAC) network """
        if self.isPAC:
            self.pacconv = PacConv2d(self.pac_in_ch,
                                     self.pac_out_ch,
                                     kernel_size=self.k,
                                     stride=1,
                                     padding=self.pad,
                                     dilation=self.d,
                                     native_impl=native_impl)
            if native_impl:
                print(
                    ' Enable Native_implement Pixel-Adaptive Convolution (NPAC) Network!!!'
                )
            else:
                print(
                    ' Enable (Filter_implemetn) Pixel-Adaptive Convolution (PAC) Network!!!'
                )

        else:
            print('[!!!] No PAC Network!!')
            self.pacconv = None
        """ the followind initilization is omitted due to inheritance from GCNet """
        net_init_v0(self)
        print("[***] attenStereoNet_pac_gcnet weights inilization done!")
예제 #5
0
    def __init__(
        self,
        maxdisp=192,
        is_sga_guide_from_img=True,
        #is_quarter_size = True, # feature in 1/4 image size (i.e., H/4 x W/4) or 1/3 size (i.e., H/3 x W/3)
        downsample_scale=4,
        is_lga=False,  # generate LGA(Local Guided Aggregation) weights or not
        cost_filter_grad=False,
        is_kendall_version=True,  # excatly following the structure in Kendall's GCNet paper;
        is_quarter_size_cost_volume_gcnet=False  # cost volume in quarter image size, i.e., [D/4, H/4, W/4]
    ):

        super(AttenStereoNet, self).__init__(
            maxdisp=maxdisp,
            #newly added arguments:
            is_kendall_version=is_kendall_version,
            is_quarter_size_cost_volume_gcnet=is_quarter_size_cost_volume_gcnet
        )

        self.downsample_scale = downsample_scale  # dummy one!!!
        self.downsample_scale = 4 if is_quarter_size_cost_volume_gcnet else 2
        print("SGA + GCNet: set downsample_scale = %d" % self.downsample_scale)
        self.is_sga_guide_from_img = is_sga_guide_from_img  # True of False
        self.cost_filter_grad = cost_filter_grad
        #self.is_quarter_size = is_quarter_size
        self.is_lga = is_lga
        if self.is_sga_guide_from_img:
            print('is_sga_guide_from_img = True !!!')
            self.embednet = None
        else:
            """ embedding network """
            print('is_sga_guide_from_img = False !!!')
            print('SGA_CostAggregation uses Embedding Network!!!')
            self.embednet = embed_net()

        self.sga_costAgg = SGA_CostAggregation(
            self.is_sga_guide_from_img,
            #self.is_quarter_size,
            self.downsample_scale,
            self.is_lga,
            cost_volume_in_channels=64)
        """ the followind initilization is omitted due to inheritance from GCNet """
        net_init_v0(self)
        print("[***] attenStereoNet_sga_gcnet weights inilization done!")
    def __init__(
        self,
        maxdisp=192,
        kernel_size=5,
        crop_img_h=256,
        crop_img_w=512,
        isDFN=True,
        dilation=2,
        cost_filter_grad=False,
        is_kendall_version=True,  # excatly following the structure in Kendall's GCNet paper;
        is_quarter_size_cost_volume_gcnet=False  # cost volume in quarter image size, i.e., [D/4, H/4, W/4]
    ):

        super(AttenStereoNet, self).__init__(
            maxdisp=maxdisp,
            #newly added arguments:
            is_kendall_version=is_kendall_version,
            is_quarter_size_cost_volume_gcnet=is_quarter_size_cost_volume_gcnet
        )

        self.isDFN = isDFN  # True of False
        self.kernel_size = kernel_size
        self.dilation = dilation
        self.cost_filter_grad = cost_filter_grad
        """ dynamic filter network """
        if self.isDFN:
            print(' Enable Dynamic Filter Network!!!')
            self.dfn_generator = filterGenerator(
                F=32,
                dynamic_filter_size=(kernel_size, kernel_size),
                in_channels=3)
            #the module layer:
            self.dfn_layer = DynamicFilterLayer(kernel_size, dilation)
        else:
            print('[!!!] No dfn_generator and dfn_layer!!')
            self.dfn_generator = None
            self.dfn_layer = None
        """ the followind initilization is omitted due to inheritance from GCNet """
        net_init_v0(self)
        print("[***] attenStereoNet_dfn_gcnet weights inilization done!")
예제 #7
0
    def __init__(
        self,
        maxdisp=192,
        sigma_s=0.7,  # 1.7: 13 x 13; 0.3 : 3 x 3;
        sigma_v=0.1,
        isEmbed=True,
        dilation=1,
        cost_filter_grad=False,
        is_kendall_version=True,  # excatly following the structure in Kendall's GCNet paper;
        is_quarter_size_cost_volume_gcnet=False  # cost volume in quarter image size, i.e., [D/4, H/4, W/4]
    ):

        super(AttenStereoNet, self).__init__(
            maxdisp=maxdisp,
            #newly added arguments:
            is_kendall_version=is_kendall_version,
            is_quarter_size_cost_volume_gcnet=is_quarter_size_cost_volume_gcnet
        )

        self.isEmbed = isEmbed  # True of False
        self.sigma_s = sigma_s
        self.sigma_v = sigma_v
        self.dilation = dilation
        self.cost_filter_grad = cost_filter_grad
        """ embedding network """
        if self.isEmbed:
            print(' Enable Embedding Network!!!')
            self.embednet = embed_net()
            self.bifilter = bilateralFilter(sigma_s,
                                            sigma_v,
                                            isCUDA=True,
                                            dilation=self.dilation)
        else:
            self.embednet = None
            self.bifilter = None
        """ the followind initilization is omitted due to inheritance from GCNet """
        net_init_v0(self)
        print("[***] attenStereoNet_embed_gcnet weights inilization done!")
    def __init__(
        self,
        maxdisp=192,
        is_sga_guide_from_img=True,
        #is_quarter_size = True, # feature in 1/4 image size (i.e., H/4 x W/4) or 1/3 size (i.e., H/3 x W/3)
        downsample_scale=4,  # dummy one!!!
        is_lga=False,  # generate LGA(Local Guided Aggregation) weights or not
        cost_filter_grad=False):
        super(AttenStereoNet, self).__init__(maxdisp=maxdisp)
        #self.downsample_scale = downsample_scale # dummy one!!!
        self.downsample_scale = 4
        print("SGA + PSMNet: set downsample_scale = %d" %
              self.downsample_scale)

        self.is_sga_guide_from_img = is_sga_guide_from_img  # True of False
        self.cost_filter_grad = cost_filter_grad
        #self.is_quarter_size = is_quarter_size
        self.is_lga = is_lga

        if self.is_sga_guide_from_img:
            print('is_sga_guide_from_img = True !!!')
            self.embednet = None
        else:
            """ embedding network """
            print('is_sga_guide_from_img = False !!!')
            print('SGA_CostAggregation uses Embedding Network!!!')
            self.embednet = embed_net()

        self.sga_costAgg = SGA_CostAggregation(
            self.is_sga_guide_from_img,
            #self.is_quarter_size,
            self.downsample_scale,
            self.is_lga,
            cost_volume_in_channels=64)
        """ the followind initilization is omitted due to inheritance from PSMNet """
        net_init_v0(self)
        print("[***] attenStereoNet_sga_psmnet weights inilization done!")
예제 #9
0
    def __init__(
            self,
            F=32,
            dynamic_filter_size=(9, 9),
            #img_size = (256, 512),
            in_channels=3,
            is_sync_bn=False):
        super(filterGenerator, self).__init__()
        self.F = F
        self.kernel_h = dynamic_filter_size[0]
        self.kernel_w = dynamic_filter_size[1]
        self.in_channels = in_channels
        """encoder"""
        self.conv1 = BasicConv2d(self.in_channels,
                                 self.F,
                                 stride=1,
                                 scopename='conv1')  # 32
        # > see: https://pytorch.org/docs/stable/nn.html#convolution-layers for input-output size relationship;
        # downsampling here
        # O = (W - F + 2P )/ S + 1
        # O = (W - 3 + 2P)/2 + 1 = (W -3 + 2P + 2)/2 = W/2 + (2P -1)/2
        self.conv2 = BasicConv2d(self.F, self.F, stride=2,
                                 scopename='conv2')  # 32
        self.conv3 = BasicConv2d(self.F,
                                 2 * self.F,
                                 stride=1,
                                 scopename='conv3')  # 64
        self.conv4 = BasicConv2d(2 * self.F,
                                 2 * self.F,
                                 stride=1,
                                 scopename='conv4')  # 64
        """ !!!Problem: using untie_biases will requrie the input image size is 
            the same as the one during training!
        """
        # untie_biases
        #self.conv5 = Conv2dUntiedBias(
        #        height = img_size[0] // 2, # due to the stride = 2 in conv2
        #        width = img_size[1] // 2, # due to the stride = 2 in conv2
        #        in_channels = 2*self.F, out_channels= 4*self.F, stride = 1,
        #        kernel_size = 3, padding = 1, is_relu = True, scopename = 'conv5_UntiedBias') # 128
        """ just use regular convolution """
        self.conv5 = BasicConv2d(2 * self.F,
                                 4 * self.F,
                                 stride=1,
                                 scopename='conv5')  # 64
        """decoder"""
        self.conv6 = BasicConv2d(4 * self.F,
                                 2 * self.F,
                                 stride=1,
                                 scopename='conv6')  # 64
        self.conv7 = BasicConv2d(2 * self.F,
                                 2 * self.F,
                                 stride=1,
                                 scopename='conv7')  # 64

        # deconv
        self.deconv8 = BasicDeConv2d(2 * self.F,
                                     2 * self.F,
                                     stride=2,
                                     padding=1,
                                     output_padding=1,
                                     scopename='deconv8')  # F=64, S=2

        self.conv9 = BasicConv2d(2 * self.F,
                                 2 * self.F,
                                 stride=1,
                                 scopename='conv9')  # 64
        self.conv10 = BasicConv2d(2 * self.F,
                                  4 * self.F,
                                  stride=1,
                                  kernel_size=1,
                                  padding=0,
                                  scopename='conv10')  # 128, K = 1
        """filter-generating layers"""
        # NOTE:
        # 1) For example, filter kernel size = [13, 13],
        # then the out_channels will be 13*13 + 1 (for bias) = 170;
        # 2) for stereo prediction, if just horizontal filter is used, that is, filter size = 1 x 13;
        # then the out_channels will be 1*13 + 1 (for bias) = 14;
        self.conv11 = BasicConv2d(4 * self.F,
                                  self.kernel_h * self.kernel_w + 1,
                                  stride=1,
                                  kernel_size=1,
                                  padding=0,
                                  scopename='conv11')  #170, K=1
        self.soft_max = nn.Softmax2d(
        )  #you softmax over the 2nd dimension, given input in size [N,C,H,W]

        if is_sync_bn:
            net_init_SyncBN(self)
            print(
                "[***] calling net_init_SyncBN(): filterGenerator weights inilization done!"
            )
        else:
            net_init_v0(self)
            print(
                "[***] calling net_init_v0(): filterGenerator weights inilization done!"
            )