def __init__(self, maxdisp): super(PSMNet, self).__init__() self.maxdisp = maxdisp self.feature_extraction = feature_extraction() self.dres0 = nn.Sequential(convbn_3d(64, 32, 3, 1, 1), nn.ReLU(inplace=True), convbn_3d(32, 32, 3, 1, 1), nn.ReLU(inplace=True)) self.dres1 = nn.Sequential(convbn_3d(32, 32, 3, 1, 1), nn.ReLU(inplace=True), convbn_3d(32, 32, 3, 1, 1)) self.dres2 = hourglass(32) self.dres3 = hourglass(32) self.dres4 = hourglass(32) self.classif1 = nn.Sequential( convbn_3d(32, 32, 3, 1, 1), nn.ReLU(inplace=True), nn.Conv3d(32, 1, kernel_size=3, padding=1, stride=1, bias=False)) self.classif2 = nn.Sequential( convbn_3d(32, 32, 3, 1, 1), nn.ReLU(inplace=True), nn.Conv3d(32, 1, kernel_size=3, padding=1, stride=1, bias=False)) self.classif3 = nn.Sequential( convbn_3d(32, 32, 3, 1, 1), nn.ReLU(inplace=True), nn.Conv3d(32, 1, kernel_size=3, padding=1, stride=1, bias=False)) net_init_v0(self) print("[***] PSMNet Weights inilization done!")
def __init__( self, maxdisp=192, sigma_s=0.7, # 1.7: 13 x 13; 0.3 : 3 x 3; sigma_v=0.1, isEmbed=True, dilation=1, cost_filter_grad=False): super(AttenStereoNet, self).__init__(maxdisp=maxdisp) #self.maxdisp = maxdisp self.isEmbed = isEmbed # True of False self.sigma_s = sigma_s self.sigma_v = sigma_v self.dilation = dilation self.cost_filter_grad = cost_filter_grad """ embedding network """ if self.isEmbed: print(' Enable Embedding Network!!!') self.embednet = embed_net() #the module layer: self.bifilter = bilateralFilter(sigma_s, sigma_v, isCUDA=True, dilation=self.dilation) else: self.embednet = None self.bifilter = None net_init_v0(self) print("[***] AttenStereoNet weights inilization done!")
def __init__( self, maxdisp=192, kernel_size=5, isPAC=True, isEmbed=False, pac_in_channels=64, # e.g., == cost_volume_channel pac_out_channels=64, dilation=2, cost_filter_grad=True, native_impl=True): super(AttenStereoNet, self).__init__(maxdisp=maxdisp) self.isPAC = isPAC # True of False self.isEmbed = isEmbed # True of False self.k = kernel_size self.pac_in_ch = pac_in_channels self.pac_out_ch = pac_out_channels self.d = dilation self.pad = dilation * (kernel_size - 1) // 2 self.cost_filter_grad = cost_filter_grad """ pixel-adaptive convolution (PAC) network """ if self.isPAC: self.pacconv = PacConv2d(self.pac_in_ch, self.pac_out_ch, kernel_size=self.k, stride=1, padding=self.pad, dilation=self.d, native_impl=native_impl) if native_impl: print( ' Enable Native_implement Pixel-Adaptive Convolution (NPAC) Network!!!' ) else: print( ' Enable (Filter_implemetn) Pixel-Adaptive Convolution (PAC) Network!!!' ) else: print('[!!!] No PAC Network!!') self.pacconv = None """ embedding network """ if self.isEmbed: print( ' PAC adapting feture f comes from Another Embedding Network!!!' ) self.embednet = embed_net() else: self.embednet = None net_init_v0(self) print("[***] AttenStereoNet weights inilization done!") """ the GANet initilization is omitted due to inheritance from GANet """
def __init__( self, maxdisp=192, kernel_size=9, isPAC=True, isEmbed=False, pac_in_channels=64, # e.g., == cost_volume_channel pac_out_channels=64, dilation=1, cost_filter_grad=True, native_impl=True, is_kendall_version=True, # excatly following the structure in Kendall's GCNet paper; is_quarter_size_cost_volume_gcnet=False # cost volume in quarter image size, i.e., [D/4, H/4, W/4] ): super(AttenStereoNet, self).__init__( maxdisp=maxdisp, #newly added arguments: is_kendall_version=is_kendall_version, is_quarter_size_cost_volume_gcnet=is_quarter_size_cost_volume_gcnet ) self.isPAC = isPAC # True of False self.isEmbed = isEmbed # True of False self.k = kernel_size self.pac_in_ch = pac_in_channels self.pac_out_ch = pac_out_channels self.d = dilation self.pad = dilation * (kernel_size - 1) // 2 self.cost_filter_grad = cost_filter_grad """ pixel-adaptive convolution (PAC) network """ if self.isPAC: self.pacconv = PacConv2d(self.pac_in_ch, self.pac_out_ch, kernel_size=self.k, stride=1, padding=self.pad, dilation=self.d, native_impl=native_impl) if native_impl: print( ' Enable Native_implement Pixel-Adaptive Convolution (NPAC) Network!!!' ) else: print( ' Enable (Filter_implemetn) Pixel-Adaptive Convolution (PAC) Network!!!' ) else: print('[!!!] No PAC Network!!') self.pacconv = None """ the followind initilization is omitted due to inheritance from GCNet """ net_init_v0(self) print("[***] attenStereoNet_pac_gcnet weights inilization done!")
def __init__( self, maxdisp=192, is_sga_guide_from_img=True, #is_quarter_size = True, # feature in 1/4 image size (i.e., H/4 x W/4) or 1/3 size (i.e., H/3 x W/3) downsample_scale=4, is_lga=False, # generate LGA(Local Guided Aggregation) weights or not cost_filter_grad=False, is_kendall_version=True, # excatly following the structure in Kendall's GCNet paper; is_quarter_size_cost_volume_gcnet=False # cost volume in quarter image size, i.e., [D/4, H/4, W/4] ): super(AttenStereoNet, self).__init__( maxdisp=maxdisp, #newly added arguments: is_kendall_version=is_kendall_version, is_quarter_size_cost_volume_gcnet=is_quarter_size_cost_volume_gcnet ) self.downsample_scale = downsample_scale # dummy one!!! self.downsample_scale = 4 if is_quarter_size_cost_volume_gcnet else 2 print("SGA + GCNet: set downsample_scale = %d" % self.downsample_scale) self.is_sga_guide_from_img = is_sga_guide_from_img # True of False self.cost_filter_grad = cost_filter_grad #self.is_quarter_size = is_quarter_size self.is_lga = is_lga if self.is_sga_guide_from_img: print('is_sga_guide_from_img = True !!!') self.embednet = None else: """ embedding network """ print('is_sga_guide_from_img = False !!!') print('SGA_CostAggregation uses Embedding Network!!!') self.embednet = embed_net() self.sga_costAgg = SGA_CostAggregation( self.is_sga_guide_from_img, #self.is_quarter_size, self.downsample_scale, self.is_lga, cost_volume_in_channels=64) """ the followind initilization is omitted due to inheritance from GCNet """ net_init_v0(self) print("[***] attenStereoNet_sga_gcnet weights inilization done!")
def __init__( self, maxdisp=192, kernel_size=5, crop_img_h=256, crop_img_w=512, isDFN=True, dilation=2, cost_filter_grad=False, is_kendall_version=True, # excatly following the structure in Kendall's GCNet paper; is_quarter_size_cost_volume_gcnet=False # cost volume in quarter image size, i.e., [D/4, H/4, W/4] ): super(AttenStereoNet, self).__init__( maxdisp=maxdisp, #newly added arguments: is_kendall_version=is_kendall_version, is_quarter_size_cost_volume_gcnet=is_quarter_size_cost_volume_gcnet ) self.isDFN = isDFN # True of False self.kernel_size = kernel_size self.dilation = dilation self.cost_filter_grad = cost_filter_grad """ dynamic filter network """ if self.isDFN: print(' Enable Dynamic Filter Network!!!') self.dfn_generator = filterGenerator( F=32, dynamic_filter_size=(kernel_size, kernel_size), in_channels=3) #the module layer: self.dfn_layer = DynamicFilterLayer(kernel_size, dilation) else: print('[!!!] No dfn_generator and dfn_layer!!') self.dfn_generator = None self.dfn_layer = None """ the followind initilization is omitted due to inheritance from GCNet """ net_init_v0(self) print("[***] attenStereoNet_dfn_gcnet weights inilization done!")
def __init__( self, maxdisp=192, sigma_s=0.7, # 1.7: 13 x 13; 0.3 : 3 x 3; sigma_v=0.1, isEmbed=True, dilation=1, cost_filter_grad=False, is_kendall_version=True, # excatly following the structure in Kendall's GCNet paper; is_quarter_size_cost_volume_gcnet=False # cost volume in quarter image size, i.e., [D/4, H/4, W/4] ): super(AttenStereoNet, self).__init__( maxdisp=maxdisp, #newly added arguments: is_kendall_version=is_kendall_version, is_quarter_size_cost_volume_gcnet=is_quarter_size_cost_volume_gcnet ) self.isEmbed = isEmbed # True of False self.sigma_s = sigma_s self.sigma_v = sigma_v self.dilation = dilation self.cost_filter_grad = cost_filter_grad """ embedding network """ if self.isEmbed: print(' Enable Embedding Network!!!') self.embednet = embed_net() self.bifilter = bilateralFilter(sigma_s, sigma_v, isCUDA=True, dilation=self.dilation) else: self.embednet = None self.bifilter = None """ the followind initilization is omitted due to inheritance from GCNet """ net_init_v0(self) print("[***] attenStereoNet_embed_gcnet weights inilization done!")
def __init__( self, maxdisp=192, is_sga_guide_from_img=True, #is_quarter_size = True, # feature in 1/4 image size (i.e., H/4 x W/4) or 1/3 size (i.e., H/3 x W/3) downsample_scale=4, # dummy one!!! is_lga=False, # generate LGA(Local Guided Aggregation) weights or not cost_filter_grad=False): super(AttenStereoNet, self).__init__(maxdisp=maxdisp) #self.downsample_scale = downsample_scale # dummy one!!! self.downsample_scale = 4 print("SGA + PSMNet: set downsample_scale = %d" % self.downsample_scale) self.is_sga_guide_from_img = is_sga_guide_from_img # True of False self.cost_filter_grad = cost_filter_grad #self.is_quarter_size = is_quarter_size self.is_lga = is_lga if self.is_sga_guide_from_img: print('is_sga_guide_from_img = True !!!') self.embednet = None else: """ embedding network """ print('is_sga_guide_from_img = False !!!') print('SGA_CostAggregation uses Embedding Network!!!') self.embednet = embed_net() self.sga_costAgg = SGA_CostAggregation( self.is_sga_guide_from_img, #self.is_quarter_size, self.downsample_scale, self.is_lga, cost_volume_in_channels=64) """ the followind initilization is omitted due to inheritance from PSMNet """ net_init_v0(self) print("[***] attenStereoNet_sga_psmnet weights inilization done!")
def __init__( self, F=32, dynamic_filter_size=(9, 9), #img_size = (256, 512), in_channels=3, is_sync_bn=False): super(filterGenerator, self).__init__() self.F = F self.kernel_h = dynamic_filter_size[0] self.kernel_w = dynamic_filter_size[1] self.in_channels = in_channels """encoder""" self.conv1 = BasicConv2d(self.in_channels, self.F, stride=1, scopename='conv1') # 32 # > see: https://pytorch.org/docs/stable/nn.html#convolution-layers for input-output size relationship; # downsampling here # O = (W - F + 2P )/ S + 1 # O = (W - 3 + 2P)/2 + 1 = (W -3 + 2P + 2)/2 = W/2 + (2P -1)/2 self.conv2 = BasicConv2d(self.F, self.F, stride=2, scopename='conv2') # 32 self.conv3 = BasicConv2d(self.F, 2 * self.F, stride=1, scopename='conv3') # 64 self.conv4 = BasicConv2d(2 * self.F, 2 * self.F, stride=1, scopename='conv4') # 64 """ !!!Problem: using untie_biases will requrie the input image size is the same as the one during training! """ # untie_biases #self.conv5 = Conv2dUntiedBias( # height = img_size[0] // 2, # due to the stride = 2 in conv2 # width = img_size[1] // 2, # due to the stride = 2 in conv2 # in_channels = 2*self.F, out_channels= 4*self.F, stride = 1, # kernel_size = 3, padding = 1, is_relu = True, scopename = 'conv5_UntiedBias') # 128 """ just use regular convolution """ self.conv5 = BasicConv2d(2 * self.F, 4 * self.F, stride=1, scopename='conv5') # 64 """decoder""" self.conv6 = BasicConv2d(4 * self.F, 2 * self.F, stride=1, scopename='conv6') # 64 self.conv7 = BasicConv2d(2 * self.F, 2 * self.F, stride=1, scopename='conv7') # 64 # deconv self.deconv8 = BasicDeConv2d(2 * self.F, 2 * self.F, stride=2, padding=1, output_padding=1, scopename='deconv8') # F=64, S=2 self.conv9 = BasicConv2d(2 * self.F, 2 * self.F, stride=1, scopename='conv9') # 64 self.conv10 = BasicConv2d(2 * self.F, 4 * self.F, stride=1, kernel_size=1, padding=0, scopename='conv10') # 128, K = 1 """filter-generating layers""" # NOTE: # 1) For example, filter kernel size = [13, 13], # then the out_channels will be 13*13 + 1 (for bias) = 170; # 2) for stereo prediction, if just horizontal filter is used, that is, filter size = 1 x 13; # then the out_channels will be 1*13 + 1 (for bias) = 14; self.conv11 = BasicConv2d(4 * self.F, self.kernel_h * self.kernel_w + 1, stride=1, kernel_size=1, padding=0, scopename='conv11') #170, K=1 self.soft_max = nn.Softmax2d( ) #you softmax over the 2nd dimension, given input in size [N,C,H,W] if is_sync_bn: net_init_SyncBN(self) print( "[***] calling net_init_SyncBN(): filterGenerator weights inilization done!" ) else: net_init_v0(self) print( "[***] calling net_init_v0(): filterGenerator weights inilization done!" )