예제 #1
0
    def forward(self, imgLeft, imgRight):
        N, C, H, W = imgLeft.size()[:]
        assert C == 3, 'should be RGB images as input'

        #NOTE: newly added for quarter size cost volume;
        # add one downsample operation:
        if self.is_quarter_size_cost_volume_gcnet:
            img_ds_scale = 2
            imgl = F.interpolate(imgLeft, [H // 2, W // 2],
                                 mode='bilinear',
                                 align_corners=True)
            imgr = F.interpolate(imgRight, [H // 2, W // 2],
                                 mode='bilinear',
                                 align_corners=True)
        else:
            img_ds_scale = 1
            imgl = imgLeft
            imgr = imgRight

        # feature extraction;
        f_imgl = self.feature_extraction(imgl)
        f_imgr = self.feature_extraction(imgr)

        # cost volume
        cv = cost_volume_faster(f_imgl,
                                f_imgr,
                                d=self.maxdisp // (2 * img_ds_scale))
        #print ("[???] cv shape: ", cv.shape)
        # cost volume aggregation
        if self.is_kendall_version:
            out = self.cost_aggregation_kendall(cv)
        else:
            out = self.cost_aggregation(cv)

        out = out.view(N, self.maxdisp // img_ds_scale, H // img_ds_scale,
                       W // img_ds_scale)
        #NOTE: This is right!!! Updated on 04/12/2020;
        # We should upsample the cost volume (now in quarter size) to full size before the soft-argmin operation;
        # which can gaurantee that the regressed disparity range should be in [0, D) (instead of in [0, D/4));
        if self.is_quarter_size_cost_volume_gcnet:
            # corresponding to the first downsampling at the beginning to the input image pair;
            out = out[:, None,
                      ...]  # add channel C first, i.e., chang [N,D,H,W] to [N,C=1,D,H,W];
            out = F.interpolate(out, [self.maxdisp, H, W],
                                mode='trilinear',
                                align_corners=True)  # in size [N,C=1,D,H,W];
            out = torch.squeeze(out, 1)  # in size [N,D,H,W]
        prob = F.softmax(out, 1)
        #disp = self.disparityregression(prob, maxdisp=self.maxdisp//img_ds_scale)
        #NOTE: This is right!!! Updated on 04/12/2020;
        disp = self.disparityregression(prob, maxdisp=self.maxdisp)

        #NOTE: This is wrong!!!
        #if self.is_quarter_size_cost_volume_gcnet:
        #    # NOTE: newly added for SGA: upsampling operation,
        #    # corresponding to the first downsampling at the beginning to the input image pair;
        #    disp = F.interpolate(disp[:,None,...], [H, W], mode='bilinear', align_corners=True)
        #    disp = torch.squeeze(disp,1) # [N,H,W]
        return disp
예제 #2
0
    def forward(self, imgLeft, imgRight):
        N, C, H, W = imgLeft.size()[:]
        assert C == 3, 'should be RGB images as input'

        #NOTE: newly added for quarter size cost volume;
        # add one downsample operation:
        if self.is_quarter_size_cost_volume_gcnet:
            img_ds_scale = 2
            imgl = F.interpolate(imgLeft, [H // 2, W // 2],
                                 mode='bilinear',
                                 align_corners=True)
            imgr = F.interpolate(imgRight, [H // 2, W // 2],
                                 mode='bilinear',
                                 align_corners=True)
        else:
            img_ds_scale = 1
            imgl = imgLeft
            imgr = imgRight

        #print ("[???] imgLeft shape: ", imgLeft.shape)
        imgl0 = self.relu(self.convbn0(imgl))
        imgr0 = self.relu(self.convbn0(imgr))

        imgl_block = self.res_block(imgl0)
        imgr_block = self.res_block(imgr0)
        #print ("[???] imgl_block shape: ", imgl_block.shape)

        imgl1 = self.conv1(imgl_block)
        imgr1 = self.conv1(imgr_block)
        #print ("[???] imgl1 shape: ", imgl1.shape)
        # cost volume
        #cv = self.get_costVolume(imgl1,imgr1)
        #cv = self.cost_volume(imgl1,imgr1)
        #cv = self.cost_volume_faster(imgl1,imgr1)
        cv = cost_volume_faster(imgl1,
                                imgr1,
                                d=self.maxdisp // (2 * img_ds_scale))
        #print ("[???] cv shape: ", cv.shape)
        out = self.relu(self.conv3dbn_1(cv))  # conv3d_19
        out = self.relu(self.conv3dbn_2(out))  # conv3d_20

        #conv3d block
        res_l20 = out  # from layer conv3d_20;
        out = self.block_3d_1(out)  # conv3d_21,22,23
        res_l23 = out
        out = self.block_3d_2(out)  # conv3d_24,25,26
        res_l26 = out
        out = self.block_3d_3(out)  # conv3d_27,28,29
        res_l29 = out
        out = self.block_3d_4(out)  # conv3d_30,31,32
        #print ("[???] after conv3d_32 out shape = ", out.shape)

        #deconv3d
        #print ("[???] res_l29: ", res_l29.shape)
        out = self.relu(self.deconvbn1(out) + res_l29)
        out = self.relu(self.deconvbn2(out) + res_l26)
        out = self.relu(self.deconvbn3(out) + res_l23)
        out = self.relu(self.deconvbn4(out) + res_l20)
        #last deconv3d, no BN or ReLU
        out = self.deconv5(out)  # [N, 1, D, H, W]
        #print ("[???] out shape = ", out.shape)

        out = out.view(N, self.maxdisp // img_ds_scale, H // img_ds_scale,
                       W // img_ds_scale)
        prob = F.softmax(out, 1)
        #disp = self.disparityregression(prob)
        disp = self.disparityregression(prob,
                                        maxdisp=self.maxdisp // img_ds_scale)
        if self.is_quarter_size_cost_volume_gcnet:
            # NOTE: newly added for SGA: upsampling operation,
            # corresponding to the first downsampling at the beginning to the input image pair;
            disp = F.interpolate(disp[:, None, ...], [H, W],
                                 mode='bilinear',
                                 align_corners=True)
            disp = torch.squeeze(disp, 1)  # [N,H,W]
        return disp
    def forward(self, imgLeft, imgRight):
        N, C, H, W = imgLeft.size()[:]
        assert C == 3, 'should be RGB images as input'
        #print ("[???] imgLeft shape: ", imgLeft.shape)

        if self.is_quarter_size_cost_volume_gcnet:
            img_ds_scale = 2
            imgl = F.interpolate(
                imgLeft, [H // 2, W // 2], mode='bilinear',
                align_corners=True)  #in size [N, 3, H/2, W/2];
            imgr = F.interpolate(
                imgRight, [H // 2, W // 2],
                mode='bilinear',
                align_corners=True)  #in size [N, 3, H/2, W/2];
        else:
            img_ds_scale = 1
            imgl = imgLeft
            imgr = imgRight

        # feature extraction;
        f_imgl = self.feature_extraction(imgl)
        f_imgr = self.feature_extraction(imgr)
        # cost volume
        cv = cost_volume_faster(f_imgl,
                                f_imgr,
                                d=self.maxdisp // (2 * img_ds_scale))
        #print ("[???] cv shape: ", cv.shape)

        if not self.isDFN:
            dfn_filter = None
            dfn_bias = None
        else:
            # downscale x to [N,C,H/2, W/2] then fed into embeddingnet,
            # because the cost volume generated below is in shape [N,C,D/2, H/2, W/2]
            left_scale = F.interpolate(imgLeft, [
                imgLeft.size()[2] // (2 * img_ds_scale),
                imgLeft.size()[3] // (2 * img_ds_scale)
            ],
                                       mode='bilinear',
                                       align_corners=True)
            #print ('[???] left shape', left.shape)
            #print ('[???] left_scale shape', left_scale.shape)
            dfn_filter, dfn_bias = self.dfn_generator(left_scale)
            D = cv.size()[2]
            #print ('[???] cost size = ', cost.size())

            # NOTE: this might be the memory consuming!!!
            # NO sure this torch.no_grad() will distory the training or not !!!!
            #with torch.set_grad_enabled(False):
            #with torch.set_grad_enabled(True):
            with torch.set_grad_enabled(self.cost_filter_grad):
                for d in range(0, D):
                    #for d in range(0,1):
                    #print ('DFN filtering cost volume slice %d/%d' %(d+1, D))
                    # apply DFN filter to cost volume [N,C,H,W];
                    cv_d_slice = cv[:, :, d, :, :].contiguous()
                    #print ('[???] cv_d_slice shape', cv_d_slice.shape)
                    cv[:, :, d, :, :] = self.dfn_layer(cv_d_slice, dfn_filter,
                                                       dfn_bias)

        cv = cv.contiguous()
        # cost volume aggregation
        if self.is_kendall_version:
            out = self.cost_aggregation_kendall(cv)
        else:
            out = self.cost_aggregation(cv)

        out = out.view(N, self.maxdisp // img_ds_scale, H // img_ds_scale,
                       W // img_ds_scale)
        #NOTE: This is right!!! Updated on 04/12/2020;
        # We should upsample the cost volume (now in quarter size) to full size before the soft-argmin operation;
        # which can gaurantee that the regressed disparity range should be in [0, D) (instead of in [0, D/4));
        if self.is_quarter_size_cost_volume_gcnet:
            # corresponding to the first downsampling at the beginning to the input image pair;
            out = out[:, None,
                      ...]  # add channel C first, i.e., chang [N,D,H,W] to [N,C=1,D,H,W];
            out = F.interpolate(out, [self.maxdisp, H, W],
                                mode='trilinear',
                                align_corners=True)  # in size [N,C=1,D,H,W];
            out = torch.squeeze(out, 1)  # in size [N,D,H,W]
        prob = F.softmax(out, 1)
        #disp = self.disparityregression(prob, maxdisp=self.maxdisp//img_ds_scale)
        #NOTE: This is right!!! Updated on 04/12/2020;
        disp = self.disparityregression(prob, maxdisp=self.maxdisp)

        #if self.is_quarter_size_cost_volume_gcnet:
        #    # NOTE: newly added for PAC: upsampling operation,
        #    # corresponding to the first downsampling at the beginning to the input image pair;
        #    disp = F.interpolate(disp[:,None,...], [H, W], mode='bilinear', align_corners=True)
        #    disp = torch.squeeze(disp,1) # [N,H,W]

        if self.training:
            return disp, dfn_filter, dfn_bias
        else:
            return disp, [dfn_filter, dfn_bias]
예제 #4
0
    def forward(self, left, right):

        x = self.feature_extraction(left)  # left feature
        y = self.feature_extraction(right)  # right feature

        # matching volume, in size [N,2C,D/4, H/4, W/4];
        cost = cost_volume_faster(x, y, self.maxdisp // 4)

        if not self.isEmbed:
            embed = None
        else:
            # downscale x to [N,C,H/4, W/4] then fed into embeddingnet,
            # because the cost volume generated below is in shape [N,C,D/4, H/4, W/4]
            left_scale = F.interpolate(
                left,
                [left.size()[2] // 4, left.size()[3] // 4],
                mode='bilinear',
                align_corners=True)
            #print ('[???] left shape', left.shape)
            #print ('[???] left_scale shape', left_scale.shape)
            """ embed shape [2, 64, 64, 128]"""
            embed = self.embednet(left_scale)
            #print ('[???] embed shape', embed.shape)
            """ cost shape [2, 64, 36, 64, 128]"""
            N, C, D, H, W = cost.size()[:]
            #print ('[???] cost shape', cost.shape)

            # NOTE: this might be the memory consuming!!!
            # NO sure this torch.no_grad() will distory the training or not !!!!
            #with torch.set_grad_enabled(False):
            #with torch.set_grad_enabled(True):
            with torch.set_grad_enabled(self.cost_filter_grad):
                for d in range(0, D):
                    #for d in range(0,1):
                    #print ('bilateral filtering cost volume slice %d/%d' %(d+1, D))
                    # apply bilateral filter to cost volume [N,C,H,W];
                    cv_d_slice = cost[:, :, d, :, :].contiguous()
                    #print ('[???] cv_d_slice shape', cv_d_slice.shape)
                    cost[:, :, d, :, :] = self.bifilter(embed, cv_d_slice)

        cost = cost.contiguous()

        cost0 = self.dres0(cost)
        cost0 = self.dres1(cost0) + cost0

        out1, pre1, post1 = self.dres2(cost0, None, None)
        out1 = out1 + cost0

        out2, pre2, post2 = self.dres3(out1, pre1, post1)
        out2 = out2 + cost0

        out3, pre3, post3 = self.dres4(out2, pre1, post2)
        out3 = out3 + cost0

        cost1 = self.classif1(out1)
        cost2 = self.classif2(out2) + cost1
        cost3 = self.classif3(out3) + cost2

        if self.training:
            # updated by CCJ: due to deprecated warning!
            #cost1 = F.upsample(cost1, [self.maxdisp,left.size()[2],left.size()[3]], mode='trilinear')
            #cost2 = F.upsample(cost2, [self.maxdisp,left.size()[2],left.size()[3]], mode='trilinear')
            cost1 = F.interpolate(
                cost1,
                [self.maxdisp, left.size()[2],
                 left.size()[3]],
                mode='trilinear',
                align_corners=True)
            cost2 = F.interpolate(
                cost2,
                [self.maxdisp, left.size()[2],
                 left.size()[3]],
                mode='trilinear',
                align_corners=True)

            cost1 = torch.squeeze(cost1, 1)
            pred1 = F.softmax(cost1, dim=1)
            pred1 = disparityregression(self.maxdisp)(pred1)

            cost2 = torch.squeeze(cost2, 1)
            pred2 = F.softmax(cost2, dim=1)
            pred2 = disparityregression(self.maxdisp)(pred2)

        #cost3 = F.upsample(cost3, [self.maxdisp,left.size()[2],left.size()[3]], mode='trilinear')
        cost3 = F.interpolate(
            cost3, [self.maxdisp, left.size()[2],
                    left.size()[3]],
            mode='trilinear',
            align_corners=True)
        cost3 = torch.squeeze(cost3, 1)
        pred3 = F.softmax(cost3, dim=1)
        # For your information: This formulation 'softmax(c)' learned "similarity"
        # while 'softmax(-c)' learned 'matching cost' as mentioned in the paper.
        # However, 'c' or '-c' do not affect the performance because feature-based cost volume provided flexibility.
        pred3 = disparityregression(self.maxdisp)(pred3)

        if self.training:
            return pred1, pred2, pred3, embed
        else:
            return pred3, embed
예제 #5
0
    def forward(self, imgLeft, imgRight):
        N, C, H, W = imgLeft.size()[:]
        assert C == 3, 'should be RGB images as input'

        #NOTE: newly added for quarter size cost volume;
        # add one downsample operation:
        if self.is_quarter_size_cost_volume_gcnet:
            img_ds_scale = 2
            imgl = F.interpolate(imgLeft, [H // 2, W // 2],
                                 mode='bilinear',
                                 align_corners=True)
            imgr = F.interpolate(imgRight, [H // 2, W // 2],
                                 mode='bilinear',
                                 align_corners=True)
        else:
            img_ds_scale = 1
            imgl = imgLeft
            imgr = imgRight

        # feature extraction;
        f_imgl = self.feature_extraction(imgl)
        f_imgr = self.feature_extraction(imgr)

        # cost volume
        cv = cost_volume_faster(f_imgl,
                                f_imgr,
                                d=self.maxdisp // (2 * img_ds_scale))
        #print ("[???] cv shape: ", cv.shape)

        if self.is_sga_guide_from_img:
            g_in = None
        else:
            # downscale x to [N,C,H/4, W/4] then fed into embeddingnet,
            # because the cost volume generated below is in shape [N,C,D/4, H/4, W/4]
            left_scale = F.interpolate(imgLeft, [H // 4, W // 4],
                                       mode='bilinear',
                                       align_corners=True)
            """ embed shape [2, 64, 64, 128]"""
            g_in = self.embednet(left_scale)
            #print ('[???] embed shape', embed.shape)
        """ apply SGA_CostAggregation() """
        # NOTE: this might be the memory consuming!!!
        with torch.set_grad_enabled(self.cost_filter_grad):
            cv = self.sga_costAgg(cv, g_in, img_for_g=imgLeft)
            #print ('[???] cost shape', cv.shape)

        cv = cv.contiguous()

        # cost volume aggregation
        if self.is_kendall_version:
            out = self.cost_aggregation_kendall(cv)
        else:
            out = self.cost_aggregation(cv)

        out = out.view(N, self.maxdisp // img_ds_scale, H // img_ds_scale,
                       W // img_ds_scale)
        #NOTE: This is right!!! Updated on 04/12/2020;
        # We should upsample the cost volume (now in quarter size) to full size before the soft-argmin operation;
        # which can gaurantee that the regressed disparity range should be in [0, D) (instead of in [0, D/4));
        if self.is_quarter_size_cost_volume_gcnet:
            # corresponding to the first downsampling at the beginning to the input image pair;
            out = out[:, None,
                      ...]  # add channel C first, i.e., chang [N,D,H,W] to [N,C=1,D,H,W];
            out = F.interpolate(out, [self.maxdisp, H, W],
                                mode='trilinear',
                                align_corners=True)  # in size [N,C=1,D,H,W];
            out = torch.squeeze(out, 1)  # in size [N,D,H,W]
        prob = F.softmax(out, 1)
        #disp = self.disparityregression(prob, maxdisp=self.maxdisp//img_ds_scale)
        #NOTE: This is right!!! Updated on 04/12/2020;
        disp = self.disparityregression(prob, maxdisp=self.maxdisp)
        return disp, g_in
    def forward(self, left, right):
        x = self.feature_extraction(left)  # left feature
        y = self.feature_extraction(right)  # right feature

        # matching volume, in size [N,2C,D/4, H/4, W/4];
        cost = cost_volume_faster(x, y, self.maxdisp // 4)

        if self.is_sga_guide_from_img:
            g_in = None
        else:
            # downscale x to [N,C,H/4, W/4] then fed into embeddingnet,
            # because the cost volume generated below is in shape [N,C,D/4, H/4, W/4]
            left_scale = F.interpolate(
                left,
                [left.size()[2] // 4, left.size()[3] // 4],
                mode='bilinear',
                align_corners=True)
            #print ('[???] left shape', left.shape)
            #print ('[???] left_scale shape', left_scale.shape)
            """ embed shape [2, 64, 64, 128]"""
            g_in = self.embednet(left_scale)
            #print ('[???] embed shape', embed.shape)
        """ apply SGA_CostAggregation() """
        # NOTE: this might be the memory consuming!!!
        with torch.set_grad_enabled(self.cost_filter_grad):
            cost = self.sga_costAgg(cost, g_in, img_for_g=left)
            #print ('[???] cost shape', cost.shape)

        cost0 = self.dres0(cost)
        cost0 = self.dres1(cost0) + cost0

        out1, pre1, post1 = self.dres2(cost0, None, None)
        out1 = out1 + cost0

        out2, pre2, post2 = self.dres3(out1, pre1, post1)
        out2 = out2 + cost0

        out3, pre3, post3 = self.dres4(out2, pre1, post2)
        out3 = out3 + cost0

        cost1 = self.classif1(out1)
        cost2 = self.classif2(out2) + cost1
        cost3 = self.classif3(out3) + cost2

        if self.training:
            # updated by CCJ: due to deprecated warning!
            cost1 = F.interpolate(
                cost1,
                [self.maxdisp, left.size()[2],
                 left.size()[3]],
                mode='trilinear',
                align_corners=True)
            cost2 = F.interpolate(
                cost2,
                [self.maxdisp, left.size()[2],
                 left.size()[3]],
                mode='trilinear',
                align_corners=True)

            cost1 = torch.squeeze(cost1, 1)
            pred1 = F.softmax(cost1, dim=1)
            pred1 = disparityregression(self.maxdisp)(pred1)

            cost2 = torch.squeeze(cost2, 1)
            pred2 = F.softmax(cost2, dim=1)
            pred2 = disparityregression(self.maxdisp)(pred2)

        #cost3 = F.upsample(cost3, [self.maxdisp,left.size()[2],left.size()[3]], mode='trilinear')
        cost3 = F.interpolate(
            cost3, [self.maxdisp, left.size()[2],
                    left.size()[3]],
            mode='trilinear',
            align_corners=True)
        cost3 = torch.squeeze(cost3, 1)
        pred3 = F.softmax(cost3, dim=1)
        # For your information: This formulation 'softmax(c)' learned "similarity"
        # while 'softmax(-c)' learned 'matching cost' as mentioned in the paper.
        # However, 'c' or '-c' do not affect the performance because feature-based cost volume provided flexibility.
        pred3 = disparityregression(self.maxdisp)(pred3)

        if self.training:
            return pred1, pred2, pred3, g_in
        else:
            return pred3, g_in