def forward(self, l, r, P, pre1, pre2):
        #self.P=P[1,0]
        #0 l to r,1 min,2 max
        #[l_box,r_box,match],[min_d,max_d]
        start_time = time.time()
        with torch.no_grad():
            self.pre = pre1.cuda(2)
            self.pre2 = pre2.cuda(2)
        P1 = P[..., 0].cuda(2)
        P2 = P[..., 3].cuda(2)
        P3 = P[..., 1].cuda(2)
        P4 = P[..., 2].cuda(2)
        #feature extraction
        l_mask = P2 - P1
        s_mask = P1
        #l_mask=l_mask.byte()
        #s_mask=s_mask.byte()
        #basic cuda 524
        #print(l.type)
        #1923
        #print(torch.cuda.memory_allocated(1))
        #2727
        l_sf = self.feature_extraction2(l)
        l_lf = self.feature_extraction(l_sf)

        #print(torch.cuda.memory_allocated(2))
        #the cuda won't copy the volume to the new gpu
        # a=l_lf.cuda(1)
        # b=l_lf.cuda(2)
        # c=l_sf.cuda(3)
        r_sf = self.feature_extraction2(r)
        r_lf = self.feature_extraction(r_sf)
        #print(torch.cuda.memory_allocated(1))
        #3267

        #print(torch.cuda.memory_allocated(2))
        #reshape the mask to batch and channel

        disparity = torch.zeros([540, 960]).cuda(2)
        one = torch.ones(1).cuda(2)
        zero = torch.zeros(1).cuda(2)
        cost_volume = []
        #5710
        #print(value)
        l_lf = l_lf.cuda(2)
        r_lf = r_lf.cuda(2)
        r_sf = r_sf.cuda(2)
        l_sf = l_sf.cuda(2)
        #985
        #feature=torch.masked_select(l_sf,s_mask)
        #feature=torch.masked_select(l_lf,l_mask)+torch.masked_select(l_sf,s_mask)
        feature = l_lf * l_mask + l_sf * s_mask
        feature = torch.where((l_mask + s_mask) > 0, feature, l_lf)

        # cost_s=[]
        # cost_l=[]
        # for m in range(10):
        count = 0
        start_time = time.time()
        for z in range(10):
            start_time = time.time()
            for i in range(150):
                #ground 0-270, sky 0-40
                #intial 0.46, after 0.18

                with torch.no_grad():
                    if i > torch.max(P3).type(torch.int32):
                        break
                    min_d = pre1[0, 0, i].long()
                    max_d = pre1[0, 1, i].long()
                    #object_mask=torch.where(P3==i,one,zero)
                    x1, y1, x2, y2, size = pre2[0, i].long()
                    cost_volume = torch.zeros(x2 - x1, y2 - y1,
                                              max_d - min_d).cuda(2)
                    object_mask = P3[0, x1:x2, y1:y2]
                    object_mask = torch.where(object_mask == i, one, zero)
                    s_mask_o = object_mask * s_mask[0, x1:x2, y1:y2]
                    l_mask_o = object_mask * l_mask[0, x1:x2, y1:y2]
                    s_match = s_mask_o.nonzero().cuda(2)
                    l_match = l_mask_o.nonzero().cuda(2)
                    if s_match.shape[0] == 0:
                        s_match = object_mask.nonzero().cuda(2)
                    if l_match.shape[0] == 0:
                        l_match = object_mask.nonzero().cuda(2)
                s_l_o = feature[..., s_match[:, 0], s_match[:, 1]]
                l_l_o = feature[..., l_match[:, 0], l_match[:, 1]]
                num_s = s_match.shape[0]
                num_l = l_match.shape[0]

                #print(sy_match.shape)
                with torch.no_grad():
                    sy_match = s_match[:, 1]
                    sx_match = s_match[:, 0]
                    ly_match = l_match[:, 1]
                    lx_match = l_match[:, 0]
                    d = max_d - min_d
                    #print(d)
                    sx_match = sx_match.repeat(1, d)
                    sy_match = sy_match.repeat(1, d)
                    #sy=sy_match
                    range_d_s = torch.arange(min_d, max_d).cuda(2).repeat(
                        s_match.shape[0],
                        1).transpose(1, 0).contiguous().view_as(sy_match)
                    sy_match -= range_d_s
                    lx_match = lx_match.repeat(1, d)
                    ly_match = ly_match.repeat(1, d)
                    range_d_l = torch.arange(min_d, max_d).cuda(2).repeat(
                        l_match.shape[0],
                        1).transpose(1, 0).contiguous().view_as(ly_match)
                    ly_match -= range_d_l
                s_r_o_t = r_sf[..., sx_match,
                               sy_match].reshape(1, 32, s_l_o.shape[-1], d)
                s_l_o = s_l_o.repeat(1, 1, 1,
                                     d).reshape(1, 32, s_l_o.shape[-1], d)
                l_r_o_t = r_lf[..., lx_match,
                               ly_match].reshape(1, 32, l_l_o.shape[-1], d)
                l_l_o = l_l_o.repeat(1, 1, 1,
                                     d).reshape(1, 32, l_l_o.shape[-1], d)
                # cost_s.append(torch.where(sy_match.reshape(1,s_l_o.shape[-2],d)>=0,cosine_s(s_l_o,s_r_o_t),zero))
                # cost_l.append(torch.where(ly_match.reshape(1,l_l_o.shape[-2],d)>=0,cosine_s(l_l_o,l_r_o_t),zero))
                cost_s = torch.where(
                    sy_match.reshape(1, s_l_o.shape[-2], d) >= 0,
                    cosine_s(s_l_o, s_r_o_t), zero)
                cost_l = torch.where(
                    ly_match.reshape(1, l_l_o.shape[-2], d) >= 0,
                    cosine_s(l_l_o, l_r_o_t), zero)
                #cost_volume=cost_s+cost_l
                #print(torch.cuda.memory_allocated(2)/1e+6)
                #time.sleep(30)
                #convert to volume
                with torch.no_grad():
                    sy_match = sy_match + range_d_s
                    range_d_s = range_d_s - min_d
                #sparse tensor
                cost_volume[sx_match.squeeze(),
                            sy_match.squeeze(),
                            range_d_s.squeeze()] = cost_s.view_as(
                                sy_match).squeeze()
                with torch.no_grad():
                    ly_match = ly_match + range_d_l
                    range_d_l = range_d_l - min_d
                cost_volume[lx_match.squeeze(),
                            ly_match.squeeze(),
                            range_d_l.squeeze()] = cost_l.view_as(
                                ly_match).squeeze()
                #print(cost_volume.nonzero().shape)
                #cost_s
                # print(time.time()-start_time)
                # time.sleep(100)
                #aggregation

                a_volume = torch.zeros_like(cost_volume).cuda(2)
                object_r = torch.where(P3[0, x1:x2, y1:y2] == i,
                                       P4[0, x1:x2, y1:y2], -one)
                max_r = torch.max(object_r).long()
                #start_time=time.time()
                for j in range(1, max_r + 1):
                    with torch.no_grad():
                        plane_mask = torch.where(object_r == j, one, zero)
                        index = plane_mask.nonzero().long().cuda()
                        if index.shape[0] < 1:
                            continue
                        xp1, xp2, yp1, yp2 = torch.min(
                            index[:,
                                  0]), torch.max(index[:, 0]) + 1, torch.min(
                                      index[:, 1]), torch.max(index[:, 1]) + 1
                        #xp1,xp2,yp1,yp2.r_size=self.pre[0,0][1]
                        plane_mask = plane_mask[..., xp1:xp2, yp1:yp2]
                        s_plane_mask = plane_mask * s_mask[..., x1:x2,
                                                           y1:y2][..., xp1:xp2,
                                                                  yp1:yp2]
                        l_plane_mask = plane_mask * l_mask[..., x1:x2,
                                                           y1:y2][..., xp1:xp2,
                                                                  yp1:yp2]
                    plane = cost_volume[..., xp1:xp2, yp1:yp2, :]
                    #rint(s_mask.shape)
                    #print(plane_mask.shape,s_plane_mask.shape)
                    s_weights = self.cluster(
                        l_sf[..., x1:x2, y1:y2][..., xp1:xp2, yp1:yp2],
                        s_plane_mask)
                    s_cost = torch.sum(torch.sum(
                        plane * s_weights, -2, keepdim=True),
                                       -3,
                                       keepdim=True) / torch.sum(s_weights)
                    l_weights = self.cluster(
                        l_lf[..., x1:x2, y1:y2][..., xp1:xp2, yp1:yp2],
                        l_plane_mask)
                    l_cost = torch.sum(torch.sum(
                        plane * l_weights, -2, keepdim=True),
                                       -3,
                                       keepdim=True) / torch.sum(l_weights)
                    with torch.no_grad():
                        # print(plane_mask.shape)
                        # plane_mask=plane_mask-torch.where(s_plane_mask+l_plane_mask>0,one,zero)
                        # print(plane_mask.shape)
                        plane_mask=plane_mask.view(1,plane_mask.shape[0],plane_mask.shape[1],1) \
                                  .expand(1,plane_mask.shape[0],plane_mask.shape[1],plane.shape[-1])
                        #print(plane_mask.shape)
                        s_plane_mask=s_plane_mask.view(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],1) \
                                  .expand(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],plane.shape[-1])
                        l_plane_mask=l_plane_mask.view(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],1) \
                                  .expand(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],plane.shape[-1])
                    # plane=torch.where(s_plane_mask==1,s_cost*(1-s_weights)+s_weights*plane,plane)
                    # plane=torch.where(l_plane_mask==1,l_cost*(1-l_weights)+l_weights*plane,plane)
                    cost_volume[..., xp1:xp2, yp1:yp2, :] = torch.where(
                        s_plane_mask == 1,
                        s_cost * s_weights + (1 - s_weights) * plane, plane)
                    cost_volume[..., xp1:xp2, yp1:yp2, :] = torch.where(
                        l_plane_mask == 1,
                        l_cost * l_weights + (1 - l_weights) * plane, plane)
                    exist = torch.where(s_plane_mask - l_plane_mask > 0, one,
                                        zero)
                    #print(plane_mask.shape,s_plane_mask.shape)
                    weights = self.cluster(
                        torch.cat([
                            l_lf[..., x1:x2, y1:y2][..., xp1:xp2, yp1:yp2],
                            l_sf[..., x1:x2, y1:y2][..., xp1:xp2, yp1:yp2]
                        ], -3), plane_mask[..., 0])
                    costs = torch.sum(torch.sum(
                        plane * weights, -2, keepdim=True),
                                      -3,
                                      keepdim=True) / torch.sum(exist)
                    plane_mask = plane_mask - exist
                    cost_volume[..., xp1:xp2, yp1:yp2, :] = torch.where(
                        plane_mask == 1, costs * weights, plane)
                #print(time.time()-start_time)
            print(time.time() - start_time)
        #print(time.time()-start_time)
        time.sleep(100)
        # #ss_argmin
        # disparity[...,x1:x2,y1:y2]=ss_argmin(cost_volume,min_d,max_d)
        # #refinement
        # refine=torch.zeros_like(disparity)[...,x1:x2,y1:y2]
        # for j in range(min_r,max_r+1):
        #     plane_mask=torch.where(object_r==j,one,zero)[x1:x2,y1:y2]
        #     xp1,xp2,yp1,yp2=crop(plane_mask)
        #     plane_mask=plane_mask[xp1:xp2,yp1:yp2]
        #     s_plane_mask=plane_mask*s_mask[x1:x2,y1:y2][xp1:xp2,yp1:yp2]
        #     l_plane_mask=plane_mask*l_mask[x1:x2,y1:y2][xp1:xp2,yp1:yp2]
        #     plane_mask=plane_mask-torch.where(s_plane_mask+l_plane_mask>0,one,zero)
        #     plane=disparity[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2]*plane_mask
        #     s_weights=self.cluster(l_sf[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],s_plane_mask)
        #     s_cost=torch.sum(torch.sum(plane*s_weights,-2,keepdim=True),-3,keepdim=True)/torch.sum(s_weights)
        #     l_weights=self.cluster(l_lf[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],l_plane_mask)
        #     l_cost=torch.sum(torch.sum(plane*l_weights,-2),-2)/torch.sum(l_weights)
        #     weights=self.cluster(l_lf[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],plane_mask)
        #     costs=torch.sum(torch.sum(plane*weights,-2,keepdim=True),-3,keepdim=True)/torch.sum(weights)
        #     plane=torch.where(s_plane_mask==1,s_cost*s_weights,plane)
        #     plane=torch.where(l_plane_mask==1,l_cost*l_weights,plane)
        #     plane=torch.where(plane_mask==1,cost*weights,plane)
        #     disparity[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2]=plane

        return cost_volume
    def forward(self, l, r, P, pre, matching):
        #self.P=P[1,0]
        #0 l to r,1 min,2 max
        #[l_box,r_box,match],[min_d,max_d]
        start_time = time.time()
        with torch.no_grad():
            self.pre = pre.cuda(1)
        P1 = P[..., 0].cuda(1)
        P2 = P[..., 3].cuda(1)
        P3 = P[..., 1].cuda(1)
        P4 = P[..., 2].cuda(1)
        #feature extraction
        l_mask = P2 - P1
        s_mask = P1
        #l_mask=l_mask.byte()
        #s_mask=s_mask.byte()
        #basic cuda 524
        #print(l.type)
        #1923
        #print(torch.cuda.memory_allocated(1))
        #2727
        l_sf = self.feature_extraction2(l)
        l_lf = self.feature_extraction(l_sf)

        #print(torch.cuda.memory_allocated(2))
        #the cuda won't copy the volume to the new gpu
        # a=l_lf.cuda(1)
        # b=l_lf.cuda(1)
        # c=l_sf.cuda(3)
        r_sf = self.feature_extraction2(r)
        r_lf = self.feature_extraction(r_sf)
        #print(torch.cuda.memory_allocated(1))
        #3267

        #print(torch.cuda.memory_allocated(2))
        #reshape the mask to batch and channel

        disparity = torch.zeros([540, 960]).cuda(1)
        one = torch.ones(1).cuda(1)
        zero = torch.zeros(1).cuda(1)
        #cost_volume=[]
        #5710
        #print(value)
        l_lf = l_lf.cuda(1)
        r_lf = r_lf.cuda(1)
        r_sf = r_sf.cuda(1)
        l_sf = l_sf.cuda(1)
        #985
        #feature=torch.masked_select(l_sf,s_mask)
        #feature=torch.masked_select(l_lf,l_mask)+torch.masked_select(l_sf,s_mask)
        # feature=l_lf*l_mask+l_sf*s_mask
        # feature=torch.where((l_mask+s_mask)>0,feature,l_lf)

        # cost_s=[]
        # cost_l=[]
        # for m in range(10):
        count = 0
        #start_time=time.time()
        #with torch.no_grad():
        for z in range(10):
            start_time = time.time()
            for i in range(torch.max(P3).type(torch.int32)):
                x1, y1, x2, y2, size = pre[0, i].long()
                max_d = torch.max(matching[-1][i])
                min_d = torch.min(matching[-1][i])

                cost_volume = torch.zeros(x2 - x1, y2 - y1,
                                          max_d - min_d + 1).cuda(1)
                #ground 0-270, sky 0-40
                #intial 0.46, after 0.18,volume 0.3
                #cost computation intial 0.20,after 0.14,volume 0.3
                if torch.max(matching[0][i]) > 0:
                    s_feature = l_sf[..., x1:x2, y1:y2][..., matching[0][i],
                                                        matching[1][i]]
                    s_r_y = torch.max(matching[1][i] - matching[2][i],
                                      -torch.ones_like(matching[2][i]))
                    #print(s_r_y)
                    s_r_o_t = r_sf[..., x1:x2, y1:y2][..., matching[0][i],
                                                      s_r_y]
                    #cost_volume[matching[0][i],matching[1][i],matching[2][i]-min_d]=torch.where(s_r_y>=0,cosine_s(s_feature,s_r_o_t),zero)
                    s_cost = torch.where(s_r_y >= 0,
                                         cosine_s(s_feature, s_r_o_t), zero)
                    d = matching[2][i] - min_d
                    cost_volume[matching[0][i], matching[1][i], d] = s_cost
                if torch.max(matching[3][i]) > 0:
                    l_feature = l_lf[..., x1:x2, y1:y2][..., matching[3][i],
                                                        matching[4][i]]
                    l_r_y = torch.max(matching[4][i] - matching[5][i],
                                      -torch.ones_like(matching[5][i]))
                    l_r_o_t = r_lf[..., x1:x2, y1:y2][..., matching[3][i],
                                                      l_r_y]
                    #print(min_d,torch.min(matching[2][i]),torch.min(matching[3][i]),torch.min(matching[4][i]))
                    d = matching[5][i] - min_d
                    #cost_volume[matching[3][i],matching[4][i],d]=torch.where(l_r_y>=0,cosine_s(l_feature,l_r_o_t),zero)
                    l_cost = torch.where(l_r_y >= 0,
                                         cosine_s(l_feature, l_r_o_t), zero)
                    cost_volume[matching[3][i], matching[4][i], d] = l_cost
            print(time.time() - start_time)

            #print(time.time()-start_time)
        print(time.time() - start_time)
        #3s,4600mb
        #print(time.time()-start_time)
        time.sleep(100)
        # #ss_argmin
        # disparity[...,x1:x2,y1:y2]=ss_argmin(cost_volume,min_d,max_d)
        # #refinement
        # refine=torch.zeros_like(disparity)[...,x1:x2,y1:y2]
        # for j in range(min_r,max_r+1):
        #     plane_mask=torch.where(object_r==j,one,zero)[x1:x2,y1:y2]
        #     xp1,xp2,yp1,yp2=crop(plane_mask)
        #     plane_mask=plane_mask[xp1:xp2,yp1:yp2]
        #     s_plane_mask=plane_mask*s_mask[x1:x2,y1:y2][xp1:xp2,yp1:yp2]
        #     l_plane_mask=plane_mask*l_mask[x1:x2,y1:y2][xp1:xp2,yp1:yp2]
        #     plane_mask=plane_mask-torch.where(s_plane_mask+l_plane_mask>0,one,zero)
        #     plane=disparity[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2]*plane_mask
        #     s_weights=self.cluster(l_sf[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],s_plane_mask)
        #     s_cost=torch.sum(torch.sum(plane*s_weights,-2,keepdim=True),-3,keepdim=True)/torch.sum(s_weights)
        #     l_weights=self.cluster(l_lf[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],l_plane_mask)
        #     l_cost=torch.sum(torch.sum(plane*l_weights,-2),-2)/torch.sum(l_weights)
        #     weights=self.cluster(l_lf[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],plane_mask)
        #     costs=torch.sum(torch.sum(plane*weights,-2,keepdim=True),-3,keepdim=True)/torch.sum(weights)
        #     plane=torch.where(s_plane_mask==1,s_cost*s_weights,plane)
        #     plane=torch.where(l_plane_mask==1,l_cost*l_weights,plane)
        #     plane=torch.where(plane_mask==1,cost*weights,plane)
        #     disparity[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2]=plane

        return cost_volume
Exemple #3
0
    def forward(self, l, r, P, pre1, pre2):
        #self.P=P[1,0]
        #0 l to r,1 min,2 max
        #[l_box,r_box,match],[min_d,max_d]
        self.pre = pre1
        self.pre2 = pre2
        P1 = P[..., 0]
        P2 = P[..., 3]
        P3 = P[..., 1]
        P4 = P[..., 2]
        #feature extraction
        l_mask = P2 - P1
        s_mask = P1
        #basic cuda 524
        #print(l.type)
        #1923
        #print(torch.cuda.memory_allocated(1))
        #2727
        l_sf = self.feature_extraction2(l)
        l_lf = self.feature_extraction(l_sf)

        #print(torch.cuda.memory_allocated(2))
        #the cuda won't copy the volume to the new gpu
        # a=l_lf.cuda(1)
        # b=l_lf.cuda(2)
        # c=l_sf.cuda(3)
        r_sf = self.feature_extraction2(r)
        r_lf = self.feature_extraction(r_sf)
        #print(torch.cuda.memory_allocated(1))
        #3267

        #print(torch.cuda.memory_allocated(2))
        #reshape the mask to batch and channel

        disparity = torch.zeros([540, 960]).cuda(2)
        one = torch.ones(1).cuda(2)
        zero = torch.zeros(1).cuda(2)
        cost_volume = []
        #5710
        #print(value)
        l_lf = l_lf.cuda(2)
        r_lf = r_lf.cuda(2)
        r_sf = r_sf.cuda(2)
        l_sf = l_sf.cuda(2)
        #985
        feature = l_lf * l_mask + l_sf * s_mask
        feature = torch.where((l_mask + s_mask) > 0, feature, l_lf)
        # for i in range(100):
        #   cost_volume.append(cosine_s(l_lf,torch.cat([r_lf[...,i:],r_lf[...,:i]],-1)))
        # cost_volume=torch.stack(cost_volume)
        # print(torch.cuda.memory_allocated(2))
        #time.sleep(100)

        #promotion
        #we can segment with bounding box and divide the whole image into many parts
        #each single bounding box will be managed through network not the whole image
        #matching cost computation
        count = 0
        start_time = time.time()
        for i in range(torch.max(P3).type(torch.int32) + 1):
            #ground 0-270, sky 0-40
            # if i==13 or i == 14:
            #   continue
            # i=60
            #print(pre2.shape)
            #i=14
            min_d = pre1[0, 0, i].long()
            max_d = pre1[0, 1, i].long()
            object_mask = torch.where(P3 == i, one, zero)
            #x1,y1,x2,y2=crop(object_mask)
            x1, y1, x2, y2, size = pre2[0, i].long()
            if min_d > y2:
                min_d = zero.long()
            else:
                min_d = torch.max(min_d, zero.long()).long()
            if max_d <= min_d:
                max_d = min_d + one.long() * 200
                max_d = max_d.long()
            max_d = torch.min(max_d, y2).long()

            object_mask = object_mask[0, x1:x2, y1:y2]
            #print(y1,y2)
            s_mask_o = object_mask * s_mask[0, x1:x2, y1:y2]
            l_mask_o = object_mask * l_mask[0, x1:x2, y1:y2]

            s_l_o = feature[..., x1:x2, y1:y2] * s_mask_o
            l_l_o = feature[..., x1:x2, y1:y2] * l_mask_o
            #print(torch.max(min_d,zero).long())

            # s_r_o=r_sf[...,x1:x2,y1-max_d:y2-min_d]
            # l_r_o=r_lf[...,x1:x2,y1-max_d:y2-min_d]
            cost_s = []
            cost_l = []
            #ground and sky
            if (y2 - y1) > 640:
                if x2 > 500:
                    print('ground')
                    cost_l = []
                    count = 0
                    m = x1.item()
                    while (m < x2):
                        #print(m)
                        min_d = np.max([count * 20 - 5, 0])
                        max_d = min_d + 30
                        cost_slice = []
                        for j in range(min_d, max_d):
                            if y1 - j > 0:
                                l_r_o_t = r_lf[..., m:m + 20, y1 - j:y2 - j]

                                cost_slice.append(
                                    torch.where(
                                        l_mask_o[...,
                                                 m - x1:m - x1 + 20, :] == 1,
                                        cosine_s(
                                            l_l_o[..., m - x1:m - x1 + 20, :],
                                            l_r_o_t), zero))
                            else:

                                l_r_o_t = torch.cat([
                                    r_lf[..., m:m + 20, 0:j - y1] * zero,
                                    r_lf[..., m:m + 20, 0:y2 - j]
                                ], -1)
                                #print(j-y1,y2-j)
                                cost_slice.append(
                                    torch.where(
                                        l_mask_o[...,
                                                 m - x1:m - x1 + 20, :] == 1,
                                        cosine_s(
                                            l_l_o[..., m - x1:m - x1 + 20, :],
                                            l_r_o_t), zero))
                        cost_slice = torch.stack(cost_slice, -1)
                        if m == x1:
                            cost_l = cost_slice
                        else:
                            cost_l = torch.cat([cost_l, cost_slice], -2)
                        m += 20
                        #print(m,x1,m-x1)
                        count += 1
                        if m + 20 > x2:
                            #m-=20
                            min_d = np.max([count * 20 - 5, 0])
                            max_d = min_d + 30
                            cost_slice = []
                            for j in range(min_d, max_d):
                                if y1 - j > 0:
                                    l_r_o_t = r_lf[..., m:, y1 - j:y2 - j]
                                    cost_slice.append(
                                        torch.where(
                                            l_mask_o[..., m - x1:m - x1 +
                                                     20, :] == 1,
                                            cosine_s(l_l_o[..., m - x1:, :],
                                                     l_r_o_t), zero))
                                else:
                                    #
                                    l_r_o_t = torch.cat([
                                        r_lf[..., m:, 0:j - y1] * zero,
                                        r_lf[..., m:, 0:y2 - j]
                                    ], -1)
                                    #print(m-x1,l_l_o[...,m-x1:,:].shape)
                                    cost_slice.append(
                                        torch.where(
                                            l_mask_o[..., m - x1:m - x1 +
                                                     20, :] == 1,
                                            cosine_s(l_l_o[..., m - x1:, :],
                                                     l_r_o_t), zero))
                            cost_slice = torch.stack(cost_slice, -1)
                        if count == 0:
                            cost_l = cost_slice
                        else:
                            cost_l = torch.cat([cost_l, cost_slice], -2)
                        break
                    cost_volume = cost_l
                else:
                    print('sky')
                    if x1 < 10:
                        cost_l = []
                        count = 0
                        m = x1.item()
                        while (m < x2):
                            min_d = np.max([count * 5 - 5, 0])
                            max_d = min_d + 15
                            cost_slice = []
                            for j in range(min_d, max_d):
                                if y1 - j > 0:
                                    l_r_o_t = r_lf[..., m:m + 20,
                                                   y1 - j:y2 - j]
                                    print(
                                        l_mask_o[...,
                                                 m - x1:m - x1 + 20, :].shape,
                                        l_r_o_t.shape,
                                        l_l_o[...,
                                              m - x1:m - x1 + 20, :].shape)
                                    cost_slice.append(
                                        torch.where(
                                            l_mask_o[..., m - x1:m - x1 +
                                                     20, :] == 1,
                                            cosine_s(
                                                l_l_o[...,
                                                      m - x1:m - x1 + 20, :],
                                                l_r_o_t), zero))
                                else:

                                    l_r_o_t = torch.cat([
                                        r_lf[..., m:m + 20, 0:j - y1] * zero,
                                        r_lf[..., m:m + 20, 0:y2 - j]
                                    ], -1)
                                    #print(j-y1,y2-j)
                                    cost_slice.append(
                                        torch.where(
                                            l_mask_o[..., m - x1:m - x1 +
                                                     20, :] == 1,
                                            cosine_s(
                                                l_l_o[...,
                                                      m - x1:m - x1 + 20, :],
                                                l_r_o_t), zero))
                            cost_slice = torch.stack(cost_slice, -1)
                            if m == x1:
                                cost_l = cost_slice
                            else:
                                cost_l = torch.cat([cost_l, cost_slice], -2)
                            m += 20
                            #print(m,x1,m-x1)
                            count += 1
                            if m + 20 > x2:
                                #m-=20
                                min_d = np.max([count * 20 - 5, 0])
                                max_d = min_d + 30
                                cost_slice = []
                                for j in range(min_d, max_d):
                                    if y1 - j > 0:
                                        l_r_o_t = r_lf[..., m:, y1 - j:y2 - j]
                                        cost_slice.append(
                                            torch.where(
                                                l_mask_o[..., m - x1:m - x1 +
                                                         20, :] == 1,
                                                cosine_s(
                                                    l_l_o[..., m - x1:, :],
                                                    l_r_o_t), zero))
                                    else:
                                        #
                                        l_r_o_t = torch.cat([
                                            r_lf[..., m:, 0:j - y1] * zero,
                                            r_lf[..., m:, 0:y2 - j]
                                        ], -1)
                                        #print(m-x1,l_l_o[...,m-x1:,:].shape)
                                        cost_slice.append(
                                            torch.where(
                                                l_mask_o[..., m - x1:m - x1 +
                                                         20, :] == 1,
                                                cosine_s(
                                                    l_l_o[..., m - x1:, :],
                                                    l_r_o_t), zero))
                                cost_slice = torch.stack(cost_slice, -1)
                            if count == 0:
                                cost_l = cost_slice
                            else:
                                cost_l = torch.cat([cost_l, cost_slice], -2)
                            break
                        cost_volume = cost_l
                continue
            #print(i)
            #print(x1,x2,y1,y2,min_d,max_d)
            for j in range(min_d, max_d):
                #print(j)
                #count+=1
                #print(count)
                if y1 - j > 0:
                    #print(y1-y2-i,-i)
                    s_r_o_t = r_sf[..., x1:x2, y1 - j:y2 - j]
                    #print(s_r_o_t.shape)
                    cost_s.append(
                        torch.where(s_mask_o == 1, cosine_s(s_l_o, s_r_o_t),
                                    zero))
                else:
                    #print(i-y1,y2-i)
                    s_r_o_t = torch.cat([
                        r_sf[..., x1:x2, 0:j - y1] * zero, r_sf[..., x1:x2,
                                                                0:y2 - j]
                    ], -1)
                    #print(y2-j)
                    cost_s.append(
                        torch.where(s_mask_o == 1, cosine_s(s_l_o, s_r_o_t),
                                    zero))
            cost_s = torch.stack(cost_s, -1)
            for j in range(min_d, max_d):
                if y1 - j > 0:
                    l_r_o_t = r_lf[..., x1:x2, y1 - j:y2 - j]
                    cost_l.append(
                        torch.where(l_mask_o == 1, cosine_s(l_l_o, l_r_o_t),
                                    zero))
                else:
                    #print(r_lf.shape,r_sf.shape)
                    l_r_o_t = torch.cat([
                        r_lf[..., x1:x2, 0:j - y1] * zero, r_lf[..., x1:x2,
                                                                0:y2 - j]
                    ], -1)
                    cost_l.append(
                        torch.where(l_mask_o == 1, cosine_s(l_l_o, l_r_o_t),
                                    zero))
            cost_l = torch.stack(cost_l, -1)
            cost_volume = cost_s + cost_l
            print(torch.cuda.memory_allocated(2) / 1e+6)
            #time.sleep(30)
            """
            
            #aggregation
            a_volume=torch.zeros_like(cost_volume)
            object_r=torch.where(P3==i,P4,zero)
            max_r=torch.max(object_r).long()
            object_r=torch.where(P3==i,P4,max_r+1)
            min_r=torch.min(object_r).long()
            for j in range(min_r,max_r+1):
                plane_mask=torch.where(object_r==j,one,zero)[x1:x2,y1:y2]
                xp1,xp2,yp1,yp2=crop(plane_mask).long()
                #xp1,xp2,yp1,yp2.r_size=self.pre[0,0][1]
                plane_mask=plane_mask[xp1:xp2,yp1:yp2]
                plane=cost_volume[...,xp1:xp2,yp1:yp2,:]
                s_plane_mask=plane_mask*s_mask[x1:x2,y1:y2][xp1:xp2,yp1:yp2]
                l_plane_mask=plane_mask*l_mask[x1:x2,y1:y2][xp1:xp2,yp1:yp2]
                s_weights=self.cluster(l_sf[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],s_plane_mask)
                s_cost=torch.sum(torch.sum(plane*s_weights,-2,keepdim=True),-3,keepdim=True)/torch.sum(s_weights)
                l_weights=self.cluster(l_lf[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],l_plane_mask)
                l_cost=torch.sum(torch.sum(plane*l_weights,-2),-2)/torch.sum(l_weights)
                plane_mask=plane_mask-torch.where(s_plane_mask+l_plane_mask>0,one,zero)
                plane_mask=plane_mask.view(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],1) \
                          .expand(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],plane.shape[-1])
                s_plane_mask=s_plane_mask.view(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],1) \
                          .expand(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],plane.shape[-1])  
                l_plane_mask=l_plane_mask.view(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],1) \
                          .expand(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],plane.shape[-1])  
                plane=torch.where(s_plane_mask==1,s_cost*s_weights,plane)
                plane=torch.where(l_plane_mask==1,l_cost*l_weights,plane)
                weights=self.cluster(l_lf[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],plane_mask)
                costs=torch.sum(torch.sum(plane*weights,-2,keepdim=True),-3,keepdim=True)/torch.sum(weights)
                plane=torch.where(plane_mask==1,cost*weights,plane)
                cost_volume[...,xp1:xp2,yp1:yp2,:]=plane
            #ss_argmin
            disparity[...,x1:x2,y1:y2]=ss_argmin(cost_volume,min_d,max_d)
            #refinement
            refine=torch.zeros_like(disparity)[...,x1:x2,y1:y2]
            for j in range(min_r,max_r+1):
                plane_mask=torch.where(object_r==j,one,zero)[x1:x2,y1:y2]
                xp1,xp2,yp1,yp2=crop(plane_mask)
                plane_mask=plane_mask[xp1:xp2,yp1:yp2]
                s_plane_mask=plane_mask*s_mask[x1:x2,y1:y2][xp1:xp2,yp1:yp2]
                l_plane_mask=plane_mask*l_mask[x1:x2,y1:y2][xp1:xp2,yp1:yp2]
                plane_mask=plane_mask-torch.where(s_plane_mask+l_plane_mask>0,one,zero)
                plane=disparity[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2]*plane_mask
                s_weights=self.cluster(l_sf[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],s_plane_mask)
                s_cost=torch.sum(torch.sum(plane*s_weights,-2,keepdim=True),-3,keepdim=True)/torch.sum(s_weights)
                l_weights=self.cluster(l_lf[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],l_plane_mask)
                l_cost=torch.sum(torch.sum(plane*l_weights,-2),-2)/torch.sum(l_weights)
                weights=self.cluster(l_lf[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],plane_mask)
                costs=torch.sum(torch.sum(plane*weights,-2,keepdim=True),-3,keepdim=True)/torch.sum(weights)
                plane=torch.where(s_plane_mask==1,s_cost*s_weights,plane)
                plane=torch.where(l_plane_mask==1,l_cost*l_weights,plane)                           
                plane=torch.where(plane_mask==1,cost*weights,plane)
                disparity[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2]=plane
            """
        print(time.time() - start_time)
        time.sleep(100)
        return cost_volume
    def forward(self, l,r,P,matching,plane,s_plane,l_plane):
        #self.P=P[1,0]
        #0 l to r,1 min,2 max
        #[l_box,r_box,match],[min_d,max_d]
        one=torch.ones(1).cuda(1)
        zero=torch.zeros(1).cuda(1)
        start_time=time.time()

        P1=P[...,0]
        P2=P[...,3]
        P3=P[...,1]
        P4=P[...,2]
        #feature extraction
        l_mask=torch.where(P2-P1>zero,one,zero)
        s_mask=P1
        #l_mask=l_mask.byte()
        #s_mask=s_mask.byte()
        #basic cuda 524
        #print(l.type)
         #1923
        #print(torch.cuda.memory_allocated(1))
        #2727
        l_sf=self.feature_extraction2(l)
        l_lf=self.feature_extraction(l_sf)

        #print(torch.cuda.memory_allocated(2))
        #the cuda won't copy the volume to the new gpu
        # a=l_lf.cuda(1)
        # b=l_lf.cuda(1)
        # c=l_sf.cuda(3)
        r_sf=self.feature_extraction2(r)
        r_lf=self.feature_extraction(r_sf)
        #print(torch.cuda.memory_allocated(1))
        #3267
        
        #print(torch.cuda.memory_allocated(2))
        #reshape the mask to batch and channel

        disparity=torch.zeros([540,960]).cuda(1)

        #cost_volume=[]
        #5710
        #print(value)
        l_lf=l_lf.cuda(1)
        r_lf=r_lf.cuda(1)
        r_sf=r_sf.cuda(1)
        l_sf=l_sf.cuda(1)
        #985
        #feature=torch.masked_select(l_sf,s_mask)
        #feature=torch.masked_select(l_lf,l_mask)+torch.masked_select(l_sf,s_mask)
        D=torch.max(torch.max(matching[5]),torch.max(matching[2])).int()

        cost_volume=torch.zeros(540,960,D+1).cuda(1)
        # feature=l_lf*l_mask+l_sf*s_mask
        # feature=torch.where((l_mask+s_mask)>0,feature,l_lf)
        s_feature=l_sf[...,matching[0],matching[1]]
        s_r_y=matching[1]-matching[2]
        s_r_o_t=r_sf[...,matching[0],s_r_y]
        s_cost=torch.where(s_r_y>=0,cosine_s(s_feature,s_r_o_t),zero)
        l_feature=l_lf[...,matching[3],matching[4]]
        l_r_y=matching[4]-matching[5]
        l_r_o_t=r_lf[...,matching[3],l_r_y]
        l_cost=torch.where(l_r_y>=0,cosine_s(l_feature,l_r_o_t),zero)

        cost_volume[matching[0],matching[1],matching[2]]=s_cost
        cost_volume[matching[3],matching[4],matching[5]]=l_cost
        time.sleep(100)
        #aggregation
        s_weights_0=self.cluster_vector(feature, s_plane[0], s_plane[1], 1600, s_plane[-3].shape[0], s_plane[-3])
        zero_cost=torch.zeros_like(D)
        costs=torch.where(s_plane[1]>0,cost_volume[s_plane[0], s_plane[1],:],zero_cost).view(1600,s_plane[-3].shape[0],D)
        s_cost_0=torch.sum(costs*s_weights_0,0,keepdim=True)/torch.sum(s_weights,0)
        cost_volume[s_plane[0], s_plane[1]]=torch.where(s_plane[1]>0,s_cost_0*s_weights_0+(1-s_weights_0)*costs,costs)

        
        print(time.time()-start_time)
        time.sleep(100)

            
        return cost_volume
Exemple #5
0
 def forward(self, l,r):
     #feature extraction
     l_mask=P[:,:,3]-P[:,:,0]
     s_mask=P[:,:,0]
     l_lf=self.feature_extraction(l)
     l_sf=self.feature_extraction2(l)
     r_lf=self.feature_extraction(r)
     r_sf=self.feature_extraction2(r)
     #reshape the mask to batch and channel
     feature=l_lf*l_mask+self.l_sf*s_mask
     feature=torch.where((l_mask+s_mask)>0,feature,l_lf)
     disparity=torch.zeros([540,960])
     one=torch.ones(1)
     zero=torch.zeros(1)
     cost_volume=[]
     #promotion
     #we can segment with bounding box and divide the whole image into many parts
     #each single bounding box will be managed through network not the whole image
     #matching cost computation
     for i in range(torch.max(self.P[:,:,1]).type(torch.int32)+1):
         min_d=self.pre2[i][0]
         max_d=self.pre2[i][1]
         object_mask=torch.where(P[:,:,1]==i,one,zero)
         #x1,y1,x2,y2=crop(object_mask)
         x1,y1,x2,y2,size=self.pre[0,0][0]
         object_mask=object_mask[x1:x2,y1:y2]
         s_mask_o=object_mask*s_mask[x1:x2,y1:y2]
         l_mask_o=object_mask*l_mask[x1:x2,y1:y2]
         s_l_o=feature[...,x1:x2,y1:y2]*s_mask_o
         l_l_o=feature[...,x1:x2,y1:y2]*l_mask_o
         s_r_o=r_sf[...,x1:x2,min_d:torch.min(max_d,r_lf.shape[-1])]
         l_r_o=r_lf[...,x1:x2,min_d:torch.min(max_d,r_lf.shape[-1])]
         cost_s=[]
         cost_l=[]
         for i in range(min_d,max_d):
           if y1-i>0:
             s_r_o_t=s_r_o[...,y1-i:y2-i]
             cost_s.append(torch.where(s_mask_o==1,cosine_s(s_l_o,s_r_o_t),zero))
           else:
             s_r_o_t=torch.cat([torch.zeros_like(s_r_o[...,:i]),s_r_o[...,0:y2-i]],-1)
             cost_s.append(torch.where(s_mask_o==1,cosine_s(s_l_o,s_r_o_t),zero))
         cost_s=torch.stack(cost_s,-1)
         for i in range(min_d,max_d):
           if y1-i>0:
             l_r_o_t=l_r_o[...,y1-i:y2-i]
             cost_l.append(torch.where(l_mask_o==1,cosine_s(l_l_o,l_r_o_t),zero))
           else:
             l_r_o_t=torch.cat([torch.zeros_like(l_r_o[...,:i]),l_r_o[...,0:y2-i]],-1)
             cost_l.append(torch.where(l_mask_o==1,cosine_s(l_l_o,l_r_o_t),zero))                
         cost_l=torch.stack(cost_l,-1)
         cost_volume=cost_s+cost_l
         #aggregation
         a_volume=torch.zeros_like(cost_volume)
         object_r=torch.where(P[:,:,1]==i,self.P[:,:,2],zero)
         max_r=torch.max(object_r)
         object_r=torch.where(P[:,:,1]==i,self.P[:,:,2],max_r+1)
         min_r=torch.min(object_r)
         for j in range(min_r,max_r+1):
             plane_mask=torch.where(object_r==j,one,zero)[x1:x2,y1:y2]
             xp1,xp2,yp1,yp2=crop(plane_mask)
             #xp1,xp2,yp1,yp2.r_size=self.pre[0,0][1]
             plane_mask=plane_mask[xp1:xp2,yp1:yp2]
             plane=cost_volume[...,xp1:xp2,yp1:yp2,:]
             for m in range(planes.shape[-1])
                 s_var,l_var=self.aggregation_sparse(l_sf[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],l_lf[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],plane[...,m]*plane_mask)
                 plane[...,m]=s_var*s_mask[x1:x2,y1:y2][xp1:xp2,yp1:yp2]+l_var*l_mask[x1:x2,y1:y2][xp1:xp2,yp1:yp2]
                 plane[...,m]=plane[...,m]*plane_mask
                 plane[...,m]=self.aggregation_dense(feature[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],plane[...,m])*plane_mask
             cost_volume[...,xp1:xp2,yp1:yp2,:]=plane
         #ss_argmin
         disparity[...,x1:x2,y1:y2]=ss_argmin(cost_volume,min_d,max_d)
         #refinement
         refine=torch.zeros_like(disparity)[...,x1:x2,y1:y2]
         for j in range(min_r,max_r+1):
             plane_mask=torch.where(object_r==j,one,zero)[x1:x2,y1:y2]
             xp1,xp2,yp1,yp2=crop(plane_mask)
             plane_mask=plane_mask[xp1:xp2,yp1:yp2]
             plane=disparity[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2]*plane_mask
             plane=self.aggregation_dense(feature[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],plane)*plane_mask
             disparity[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2]=plane
             
     return disparity
    def forward(self, l, r):
        P1 = self.P[..., 0]
        P2 = self.P[..., 3]
        P3 = self.P[..., 1]
        P4 = self.P[..., 2]
        #feature extraction
        l_mask = P2 - P1
        s_mask = P1
        l_lf = self.feature_extraction(l)
        l_sf = self.feature_extraction2(l)
        r_lf = self.feature_extraction(r)
        r_sf = self.feature_extraction2(r)
        #reshape the mask to batch and channel
        feature = l_lf * l_mask + l_sf * s_mask
        feature = torch.where((l_mask + s_mask) > 0, feature, l_lf)
        disparity = torch.zeros([540, 960]).cuda()
        one = torch.ones(1).cuda()
        zero = torch.zeros(1).cuda()
        cost_volume = []
        #promotion
        #we can segment with bounding box and divide the whole image into many parts
        #each single bounding box will be managed through network not the whole image
        #matching cost computation
        for i in range(torch.max(self.P[:, :, 1]).type(torch.int32) + 1):
            min_d = self.pre[1, 0][0, i]
            max_d = self.pre[1, 0][1, i]
            object_mask = torch.where(P[:, :, 1] == i, one, zero)
            #x1,y1,x2,y2=crop(object_mask)
            x1, y1, x2, y2, size = self.pre2[0]
            object_mask = object_mask[x1:x2, y1:y2]
            s_mask_o = object_mask * s_mask[x1:x2, y1:y2]
            l_mask_o = object_mask * l_mask[x1:x2, y1:y2]
            s_l_o = feature[..., x1:x2, y1:y2] * s_mask_o
            l_l_o = feature[..., x1:x2, y1:y2] * l_mask_o
            s_r_o = r_sf[..., x1:x2,
                         torch.max(min_d, zero):torch.min(max_d, one * 960)]
            l_r_o = r_lf[..., x1:x2,
                         torch.max(min_d, zero):torch.min(max_d, one * 960)]
            min_d = y2 - max_d
            max_d = y1 - min_d
            cost_s = []
            cost_l = []
            for i in range(0, max_d - min_d):
                if y2 - y1 - i > 0:
                    s_r_o_t = s_r_o[..., y2 - y1 - i:-i]
                    cost_s.append(
                        torch.where(s_mask_o == 1, cosine_s(s_l_o, s_r_o_t),
                                    zero))
                else:
                    s_r_o_t = torch.cat([
                        torch.zeros_like(s_r_o[..., :y2 - y1 - i]), s_r_o[...,
                                                                          0:-i]
                    ], -1)
                    cost_s.append(
                        torch.where(s_mask_o == 1, cosine_s(s_l_o, s_r_o_t),
                                    zero))
            cost_s = torch.stack(cost_s, -1)
            for i in range(0, max_d - min_d):
                if y2 - y1 - i > 0:
                    l_r_o_t = l_r_o[..., y2 - y1 - i:-i]
                    cost_l.append(
                        torch.where(l_mask_o == 1, cosine_s(l_l_o, l_r_o_t),
                                    zero))
                else:
                    l_r_o_t = torch.cat([
                        torch.zeros_like(l_r_o[..., :y2 - y1 - i]), l_r_o[...,
                                                                          0:-i]
                    ], -1)
                    cost_l.append(
                        torch.where(l_mask_o == 1, cosine_s(l_l_o, l_r_o_t),
                                    zero))
            cost_l = torch.stack(cost_l, -1)
            cost_volume = cost_s + cost_l
            #aggregation
            a_volume = torch.zeros_like(cost_volume)
            object_r = torch.where(P3 == i, P4, zero)
            max_r = torch.max(object_r)
            object_r = torch.where(P3 == i, P4, max_r + 1)
            min_r = torch.min(object_r)
            for j in range(min_r, max_r + 1):
                plane_mask = torch.where(object_r == j, one, zero)[x1:x2,
                                                                   y1:y2]
                xp1, xp2, yp1, yp2 = crop(plane_mask)
                #xp1,xp2,yp1,yp2.r_size=self.pre[0,0][1]
                plane_mask = plane_mask[xp1:xp2, yp1:yp2]
                plane = cost_volume[..., xp1:xp2, yp1:yp2, :]
                s_plane_mask = plane_mask * s_mask[x1:x2, y1:y2][xp1:xp2,
                                                                 yp1:yp2]
                l_plane_mask = plane_mask * l_mask[x1:x2, y1:y2][xp1:xp2,
                                                                 yp1:yp2]
                s_weights = self.cluster(
                    l_sf[..., x1:x2, y1:y2][..., xp1:xp2, yp1:yp2],
                    s_plane_mask)
                s_cost = torch.sum(torch.sum(
                    plane * s_weights, -2, keepdim=True),
                                   -3,
                                   keepdim=True) / torch.sum(s_weights)
                l_weights = self.cluster(
                    l_lf[..., x1:x2, y1:y2][..., xp1:xp2, yp1:yp2],
                    l_plane_mask)
                l_cost = torch.sum(torch.sum(plane * l_weights, -2),
                                   -2) / torch.sum(l_weights)
                plane_mask = plane_mask - torch.where(
                    s_plane_mask + l_plane_mask > 0, one, zero)
                plane_mask=plane_mask.view(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],1) \
                          .expand(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],plane.shape[-1])
                s_plane_mask=s_plane_mask.view(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],1) \
                          .expand(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],plane.shape[-1])
                l_plane_mask=l_plane_mask.view(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],1) \
                          .expand(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],plane.shape[-1])
                plane = torch.where(s_plane_mask == 1, s_cost * s_weights,
                                    plane)
                plane = torch.where(l_plane_mask == 1, l_cost * l_weights,
                                    plane)
                weights = self.cluster(
                    l_lf[..., x1:x2, y1:y2][..., xp1:xp2, yp1:yp2], plane_mask)
                costs = torch.sum(torch.sum(plane * weights, -2, keepdim=True),
                                  -3,
                                  keepdim=True) / torch.sum(weights)
                plane = torch.where(plane_mask == 1, cost * weights, plane)
                cost_volume[..., xp1:xp2, yp1:yp2, :] = plane
            #ss_argmin
            disparity[..., x1:x2, y1:y2] = ss_argmin(cost_volume, min_d, max_d)
            #refinement
            refine = torch.zeros_like(disparity)[..., x1:x2, y1:y2]
            for j in range(min_r, max_r + 1):
                plane_mask = torch.where(object_r == j, one, zero)[x1:x2,
                                                                   y1:y2]
                xp1, xp2, yp1, yp2 = crop(plane_mask)
                plane_mask = plane_mask[xp1:xp2, yp1:yp2]
                s_plane_mask = plane_mask * s_mask[x1:x2, y1:y2][xp1:xp2,
                                                                 yp1:yp2]
                l_plane_mask = plane_mask * l_mask[x1:x2, y1:y2][xp1:xp2,
                                                                 yp1:yp2]
                plane_mask = plane_mask - torch.where(
                    s_plane_mask + l_plane_mask > 0, one, zero)
                plane = disparity[..., x1:x2, y1:y2][..., xp1:xp2,
                                                     yp1:yp2] * plane_mask
                s_weights = self.cluster(
                    l_sf[..., x1:x2, y1:y2][..., xp1:xp2, yp1:yp2],
                    s_plane_mask)
                s_cost = torch.sum(torch.sum(
                    plane * s_weights, -2, keepdim=True),
                                   -3,
                                   keepdim=True) / torch.sum(s_weights)
                l_weights = self.cluster(
                    l_lf[..., x1:x2, y1:y2][..., xp1:xp2, yp1:yp2],
                    l_plane_mask)
                l_cost = torch.sum(torch.sum(plane * l_weights, -2),
                                   -2) / torch.sum(l_weights)
                weights = self.cluster(
                    l_lf[..., x1:x2, y1:y2][..., xp1:xp2, yp1:yp2], plane_mask)
                costs = torch.sum(torch.sum(plane * weights, -2, keepdim=True),
                                  -3,
                                  keepdim=True) / torch.sum(weights)
                plane = torch.where(s_plane_mask == 1, s_cost * s_weights,
                                    plane)
                plane = torch.where(l_plane_mask == 1, l_cost * l_weights,
                                    plane)
                plane = torch.where(plane_mask == 1, cost * weights, plane)
                disparity[..., x1:x2, y1:y2][..., xp1:xp2, yp1:yp2] = plane

        return disparity
    def forward(self, l, r, P, pre1, pre2):
        #self.P=P[1,0]
        #0 l to r,1 min,2 max
        #[l_box,r_box,match],[min_d,max_d]
        with torch.no_grad():
            self.pre = pre1
            self.pre2 = pre2
        P1 = P[..., 0]
        P2 = P[..., 3]
        P3 = P[..., 1]
        P4 = P[..., 2]
        #feature extraction
        l_mask = P2 - P1
        s_mask = P1
        #l_mask=l_mask.byte()
        #s_mask=s_mask.byte()
        #basic cuda 524
        #print(l.type)
        #1923
        #print(torch.cuda.memory_allocated(1))
        #2727
        l_sf = self.feature_extraction2(l)
        l_lf = self.feature_extraction(l_sf)

        #print(torch.cuda.memory_allocated(2))
        #the cuda won't copy the volume to the new gpu
        # a=l_lf.cuda(1)
        # b=l_lf.cuda(2)
        # c=l_sf.cuda(3)
        r_sf = self.feature_extraction2(r)
        r_lf = self.feature_extraction(r_sf)
        #print(torch.cuda.memory_allocated(1))
        #3267

        #print(torch.cuda.memory_allocated(2))
        #reshape the mask to batch and channel

        disparity = torch.zeros([540, 960]).cuda(2)
        one = torch.ones(1).cuda(2)
        zero = torch.zeros(1).cuda(2)
        cost_volume = []
        #5710
        #print(value)
        l_lf = l_lf.cuda(2)
        r_lf = r_lf.cuda(2)
        r_sf = r_sf.cuda(2)
        l_sf = l_sf.cuda(2)
        #985
        #feature=torch.masked_select(l_sf,s_mask)
        #feature=torch.masked_select(l_lf,l_mask)+torch.masked_select(l_sf,s_mask)
        feature = l_lf * l_mask + l_sf * s_mask
        feature = torch.where((l_mask + s_mask) > 0, feature, l_lf)
        #0.48
        # s_mask=torch.squeeze(s_mask).cuda(2)
        # l_mask=torch.squeeze(l_mask).cuda(2)
        #for i in range(10):
        # start_time=time.time()
        # a=torch.rand_like(P1)
        # print(time.time()-start_time)
        # #initial 0.44, next 0.18
        # start_time=time.time()
        # #0.27
        # # s_match=s_mask.nonzero()
        # # s_feature=l_sf[...,s_match[:,0],s_match[:,1]]
        # # l_match=l_mask.nonzero()
        # # l_feature=l_lf[...,l_match[:,0],l_match[:,1]]
        # with torch.no_grad():
        #   s_match=s_mask.nonzero()
        # s_feature=l_sf[...,s_match[:,0],s_match[:,1]]
        # with torch.no_grad():
        #   l_match=l_mask.nonzero()
        # l_feature=l_lf[...,l_match[:,0],l_match[:,1]]
        # print(time.time()-start_time)
        # time.sleep(100)
        # #0.18
        # sy_match=s_match[:,1]
        # sx_match=s_match[:,0]
        # with torch.no_grad():
        #   # for i in range(1,192):
        #   #   sy_match=torch.cat([sy_match,s_match[:,1]-i],0)
        #   d=192
        #   sx_match=sx_match.repeat(1,d)
        #   sy_match=sy_match.repeat(1,d)
        #   #print(sy_match.shape)
        #   sy_match-=torch.arange(0,d).repeat(s_match.shape[0],1).transpose(1,0).contiguous().view_as(sy_match).cuda(2)
        # #192,0.09s,30,0.01
        # s_r_o_t=r_sf[...,sx_match,sy_match].reshape(1,32,s_feature.shape[-1],d)
        # s_feature=s_feature.repeat(1,1,1,d).reshape(1,32,s_feature.shape[-1],d)
        # #print(s_feature.shape,s_r_o_t.shape)
        # cost_volume.append(torch.where(sy_match.reshape(1,s_feature.shape[-2],d)>=0,cosine_s(s_feature,s_r_o_t),zero))
        # ly_match=l_match[:,1]
        # lx_match=l_match[:,0]
        # with torch.no_grad():
        #   # for i in range(1,192):
        #   #   sy_match=torch.cat([sy_match,s_match[:,1]-i],0)
        #   d=192
        #   lx_match=lx_match.repeat(1,d)
        #   ly_match=ly_match.repeat(1,d)
        #   #print(sy_match.shape)
        #   ly_match-=torch.arange(0,d).repeat(l_match.shape[0],1).transpose(1,0).contiguous().view_as(ly_match).cuda(2)
        # #192,0.09s,30,0.01,lf0.19
        # l_r_o_t=r_lf[...,lx_match,ly_match].reshape(1,32,l_feature.shape[-1],d)
        # l_feature=l_feature.repeat(1,1,1,d).reshape(1,32,l_feature.shape[-1],d)
        # #print(s_feature.shape,s_r_o_t.shape)
        # cost_volume.append(torch.where(ly_match.reshape(1,l_feature.shape[-2],d)>=0,cosine_s(l_feature,l_r_o_t),zero))
        # print(time.time()-start_time)
        # time.sleep(100)

        # #0.0003
        # #s_r_o_t=r_sf[...,s_match[:,0],s_match[:,1]]
        # #1,32,n
        # #print(time.time()-start_time)
        # #print(s_match.shape)
        # #time 10
        # # for i in range(s_match.shape[0]):
        # #   min_d=torch.max(s_match[i,1]-300,zero.long())
        # #   #print(min_d)
        # #   s_r_o_t=r_sf[...,s_match[i,0],min_d:s_match[i,1]]

        # #   a=s_feature[...,i].reshape(1,32,1)
        # #   #print(a.shape,s_r_o_t.shape)
        # #   cost_volume.append(torch.where(s_match[i,1]-300>=0,cosine_s(a,s_r_o_t),zero))

        # #time 0.23,192,0.035,30, the number of the match points won't influence the time,only the iteration

        # for i in range(30):
        #   s_r_o_t=r_sf[...,s_match[:,0],s_match[:,1]-i]
        #   #s_r_o_t=torch.take(r_sf,[...,s_match[:,0],s_match[:,1]-i])
        #   cost_volume.append(torch.where(s_match[:,1]-i>=0,cosine_s(s_feature,s_r_o_t),zero))
        #   l_r_o_t=r_sf[...,l_match[:,0],l_match[:,1]-i]
        #   cost_volume.append(torch.where(l_match[:,1]-i>=0,cosine_s(l_feature,l_r_o_t),zero))
        #cost_volume=torch.stack(cost_volume)
        # print(torch.cuda.memory_allocated(2))
        # print(time.time()-start_time)
        # time.sleep(100)

        count = 0
        start_time = time.time()
        sx = []
        sy = []
        lx = []
        ly = []
        sy_r = []
        ly_r = []
        with torch.no_grad():
            for i in range(150):
                #ground 0-270, sky 0-40
                #0.38
                if i > torch.max(P3):
                    break

                min_d = pre1[0, 0, i].long()
                max_d = pre1[0, 1, i].long()
                #object_mask=torch.where(P3==i,one,zero)
                x1, y1, x2, y2, size = pre2[0, i].long()
                object_mask = P3[0, x1:x2, y1:y2]
                object_mask = torch.where(object_mask == i, one, zero)
                s_mask_o = object_mask * s_mask[0, x1:x2, y1:y2]
                l_mask_o = object_mask * l_mask[0, x1:x2, y1:y2]
                s_match = s_mask_o.nonzero()
                l_match = l_mask_o.nonzero()
                if s_match.shape[0] == 0:
                    s_match = object_mask.nonzero()
                if l_match.shape[0] == 0:
                    l_match = object_mask.nonzero()
                sy_match = s_match[:, 1]
                sx_match = s_match[:, 0]
                ly_match = l_match[:, 1]
                lx_match = l_match[:, 0]
                #print(sy_match.shape)

                d = max_d - min_d
                #print(d)
                sx_match = sx_match.repeat(1, d)
                sy_match = sy_match.repeat(1, d)
                sy_match_r = sy_match - torch.arange(
                    min_d, max_d).cuda(2).repeat(
                        s_match.shape[0], 1).transpose(
                            1, 0).contiguous().view_as(sy_match)
                lx_match = lx_match.repeat(1, d)
                ly_match = ly_match.repeat(1, d)
                ly_match_r = ly_match - torch.arange(
                    min_d, max_d).cuda(2).repeat(
                        l_match.shape[0], 1).transpose(
                            1, 0).contiguous().view_as(ly_match)
                #print(ly_match.shape)
                sx.append(sx_match)
                sy.append(sy_match)
                sy_r.append(sy_match_r)
                lx.append(lx_match)
                ly.append(ly_match)
                ly_r.append(ly_match_r)
        #print(cosine_s(feature[...,sx_match,sy_match].squeeze(),r_sf[...,sx_match,sy_match_r].squeeze(),0).shape,sy_match_r.squeeze().shape)
        sx_match = torch.cat(sx, 1)
        sy_match = torch.cat(sy, 1)
        lx_match = torch.cat(lx, 1)
        ly_match = torch.cat(ly, 1)
        sy_match_r = torch.cat(sy_r, 1)
        ly_match_r = torch.cat(ly_r, 1)
        cost_s = torch.where(
            sy_match_r.squeeze() >= 0,
            cosine_s(feature[..., sx_match, sy_match].squeeze(),
                     r_sf[..., sx_match, sy_match_r].squeeze(), 0), zero)
        cost_l = torch.where(
            ly_match_r.squeeze() >= 0,
            cosine_s(feature[..., lx_match, ly_match].squeeze(),
                     r_sf[..., lx_match, ly_match_r].squeeze(), 0), zero)
        print(time.time() - start_time)
        time.sleep(100)

        #start_time=time.time()
        #with torch.no_grad():
        for i in range(150):
            #ground 0-270, sky 0-40
            #0.46
            if i > torch.max(P3):
                break
            min_d = pre1[0, 0, i].long()
            max_d = pre1[0, 1, i].long()
            #object_mask=torch.where(P3==i,one,zero)
            x1, y1, x2, y2, size = pre2[0, i].long()
            object_mask = P3[0, x1:x2, y1:y2]
            object_mask = torch.where(object_mask == i, one, zero)
            s_mask_o = object_mask * s_mask[0, x1:x2, y1:y2]
            l_mask_o = object_mask * l_mask[0, x1:x2, y1:y2]
            s_match = s_mask_o.nonzero()
            l_match = l_mask_o.nonzero()
            if s_match.shape[0] == 0:
                s_match = object_mask.nonzero()
            if l_match.shape[0] == 0:
                l_match = object_mask.nonzero()
            s_l_o = feature[..., s_match[:, 0], s_match[:, 1]]
            l_l_o = feature[..., l_match[:, 0], l_match[:, 1]]
            cost_s = []
            cost_l = []
            sy_match = s_match[:, 1]
            sx_match = s_match[:, 0]
            ly_match = l_match[:, 1]
            lx_match = l_match[:, 0]
            #print(sy_match.shape)
            with torch.no_grad():
                d = max_d - min_d
                #print(d)
                sx_match = sx_match.repeat(1, d)
                sy_match = sy_match.repeat(1, d)
                sy_match -= torch.arange(min_d, max_d).cuda(2).repeat(
                    s_match.shape[0],
                    1).transpose(1, 0).contiguous().view_as(sy_match)
                lx_match = lx_match.repeat(1, d)
                ly_match = ly_match.repeat(1, d)
                ly_match -= torch.arange(min_d, max_d).cuda(2).repeat(
                    l_match.shape[0],
                    1).transpose(1, 0).contiguous().view_as(ly_match)
            s_r_o_t = r_sf[..., sx_match,
                           sy_match].reshape(1, 32, s_l_o.shape[-1], d)
            s_l_o = s_l_o.repeat(1, 1, 1, d).reshape(1, 32, s_l_o.shape[-1], d)
            l_r_o_t = r_lf[..., lx_match,
                           ly_match].reshape(1, 32, l_l_o.shape[-1], d)
            l_l_o = l_l_o.repeat(1, 1, 1, d).reshape(1, 32, l_l_o.shape[-1], d)
            cost_s.append(
                torch.where(
                    sy_match.reshape(1, s_l_o.shape[-2], d) >= 0,
                    cosine_s(s_l_o, s_r_o_t), zero))
            cost_l.append(
                torch.where(
                    ly_match.reshape(1, l_l_o.shape[-2], d) >= 0,
                    cosine_s(l_l_o, l_r_o_t), zero))
            #cost_volume=cost_s+cost_l
            #print(torch.cuda.memory_allocated(2)/1e+6)
            #time.sleep(30)
        print(time.time() - start_time)
        time.sleep(100)

        # count=0
        # start_time=time.time()
        # for i in range(150):
        #     #ground 0-270, sky 0-40
        #     #0.50
        #     if i> torch.max(P3).type(torch.int32):
        #       break
        #     min_d=pre1[0,0,i].long()
        #     max_d=pre1[0,1,i].long()
        #     #object_mask=torch.where(P3==i,one,zero)
        #     x1,y1,x2,y2,size=pre2[0,i].long()
        #     object_mask=P3[0,x1:x2,y1:y2]
        #     object_mask=torch.where(object_mask==i,one,zero)
        #     s_mask_o=object_mask*s_mask[0,x1:x2,y1:y2]
        #     l_mask_o=object_mask*l_mask[0,x1:x2,y1:y2]
        #     s_match=s_mask_o.nonzero()
        #     l_match=l_mask_o.nonzero()
        #     if s_match.shape[0]==0:
        #       s_match=object_mask.nonzero()
        #     if l_match.shape[0]==0:
        #       l_match=object_mask.nonzero()
        #     s_l_o=feature[...,s_match[:,0],s_match[:,1]]
        #     l_l_o=feature[...,l_match[:,0],l_match[:,1]]
        #     cost_s=[]
        #     cost_l=[]
        #     sy_match=s_match[:,1]
        #     sx_match=s_match[:,0]
        #     ly_match=l_match[:,1]
        #     lx_match=l_match[:,0]
        #     #print(sy_match.shape)
        #     with torch.no_grad():
        #       d=max_d-min_d
        #       print(d)
        #       sx_match=sx_match.repeat(1,d)
        #       sy_match=sy_match.repeat(1,d)
        #       sy_match-=torch.arange(min_d,max_d).repeat(s_match.shape[0],1).transpose(1,0).contiguous().view_as(sy_match).cuda(2)
        #       lx_match=lx_match.repeat(1,d)
        #       ly_match=ly_match.repeat(1,d)
        #       ly_match-=torch.arange(min_d,max_d).repeat(l_match.shape[0],1).transpose(1,0).contiguous().view_as(ly_match).cuda(2)
        #     s_r_o_t=r_sf[...,sx_match,sy_match].reshape(1,32,s_l_o.shape[-1],d)
        #     s_l_o=s_l_o.repeat(1,1,1,d).reshape(1,32,s_l_o.shape[-1],d)
        #     l_r_o_t=r_lf[...,lx_match,ly_match].reshape(1,32,l_l_o.shape[-1],d)
        #     l_l_o=l_l_o.repeat(1,1,1,d).reshape(1,32,l_l_o.shape[-1],d)
        #     cost_s.append(torch.where(sy_match.reshape(1,s_l_o.shape[-2],d)>=0,cosine_s(s_l_o,s_r_o_t),zero))
        #     cost_l.append(torch.where(ly_match.reshape(1,l_l_o.shape[-2],d)>=0,cosine_s(l_l_o,l_r_o_t),zero))
        #     #cost_volume=cost_s+cost_l
        #     #print(torch.cuda.memory_allocated(2)/1e+6)
        #     #time.sleep(30)
        # print(time.time()-start_time)
        # time.sleep(100)
        return cost_volume
Exemple #8
0
    def forward(self, l, r, P, pre1, pre2):
        #self.P=P[1,0]
        #0 l to r,1 min,2 max
        #[l_box,r_box,match],[min_d,max_d]
        with torch.no_grad():
            self.pre = pre1
            self.pre2 = pre2
        P1 = P[..., 0]
        P2 = P[..., 3]
        P3 = P[..., 1]
        P4 = P[..., 2]
        #feature extraction
        l_mask = P2 - P1
        s_mask = P1
        #l_mask=l_mask.byte()
        #s_mask=s_mask.byte()
        #basic cuda 524
        #print(l.type)
        #1923
        #print(torch.cuda.memory_allocated(1))
        #2727
        l_sf = self.feature_extraction2(l)
        l_lf = self.feature_extraction(l_sf)

        #print(torch.cuda.memory_allocated(2))
        #the cuda won't copy the volume to the new gpu
        # a=l_lf.cuda(1)
        # b=l_lf.cuda(2)
        # c=l_sf.cuda(3)
        r_sf = self.feature_extraction2(r)
        r_lf = self.feature_extraction(r_sf)
        #print(torch.cuda.memory_allocated(1))
        #3267

        #print(torch.cuda.memory_allocated(2))
        #reshape the mask to batch and channel

        disparity = torch.zeros([540, 960]).cuda(2)
        one = torch.ones(1).cuda(2)
        zero = torch.zeros(1).cuda(2)
        cost_volume = []
        #5710
        #print(value)
        l_lf = l_lf.cuda(2)
        r_lf = r_lf.cuda(2)
        r_sf = r_sf.cuda(2)
        l_sf = l_sf.cuda(2)
        #985
        #feature=torch.masked_select(l_sf,s_mask)
        #feature=torch.masked_select(l_lf,l_mask)+torch.masked_select(l_sf,s_mask)
        feature = l_lf * l_mask + l_sf * s_mask
        feature = torch.where((l_mask + s_mask) > 0, feature, l_lf)
        with torch.no_grad():
            s_match = s_mask.nonzero()
        s_feature = l_sf[..., s_match[:, 0], s_match[:, 1]]
        with torch.no_grad():
            l_match = l_mask.nonzero()
        l_feature = l_lf[..., l_match[:, 0], l_match[:, 1]]
        #0.18
        start_time = time.time()
        # #0.04s
        sy_match = s_match[:, 1]
        sx_match = s_match[:, 0]
        with torch.no_grad():
            # for i in range(1,192):
            #   sy_match=torch.cat([sy_match,s_match[:,1]-i],0)
            d = 192
            sx_match = sx_match.repeat(1, d)
            sy_match = sy_match.repeat(1, d)
            #print(sy_match.shape)
            sy_match -= torch.arange(0, d).repeat(
                s_match.shape[0],
                1).transpose(1, 0).contiguous().view_as(sy_match).cuda(2)
        #192,0.09s,30,0.01
        s_r_o_t = r_sf[..., sx_match,
                       sy_match].reshape(1, 32, s_feature.shape[-1], d)
        s_feature = s_feature.repeat(1, 1, 1,
                                     d).reshape(1, 32, s_feature.shape[-1], d)
        #print(s_feature.shape,s_r_o_t.shape)
        cost_volume.append(
            torch.where(
                sy_match.reshape(1, s_feature.shape[-2], d) >= 0,
                cosine_s(s_feature, s_r_o_t), zero))
        ly_match = l_match[:, 1]
        lx_match = l_match[:, 0]
        with torch.no_grad():
            # for i in range(1,192):
            #   sy_match=torch.cat([sy_match,s_match[:,1]-i],0)
            d = 192
            lx_match = lx_match.repeat(1, d)
            ly_match = ly_match.repeat(1, d)
            #print(sy_match.shape)
            ly_match -= torch.arange(0, d).repeat(
                l_match.shape[0],
                1).transpose(1, 0).contiguous().view_as(ly_match).cuda(2)
        #192,0.09s,30,0.01,lf0.19
        l_r_o_t = r_lf[..., lx_match,
                       ly_match].reshape(1, 32, l_feature.shape[-1], d)
        l_feature = l_feature.repeat(1, 1, 1,
                                     d).reshape(1, 32, l_feature.shape[-1], d)
        #print(s_feature.shape,s_r_o_t.shape)
        cost_volume.append(
            torch.where(
                ly_match.reshape(1, l_feature.shape[-2], d) >= 0,
                cosine_s(l_feature, l_r_o_t), zero))
        print(time.time() - start_time)
        time.sleep(100)
        # #0.0003
        # #s_r_o_t=r_sf[...,s_match[:,0],s_match[:,1]]
        # #1,32,n
        # #print(time.time()-start_time)
        # #print(s_match.shape)
        # #time 10
        # # for i in range(s_match.shape[0]):
        # #   min_d=torch.max(s_match[i,1]-300,zero.long())
        # #   #print(min_d)
        # #   s_r_o_t=r_sf[...,s_match[i,0],min_d:s_match[i,1]]

        # #   a=s_feature[...,i].reshape(1,32,1)
        # #   #print(a.shape,s_r_o_t.shape)
        # #   cost_volume.append(torch.where(s_match[i,1]-300>=0,cosine_s(a,s_r_o_t),zero))

        # #time 0.23,192,0.035,30, the number of the match points won't influence the time,only the iteration

        # for i in range(30):
        #   s_r_o_t=r_sf[...,s_match[:,0],s_match[:,1]-i]
        #   #s_r_o_t=torch.take(r_sf,[...,s_match[:,0],s_match[:,1]-i])
        #   cost_volume.append(torch.where(s_match[:,1]-i>=0,cosine_s(s_feature,s_r_o_t),zero))
        #   l_r_o_t=r_sf[...,l_match[:,0],l_match[:,1]-i]
        #   cost_volume.append(torch.where(l_match[:,1]-i>=0,cosine_s(l_feature,l_r_o_t),zero))
        #cost_volume=torch.stack(cost_volume)
        # print(torch.cuda.memory_allocated(2))
        # print(time.time()-start_time)
        # time.sleep(100)

        #promotion
        #we can segment with bounding box and divide the whole image into many parts
        #each single bounding box will be managed through network not the whole image
        #matching cost computation
        count = 0
        start_time = time.time()
        for i in range(150):
            #ground 0-270, sky 0-40
            #0.42
            if i > torch.max(P3).type(torch.int32):
                break
            min_d = pre1[0, 0, i].long()
            max_d = pre1[0, 1, i].long()
            #object_mask=torch.where(P3==i,one,zero)
            x1, y1, x2, y2, size = pre2[0, i].long()
            object_mask = P3[0, x1:x2, y1:y2]
            object_mask = torch.where(object_mask == i, one, zero)
            s_mask_o = object_mask * s_mask[0, x1:x2, y1:y2]
            l_mask_o = object_mask * l_mask[0, x1:x2, y1:y2]
            s_match = s_mask_o.nonzero()
            l_match = l_mask_o.nonzero()
            if s_match.shape[0] == 0:
                s_match = object_mask.nonzero()
            if l_match.shape[0] == 0:
                l_match = object_mask.nonzero()
            s_l_o = feature[..., s_match[:, 0], s_match[:, 1]]
            l_l_o = feature[..., l_match[:, 0], l_match[:, 1]]
            cost_s = []
            cost_l = []
            sy_match = s_match[:, 1]
            sx_match = s_match[:, 0]
            #print(sy_match.shape)
            with torch.no_grad():
                d = max_d - min_d
                print(d)
                sx_match = sx_match.repeat(1, d)
                sy_match = sy_match.repeat(1, d)
                sy_match -= torch.arange(min_d, max_d).repeat(
                    s_match.shape[0],
                    1).transpose(1, 0).contiguous().view_as(sy_match).cuda(2)
            s_r_o_t = r_sf[..., sx_match,
                           sy_match].reshape(1, 32, s_l_o.shape[-1], d)
            s_l_o = s_l_o.repeat(1, 1, 1, d).reshape(1, 32, s_l_o.shape[-1], d)
            cost_s.append(
                torch.where(
                    sy_match.reshape(1, s_l_o.shape[-2], d) >= 0,
                    cosine_s(s_l_o, s_r_o_t), zero))

            #cost_volume=cost_s+cost_l
            #print(torch.cuda.memory_allocated(2)/1e+6)
            #time.sleep(30)
        print(time.time() - start_time)
        time.sleep(100)
        return cost_volume
    def forward(self, l, r, P, pre1, pre2):
        #self.P=P[1,0]
        #0 l to r,1 min,2 max
        #[l_box,r_box,match],[min_d,max_d]
        self.pre = pre1
        self.pre2 = pre2
        P1 = P[..., 0]
        P2 = P[..., 3]
        P3 = P[..., 1]
        P4 = P[..., 2]
        #feature extraction
        l_mask = P2 - P1
        s_mask = P1
        #l_mask=l_mask.byte()
        #s_mask=s_mask.byte()
        #basic cuda 524
        #print(l.type)
        #1923
        #print(torch.cuda.memory_allocated(1))
        #2727
        l_sf = self.feature_extraction2(l)
        l_lf = self.feature_extraction(l_sf)

        #print(torch.cuda.memory_allocated(2))
        #the cuda won't copy the volume to the new gpu
        # a=l_lf.cuda(1)
        # b=l_lf.cuda(2)
        # c=l_sf.cuda(3)
        r_sf = self.feature_extraction2(r)
        r_lf = self.feature_extraction(r_sf)
        #print(torch.cuda.memory_allocated(1))
        #3267

        #print(torch.cuda.memory_allocated(2))
        #reshape the mask to batch and channel

        disparity = torch.zeros([540, 960]).cuda(2)
        one = torch.ones(1).cuda(2)
        zero = torch.zeros(1).cuda(2)
        cost_volume = []
        #5710
        #print(value)
        l_lf = l_lf.cuda(2)
        r_lf = r_lf.cuda(2)
        r_sf = r_sf.cuda(2)
        l_sf = l_sf.cuda(2)
        #985
        #feature=torch.masked_select(l_sf,s_mask)
        #feature=torch.masked_select(l_lf,l_mask)+torch.masked_select(l_sf,s_mask)
        feature = l_lf * l_mask + l_sf * s_mask
        feature = torch.where((l_mask + s_mask) > 0, feature, l_lf)

        s_match = s_mask.long().nonzero()
        s_feature = l_sf[..., s_match[:, 0], s_match[:, 1]]
        l_match = l_mask.long().nonzero()
        l_feature = l_lf[..., l_match[:, 0], l_match[:, 1]]
        start_time = time.time()

        #0.0003
        #s_r_o_t=r_sf[...,s_match[:,0],s_match[:,1]]
        #1,32,n
        #print(time.time()-start_time)
        #print(s_match.shape)
        #time 10
        # for i in range(s_match.shape[0]):
        #   min_d=torch.max(s_match[i,1]-300,zero.long())
        #   #print(min_d)
        #   s_r_o_t=r_sf[...,s_match[i,0],min_d:s_match[i,1]]

        #   a=s_feature[...,i].view(1,32,1)
        #   #print(a.shape,s_r_o_t.shape)
        #   cost_volume.append(torch.where(s_match[i,1]-300>=0,cosine_s(a,s_r_o_t),zero))
        #time 0.23,192,0.035,30, the number of the match points won't influence the time,only the iteration

        # for i in range(300):
        #   s_r_o_t=r_sf[...,s_match[:,0],s_match[:,1]-i]
        #   cost_volume.append(torch.where(s_match[:,1]-i>=0,cosine_s(s_feature,s_r_o_t),zero))
        #   l_r_o_t=r_sf[...,l_match[:,0],l_match[:,1]-i]
        #   cost_volume.append(torch.where(l_match[:,1]-i>=0,cosine_s(l_feature,l_r_o_t),zero))
        # #cost_volume=torch.stack(cost_volume)
        # print(torch.cuda.memory_allocated(2))
        # print(time.time()-start_time)
        # time.sleep(100)

        #promotion
        #we can segment with bounding box and divide the whole image into many parts
        #each single bounding box will be managed through network not the whole image
        #matching cost computation
        count = 0
        start_time = time.time()
        for i in range(torch.max(P3).type(torch.int32) + 1):
            #ground 0-270, sky 0-40
            # if i==13 or i == 14:
            #   continue
            # i=60
            #print(pre2.shape)
            #i=14
            min_d = pre1[0, 0, i].long()
            max_d = pre1[0, 1, i].long()
            object_mask = torch.where(P3 == i, one, zero)
            x1, y1, x2, y2, size = pre2[0, i].long()
            object_mask = object_mask[0, x1:x2, y1:y2]
            s_mask_o = object_mask * s_mask[0, x1:x2, y1:y2]
            l_mask_o = object_mask * l_mask[0, x1:x2, y1:y2]
            s_match = s_mask_o.long().nonzero()
            l_match = l_mask_o.long().nonzero()
            if s_match.shape[0] == 0:
                s_match = object_mask.nonzero()
            if l_match.shape[0] == 0:
                l_match = object_mask.nonzero()
            s_l_o = feature[..., s_match[:, 0], s_match[:, 1]]
            l_l_o = feature[..., l_match[:, 0], l_match[:, 1]]
            #print(torch.max(min_d,zero).long())
            #s_r_o=feature[...,s_match[:,0],s_match[:,1]]

            # s_r_o=r_sf[...,x1:x2,y1-max_d:y2-min_d]
            # l_r_o=r_lf[...,x1:x2,y1-max_d:y2-min_d]
            cost_s = []
            cost_l = []
            #ground and sky
            #print(s_match.shape[0],l_match.shape[0],min_d,max_d)

            for j in range(min_d, max_d):
                s_r_o_t = r_sf[..., s_match[:, 0], s_match[:, 1] - j]
                cost_s.append(
                    torch.where(s_match[:, 1] - j >= 0,
                                cosine_s(s_l_o, s_r_o_t), zero))
                l_r_o_t = r_lf[..., l_match[:, 0], l_match[:, 1] - j]
                cost_l.append(
                    torch.where(l_match[:, 1] - j >= 0,
                                cosine_s(l_l_o, l_r_o_t), zero))
            cost_s = torch.stack(cost_s, -1)
            cost_l = torch.stack(cost_l, -1)

            #cost_volume=cost_s+cost_l
            #print(torch.cuda.memory_allocated(2)/1e+6)
            #time.sleep(30)
        print(time.time() - start_time)
        time.sleep(100)
        return cost_volume
    def forward(self, l, r, P, pre1, pre2):
        #self.P=P[1,0]
        #0 l to r,1 min,2 max
        #[l_box,r_box,match],[min_d,max_d]
        with torch.no_grad():
            self.pre = pre1
            self.pre2 = pre2
        P1 = P[..., 0]
        P2 = P[..., 3]
        P3 = P[..., 1]
        P4 = P[..., 2]
        #feature extraction
        l_mask = P2 - P1
        s_mask = P1
        #l_mask=l_mask.byte()
        #s_mask=s_mask.byte()
        #basic cuda 524
        #print(l.type)
        #1923
        #print(torch.cuda.memory_allocated(1))
        #2727
        l_sf = self.feature_extraction2(l)
        l_lf = self.feature_extraction(l_sf)

        #print(torch.cuda.memory_allocated(2))
        #the cuda won't copy the volume to the new gpu
        # a=l_lf.cuda(1)
        # b=l_lf.cuda(2)
        # c=l_sf.cuda(3)
        r_sf = self.feature_extraction2(r)
        r_lf = self.feature_extraction(r_sf)
        #print(torch.cuda.memory_allocated(1))
        #3267

        #print(torch.cuda.memory_allocated(2))
        #reshape the mask to batch and channel

        disparity = torch.zeros([540, 960]).cuda(2)
        one = torch.ones(1).cuda(2)
        zero = torch.zeros(1).cuda(2)
        cost_volume = []
        #5710
        #print(value)
        l_lf = l_lf.cuda(2)
        r_lf = r_lf.cuda(2)
        r_sf = r_sf.cuda(2)
        l_sf = l_sf.cuda(2)
        feature = l_lf * l_mask + l_sf * s_mask
        feature = torch.where((l_mask + s_mask) > 0, feature, l_lf)
        count = 0
        start_time = time.time()
        sx = []
        sy = []
        lx = []
        ly = []
        sy_r = []
        ly_r = []
        with torch.no_grad():
            for i in range(150):
                #ground 0-270, sky 0-40
                #0.38
                if i > torch.max(P3):
                    break

                min_d = pre1[0, 0, i].long()
                max_d = pre1[0, 1, i].long()
                #object_mask=torch.where(P3==i,one,zero)
                x1, y1, x2, y2, size = pre2[0, i].long()
                object_mask = P3[0, x1:x2, y1:y2]
                object_mask = torch.where(object_mask == i, one, zero)
                s_mask_o = object_mask * s_mask[0, x1:x2, y1:y2]
                l_mask_o = object_mask * l_mask[0, x1:x2, y1:y2]
                s_match = s_mask_o.nonzero()
                l_match = l_mask_o.nonzero()
                if s_match.shape[0] == 0:
                    s_match = object_mask.nonzero()
                if l_match.shape[0] == 0:
                    l_match = object_mask.nonzero()
                sy_match = s_match[:, 1]
                sx_match = s_match[:, 0]
                ly_match = l_match[:, 1]
                lx_match = l_match[:, 0]
                d = max_d - min_d
                sx_match = sx_match.repeat(1, d)
                sy_match = sy_match.repeat(1, d)
                sy_match_r = sy_match - torch.arange(
                    min_d, max_d).cuda(2).repeat(
                        s_match.shape[0], 1).transpose(
                            1, 0).contiguous().view_as(sy_match)
                lx_match = lx_match.repeat(1, d)
                ly_match = ly_match.repeat(1, d)
                ly_match_r = ly_match - torch.arange(
                    min_d, max_d).cuda(2).repeat(
                        l_match.shape[0], 1).transpose(
                            1, 0).contiguous().view_as(ly_match)
                #print(ly_match.shape)
                sx.append(sx_match)
                sy.append(sy_match)
                sy_r.append(sy_match_r)
                lx.append(lx_match)
                ly.append(ly_match)
                ly_r.append(ly_match_r)
            sx_match = torch.cat(sx, 1)
            sy_match = torch.cat(sy, 1)
            lx_match = torch.cat(lx, 1)
            ly_match = torch.cat(ly, 1)
            sy_match_r = torch.cat(sy_r, 1)
            ly_match_r = torch.cat(ly_r, 1)
        cost_s = torch.where(
            sy_match_r.squeeze() >= 0,
            cosine_s(feature[..., sx_match, sy_match].squeeze(),
                     r_sf[..., sx_match, sy_match_r].squeeze(), 0), zero)
        cost_l = torch.where(
            ly_match_r.squeeze() >= 0,
            cosine_s(feature[..., lx_match, ly_match].squeeze(),
                     r_sf[..., lx_match, ly_match_r].squeeze(), 0), zero)
        feature[..., sx_match, sy_match] = 1
        for i in range(150):
            with torch.no_grad():
                if i > torch.max(P3):
                    break
                min_d = pre1[0, 0, i].long()
                max_d = pre1[0, 1, i].long()
                #object_mask=torch.where(P3==i,one,zero)
                x1, y1, x2, y2, size = pre2[0, i].long()
                object_mask = P3[0, x1:x2, y1:y2]
                object_mask = torch.where(object_mask == i, one, zero)
                s_mask_o = object_mask * s_mask[0, x1:x2, y1:y2]
                l_mask_o = object_mask * l_mask[0, x1:x2, y1:y2]
                s_match = s_mask_o.nonzero()
                l_match = l_mask_o.nonzero()

        print(time.time() - start_time)
        time.sleep(100)
        return cost_volume