def forward(self, l, r, P, pre1, pre2): #self.P=P[1,0] #0 l to r,1 min,2 max #[l_box,r_box,match],[min_d,max_d] start_time = time.time() with torch.no_grad(): self.pre = pre1.cuda(2) self.pre2 = pre2.cuda(2) P1 = P[..., 0].cuda(2) P2 = P[..., 3].cuda(2) P3 = P[..., 1].cuda(2) P4 = P[..., 2].cuda(2) #feature extraction l_mask = P2 - P1 s_mask = P1 #l_mask=l_mask.byte() #s_mask=s_mask.byte() #basic cuda 524 #print(l.type) #1923 #print(torch.cuda.memory_allocated(1)) #2727 l_sf = self.feature_extraction2(l) l_lf = self.feature_extraction(l_sf) #print(torch.cuda.memory_allocated(2)) #the cuda won't copy the volume to the new gpu # a=l_lf.cuda(1) # b=l_lf.cuda(2) # c=l_sf.cuda(3) r_sf = self.feature_extraction2(r) r_lf = self.feature_extraction(r_sf) #print(torch.cuda.memory_allocated(1)) #3267 #print(torch.cuda.memory_allocated(2)) #reshape the mask to batch and channel disparity = torch.zeros([540, 960]).cuda(2) one = torch.ones(1).cuda(2) zero = torch.zeros(1).cuda(2) cost_volume = [] #5710 #print(value) l_lf = l_lf.cuda(2) r_lf = r_lf.cuda(2) r_sf = r_sf.cuda(2) l_sf = l_sf.cuda(2) #985 #feature=torch.masked_select(l_sf,s_mask) #feature=torch.masked_select(l_lf,l_mask)+torch.masked_select(l_sf,s_mask) feature = l_lf * l_mask + l_sf * s_mask feature = torch.where((l_mask + s_mask) > 0, feature, l_lf) # cost_s=[] # cost_l=[] # for m in range(10): count = 0 start_time = time.time() for z in range(10): start_time = time.time() for i in range(150): #ground 0-270, sky 0-40 #intial 0.46, after 0.18 with torch.no_grad(): if i > torch.max(P3).type(torch.int32): break min_d = pre1[0, 0, i].long() max_d = pre1[0, 1, i].long() #object_mask=torch.where(P3==i,one,zero) x1, y1, x2, y2, size = pre2[0, i].long() cost_volume = torch.zeros(x2 - x1, y2 - y1, max_d - min_d).cuda(2) object_mask = P3[0, x1:x2, y1:y2] object_mask = torch.where(object_mask == i, one, zero) s_mask_o = object_mask * s_mask[0, x1:x2, y1:y2] l_mask_o = object_mask * l_mask[0, x1:x2, y1:y2] s_match = s_mask_o.nonzero().cuda(2) l_match = l_mask_o.nonzero().cuda(2) if s_match.shape[0] == 0: s_match = object_mask.nonzero().cuda(2) if l_match.shape[0] == 0: l_match = object_mask.nonzero().cuda(2) s_l_o = feature[..., s_match[:, 0], s_match[:, 1]] l_l_o = feature[..., l_match[:, 0], l_match[:, 1]] num_s = s_match.shape[0] num_l = l_match.shape[0] #print(sy_match.shape) with torch.no_grad(): sy_match = s_match[:, 1] sx_match = s_match[:, 0] ly_match = l_match[:, 1] lx_match = l_match[:, 0] d = max_d - min_d #print(d) sx_match = sx_match.repeat(1, d) sy_match = sy_match.repeat(1, d) #sy=sy_match range_d_s = torch.arange(min_d, max_d).cuda(2).repeat( s_match.shape[0], 1).transpose(1, 0).contiguous().view_as(sy_match) sy_match -= range_d_s lx_match = lx_match.repeat(1, d) ly_match = ly_match.repeat(1, d) range_d_l = torch.arange(min_d, max_d).cuda(2).repeat( l_match.shape[0], 1).transpose(1, 0).contiguous().view_as(ly_match) ly_match -= range_d_l s_r_o_t = r_sf[..., sx_match, sy_match].reshape(1, 32, s_l_o.shape[-1], d) s_l_o = s_l_o.repeat(1, 1, 1, d).reshape(1, 32, s_l_o.shape[-1], d) l_r_o_t = r_lf[..., lx_match, ly_match].reshape(1, 32, l_l_o.shape[-1], d) l_l_o = l_l_o.repeat(1, 1, 1, d).reshape(1, 32, l_l_o.shape[-1], d) # cost_s.append(torch.where(sy_match.reshape(1,s_l_o.shape[-2],d)>=0,cosine_s(s_l_o,s_r_o_t),zero)) # cost_l.append(torch.where(ly_match.reshape(1,l_l_o.shape[-2],d)>=0,cosine_s(l_l_o,l_r_o_t),zero)) cost_s = torch.where( sy_match.reshape(1, s_l_o.shape[-2], d) >= 0, cosine_s(s_l_o, s_r_o_t), zero) cost_l = torch.where( ly_match.reshape(1, l_l_o.shape[-2], d) >= 0, cosine_s(l_l_o, l_r_o_t), zero) #cost_volume=cost_s+cost_l #print(torch.cuda.memory_allocated(2)/1e+6) #time.sleep(30) #convert to volume with torch.no_grad(): sy_match = sy_match + range_d_s range_d_s = range_d_s - min_d #sparse tensor cost_volume[sx_match.squeeze(), sy_match.squeeze(), range_d_s.squeeze()] = cost_s.view_as( sy_match).squeeze() with torch.no_grad(): ly_match = ly_match + range_d_l range_d_l = range_d_l - min_d cost_volume[lx_match.squeeze(), ly_match.squeeze(), range_d_l.squeeze()] = cost_l.view_as( ly_match).squeeze() #print(cost_volume.nonzero().shape) #cost_s # print(time.time()-start_time) # time.sleep(100) #aggregation a_volume = torch.zeros_like(cost_volume).cuda(2) object_r = torch.where(P3[0, x1:x2, y1:y2] == i, P4[0, x1:x2, y1:y2], -one) max_r = torch.max(object_r).long() #start_time=time.time() for j in range(1, max_r + 1): with torch.no_grad(): plane_mask = torch.where(object_r == j, one, zero) index = plane_mask.nonzero().long().cuda() if index.shape[0] < 1: continue xp1, xp2, yp1, yp2 = torch.min( index[:, 0]), torch.max(index[:, 0]) + 1, torch.min( index[:, 1]), torch.max(index[:, 1]) + 1 #xp1,xp2,yp1,yp2.r_size=self.pre[0,0][1] plane_mask = plane_mask[..., xp1:xp2, yp1:yp2] s_plane_mask = plane_mask * s_mask[..., x1:x2, y1:y2][..., xp1:xp2, yp1:yp2] l_plane_mask = plane_mask * l_mask[..., x1:x2, y1:y2][..., xp1:xp2, yp1:yp2] plane = cost_volume[..., xp1:xp2, yp1:yp2, :] #rint(s_mask.shape) #print(plane_mask.shape,s_plane_mask.shape) s_weights = self.cluster( l_sf[..., x1:x2, y1:y2][..., xp1:xp2, yp1:yp2], s_plane_mask) s_cost = torch.sum(torch.sum( plane * s_weights, -2, keepdim=True), -3, keepdim=True) / torch.sum(s_weights) l_weights = self.cluster( l_lf[..., x1:x2, y1:y2][..., xp1:xp2, yp1:yp2], l_plane_mask) l_cost = torch.sum(torch.sum( plane * l_weights, -2, keepdim=True), -3, keepdim=True) / torch.sum(l_weights) with torch.no_grad(): # print(plane_mask.shape) # plane_mask=plane_mask-torch.where(s_plane_mask+l_plane_mask>0,one,zero) # print(plane_mask.shape) plane_mask=plane_mask.view(1,plane_mask.shape[0],plane_mask.shape[1],1) \ .expand(1,plane_mask.shape[0],plane_mask.shape[1],plane.shape[-1]) #print(plane_mask.shape) s_plane_mask=s_plane_mask.view(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],1) \ .expand(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],plane.shape[-1]) l_plane_mask=l_plane_mask.view(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],1) \ .expand(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],plane.shape[-1]) # plane=torch.where(s_plane_mask==1,s_cost*(1-s_weights)+s_weights*plane,plane) # plane=torch.where(l_plane_mask==1,l_cost*(1-l_weights)+l_weights*plane,plane) cost_volume[..., xp1:xp2, yp1:yp2, :] = torch.where( s_plane_mask == 1, s_cost * s_weights + (1 - s_weights) * plane, plane) cost_volume[..., xp1:xp2, yp1:yp2, :] = torch.where( l_plane_mask == 1, l_cost * l_weights + (1 - l_weights) * plane, plane) exist = torch.where(s_plane_mask - l_plane_mask > 0, one, zero) #print(plane_mask.shape,s_plane_mask.shape) weights = self.cluster( torch.cat([ l_lf[..., x1:x2, y1:y2][..., xp1:xp2, yp1:yp2], l_sf[..., x1:x2, y1:y2][..., xp1:xp2, yp1:yp2] ], -3), plane_mask[..., 0]) costs = torch.sum(torch.sum( plane * weights, -2, keepdim=True), -3, keepdim=True) / torch.sum(exist) plane_mask = plane_mask - exist cost_volume[..., xp1:xp2, yp1:yp2, :] = torch.where( plane_mask == 1, costs * weights, plane) #print(time.time()-start_time) print(time.time() - start_time) #print(time.time()-start_time) time.sleep(100) # #ss_argmin # disparity[...,x1:x2,y1:y2]=ss_argmin(cost_volume,min_d,max_d) # #refinement # refine=torch.zeros_like(disparity)[...,x1:x2,y1:y2] # for j in range(min_r,max_r+1): # plane_mask=torch.where(object_r==j,one,zero)[x1:x2,y1:y2] # xp1,xp2,yp1,yp2=crop(plane_mask) # plane_mask=plane_mask[xp1:xp2,yp1:yp2] # s_plane_mask=plane_mask*s_mask[x1:x2,y1:y2][xp1:xp2,yp1:yp2] # l_plane_mask=plane_mask*l_mask[x1:x2,y1:y2][xp1:xp2,yp1:yp2] # plane_mask=plane_mask-torch.where(s_plane_mask+l_plane_mask>0,one,zero) # plane=disparity[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2]*plane_mask # s_weights=self.cluster(l_sf[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],s_plane_mask) # s_cost=torch.sum(torch.sum(plane*s_weights,-2,keepdim=True),-3,keepdim=True)/torch.sum(s_weights) # l_weights=self.cluster(l_lf[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],l_plane_mask) # l_cost=torch.sum(torch.sum(plane*l_weights,-2),-2)/torch.sum(l_weights) # weights=self.cluster(l_lf[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],plane_mask) # costs=torch.sum(torch.sum(plane*weights,-2,keepdim=True),-3,keepdim=True)/torch.sum(weights) # plane=torch.where(s_plane_mask==1,s_cost*s_weights,plane) # plane=torch.where(l_plane_mask==1,l_cost*l_weights,plane) # plane=torch.where(plane_mask==1,cost*weights,plane) # disparity[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2]=plane return cost_volume
def forward(self, l, r, P, pre, matching): #self.P=P[1,0] #0 l to r,1 min,2 max #[l_box,r_box,match],[min_d,max_d] start_time = time.time() with torch.no_grad(): self.pre = pre.cuda(1) P1 = P[..., 0].cuda(1) P2 = P[..., 3].cuda(1) P3 = P[..., 1].cuda(1) P4 = P[..., 2].cuda(1) #feature extraction l_mask = P2 - P1 s_mask = P1 #l_mask=l_mask.byte() #s_mask=s_mask.byte() #basic cuda 524 #print(l.type) #1923 #print(torch.cuda.memory_allocated(1)) #2727 l_sf = self.feature_extraction2(l) l_lf = self.feature_extraction(l_sf) #print(torch.cuda.memory_allocated(2)) #the cuda won't copy the volume to the new gpu # a=l_lf.cuda(1) # b=l_lf.cuda(1) # c=l_sf.cuda(3) r_sf = self.feature_extraction2(r) r_lf = self.feature_extraction(r_sf) #print(torch.cuda.memory_allocated(1)) #3267 #print(torch.cuda.memory_allocated(2)) #reshape the mask to batch and channel disparity = torch.zeros([540, 960]).cuda(1) one = torch.ones(1).cuda(1) zero = torch.zeros(1).cuda(1) #cost_volume=[] #5710 #print(value) l_lf = l_lf.cuda(1) r_lf = r_lf.cuda(1) r_sf = r_sf.cuda(1) l_sf = l_sf.cuda(1) #985 #feature=torch.masked_select(l_sf,s_mask) #feature=torch.masked_select(l_lf,l_mask)+torch.masked_select(l_sf,s_mask) # feature=l_lf*l_mask+l_sf*s_mask # feature=torch.where((l_mask+s_mask)>0,feature,l_lf) # cost_s=[] # cost_l=[] # for m in range(10): count = 0 #start_time=time.time() #with torch.no_grad(): for z in range(10): start_time = time.time() for i in range(torch.max(P3).type(torch.int32)): x1, y1, x2, y2, size = pre[0, i].long() max_d = torch.max(matching[-1][i]) min_d = torch.min(matching[-1][i]) cost_volume = torch.zeros(x2 - x1, y2 - y1, max_d - min_d + 1).cuda(1) #ground 0-270, sky 0-40 #intial 0.46, after 0.18,volume 0.3 #cost computation intial 0.20,after 0.14,volume 0.3 if torch.max(matching[0][i]) > 0: s_feature = l_sf[..., x1:x2, y1:y2][..., matching[0][i], matching[1][i]] s_r_y = torch.max(matching[1][i] - matching[2][i], -torch.ones_like(matching[2][i])) #print(s_r_y) s_r_o_t = r_sf[..., x1:x2, y1:y2][..., matching[0][i], s_r_y] #cost_volume[matching[0][i],matching[1][i],matching[2][i]-min_d]=torch.where(s_r_y>=0,cosine_s(s_feature,s_r_o_t),zero) s_cost = torch.where(s_r_y >= 0, cosine_s(s_feature, s_r_o_t), zero) d = matching[2][i] - min_d cost_volume[matching[0][i], matching[1][i], d] = s_cost if torch.max(matching[3][i]) > 0: l_feature = l_lf[..., x1:x2, y1:y2][..., matching[3][i], matching[4][i]] l_r_y = torch.max(matching[4][i] - matching[5][i], -torch.ones_like(matching[5][i])) l_r_o_t = r_lf[..., x1:x2, y1:y2][..., matching[3][i], l_r_y] #print(min_d,torch.min(matching[2][i]),torch.min(matching[3][i]),torch.min(matching[4][i])) d = matching[5][i] - min_d #cost_volume[matching[3][i],matching[4][i],d]=torch.where(l_r_y>=0,cosine_s(l_feature,l_r_o_t),zero) l_cost = torch.where(l_r_y >= 0, cosine_s(l_feature, l_r_o_t), zero) cost_volume[matching[3][i], matching[4][i], d] = l_cost print(time.time() - start_time) #print(time.time()-start_time) print(time.time() - start_time) #3s,4600mb #print(time.time()-start_time) time.sleep(100) # #ss_argmin # disparity[...,x1:x2,y1:y2]=ss_argmin(cost_volume,min_d,max_d) # #refinement # refine=torch.zeros_like(disparity)[...,x1:x2,y1:y2] # for j in range(min_r,max_r+1): # plane_mask=torch.where(object_r==j,one,zero)[x1:x2,y1:y2] # xp1,xp2,yp1,yp2=crop(plane_mask) # plane_mask=plane_mask[xp1:xp2,yp1:yp2] # s_plane_mask=plane_mask*s_mask[x1:x2,y1:y2][xp1:xp2,yp1:yp2] # l_plane_mask=plane_mask*l_mask[x1:x2,y1:y2][xp1:xp2,yp1:yp2] # plane_mask=plane_mask-torch.where(s_plane_mask+l_plane_mask>0,one,zero) # plane=disparity[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2]*plane_mask # s_weights=self.cluster(l_sf[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],s_plane_mask) # s_cost=torch.sum(torch.sum(plane*s_weights,-2,keepdim=True),-3,keepdim=True)/torch.sum(s_weights) # l_weights=self.cluster(l_lf[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],l_plane_mask) # l_cost=torch.sum(torch.sum(plane*l_weights,-2),-2)/torch.sum(l_weights) # weights=self.cluster(l_lf[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],plane_mask) # costs=torch.sum(torch.sum(plane*weights,-2,keepdim=True),-3,keepdim=True)/torch.sum(weights) # plane=torch.where(s_plane_mask==1,s_cost*s_weights,plane) # plane=torch.where(l_plane_mask==1,l_cost*l_weights,plane) # plane=torch.where(plane_mask==1,cost*weights,plane) # disparity[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2]=plane return cost_volume
def forward(self, l, r, P, pre1, pre2): #self.P=P[1,0] #0 l to r,1 min,2 max #[l_box,r_box,match],[min_d,max_d] self.pre = pre1 self.pre2 = pre2 P1 = P[..., 0] P2 = P[..., 3] P3 = P[..., 1] P4 = P[..., 2] #feature extraction l_mask = P2 - P1 s_mask = P1 #basic cuda 524 #print(l.type) #1923 #print(torch.cuda.memory_allocated(1)) #2727 l_sf = self.feature_extraction2(l) l_lf = self.feature_extraction(l_sf) #print(torch.cuda.memory_allocated(2)) #the cuda won't copy the volume to the new gpu # a=l_lf.cuda(1) # b=l_lf.cuda(2) # c=l_sf.cuda(3) r_sf = self.feature_extraction2(r) r_lf = self.feature_extraction(r_sf) #print(torch.cuda.memory_allocated(1)) #3267 #print(torch.cuda.memory_allocated(2)) #reshape the mask to batch and channel disparity = torch.zeros([540, 960]).cuda(2) one = torch.ones(1).cuda(2) zero = torch.zeros(1).cuda(2) cost_volume = [] #5710 #print(value) l_lf = l_lf.cuda(2) r_lf = r_lf.cuda(2) r_sf = r_sf.cuda(2) l_sf = l_sf.cuda(2) #985 feature = l_lf * l_mask + l_sf * s_mask feature = torch.where((l_mask + s_mask) > 0, feature, l_lf) # for i in range(100): # cost_volume.append(cosine_s(l_lf,torch.cat([r_lf[...,i:],r_lf[...,:i]],-1))) # cost_volume=torch.stack(cost_volume) # print(torch.cuda.memory_allocated(2)) #time.sleep(100) #promotion #we can segment with bounding box and divide the whole image into many parts #each single bounding box will be managed through network not the whole image #matching cost computation count = 0 start_time = time.time() for i in range(torch.max(P3).type(torch.int32) + 1): #ground 0-270, sky 0-40 # if i==13 or i == 14: # continue # i=60 #print(pre2.shape) #i=14 min_d = pre1[0, 0, i].long() max_d = pre1[0, 1, i].long() object_mask = torch.where(P3 == i, one, zero) #x1,y1,x2,y2=crop(object_mask) x1, y1, x2, y2, size = pre2[0, i].long() if min_d > y2: min_d = zero.long() else: min_d = torch.max(min_d, zero.long()).long() if max_d <= min_d: max_d = min_d + one.long() * 200 max_d = max_d.long() max_d = torch.min(max_d, y2).long() object_mask = object_mask[0, x1:x2, y1:y2] #print(y1,y2) s_mask_o = object_mask * s_mask[0, x1:x2, y1:y2] l_mask_o = object_mask * l_mask[0, x1:x2, y1:y2] s_l_o = feature[..., x1:x2, y1:y2] * s_mask_o l_l_o = feature[..., x1:x2, y1:y2] * l_mask_o #print(torch.max(min_d,zero).long()) # s_r_o=r_sf[...,x1:x2,y1-max_d:y2-min_d] # l_r_o=r_lf[...,x1:x2,y1-max_d:y2-min_d] cost_s = [] cost_l = [] #ground and sky if (y2 - y1) > 640: if x2 > 500: print('ground') cost_l = [] count = 0 m = x1.item() while (m < x2): #print(m) min_d = np.max([count * 20 - 5, 0]) max_d = min_d + 30 cost_slice = [] for j in range(min_d, max_d): if y1 - j > 0: l_r_o_t = r_lf[..., m:m + 20, y1 - j:y2 - j] cost_slice.append( torch.where( l_mask_o[..., m - x1:m - x1 + 20, :] == 1, cosine_s( l_l_o[..., m - x1:m - x1 + 20, :], l_r_o_t), zero)) else: l_r_o_t = torch.cat([ r_lf[..., m:m + 20, 0:j - y1] * zero, r_lf[..., m:m + 20, 0:y2 - j] ], -1) #print(j-y1,y2-j) cost_slice.append( torch.where( l_mask_o[..., m - x1:m - x1 + 20, :] == 1, cosine_s( l_l_o[..., m - x1:m - x1 + 20, :], l_r_o_t), zero)) cost_slice = torch.stack(cost_slice, -1) if m == x1: cost_l = cost_slice else: cost_l = torch.cat([cost_l, cost_slice], -2) m += 20 #print(m,x1,m-x1) count += 1 if m + 20 > x2: #m-=20 min_d = np.max([count * 20 - 5, 0]) max_d = min_d + 30 cost_slice = [] for j in range(min_d, max_d): if y1 - j > 0: l_r_o_t = r_lf[..., m:, y1 - j:y2 - j] cost_slice.append( torch.where( l_mask_o[..., m - x1:m - x1 + 20, :] == 1, cosine_s(l_l_o[..., m - x1:, :], l_r_o_t), zero)) else: # l_r_o_t = torch.cat([ r_lf[..., m:, 0:j - y1] * zero, r_lf[..., m:, 0:y2 - j] ], -1) #print(m-x1,l_l_o[...,m-x1:,:].shape) cost_slice.append( torch.where( l_mask_o[..., m - x1:m - x1 + 20, :] == 1, cosine_s(l_l_o[..., m - x1:, :], l_r_o_t), zero)) cost_slice = torch.stack(cost_slice, -1) if count == 0: cost_l = cost_slice else: cost_l = torch.cat([cost_l, cost_slice], -2) break cost_volume = cost_l else: print('sky') if x1 < 10: cost_l = [] count = 0 m = x1.item() while (m < x2): min_d = np.max([count * 5 - 5, 0]) max_d = min_d + 15 cost_slice = [] for j in range(min_d, max_d): if y1 - j > 0: l_r_o_t = r_lf[..., m:m + 20, y1 - j:y2 - j] print( l_mask_o[..., m - x1:m - x1 + 20, :].shape, l_r_o_t.shape, l_l_o[..., m - x1:m - x1 + 20, :].shape) cost_slice.append( torch.where( l_mask_o[..., m - x1:m - x1 + 20, :] == 1, cosine_s( l_l_o[..., m - x1:m - x1 + 20, :], l_r_o_t), zero)) else: l_r_o_t = torch.cat([ r_lf[..., m:m + 20, 0:j - y1] * zero, r_lf[..., m:m + 20, 0:y2 - j] ], -1) #print(j-y1,y2-j) cost_slice.append( torch.where( l_mask_o[..., m - x1:m - x1 + 20, :] == 1, cosine_s( l_l_o[..., m - x1:m - x1 + 20, :], l_r_o_t), zero)) cost_slice = torch.stack(cost_slice, -1) if m == x1: cost_l = cost_slice else: cost_l = torch.cat([cost_l, cost_slice], -2) m += 20 #print(m,x1,m-x1) count += 1 if m + 20 > x2: #m-=20 min_d = np.max([count * 20 - 5, 0]) max_d = min_d + 30 cost_slice = [] for j in range(min_d, max_d): if y1 - j > 0: l_r_o_t = r_lf[..., m:, y1 - j:y2 - j] cost_slice.append( torch.where( l_mask_o[..., m - x1:m - x1 + 20, :] == 1, cosine_s( l_l_o[..., m - x1:, :], l_r_o_t), zero)) else: # l_r_o_t = torch.cat([ r_lf[..., m:, 0:j - y1] * zero, r_lf[..., m:, 0:y2 - j] ], -1) #print(m-x1,l_l_o[...,m-x1:,:].shape) cost_slice.append( torch.where( l_mask_o[..., m - x1:m - x1 + 20, :] == 1, cosine_s( l_l_o[..., m - x1:, :], l_r_o_t), zero)) cost_slice = torch.stack(cost_slice, -1) if count == 0: cost_l = cost_slice else: cost_l = torch.cat([cost_l, cost_slice], -2) break cost_volume = cost_l continue #print(i) #print(x1,x2,y1,y2,min_d,max_d) for j in range(min_d, max_d): #print(j) #count+=1 #print(count) if y1 - j > 0: #print(y1-y2-i,-i) s_r_o_t = r_sf[..., x1:x2, y1 - j:y2 - j] #print(s_r_o_t.shape) cost_s.append( torch.where(s_mask_o == 1, cosine_s(s_l_o, s_r_o_t), zero)) else: #print(i-y1,y2-i) s_r_o_t = torch.cat([ r_sf[..., x1:x2, 0:j - y1] * zero, r_sf[..., x1:x2, 0:y2 - j] ], -1) #print(y2-j) cost_s.append( torch.where(s_mask_o == 1, cosine_s(s_l_o, s_r_o_t), zero)) cost_s = torch.stack(cost_s, -1) for j in range(min_d, max_d): if y1 - j > 0: l_r_o_t = r_lf[..., x1:x2, y1 - j:y2 - j] cost_l.append( torch.where(l_mask_o == 1, cosine_s(l_l_o, l_r_o_t), zero)) else: #print(r_lf.shape,r_sf.shape) l_r_o_t = torch.cat([ r_lf[..., x1:x2, 0:j - y1] * zero, r_lf[..., x1:x2, 0:y2 - j] ], -1) cost_l.append( torch.where(l_mask_o == 1, cosine_s(l_l_o, l_r_o_t), zero)) cost_l = torch.stack(cost_l, -1) cost_volume = cost_s + cost_l print(torch.cuda.memory_allocated(2) / 1e+6) #time.sleep(30) """ #aggregation a_volume=torch.zeros_like(cost_volume) object_r=torch.where(P3==i,P4,zero) max_r=torch.max(object_r).long() object_r=torch.where(P3==i,P4,max_r+1) min_r=torch.min(object_r).long() for j in range(min_r,max_r+1): plane_mask=torch.where(object_r==j,one,zero)[x1:x2,y1:y2] xp1,xp2,yp1,yp2=crop(plane_mask).long() #xp1,xp2,yp1,yp2.r_size=self.pre[0,0][1] plane_mask=plane_mask[xp1:xp2,yp1:yp2] plane=cost_volume[...,xp1:xp2,yp1:yp2,:] s_plane_mask=plane_mask*s_mask[x1:x2,y1:y2][xp1:xp2,yp1:yp2] l_plane_mask=plane_mask*l_mask[x1:x2,y1:y2][xp1:xp2,yp1:yp2] s_weights=self.cluster(l_sf[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],s_plane_mask) s_cost=torch.sum(torch.sum(plane*s_weights,-2,keepdim=True),-3,keepdim=True)/torch.sum(s_weights) l_weights=self.cluster(l_lf[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],l_plane_mask) l_cost=torch.sum(torch.sum(plane*l_weights,-2),-2)/torch.sum(l_weights) plane_mask=plane_mask-torch.where(s_plane_mask+l_plane_mask>0,one,zero) plane_mask=plane_mask.view(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],1) \ .expand(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],plane.shape[-1]) s_plane_mask=s_plane_mask.view(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],1) \ .expand(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],plane.shape[-1]) l_plane_mask=l_plane_mask.view(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],1) \ .expand(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],plane.shape[-1]) plane=torch.where(s_plane_mask==1,s_cost*s_weights,plane) plane=torch.where(l_plane_mask==1,l_cost*l_weights,plane) weights=self.cluster(l_lf[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],plane_mask) costs=torch.sum(torch.sum(plane*weights,-2,keepdim=True),-3,keepdim=True)/torch.sum(weights) plane=torch.where(plane_mask==1,cost*weights,plane) cost_volume[...,xp1:xp2,yp1:yp2,:]=plane #ss_argmin disparity[...,x1:x2,y1:y2]=ss_argmin(cost_volume,min_d,max_d) #refinement refine=torch.zeros_like(disparity)[...,x1:x2,y1:y2] for j in range(min_r,max_r+1): plane_mask=torch.where(object_r==j,one,zero)[x1:x2,y1:y2] xp1,xp2,yp1,yp2=crop(plane_mask) plane_mask=plane_mask[xp1:xp2,yp1:yp2] s_plane_mask=plane_mask*s_mask[x1:x2,y1:y2][xp1:xp2,yp1:yp2] l_plane_mask=plane_mask*l_mask[x1:x2,y1:y2][xp1:xp2,yp1:yp2] plane_mask=plane_mask-torch.where(s_plane_mask+l_plane_mask>0,one,zero) plane=disparity[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2]*plane_mask s_weights=self.cluster(l_sf[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],s_plane_mask) s_cost=torch.sum(torch.sum(plane*s_weights,-2,keepdim=True),-3,keepdim=True)/torch.sum(s_weights) l_weights=self.cluster(l_lf[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],l_plane_mask) l_cost=torch.sum(torch.sum(plane*l_weights,-2),-2)/torch.sum(l_weights) weights=self.cluster(l_lf[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],plane_mask) costs=torch.sum(torch.sum(plane*weights,-2,keepdim=True),-3,keepdim=True)/torch.sum(weights) plane=torch.where(s_plane_mask==1,s_cost*s_weights,plane) plane=torch.where(l_plane_mask==1,l_cost*l_weights,plane) plane=torch.where(plane_mask==1,cost*weights,plane) disparity[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2]=plane """ print(time.time() - start_time) time.sleep(100) return cost_volume
def forward(self, l,r,P,matching,plane,s_plane,l_plane): #self.P=P[1,0] #0 l to r,1 min,2 max #[l_box,r_box,match],[min_d,max_d] one=torch.ones(1).cuda(1) zero=torch.zeros(1).cuda(1) start_time=time.time() P1=P[...,0] P2=P[...,3] P3=P[...,1] P4=P[...,2] #feature extraction l_mask=torch.where(P2-P1>zero,one,zero) s_mask=P1 #l_mask=l_mask.byte() #s_mask=s_mask.byte() #basic cuda 524 #print(l.type) #1923 #print(torch.cuda.memory_allocated(1)) #2727 l_sf=self.feature_extraction2(l) l_lf=self.feature_extraction(l_sf) #print(torch.cuda.memory_allocated(2)) #the cuda won't copy the volume to the new gpu # a=l_lf.cuda(1) # b=l_lf.cuda(1) # c=l_sf.cuda(3) r_sf=self.feature_extraction2(r) r_lf=self.feature_extraction(r_sf) #print(torch.cuda.memory_allocated(1)) #3267 #print(torch.cuda.memory_allocated(2)) #reshape the mask to batch and channel disparity=torch.zeros([540,960]).cuda(1) #cost_volume=[] #5710 #print(value) l_lf=l_lf.cuda(1) r_lf=r_lf.cuda(1) r_sf=r_sf.cuda(1) l_sf=l_sf.cuda(1) #985 #feature=torch.masked_select(l_sf,s_mask) #feature=torch.masked_select(l_lf,l_mask)+torch.masked_select(l_sf,s_mask) D=torch.max(torch.max(matching[5]),torch.max(matching[2])).int() cost_volume=torch.zeros(540,960,D+1).cuda(1) # feature=l_lf*l_mask+l_sf*s_mask # feature=torch.where((l_mask+s_mask)>0,feature,l_lf) s_feature=l_sf[...,matching[0],matching[1]] s_r_y=matching[1]-matching[2] s_r_o_t=r_sf[...,matching[0],s_r_y] s_cost=torch.where(s_r_y>=0,cosine_s(s_feature,s_r_o_t),zero) l_feature=l_lf[...,matching[3],matching[4]] l_r_y=matching[4]-matching[5] l_r_o_t=r_lf[...,matching[3],l_r_y] l_cost=torch.where(l_r_y>=0,cosine_s(l_feature,l_r_o_t),zero) cost_volume[matching[0],matching[1],matching[2]]=s_cost cost_volume[matching[3],matching[4],matching[5]]=l_cost time.sleep(100) #aggregation s_weights_0=self.cluster_vector(feature, s_plane[0], s_plane[1], 1600, s_plane[-3].shape[0], s_plane[-3]) zero_cost=torch.zeros_like(D) costs=torch.where(s_plane[1]>0,cost_volume[s_plane[0], s_plane[1],:],zero_cost).view(1600,s_plane[-3].shape[0],D) s_cost_0=torch.sum(costs*s_weights_0,0,keepdim=True)/torch.sum(s_weights,0) cost_volume[s_plane[0], s_plane[1]]=torch.where(s_plane[1]>0,s_cost_0*s_weights_0+(1-s_weights_0)*costs,costs) print(time.time()-start_time) time.sleep(100) return cost_volume
def forward(self, l,r): #feature extraction l_mask=P[:,:,3]-P[:,:,0] s_mask=P[:,:,0] l_lf=self.feature_extraction(l) l_sf=self.feature_extraction2(l) r_lf=self.feature_extraction(r) r_sf=self.feature_extraction2(r) #reshape the mask to batch and channel feature=l_lf*l_mask+self.l_sf*s_mask feature=torch.where((l_mask+s_mask)>0,feature,l_lf) disparity=torch.zeros([540,960]) one=torch.ones(1) zero=torch.zeros(1) cost_volume=[] #promotion #we can segment with bounding box and divide the whole image into many parts #each single bounding box will be managed through network not the whole image #matching cost computation for i in range(torch.max(self.P[:,:,1]).type(torch.int32)+1): min_d=self.pre2[i][0] max_d=self.pre2[i][1] object_mask=torch.where(P[:,:,1]==i,one,zero) #x1,y1,x2,y2=crop(object_mask) x1,y1,x2,y2,size=self.pre[0,0][0] object_mask=object_mask[x1:x2,y1:y2] s_mask_o=object_mask*s_mask[x1:x2,y1:y2] l_mask_o=object_mask*l_mask[x1:x2,y1:y2] s_l_o=feature[...,x1:x2,y1:y2]*s_mask_o l_l_o=feature[...,x1:x2,y1:y2]*l_mask_o s_r_o=r_sf[...,x1:x2,min_d:torch.min(max_d,r_lf.shape[-1])] l_r_o=r_lf[...,x1:x2,min_d:torch.min(max_d,r_lf.shape[-1])] cost_s=[] cost_l=[] for i in range(min_d,max_d): if y1-i>0: s_r_o_t=s_r_o[...,y1-i:y2-i] cost_s.append(torch.where(s_mask_o==1,cosine_s(s_l_o,s_r_o_t),zero)) else: s_r_o_t=torch.cat([torch.zeros_like(s_r_o[...,:i]),s_r_o[...,0:y2-i]],-1) cost_s.append(torch.where(s_mask_o==1,cosine_s(s_l_o,s_r_o_t),zero)) cost_s=torch.stack(cost_s,-1) for i in range(min_d,max_d): if y1-i>0: l_r_o_t=l_r_o[...,y1-i:y2-i] cost_l.append(torch.where(l_mask_o==1,cosine_s(l_l_o,l_r_o_t),zero)) else: l_r_o_t=torch.cat([torch.zeros_like(l_r_o[...,:i]),l_r_o[...,0:y2-i]],-1) cost_l.append(torch.where(l_mask_o==1,cosine_s(l_l_o,l_r_o_t),zero)) cost_l=torch.stack(cost_l,-1) cost_volume=cost_s+cost_l #aggregation a_volume=torch.zeros_like(cost_volume) object_r=torch.where(P[:,:,1]==i,self.P[:,:,2],zero) max_r=torch.max(object_r) object_r=torch.where(P[:,:,1]==i,self.P[:,:,2],max_r+1) min_r=torch.min(object_r) for j in range(min_r,max_r+1): plane_mask=torch.where(object_r==j,one,zero)[x1:x2,y1:y2] xp1,xp2,yp1,yp2=crop(plane_mask) #xp1,xp2,yp1,yp2.r_size=self.pre[0,0][1] plane_mask=plane_mask[xp1:xp2,yp1:yp2] plane=cost_volume[...,xp1:xp2,yp1:yp2,:] for m in range(planes.shape[-1]) s_var,l_var=self.aggregation_sparse(l_sf[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],l_lf[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],plane[...,m]*plane_mask) plane[...,m]=s_var*s_mask[x1:x2,y1:y2][xp1:xp2,yp1:yp2]+l_var*l_mask[x1:x2,y1:y2][xp1:xp2,yp1:yp2] plane[...,m]=plane[...,m]*plane_mask plane[...,m]=self.aggregation_dense(feature[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],plane[...,m])*plane_mask cost_volume[...,xp1:xp2,yp1:yp2,:]=plane #ss_argmin disparity[...,x1:x2,y1:y2]=ss_argmin(cost_volume,min_d,max_d) #refinement refine=torch.zeros_like(disparity)[...,x1:x2,y1:y2] for j in range(min_r,max_r+1): plane_mask=torch.where(object_r==j,one,zero)[x1:x2,y1:y2] xp1,xp2,yp1,yp2=crop(plane_mask) plane_mask=plane_mask[xp1:xp2,yp1:yp2] plane=disparity[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2]*plane_mask plane=self.aggregation_dense(feature[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2],plane)*plane_mask disparity[...,x1:x2,y1:y2][...,xp1:xp2,yp1:yp2]=plane return disparity
def forward(self, l, r): P1 = self.P[..., 0] P2 = self.P[..., 3] P3 = self.P[..., 1] P4 = self.P[..., 2] #feature extraction l_mask = P2 - P1 s_mask = P1 l_lf = self.feature_extraction(l) l_sf = self.feature_extraction2(l) r_lf = self.feature_extraction(r) r_sf = self.feature_extraction2(r) #reshape the mask to batch and channel feature = l_lf * l_mask + l_sf * s_mask feature = torch.where((l_mask + s_mask) > 0, feature, l_lf) disparity = torch.zeros([540, 960]).cuda() one = torch.ones(1).cuda() zero = torch.zeros(1).cuda() cost_volume = [] #promotion #we can segment with bounding box and divide the whole image into many parts #each single bounding box will be managed through network not the whole image #matching cost computation for i in range(torch.max(self.P[:, :, 1]).type(torch.int32) + 1): min_d = self.pre[1, 0][0, i] max_d = self.pre[1, 0][1, i] object_mask = torch.where(P[:, :, 1] == i, one, zero) #x1,y1,x2,y2=crop(object_mask) x1, y1, x2, y2, size = self.pre2[0] object_mask = object_mask[x1:x2, y1:y2] s_mask_o = object_mask * s_mask[x1:x2, y1:y2] l_mask_o = object_mask * l_mask[x1:x2, y1:y2] s_l_o = feature[..., x1:x2, y1:y2] * s_mask_o l_l_o = feature[..., x1:x2, y1:y2] * l_mask_o s_r_o = r_sf[..., x1:x2, torch.max(min_d, zero):torch.min(max_d, one * 960)] l_r_o = r_lf[..., x1:x2, torch.max(min_d, zero):torch.min(max_d, one * 960)] min_d = y2 - max_d max_d = y1 - min_d cost_s = [] cost_l = [] for i in range(0, max_d - min_d): if y2 - y1 - i > 0: s_r_o_t = s_r_o[..., y2 - y1 - i:-i] cost_s.append( torch.where(s_mask_o == 1, cosine_s(s_l_o, s_r_o_t), zero)) else: s_r_o_t = torch.cat([ torch.zeros_like(s_r_o[..., :y2 - y1 - i]), s_r_o[..., 0:-i] ], -1) cost_s.append( torch.where(s_mask_o == 1, cosine_s(s_l_o, s_r_o_t), zero)) cost_s = torch.stack(cost_s, -1) for i in range(0, max_d - min_d): if y2 - y1 - i > 0: l_r_o_t = l_r_o[..., y2 - y1 - i:-i] cost_l.append( torch.where(l_mask_o == 1, cosine_s(l_l_o, l_r_o_t), zero)) else: l_r_o_t = torch.cat([ torch.zeros_like(l_r_o[..., :y2 - y1 - i]), l_r_o[..., 0:-i] ], -1) cost_l.append( torch.where(l_mask_o == 1, cosine_s(l_l_o, l_r_o_t), zero)) cost_l = torch.stack(cost_l, -1) cost_volume = cost_s + cost_l #aggregation a_volume = torch.zeros_like(cost_volume) object_r = torch.where(P3 == i, P4, zero) max_r = torch.max(object_r) object_r = torch.where(P3 == i, P4, max_r + 1) min_r = torch.min(object_r) for j in range(min_r, max_r + 1): plane_mask = torch.where(object_r == j, one, zero)[x1:x2, y1:y2] xp1, xp2, yp1, yp2 = crop(plane_mask) #xp1,xp2,yp1,yp2.r_size=self.pre[0,0][1] plane_mask = plane_mask[xp1:xp2, yp1:yp2] plane = cost_volume[..., xp1:xp2, yp1:yp2, :] s_plane_mask = plane_mask * s_mask[x1:x2, y1:y2][xp1:xp2, yp1:yp2] l_plane_mask = plane_mask * l_mask[x1:x2, y1:y2][xp1:xp2, yp1:yp2] s_weights = self.cluster( l_sf[..., x1:x2, y1:y2][..., xp1:xp2, yp1:yp2], s_plane_mask) s_cost = torch.sum(torch.sum( plane * s_weights, -2, keepdim=True), -3, keepdim=True) / torch.sum(s_weights) l_weights = self.cluster( l_lf[..., x1:x2, y1:y2][..., xp1:xp2, yp1:yp2], l_plane_mask) l_cost = torch.sum(torch.sum(plane * l_weights, -2), -2) / torch.sum(l_weights) plane_mask = plane_mask - torch.where( s_plane_mask + l_plane_mask > 0, one, zero) plane_mask=plane_mask.view(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],1) \ .expand(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],plane.shape[-1]) s_plane_mask=s_plane_mask.view(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],1) \ .expand(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],plane.shape[-1]) l_plane_mask=l_plane_mask.view(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],1) \ .expand(plane_mask.shape[0],plane_mask.shape[1],plane_mask.shape[2],plane.shape[-1]) plane = torch.where(s_plane_mask == 1, s_cost * s_weights, plane) plane = torch.where(l_plane_mask == 1, l_cost * l_weights, plane) weights = self.cluster( l_lf[..., x1:x2, y1:y2][..., xp1:xp2, yp1:yp2], plane_mask) costs = torch.sum(torch.sum(plane * weights, -2, keepdim=True), -3, keepdim=True) / torch.sum(weights) plane = torch.where(plane_mask == 1, cost * weights, plane) cost_volume[..., xp1:xp2, yp1:yp2, :] = plane #ss_argmin disparity[..., x1:x2, y1:y2] = ss_argmin(cost_volume, min_d, max_d) #refinement refine = torch.zeros_like(disparity)[..., x1:x2, y1:y2] for j in range(min_r, max_r + 1): plane_mask = torch.where(object_r == j, one, zero)[x1:x2, y1:y2] xp1, xp2, yp1, yp2 = crop(plane_mask) plane_mask = plane_mask[xp1:xp2, yp1:yp2] s_plane_mask = plane_mask * s_mask[x1:x2, y1:y2][xp1:xp2, yp1:yp2] l_plane_mask = plane_mask * l_mask[x1:x2, y1:y2][xp1:xp2, yp1:yp2] plane_mask = plane_mask - torch.where( s_plane_mask + l_plane_mask > 0, one, zero) plane = disparity[..., x1:x2, y1:y2][..., xp1:xp2, yp1:yp2] * plane_mask s_weights = self.cluster( l_sf[..., x1:x2, y1:y2][..., xp1:xp2, yp1:yp2], s_plane_mask) s_cost = torch.sum(torch.sum( plane * s_weights, -2, keepdim=True), -3, keepdim=True) / torch.sum(s_weights) l_weights = self.cluster( l_lf[..., x1:x2, y1:y2][..., xp1:xp2, yp1:yp2], l_plane_mask) l_cost = torch.sum(torch.sum(plane * l_weights, -2), -2) / torch.sum(l_weights) weights = self.cluster( l_lf[..., x1:x2, y1:y2][..., xp1:xp2, yp1:yp2], plane_mask) costs = torch.sum(torch.sum(plane * weights, -2, keepdim=True), -3, keepdim=True) / torch.sum(weights) plane = torch.where(s_plane_mask == 1, s_cost * s_weights, plane) plane = torch.where(l_plane_mask == 1, l_cost * l_weights, plane) plane = torch.where(plane_mask == 1, cost * weights, plane) disparity[..., x1:x2, y1:y2][..., xp1:xp2, yp1:yp2] = plane return disparity
def forward(self, l, r, P, pre1, pre2): #self.P=P[1,0] #0 l to r,1 min,2 max #[l_box,r_box,match],[min_d,max_d] with torch.no_grad(): self.pre = pre1 self.pre2 = pre2 P1 = P[..., 0] P2 = P[..., 3] P3 = P[..., 1] P4 = P[..., 2] #feature extraction l_mask = P2 - P1 s_mask = P1 #l_mask=l_mask.byte() #s_mask=s_mask.byte() #basic cuda 524 #print(l.type) #1923 #print(torch.cuda.memory_allocated(1)) #2727 l_sf = self.feature_extraction2(l) l_lf = self.feature_extraction(l_sf) #print(torch.cuda.memory_allocated(2)) #the cuda won't copy the volume to the new gpu # a=l_lf.cuda(1) # b=l_lf.cuda(2) # c=l_sf.cuda(3) r_sf = self.feature_extraction2(r) r_lf = self.feature_extraction(r_sf) #print(torch.cuda.memory_allocated(1)) #3267 #print(torch.cuda.memory_allocated(2)) #reshape the mask to batch and channel disparity = torch.zeros([540, 960]).cuda(2) one = torch.ones(1).cuda(2) zero = torch.zeros(1).cuda(2) cost_volume = [] #5710 #print(value) l_lf = l_lf.cuda(2) r_lf = r_lf.cuda(2) r_sf = r_sf.cuda(2) l_sf = l_sf.cuda(2) #985 #feature=torch.masked_select(l_sf,s_mask) #feature=torch.masked_select(l_lf,l_mask)+torch.masked_select(l_sf,s_mask) feature = l_lf * l_mask + l_sf * s_mask feature = torch.where((l_mask + s_mask) > 0, feature, l_lf) #0.48 # s_mask=torch.squeeze(s_mask).cuda(2) # l_mask=torch.squeeze(l_mask).cuda(2) #for i in range(10): # start_time=time.time() # a=torch.rand_like(P1) # print(time.time()-start_time) # #initial 0.44, next 0.18 # start_time=time.time() # #0.27 # # s_match=s_mask.nonzero() # # s_feature=l_sf[...,s_match[:,0],s_match[:,1]] # # l_match=l_mask.nonzero() # # l_feature=l_lf[...,l_match[:,0],l_match[:,1]] # with torch.no_grad(): # s_match=s_mask.nonzero() # s_feature=l_sf[...,s_match[:,0],s_match[:,1]] # with torch.no_grad(): # l_match=l_mask.nonzero() # l_feature=l_lf[...,l_match[:,0],l_match[:,1]] # print(time.time()-start_time) # time.sleep(100) # #0.18 # sy_match=s_match[:,1] # sx_match=s_match[:,0] # with torch.no_grad(): # # for i in range(1,192): # # sy_match=torch.cat([sy_match,s_match[:,1]-i],0) # d=192 # sx_match=sx_match.repeat(1,d) # sy_match=sy_match.repeat(1,d) # #print(sy_match.shape) # sy_match-=torch.arange(0,d).repeat(s_match.shape[0],1).transpose(1,0).contiguous().view_as(sy_match).cuda(2) # #192,0.09s,30,0.01 # s_r_o_t=r_sf[...,sx_match,sy_match].reshape(1,32,s_feature.shape[-1],d) # s_feature=s_feature.repeat(1,1,1,d).reshape(1,32,s_feature.shape[-1],d) # #print(s_feature.shape,s_r_o_t.shape) # cost_volume.append(torch.where(sy_match.reshape(1,s_feature.shape[-2],d)>=0,cosine_s(s_feature,s_r_o_t),zero)) # ly_match=l_match[:,1] # lx_match=l_match[:,0] # with torch.no_grad(): # # for i in range(1,192): # # sy_match=torch.cat([sy_match,s_match[:,1]-i],0) # d=192 # lx_match=lx_match.repeat(1,d) # ly_match=ly_match.repeat(1,d) # #print(sy_match.shape) # ly_match-=torch.arange(0,d).repeat(l_match.shape[0],1).transpose(1,0).contiguous().view_as(ly_match).cuda(2) # #192,0.09s,30,0.01,lf0.19 # l_r_o_t=r_lf[...,lx_match,ly_match].reshape(1,32,l_feature.shape[-1],d) # l_feature=l_feature.repeat(1,1,1,d).reshape(1,32,l_feature.shape[-1],d) # #print(s_feature.shape,s_r_o_t.shape) # cost_volume.append(torch.where(ly_match.reshape(1,l_feature.shape[-2],d)>=0,cosine_s(l_feature,l_r_o_t),zero)) # print(time.time()-start_time) # time.sleep(100) # #0.0003 # #s_r_o_t=r_sf[...,s_match[:,0],s_match[:,1]] # #1,32,n # #print(time.time()-start_time) # #print(s_match.shape) # #time 10 # # for i in range(s_match.shape[0]): # # min_d=torch.max(s_match[i,1]-300,zero.long()) # # #print(min_d) # # s_r_o_t=r_sf[...,s_match[i,0],min_d:s_match[i,1]] # # a=s_feature[...,i].reshape(1,32,1) # # #print(a.shape,s_r_o_t.shape) # # cost_volume.append(torch.where(s_match[i,1]-300>=0,cosine_s(a,s_r_o_t),zero)) # #time 0.23,192,0.035,30, the number of the match points won't influence the time,only the iteration # for i in range(30): # s_r_o_t=r_sf[...,s_match[:,0],s_match[:,1]-i] # #s_r_o_t=torch.take(r_sf,[...,s_match[:,0],s_match[:,1]-i]) # cost_volume.append(torch.where(s_match[:,1]-i>=0,cosine_s(s_feature,s_r_o_t),zero)) # l_r_o_t=r_sf[...,l_match[:,0],l_match[:,1]-i] # cost_volume.append(torch.where(l_match[:,1]-i>=0,cosine_s(l_feature,l_r_o_t),zero)) #cost_volume=torch.stack(cost_volume) # print(torch.cuda.memory_allocated(2)) # print(time.time()-start_time) # time.sleep(100) count = 0 start_time = time.time() sx = [] sy = [] lx = [] ly = [] sy_r = [] ly_r = [] with torch.no_grad(): for i in range(150): #ground 0-270, sky 0-40 #0.38 if i > torch.max(P3): break min_d = pre1[0, 0, i].long() max_d = pre1[0, 1, i].long() #object_mask=torch.where(P3==i,one,zero) x1, y1, x2, y2, size = pre2[0, i].long() object_mask = P3[0, x1:x2, y1:y2] object_mask = torch.where(object_mask == i, one, zero) s_mask_o = object_mask * s_mask[0, x1:x2, y1:y2] l_mask_o = object_mask * l_mask[0, x1:x2, y1:y2] s_match = s_mask_o.nonzero() l_match = l_mask_o.nonzero() if s_match.shape[0] == 0: s_match = object_mask.nonzero() if l_match.shape[0] == 0: l_match = object_mask.nonzero() sy_match = s_match[:, 1] sx_match = s_match[:, 0] ly_match = l_match[:, 1] lx_match = l_match[:, 0] #print(sy_match.shape) d = max_d - min_d #print(d) sx_match = sx_match.repeat(1, d) sy_match = sy_match.repeat(1, d) sy_match_r = sy_match - torch.arange( min_d, max_d).cuda(2).repeat( s_match.shape[0], 1).transpose( 1, 0).contiguous().view_as(sy_match) lx_match = lx_match.repeat(1, d) ly_match = ly_match.repeat(1, d) ly_match_r = ly_match - torch.arange( min_d, max_d).cuda(2).repeat( l_match.shape[0], 1).transpose( 1, 0).contiguous().view_as(ly_match) #print(ly_match.shape) sx.append(sx_match) sy.append(sy_match) sy_r.append(sy_match_r) lx.append(lx_match) ly.append(ly_match) ly_r.append(ly_match_r) #print(cosine_s(feature[...,sx_match,sy_match].squeeze(),r_sf[...,sx_match,sy_match_r].squeeze(),0).shape,sy_match_r.squeeze().shape) sx_match = torch.cat(sx, 1) sy_match = torch.cat(sy, 1) lx_match = torch.cat(lx, 1) ly_match = torch.cat(ly, 1) sy_match_r = torch.cat(sy_r, 1) ly_match_r = torch.cat(ly_r, 1) cost_s = torch.where( sy_match_r.squeeze() >= 0, cosine_s(feature[..., sx_match, sy_match].squeeze(), r_sf[..., sx_match, sy_match_r].squeeze(), 0), zero) cost_l = torch.where( ly_match_r.squeeze() >= 0, cosine_s(feature[..., lx_match, ly_match].squeeze(), r_sf[..., lx_match, ly_match_r].squeeze(), 0), zero) print(time.time() - start_time) time.sleep(100) #start_time=time.time() #with torch.no_grad(): for i in range(150): #ground 0-270, sky 0-40 #0.46 if i > torch.max(P3): break min_d = pre1[0, 0, i].long() max_d = pre1[0, 1, i].long() #object_mask=torch.where(P3==i,one,zero) x1, y1, x2, y2, size = pre2[0, i].long() object_mask = P3[0, x1:x2, y1:y2] object_mask = torch.where(object_mask == i, one, zero) s_mask_o = object_mask * s_mask[0, x1:x2, y1:y2] l_mask_o = object_mask * l_mask[0, x1:x2, y1:y2] s_match = s_mask_o.nonzero() l_match = l_mask_o.nonzero() if s_match.shape[0] == 0: s_match = object_mask.nonzero() if l_match.shape[0] == 0: l_match = object_mask.nonzero() s_l_o = feature[..., s_match[:, 0], s_match[:, 1]] l_l_o = feature[..., l_match[:, 0], l_match[:, 1]] cost_s = [] cost_l = [] sy_match = s_match[:, 1] sx_match = s_match[:, 0] ly_match = l_match[:, 1] lx_match = l_match[:, 0] #print(sy_match.shape) with torch.no_grad(): d = max_d - min_d #print(d) sx_match = sx_match.repeat(1, d) sy_match = sy_match.repeat(1, d) sy_match -= torch.arange(min_d, max_d).cuda(2).repeat( s_match.shape[0], 1).transpose(1, 0).contiguous().view_as(sy_match) lx_match = lx_match.repeat(1, d) ly_match = ly_match.repeat(1, d) ly_match -= torch.arange(min_d, max_d).cuda(2).repeat( l_match.shape[0], 1).transpose(1, 0).contiguous().view_as(ly_match) s_r_o_t = r_sf[..., sx_match, sy_match].reshape(1, 32, s_l_o.shape[-1], d) s_l_o = s_l_o.repeat(1, 1, 1, d).reshape(1, 32, s_l_o.shape[-1], d) l_r_o_t = r_lf[..., lx_match, ly_match].reshape(1, 32, l_l_o.shape[-1], d) l_l_o = l_l_o.repeat(1, 1, 1, d).reshape(1, 32, l_l_o.shape[-1], d) cost_s.append( torch.where( sy_match.reshape(1, s_l_o.shape[-2], d) >= 0, cosine_s(s_l_o, s_r_o_t), zero)) cost_l.append( torch.where( ly_match.reshape(1, l_l_o.shape[-2], d) >= 0, cosine_s(l_l_o, l_r_o_t), zero)) #cost_volume=cost_s+cost_l #print(torch.cuda.memory_allocated(2)/1e+6) #time.sleep(30) print(time.time() - start_time) time.sleep(100) # count=0 # start_time=time.time() # for i in range(150): # #ground 0-270, sky 0-40 # #0.50 # if i> torch.max(P3).type(torch.int32): # break # min_d=pre1[0,0,i].long() # max_d=pre1[0,1,i].long() # #object_mask=torch.where(P3==i,one,zero) # x1,y1,x2,y2,size=pre2[0,i].long() # object_mask=P3[0,x1:x2,y1:y2] # object_mask=torch.where(object_mask==i,one,zero) # s_mask_o=object_mask*s_mask[0,x1:x2,y1:y2] # l_mask_o=object_mask*l_mask[0,x1:x2,y1:y2] # s_match=s_mask_o.nonzero() # l_match=l_mask_o.nonzero() # if s_match.shape[0]==0: # s_match=object_mask.nonzero() # if l_match.shape[0]==0: # l_match=object_mask.nonzero() # s_l_o=feature[...,s_match[:,0],s_match[:,1]] # l_l_o=feature[...,l_match[:,0],l_match[:,1]] # cost_s=[] # cost_l=[] # sy_match=s_match[:,1] # sx_match=s_match[:,0] # ly_match=l_match[:,1] # lx_match=l_match[:,0] # #print(sy_match.shape) # with torch.no_grad(): # d=max_d-min_d # print(d) # sx_match=sx_match.repeat(1,d) # sy_match=sy_match.repeat(1,d) # sy_match-=torch.arange(min_d,max_d).repeat(s_match.shape[0],1).transpose(1,0).contiguous().view_as(sy_match).cuda(2) # lx_match=lx_match.repeat(1,d) # ly_match=ly_match.repeat(1,d) # ly_match-=torch.arange(min_d,max_d).repeat(l_match.shape[0],1).transpose(1,0).contiguous().view_as(ly_match).cuda(2) # s_r_o_t=r_sf[...,sx_match,sy_match].reshape(1,32,s_l_o.shape[-1],d) # s_l_o=s_l_o.repeat(1,1,1,d).reshape(1,32,s_l_o.shape[-1],d) # l_r_o_t=r_lf[...,lx_match,ly_match].reshape(1,32,l_l_o.shape[-1],d) # l_l_o=l_l_o.repeat(1,1,1,d).reshape(1,32,l_l_o.shape[-1],d) # cost_s.append(torch.where(sy_match.reshape(1,s_l_o.shape[-2],d)>=0,cosine_s(s_l_o,s_r_o_t),zero)) # cost_l.append(torch.where(ly_match.reshape(1,l_l_o.shape[-2],d)>=0,cosine_s(l_l_o,l_r_o_t),zero)) # #cost_volume=cost_s+cost_l # #print(torch.cuda.memory_allocated(2)/1e+6) # #time.sleep(30) # print(time.time()-start_time) # time.sleep(100) return cost_volume
def forward(self, l, r, P, pre1, pre2): #self.P=P[1,0] #0 l to r,1 min,2 max #[l_box,r_box,match],[min_d,max_d] with torch.no_grad(): self.pre = pre1 self.pre2 = pre2 P1 = P[..., 0] P2 = P[..., 3] P3 = P[..., 1] P4 = P[..., 2] #feature extraction l_mask = P2 - P1 s_mask = P1 #l_mask=l_mask.byte() #s_mask=s_mask.byte() #basic cuda 524 #print(l.type) #1923 #print(torch.cuda.memory_allocated(1)) #2727 l_sf = self.feature_extraction2(l) l_lf = self.feature_extraction(l_sf) #print(torch.cuda.memory_allocated(2)) #the cuda won't copy the volume to the new gpu # a=l_lf.cuda(1) # b=l_lf.cuda(2) # c=l_sf.cuda(3) r_sf = self.feature_extraction2(r) r_lf = self.feature_extraction(r_sf) #print(torch.cuda.memory_allocated(1)) #3267 #print(torch.cuda.memory_allocated(2)) #reshape the mask to batch and channel disparity = torch.zeros([540, 960]).cuda(2) one = torch.ones(1).cuda(2) zero = torch.zeros(1).cuda(2) cost_volume = [] #5710 #print(value) l_lf = l_lf.cuda(2) r_lf = r_lf.cuda(2) r_sf = r_sf.cuda(2) l_sf = l_sf.cuda(2) #985 #feature=torch.masked_select(l_sf,s_mask) #feature=torch.masked_select(l_lf,l_mask)+torch.masked_select(l_sf,s_mask) feature = l_lf * l_mask + l_sf * s_mask feature = torch.where((l_mask + s_mask) > 0, feature, l_lf) with torch.no_grad(): s_match = s_mask.nonzero() s_feature = l_sf[..., s_match[:, 0], s_match[:, 1]] with torch.no_grad(): l_match = l_mask.nonzero() l_feature = l_lf[..., l_match[:, 0], l_match[:, 1]] #0.18 start_time = time.time() # #0.04s sy_match = s_match[:, 1] sx_match = s_match[:, 0] with torch.no_grad(): # for i in range(1,192): # sy_match=torch.cat([sy_match,s_match[:,1]-i],0) d = 192 sx_match = sx_match.repeat(1, d) sy_match = sy_match.repeat(1, d) #print(sy_match.shape) sy_match -= torch.arange(0, d).repeat( s_match.shape[0], 1).transpose(1, 0).contiguous().view_as(sy_match).cuda(2) #192,0.09s,30,0.01 s_r_o_t = r_sf[..., sx_match, sy_match].reshape(1, 32, s_feature.shape[-1], d) s_feature = s_feature.repeat(1, 1, 1, d).reshape(1, 32, s_feature.shape[-1], d) #print(s_feature.shape,s_r_o_t.shape) cost_volume.append( torch.where( sy_match.reshape(1, s_feature.shape[-2], d) >= 0, cosine_s(s_feature, s_r_o_t), zero)) ly_match = l_match[:, 1] lx_match = l_match[:, 0] with torch.no_grad(): # for i in range(1,192): # sy_match=torch.cat([sy_match,s_match[:,1]-i],0) d = 192 lx_match = lx_match.repeat(1, d) ly_match = ly_match.repeat(1, d) #print(sy_match.shape) ly_match -= torch.arange(0, d).repeat( l_match.shape[0], 1).transpose(1, 0).contiguous().view_as(ly_match).cuda(2) #192,0.09s,30,0.01,lf0.19 l_r_o_t = r_lf[..., lx_match, ly_match].reshape(1, 32, l_feature.shape[-1], d) l_feature = l_feature.repeat(1, 1, 1, d).reshape(1, 32, l_feature.shape[-1], d) #print(s_feature.shape,s_r_o_t.shape) cost_volume.append( torch.where( ly_match.reshape(1, l_feature.shape[-2], d) >= 0, cosine_s(l_feature, l_r_o_t), zero)) print(time.time() - start_time) time.sleep(100) # #0.0003 # #s_r_o_t=r_sf[...,s_match[:,0],s_match[:,1]] # #1,32,n # #print(time.time()-start_time) # #print(s_match.shape) # #time 10 # # for i in range(s_match.shape[0]): # # min_d=torch.max(s_match[i,1]-300,zero.long()) # # #print(min_d) # # s_r_o_t=r_sf[...,s_match[i,0],min_d:s_match[i,1]] # # a=s_feature[...,i].reshape(1,32,1) # # #print(a.shape,s_r_o_t.shape) # # cost_volume.append(torch.where(s_match[i,1]-300>=0,cosine_s(a,s_r_o_t),zero)) # #time 0.23,192,0.035,30, the number of the match points won't influence the time,only the iteration # for i in range(30): # s_r_o_t=r_sf[...,s_match[:,0],s_match[:,1]-i] # #s_r_o_t=torch.take(r_sf,[...,s_match[:,0],s_match[:,1]-i]) # cost_volume.append(torch.where(s_match[:,1]-i>=0,cosine_s(s_feature,s_r_o_t),zero)) # l_r_o_t=r_sf[...,l_match[:,0],l_match[:,1]-i] # cost_volume.append(torch.where(l_match[:,1]-i>=0,cosine_s(l_feature,l_r_o_t),zero)) #cost_volume=torch.stack(cost_volume) # print(torch.cuda.memory_allocated(2)) # print(time.time()-start_time) # time.sleep(100) #promotion #we can segment with bounding box and divide the whole image into many parts #each single bounding box will be managed through network not the whole image #matching cost computation count = 0 start_time = time.time() for i in range(150): #ground 0-270, sky 0-40 #0.42 if i > torch.max(P3).type(torch.int32): break min_d = pre1[0, 0, i].long() max_d = pre1[0, 1, i].long() #object_mask=torch.where(P3==i,one,zero) x1, y1, x2, y2, size = pre2[0, i].long() object_mask = P3[0, x1:x2, y1:y2] object_mask = torch.where(object_mask == i, one, zero) s_mask_o = object_mask * s_mask[0, x1:x2, y1:y2] l_mask_o = object_mask * l_mask[0, x1:x2, y1:y2] s_match = s_mask_o.nonzero() l_match = l_mask_o.nonzero() if s_match.shape[0] == 0: s_match = object_mask.nonzero() if l_match.shape[0] == 0: l_match = object_mask.nonzero() s_l_o = feature[..., s_match[:, 0], s_match[:, 1]] l_l_o = feature[..., l_match[:, 0], l_match[:, 1]] cost_s = [] cost_l = [] sy_match = s_match[:, 1] sx_match = s_match[:, 0] #print(sy_match.shape) with torch.no_grad(): d = max_d - min_d print(d) sx_match = sx_match.repeat(1, d) sy_match = sy_match.repeat(1, d) sy_match -= torch.arange(min_d, max_d).repeat( s_match.shape[0], 1).transpose(1, 0).contiguous().view_as(sy_match).cuda(2) s_r_o_t = r_sf[..., sx_match, sy_match].reshape(1, 32, s_l_o.shape[-1], d) s_l_o = s_l_o.repeat(1, 1, 1, d).reshape(1, 32, s_l_o.shape[-1], d) cost_s.append( torch.where( sy_match.reshape(1, s_l_o.shape[-2], d) >= 0, cosine_s(s_l_o, s_r_o_t), zero)) #cost_volume=cost_s+cost_l #print(torch.cuda.memory_allocated(2)/1e+6) #time.sleep(30) print(time.time() - start_time) time.sleep(100) return cost_volume
def forward(self, l, r, P, pre1, pre2): #self.P=P[1,0] #0 l to r,1 min,2 max #[l_box,r_box,match],[min_d,max_d] self.pre = pre1 self.pre2 = pre2 P1 = P[..., 0] P2 = P[..., 3] P3 = P[..., 1] P4 = P[..., 2] #feature extraction l_mask = P2 - P1 s_mask = P1 #l_mask=l_mask.byte() #s_mask=s_mask.byte() #basic cuda 524 #print(l.type) #1923 #print(torch.cuda.memory_allocated(1)) #2727 l_sf = self.feature_extraction2(l) l_lf = self.feature_extraction(l_sf) #print(torch.cuda.memory_allocated(2)) #the cuda won't copy the volume to the new gpu # a=l_lf.cuda(1) # b=l_lf.cuda(2) # c=l_sf.cuda(3) r_sf = self.feature_extraction2(r) r_lf = self.feature_extraction(r_sf) #print(torch.cuda.memory_allocated(1)) #3267 #print(torch.cuda.memory_allocated(2)) #reshape the mask to batch and channel disparity = torch.zeros([540, 960]).cuda(2) one = torch.ones(1).cuda(2) zero = torch.zeros(1).cuda(2) cost_volume = [] #5710 #print(value) l_lf = l_lf.cuda(2) r_lf = r_lf.cuda(2) r_sf = r_sf.cuda(2) l_sf = l_sf.cuda(2) #985 #feature=torch.masked_select(l_sf,s_mask) #feature=torch.masked_select(l_lf,l_mask)+torch.masked_select(l_sf,s_mask) feature = l_lf * l_mask + l_sf * s_mask feature = torch.where((l_mask + s_mask) > 0, feature, l_lf) s_match = s_mask.long().nonzero() s_feature = l_sf[..., s_match[:, 0], s_match[:, 1]] l_match = l_mask.long().nonzero() l_feature = l_lf[..., l_match[:, 0], l_match[:, 1]] start_time = time.time() #0.0003 #s_r_o_t=r_sf[...,s_match[:,0],s_match[:,1]] #1,32,n #print(time.time()-start_time) #print(s_match.shape) #time 10 # for i in range(s_match.shape[0]): # min_d=torch.max(s_match[i,1]-300,zero.long()) # #print(min_d) # s_r_o_t=r_sf[...,s_match[i,0],min_d:s_match[i,1]] # a=s_feature[...,i].view(1,32,1) # #print(a.shape,s_r_o_t.shape) # cost_volume.append(torch.where(s_match[i,1]-300>=0,cosine_s(a,s_r_o_t),zero)) #time 0.23,192,0.035,30, the number of the match points won't influence the time,only the iteration # for i in range(300): # s_r_o_t=r_sf[...,s_match[:,0],s_match[:,1]-i] # cost_volume.append(torch.where(s_match[:,1]-i>=0,cosine_s(s_feature,s_r_o_t),zero)) # l_r_o_t=r_sf[...,l_match[:,0],l_match[:,1]-i] # cost_volume.append(torch.where(l_match[:,1]-i>=0,cosine_s(l_feature,l_r_o_t),zero)) # #cost_volume=torch.stack(cost_volume) # print(torch.cuda.memory_allocated(2)) # print(time.time()-start_time) # time.sleep(100) #promotion #we can segment with bounding box and divide the whole image into many parts #each single bounding box will be managed through network not the whole image #matching cost computation count = 0 start_time = time.time() for i in range(torch.max(P3).type(torch.int32) + 1): #ground 0-270, sky 0-40 # if i==13 or i == 14: # continue # i=60 #print(pre2.shape) #i=14 min_d = pre1[0, 0, i].long() max_d = pre1[0, 1, i].long() object_mask = torch.where(P3 == i, one, zero) x1, y1, x2, y2, size = pre2[0, i].long() object_mask = object_mask[0, x1:x2, y1:y2] s_mask_o = object_mask * s_mask[0, x1:x2, y1:y2] l_mask_o = object_mask * l_mask[0, x1:x2, y1:y2] s_match = s_mask_o.long().nonzero() l_match = l_mask_o.long().nonzero() if s_match.shape[0] == 0: s_match = object_mask.nonzero() if l_match.shape[0] == 0: l_match = object_mask.nonzero() s_l_o = feature[..., s_match[:, 0], s_match[:, 1]] l_l_o = feature[..., l_match[:, 0], l_match[:, 1]] #print(torch.max(min_d,zero).long()) #s_r_o=feature[...,s_match[:,0],s_match[:,1]] # s_r_o=r_sf[...,x1:x2,y1-max_d:y2-min_d] # l_r_o=r_lf[...,x1:x2,y1-max_d:y2-min_d] cost_s = [] cost_l = [] #ground and sky #print(s_match.shape[0],l_match.shape[0],min_d,max_d) for j in range(min_d, max_d): s_r_o_t = r_sf[..., s_match[:, 0], s_match[:, 1] - j] cost_s.append( torch.where(s_match[:, 1] - j >= 0, cosine_s(s_l_o, s_r_o_t), zero)) l_r_o_t = r_lf[..., l_match[:, 0], l_match[:, 1] - j] cost_l.append( torch.where(l_match[:, 1] - j >= 0, cosine_s(l_l_o, l_r_o_t), zero)) cost_s = torch.stack(cost_s, -1) cost_l = torch.stack(cost_l, -1) #cost_volume=cost_s+cost_l #print(torch.cuda.memory_allocated(2)/1e+6) #time.sleep(30) print(time.time() - start_time) time.sleep(100) return cost_volume
def forward(self, l, r, P, pre1, pre2): #self.P=P[1,0] #0 l to r,1 min,2 max #[l_box,r_box,match],[min_d,max_d] with torch.no_grad(): self.pre = pre1 self.pre2 = pre2 P1 = P[..., 0] P2 = P[..., 3] P3 = P[..., 1] P4 = P[..., 2] #feature extraction l_mask = P2 - P1 s_mask = P1 #l_mask=l_mask.byte() #s_mask=s_mask.byte() #basic cuda 524 #print(l.type) #1923 #print(torch.cuda.memory_allocated(1)) #2727 l_sf = self.feature_extraction2(l) l_lf = self.feature_extraction(l_sf) #print(torch.cuda.memory_allocated(2)) #the cuda won't copy the volume to the new gpu # a=l_lf.cuda(1) # b=l_lf.cuda(2) # c=l_sf.cuda(3) r_sf = self.feature_extraction2(r) r_lf = self.feature_extraction(r_sf) #print(torch.cuda.memory_allocated(1)) #3267 #print(torch.cuda.memory_allocated(2)) #reshape the mask to batch and channel disparity = torch.zeros([540, 960]).cuda(2) one = torch.ones(1).cuda(2) zero = torch.zeros(1).cuda(2) cost_volume = [] #5710 #print(value) l_lf = l_lf.cuda(2) r_lf = r_lf.cuda(2) r_sf = r_sf.cuda(2) l_sf = l_sf.cuda(2) feature = l_lf * l_mask + l_sf * s_mask feature = torch.where((l_mask + s_mask) > 0, feature, l_lf) count = 0 start_time = time.time() sx = [] sy = [] lx = [] ly = [] sy_r = [] ly_r = [] with torch.no_grad(): for i in range(150): #ground 0-270, sky 0-40 #0.38 if i > torch.max(P3): break min_d = pre1[0, 0, i].long() max_d = pre1[0, 1, i].long() #object_mask=torch.where(P3==i,one,zero) x1, y1, x2, y2, size = pre2[0, i].long() object_mask = P3[0, x1:x2, y1:y2] object_mask = torch.where(object_mask == i, one, zero) s_mask_o = object_mask * s_mask[0, x1:x2, y1:y2] l_mask_o = object_mask * l_mask[0, x1:x2, y1:y2] s_match = s_mask_o.nonzero() l_match = l_mask_o.nonzero() if s_match.shape[0] == 0: s_match = object_mask.nonzero() if l_match.shape[0] == 0: l_match = object_mask.nonzero() sy_match = s_match[:, 1] sx_match = s_match[:, 0] ly_match = l_match[:, 1] lx_match = l_match[:, 0] d = max_d - min_d sx_match = sx_match.repeat(1, d) sy_match = sy_match.repeat(1, d) sy_match_r = sy_match - torch.arange( min_d, max_d).cuda(2).repeat( s_match.shape[0], 1).transpose( 1, 0).contiguous().view_as(sy_match) lx_match = lx_match.repeat(1, d) ly_match = ly_match.repeat(1, d) ly_match_r = ly_match - torch.arange( min_d, max_d).cuda(2).repeat( l_match.shape[0], 1).transpose( 1, 0).contiguous().view_as(ly_match) #print(ly_match.shape) sx.append(sx_match) sy.append(sy_match) sy_r.append(sy_match_r) lx.append(lx_match) ly.append(ly_match) ly_r.append(ly_match_r) sx_match = torch.cat(sx, 1) sy_match = torch.cat(sy, 1) lx_match = torch.cat(lx, 1) ly_match = torch.cat(ly, 1) sy_match_r = torch.cat(sy_r, 1) ly_match_r = torch.cat(ly_r, 1) cost_s = torch.where( sy_match_r.squeeze() >= 0, cosine_s(feature[..., sx_match, sy_match].squeeze(), r_sf[..., sx_match, sy_match_r].squeeze(), 0), zero) cost_l = torch.where( ly_match_r.squeeze() >= 0, cosine_s(feature[..., lx_match, ly_match].squeeze(), r_sf[..., lx_match, ly_match_r].squeeze(), 0), zero) feature[..., sx_match, sy_match] = 1 for i in range(150): with torch.no_grad(): if i > torch.max(P3): break min_d = pre1[0, 0, i].long() max_d = pre1[0, 1, i].long() #object_mask=torch.where(P3==i,one,zero) x1, y1, x2, y2, size = pre2[0, i].long() object_mask = P3[0, x1:x2, y1:y2] object_mask = torch.where(object_mask == i, one, zero) s_mask_o = object_mask * s_mask[0, x1:x2, y1:y2] l_mask_o = object_mask * l_mask[0, x1:x2, y1:y2] s_match = s_mask_o.nonzero() l_match = l_mask_o.nonzero() print(time.time() - start_time) time.sleep(100) return cost_volume