def pooler(self,fms,bbox=None,level=2): if bbox==None: return fms[level] else: a = math.sqrt(bbox[2]*bbox[3])/math.sqrt(self.img_width*self.img_height) if a==0: a+=0.01 roi_level = min(2,max(0,5+math.log2(a))) roi_level = int(roi_level) # print(roi_level,bbox[2]*bbox[3]) fm = fms[roi_level] fh, fw = fm.shape[-2:] sampling_ratio = 0.03125/(2**roi_level) if not self.training: # zoom_roi_align = RoIAlign(bbox[3], bbox[2], 0.25) # out = zoom_roi_align(out, self.zoomboxes, self.box_index) boxes = self.format_ops_box(bbox, fw, fh).cuda() crops = torchvision.ops.roi_align(fm,boxes,(self.crop_height, self.crop_width))[0].unsqueeze(0) return crops else: roi_align = RoIAlign(self.crop_height, self.crop_width, sampling_ratio) boxes = self.format_box(bbox, fw, fh).cuda() # print(fm.shape,boxes,bbox) crops = roi_align(fm, boxes, self.box_index) # 输入必须是tensor,不能是numpy # crops = torchvision.ops.roi_align(fm,boxes,(28,28))[0].unsqueeze(0) return crops
def __init__( self, inputRes=None, # seqs_list_file='/home/zuochenyu/datasets/MOTSChallenge/train/instances_txt', seqs_list_file=r'E:\Challenge\MOTSChallenge\train\instances_txt', img_file_root=r'E:\Challenge\MOTSChallenge\train\images', # img_file_root='/home/zuochenyu/datasets/MOTSChallenge/train/images', transform=None, random_rev_thred=0.4, level=3): # self.imgPath = os.path.join(, "{:04}".format(sequence)) self.transform = transform self.inputRes = inputRes self.random_rev_thred = random_rev_thred self.tr_image = transforms.Compose([transforms.ToTensor()]) self.level = level self.width = 1088 self.height = 608 self.nID = 0 self.img_list = [] for sequence in [2, 5, 9, 11]: imgPath = os.path.join(img_file_root, "{:04}".format(sequence)) filename = os.path.join(seqs_list_file, "{:04}.txt".format(sequence)) instance = load_txt(filename) for i in range(len(instance)): frame = i + 1 self.img_list.append((os.path.join(imgPath, "{:06}.jpg".format(frame)), instance[frame], sequence)) self.nID = 14455 random.shuffle(self.img_list) self.roi_align = RoIAlign(56, 56, 0.25)
def __init__(self, opt, frame_rate=30): self.opt = opt if opt.gpus[0] >= 0: opt.device = torch.device('cuda') else: opt.device = torch.device('cpu') print('Creating model...') self.model = create_model(opt.arch, opt.heads, opt.head_conv) self.model = load_model(self.model, opt.load_model) self.model = self.model.to(opt.device) self.model.eval() self.tracked_stracks = [] # type: list[STrack] self.lost_stracks = [] # type: list[STrack] self.removed_stracks = [] # type: list[STrack] self.frame_id = 0 self.det_thresh = opt.conf_thres self.buffer_size = int(frame_rate / 30.0 * opt.track_buffer) self.max_time_lost = self.buffer_size self.max_per_image = opt.K self.mean = np.array(opt.mean, dtype=np.float32).reshape(1, 1, 3) self.std = np.array(opt.std, dtype=np.float32).reshape(1, 1, 3) self.kalman_filter = KalmanFilter() self.roi_align = RoIAlign(7, 7) cfg = get_config() cfg.merge_from_file( "/home/hongwei/track-human/FairMOT/src/lib/tracker/deep_configs/yolov3.yaml" ) cfg.merge_from_file( "/home/hongwei/track-human/FairMOT/src/lib/tracker/deep_configs/deep_sort.yaml" ) self.detector = build_detector(cfg, True)
def forward(self,fms,bbox_list=None,gts=None,level=2): if bbox_list ==None: loss = [] x = [] for l in range(3): out = self.pooler(fms,level=l) for layer_name in self.blocks: out = F.relu(getattr(self, layer_name)(out)) out = self.conv5_mask(out) out = self.mask_fcn_logits(out) # plt.imshow(out[0][0].detach().cpu().numpy()) # plt.show() if self.training: maskloss = F.binary_cross_entropy_with_logits(out, gts[l]) loss.append(maskloss) else: x.append(out.squeeze()) if self.training: maskloss = sum(loss) return maskloss * torch.exp(-self.s_mask) * 0.5 + self.s_mask * 0.5, maskloss.item() else: return x # return out else: x=[] for bbox in bbox_list: bbox = bbox.squeeze() assert bbox[0]>=0 and bbox[1]>=0 and bbox[0]<bbox[2] and bbox[1]<bbox[3] out = self.pooler(fms,bbox) for layer_name in self.blocks: out = F.relu(getattr(self, layer_name)(out)) out = self.conv5_mask(out) out = self.mask_fcn_logits(out) if not self.training: zoom_roi_align = RoIAlign(bbox[3], bbox[2], 0.25) out = zoom_roi_align(out, self.zoomboxes, self.box_index) # zoom = torch.Tensor([0, 0, 0, 56, 56]) # out = torchvision.ops.roi_align(out,zoom,(bbox[3], bbox[2]))[0] # plt.imshow(out.squeeze().detach().cpu().numpy()) # plt.show() # plt.imshow(out.detach().cpu().numpy()) # plt.show() x.append(out.squeeze()) if self.training: out = torch.cat(x) maskloss = F.binary_cross_entropy_with_logits(out, gts) return maskloss* torch.exp(-self.s_mask)*0.5 +self.s_mask*0.5,maskloss.item() return x
def __init__( self, inputRes=None, # seqs_list_file='/home/zuochenyu/datasets/MOTSChallenge/train/instances_txt', seqs_list_file=r'E:\Challenge\MOTSChallenge\train\instances_txt', transform=None, sequence=2, random_rev_thred=0.4): self.imgPath = os.path.join(r'E:\Challenge\MOTSChallenge\train\images', "{:04}".format(sequence)) # self.imgPath = os.path.join('/home/zuochenyu/datasets/MOTSChallenge/train/images', "{:04}".format(sequence)) filename = os.path.join(seqs_list_file, "{:04}.txt".format(sequence)) self.instance = load_txt(filename) self.transform = transform self.inputRes = inputRes self.random_rev_thred = random_rev_thred self.roi_align = RoIAlign(56, 56, 0.25)
def __init__( self, inputRes=None, seqs_list_file=r'E:\Challenge\Multi-Object-Tracking-and-Segmentation-with-Pytorch\results', # seqs_list_file='/home/zuochenyu/codes/Multi-Object-Tracking-and-Segmentation-with-Pytorch/results', transform=None, sequence=2, random_rev_thred=0.4): self.imgPath = r'E:\Challenge\MOTSChallenge\train\images' # self.imgPath = '/home/zuochenyu/datasets/MOTSChallenge/train/images' self.imgPath = os.path.join(self.imgPath, "{:04}".format(sequence)) self.width = 1920 self.height = 1080 if sequence == 6 or sequence == 5: self.width = 640 self.height = 480 filename = os.path.join(seqs_list_file, "{:04}.txt".format(sequence)) # self.instance = load_MOT_txt(filename,self.width,self.height) self.instance = load_MOT_txt(filename, 2048, 1024, sequence) self.inputRes = inputRes self.random_rev_thred = random_rev_thred self.roi_align = RoIAlign(56, 56, 0.25)
mask = rletools.decode(obj.mask) mask = torch.from_numpy(mask) mask = mask.float() mask = mask[None] mask = mask[None] mask = mask.contiguous() boxes = format_box(rletools.toBbox(obj.mask)) # 做好坐标比例变化 box_index = torch.tensor([0], dtype=torch.int) # index of bbox in batch # RoIAlign layer with crop sizes: crop_height = 196 crop_width = 84 roi_align = RoIAlign(crop_height, crop_width, 0.25) print(mask.shape) # make crops: crops = roi_align(mask, boxes, box_index) # 输入必须是tensor,不能是numpy plt.imshow(crops[0][0]) plt.show() print(crops.shape) # RoIAlign layer with crop sizes: boxes = torch.Tensor([[0, 0, 84, 196]]) print(rletools.toBbox(obj.mask)) crop_height = int(rletools.toBbox(obj.mask)[3]) crop_width = int(rletools.toBbox(obj.mask)[2]) roi_align = RoIAlign(crop_height, crop_width) crops = roi_align(crops.clone(), boxes,
def __init__(self, img_size): super(imgCropper, self).__init__() self.isCuda = False self.img_size = img_size self.roi_align_model = RoIAlign(img_size, img_size, 1.)
class imgCropper(nn.Module): def __init__(self, img_size): super(imgCropper, self).__init__() self.isCuda = False self.img_size = img_size self.roi_align_model = RoIAlign(img_size, img_size, 1.) def gpuEnable(self): self.roi_align_model = self.roi_align_model.cuda() self.isCuda = True def forward(self, image, roi): aligned_image_var = self.roi_align_model(image, roi) return aligned_image_var def crop_image(self, image, box, result_size): ## constraint = several box from common 1 image ishape = image.shape cur_image_var = np.reshape(image, (1, ishape[0], ishape[1], ishape[2])) cur_image_var = cur_image_var.transpose(0, 3, 1, 2) cur_image_var = cur_image_var.astype('float32') cur_image_var = Variable(torch.from_numpy(cur_image_var).float()) roi = np.copy(box) roi[:, 2:4] += roi[:, 0:2] roi = np.concatenate((np.zeros((roi.shape[0], 1)), roi), axis=1) roi = Variable(torch.from_numpy(roi).float()) if self.isCuda: cur_image_var = cur_image_var.cuda() roi = roi.cuda() self.roi_align_model.aligned_width = result_size[0] self.roi_align_model.aligned_height = result_size[1] cropped_image = self.forward(cur_image_var, roi) return cropped_image, cur_image_var def crop_several_image(self, img_list, target_list): ## constraint = one to one matching between image and target ## exception handling assert (len(target_list) == len(img_list)) ## image crop torch.cuda.synchronize() start_time = time.time() cur_images = torch.squeeze(torch.stack(img_list, 0)) torch.cuda.synchronize() print('10 image stacking time:{}'.format(time.time() - start_time)) ishape = cur_images.size() # Extract sample features and get target location sample_rois = np.array(target_list) sample_rois[:, 2:4] += sample_rois[:, 0:2] batch_num = np.reshape(np.arange(0, len(sample_rois)), (len(sample_rois), 1)) sample_rois = np.concatenate((batch_num, sample_rois), axis=1) sample_rois = Variable(torch.from_numpy(sample_rois.astype('float32'))) if self.isCuda: sample_rois = sample_rois.cuda() cur_images = cur_images.cuda() cropped_images = self.forward(cur_images, sample_rois) return cropped_images
from torch.autograd import gradcheck import os.path as osp import sys sys.path.append(osp.abspath(osp.join(__file__, '../../'))) from roi_align import RoIAlign feat_size = 15 spatial_scale = 1.0 / 8 img_size = feat_size / spatial_scale num_imgs = 2 num_rois = 20 batch_ind = np.random.randint(num_imgs, size=(num_rois, 1)) rois = np.random.rand(num_rois, 4) * img_size * 0.5 rois[:, 2:] += img_size * 0.5 rois = np.hstack((batch_ind, rois)) feat = torch.randn(num_imgs, 16, feat_size, feat_size, requires_grad=True, device='cuda:0') rois = torch.from_numpy(rois).float().cuda() inputs = (feat, rois) print('Gradcheck for roi align...') test = gradcheck(RoIAlign(3, spatial_scale), inputs, atol=1e-3, eps=1e-3) print(test) test = gradcheck(RoIAlign(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3) print(test)