def __init__(self, config_path, model_path): args = TrackArgs() args.config = config_path args.resume = model_path cfg = load_config(args) if args.arch == 'Custom': from custom import Custom self.model = Custom(anchors=cfg['anchors']) else: parser.error('invalid architecture: {}'.format(args.arch)) if args.resume: assert isfile(args.resume), '{} is not a valid file'.format(args.resume) self.model = load_pretrain(self.model, args.resume) self.model.eval() self.device = torch.device('cuda' if (torch.cuda.is_available() and not args.cpu) else 'cpu') self.model = self.model.to(self.device) ################# Dangerous self.p = TrackerConfig() self.p.update(cfg['hp'] if 'hp' in cfg.keys() else None, self.model.anchors) self.p.renew() self.p.scales = self.model.anchors['scales'] self.p.ratios = self.model.anchors['ratios'] self.p.anchor_num = self.model.anchor_num self.p.anchor = generate_anchor(self.model.anchors, self.p.score_size) if self.p.windowing == 'cosine': self.window = np.outer(np.hanning(self.p.score_size), np.hanning(self.p.score_size)) elif self.p.windowing == 'uniform': self.window = np.ones((self.p.score_size, self.p.score_size)) self.window = np.tile(self.window.flatten(), self.p.anchor_num)
def siamese_init(im, target_pos, target_sz, model, hp=None, device='cpu'): """ 初始化跟踪器,根据目标的信息构建state 字典 :param im: 当前处理的图像 :param target_pos: 目标的位置 :param target_sz: 目标的尺寸 :param model: 训练好的网络模型 :param hp: 超参数 :param device: 硬件信息 :return: 跟踪器的state字典数据 """ # 初始化state字典 state = dict() # 设置图像的宽高 state['im_h'] = im.shape[0] state['im_w'] = im.shape[1] # 配置跟踪器的相关参数 p = TrackerConfig() # 对参数进行更新 p.update(hp, model.anchors) # 更新参数 p.renew() # 获取网络模型 net = model # 根据网络参数对跟踪器的参数进行更新,主要是anchors p.scales = model.anchors['scales'] p.ratios = model.anchors['ratios'] p.anchor_num = model.anchor_num # 生成锚点 p.anchor = generate_anchor(model.anchors, p.score_size) # 图像的平均值 avg_chans = np.mean(im, axis=(0, 1)) # 根据设置的上下文比例,输入z 的宽高及尺寸 wc_z = target_sz[0] + p.context_amount * sum(target_sz) hc_z = target_sz[1] + p.context_amount * sum(target_sz) s_z = round(np.sqrt(wc_z * hc_z)) # 初始化跟踪目标 initialize the exemplar z_crop = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z, avg_chans) # 将其转换为Variable可在pythorch中进行反向传播 z = Variable(z_crop.unsqueeze(0)) # 专门处理模板 net.template(z.to(device)) # 设置使用的惩罚窗口 if p.windowing == 'cosine': # 利用hanning窗的外积生成cosine窗口 window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size)) elif p.windowing == 'uniform': window = np.ones((p.score_size, p.score_size)) # 每一个anchor都有一个对应的惩罚窗口 window = np.tile(window.flatten(), p.anchor_num) # 将信息更新到state字典中 state['p'] = p state['net'] = net state['avg_chans'] = avg_chans state['window'] = window state['target_pos'] = target_pos state['target_sz'] = target_sz return state
def siamese_init(im, target_pos, target_sz, model, hp=None, device='cpu'): # print("------siamese_init-------") state = dict() state['im_h'] = im.shape[0] state['im_w'] = im.shape[1] # print("im.shape[0] ", im.shape[0]) p = TrackerConfig() p.update(hp, model.anchors) p.renew() net = model p.scales = model.anchors['scales'] p.ratios = model.anchors['ratios'] p.anchor_num = model.anchor_num p.anchor = generate_anchor(model.anchors, p.score_size) avg_chans = np.mean(im, axis=(0, 1)) wc_z = target_sz[0] + p.context_amount * sum(target_sz) hc_z = target_sz[1] + p.context_amount * sum(target_sz) s_z = round(np.sqrt(wc_z * hc_z)) # initialize the exemplar z_crop = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z, avg_chans) # print("z size (patch) ", z_crop.size()) z = Variable(z_crop.unsqueeze(0)) # La xarxa es guarda les features resultants (self.zf) d'haver passat el patch z per la siamesa net.template(z.to(device)) if p.windowing == 'cosine': window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size)) elif p.windowing == 'uniform': window = np.ones((p.score_size, p.score_size)) window = np.tile(window.flatten(), p.anchor_num) state['p'] = p state['net'] = net state['avg_chans'] = avg_chans state['window'] = window state['target_pos'] = target_pos state['target_sz'] = target_sz # print("window = ", state['window']) return state
def __init__(self, args): super(PatchTrainer, self).__init__() # Setup device self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') torch.backends.cudnn.benchmark = True # Setup tracker cfg cfg = load_config(args) p = TrackerConfig() p.renew() self.p = p # Setup tracker siammask = Tracker(p=p, anchors=cfg['anchors']) if args.resume: assert isfile(args.resume), 'Please download {} first.'.format(args.resume) siammask = load_pretrain(siammask, args.resume) siammask.eval().to(self.device) self.model = siammask
def siamese_init(im, target_pos, target_sz, model, hp=None, device='cpu'): state = dict() state['im_h'] = im.shape[0] state['im_w'] = im.shape[1] p = TrackerConfig() p.update(hp, model.anchors) p.renew() net = model p.scales = model.anchors['scales'] p.ratios = model.anchors['ratios'] p.anchor_num = model.anchor_num p.anchor = generate_anchor(model.anchors, p.score_size) avg_chans = np.mean(im, axis=(0, 1)) wc_z = target_sz[0] + p.context_amount * sum(target_sz) hc_z = target_sz[1] + p.context_amount * sum(target_sz) s_z = round(np.sqrt(wc_z * hc_z)) # initialize the exemplar z_crop = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z, avg_chans) z = Variable(z_crop.unsqueeze(0)) net.template(z.to(device)) if p.windowing == 'cosine': window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size)) # 外积的结果是矩阵,内积的结果是一个数 elif p.windowing == 'uniform': window = np.ones((p.score_size, p.score_size)) window = np.tile(window.flatten(), p.anchor_num) state['p'] = p state['net'] = net state['avg_chans'] = avg_chans state['window'] = window state['target_pos'] = target_pos state['target_sz'] = target_sz return state
def siamese_init(im, model, hp=None, device='cpu', targets=None, detector=None): custom_objects = detector.CustomObjects(car=True, person=True) state = dict() state['im_h'] = im.shape[0] state['im_w'] = im.shape[1] p = TrackerConfig() p.update(hp, model.anchors) p.renew() net = model p.scales = model.anchors['scales'] p.ratios = model.anchors['ratios'] p.anchor_num = model.anchor_num p.anchor = generate_anchor(model.anchors, p.score_size) avg_chans = np.mean(im, axis=(0, 1)) # s_z = [ round(np.sqrt(target["target_sz"][1] + 0.123 * sum(target["target_sz"])*target["target_sz"][0] + 0.123 * sum(target["target_sz"]) )) for target in targets ] # s_z = np.array(s_z) # print(targe) # targets.append(targe) # print(targets) BLUE = [255, 255, 255] for i, target in enumerate(targets): wc_z = target["target_sz"][0] + p.context_amount * sum( target["target_sz"]) hc_z = target["target_sz"][1] + p.context_amount * sum( target["target_sz"]) target["s_z"] = round(np.sqrt(wc_z * hc_z)) print("out") # initialize the exemplar targets = get_subwindow_tracking( im, p.exemplar_size, avg_chans, targets=targets, ) # z_f = [ net.template(Variable(target["im_to_torch"].unsqueeze(0)).to(device)) for target in targets ] for i, target in enumerate(targets): # detections = detector.detectCustomObjectsFromImage(custom_objects=custom_objects, input_image=target["im_patch"],input_type="array", output_image_path=os.path.join("image {} custom.jpg".format(i)),output_type="file", minimum_percentage_probability=30) # detections = detector.detectCustomObjectsFromImage(custom_objects=custom_objects, input_image=target["img"],input_type="array", output_image_path=os.path.join(execution_path , "images.jpg"),output_type="file", minimum_percentage_probability=30) z = Variable(target["im_to_torch"].unsqueeze(0)) target["zf"] = net.template(z.to(device)) del target["im_to_torch"] # for eachObject in detections: # print(eachObject["name"] , " : ", eachObject["percentage_probability"], " : ", eachObject["box_points"] ) # target["detection"] = eachObject["box_points"] # print("--------------------------------") if p.windowing == 'cosine': window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size)) elif p.windowing == 'uniform': window = np.ones((p.score_size, p.score_size)) window = np.tile(window.flatten(), p.anchor_num) state['p'] = p state['net'] = net state['avg_chans'] = avg_chans state['window'] = window state["targets"] = targets state["detector"] = detector # state["s_z"] = s_z # state["z_f"] = z_f return state
class SingleTracker(object): def __init__(self, config_path, model_path): args = TrackArgs() args.config = config_path args.resume = model_path cfg = load_config(args) if args.arch == 'Custom': from custom import Custom self.model = Custom(anchors=cfg['anchors']) else: parser.error('invalid architecture: {}'.format(args.arch)) if args.resume: assert isfile(args.resume), '{} is not a valid file'.format(args.resume) self.model = load_pretrain(self.model, args.resume) self.model.eval() self.device = torch.device('cuda' if (torch.cuda.is_available() and not args.cpu) else 'cpu') self.model = self.model.to(self.device) ################# Dangerous self.p = TrackerConfig() self.p.update(cfg['hp'] if 'hp' in cfg.keys() else None, self.model.anchors) self.p.renew() self.p.scales = self.model.anchors['scales'] self.p.ratios = self.model.anchors['ratios'] self.p.anchor_num = self.model.anchor_num self.p.anchor = generate_anchor(self.model.anchors, self.p.score_size) if self.p.windowing == 'cosine': self.window = np.outer(np.hanning(self.p.score_size), np.hanning(self.p.score_size)) elif self.p.windowing == 'uniform': self.window = np.ones((self.p.score_size, self.p.score_size)) self.window = np.tile(self.window.flatten(), self.p.anchor_num) ################ def get_examplar_feature(self, img, target_pos, target_sz): avg_chans = np.mean(img, axis=(0, 1)) wc_z = target_sz[0] + self.p.context_amount * sum(target_sz) hc_z = target_sz[1] + self.p.context_amount * sum(target_sz) s_z = round(np.sqrt(wc_z * hc_z)) # initialize the exemplar examplar = get_subwindow_tracking(img, target_pos, self.p.exemplar_size, s_z, avg_chans) z = Variable(examplar.unsqueeze(0)) return self.model.template(z.to(self.device)) def siamese_track(self, img, target_pos, target_sz, examplar_feature, debug=False, mask_enable=True, refine_enable=True): avg_chans = np.mean(img, axis=(0, 1)) im_h = img.shape[0] im_w = img.shape[1] wc_x = target_sz[0] + self.p.context_amount * sum(target_sz) hc_x = target_sz[1] + self.p.context_amount * sum(target_sz) s_x = np.sqrt(wc_x * hc_x) ''' scale_x = self.p.exemplar_size / s_x d_search = (self.p.instance_size - self.p.exemplar_size) / 2 pad = d_search / scale_x s_x = s_x + 2 * pad crop_box = [target_pos[0] - round(s_x) / 2, target_pos[1] - round(s_x) / 2, round(s_x), round(s_x)] ''' # myy # 上面注释的部分, 原作者写的代码可以简化为下面三句 scale_x = self.p.exemplar_size / s_x s_x = self.p.instance_size / self.p.exemplar_size * s_x crop_box = [target_pos[0] - round(s_x) / 2, target_pos[1] - round(s_x) / 2, round(s_x), round(s_x)] # extract scaled crops for search region x at previous target position x_crop = Variable(get_subwindow_tracking(img, target_pos, self.p.instance_size, round(s_x), avg_chans).unsqueeze(0)) if mask_enable: score, delta, mask = self.model.track_mask(examplar_feature, x_crop.to(self.device)) else: score, delta = self.model.track(examplar_feature, x_crop.to(self.device)) delta = delta.permute(1, 2, 3, 0).contiguous().view(4, -1).data.cpu().numpy() score = F.softmax(score.permute(1, 2, 3, 0).contiguous().view(2, -1).permute(1, 0), dim=1).data[:, 1].cpu().numpy() delta[0, :] = delta[0, :] * self.p.anchor[:, 2] + self.p.anchor[:, 0] delta[1, :] = delta[1, :] * self.p.anchor[:, 3] + self.p.anchor[:, 1] delta[2, :] = np.exp(delta[2, :]) * self.p.anchor[:, 2] delta[3, :] = np.exp(delta[3, :]) * self.p.anchor[:, 3] def change(r): return np.maximum(r, 1. / r) def sz(w, h): pad = (w + h) * 0.5 sz2 = (w + pad) * (h + pad) return np.sqrt(sz2) def sz_wh(wh): pad = (wh[0] + wh[1]) * 0.5 sz2 = (wh[0] + pad) * (wh[1] + pad) return np.sqrt(sz2) # size penalty target_sz_in_crop = target_sz*scale_x s_c = change(sz(delta[2, :], delta[3, :]) / (sz_wh(target_sz_in_crop))) # scale penalty r_c = change((target_sz_in_crop[0] / target_sz_in_crop[1]) / (delta[2, :] / delta[3, :])) # ratio penalty penalty = np.exp(-(r_c * s_c - 1) * self.p.penalty_k) pscore = penalty * score # cos window (motion model) pscore = pscore * (1 - self.p.window_influence) + self.window * self.p.window_influence best_pscore_id = np.argmax(pscore) pred_in_crop = delta[:, best_pscore_id] / scale_x lr = penalty[best_pscore_id] * score[best_pscore_id] * self.p.lr # lr for OTB res_x = pred_in_crop[0] + target_pos[0] res_y = pred_in_crop[1] + target_pos[1] res_w = target_sz[0] * (1 - lr) + pred_in_crop[2] * lr res_h = target_sz[1] * (1 - lr) + pred_in_crop[3] * lr target_pos = np.array([res_x, res_y]) target_sz = np.array([res_w, res_h]) # for Mask Branch if mask_enable: best_pscore_id_mask = np.unravel_index(best_pscore_id, (5, self.p.score_size, self.p.score_size)) delta_x, delta_y = best_pscore_id_mask[2], best_pscore_id_mask[1] if refine_enable: mask = self.model.track_refine((delta_y, delta_x)).to(self.device).sigmoid().squeeze().view( self.p.out_size, self.p.out_size).cpu().data.numpy() else: mask = mask[0, :, delta_y, delta_x].sigmoid(). \ squeeze().view(self.p.out_size, self.p.out_size).cpu().data.numpy() def crop_back(image, bbox, out_sz, padding=-1): a = (out_sz[0] - 1) / bbox[2] b = (out_sz[1] - 1) / bbox[3] c = -a * bbox[0] d = -b * bbox[1] mapping = np.array([[a, 0, c], [0, b, d]]).astype(np.float) crop = cv2.warpAffine(image, mapping, (out_sz[0], out_sz[1]), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=padding) return crop s = crop_box[2] / self.p.instance_size sub_box = [crop_box[0] + (delta_x - self.p.base_size / 2) * self.p.total_stride * s, crop_box[1] + (delta_y - self.p.base_size / 2) * self.p.total_stride * s, s * self.p.exemplar_size, s * self.p.exemplar_size] s = self.p.out_size / sub_box[2] back_box = [-sub_box[0] * s, -sub_box[1] * s, im_w * s, im_h * s] mask_in_img = crop_back(mask, back_box, (im_w, im_h)) target_mask = (mask_in_img > self.p.seg_thr).astype(np.uint8) if cv2.__version__[-5] == '4': contours, _ = cv2.findContours(target_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) else: _, contours, _ = cv2.findContours(target_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) cnt_area = [cv2.contourArea(cnt) for cnt in contours] if len(contours) != 0 and np.max(cnt_area) > 100: contour = contours[np.argmax(cnt_area)] # use max area polygon polygon = contour.reshape(-1, 2) # pbox = cv2.boundingRect(polygon) # Min Max Rectangle prbox = cv2.boxPoints(cv2.minAreaRect(polygon)) # Rotated Rectangle # box_in_img = pbox rbox_in_img = prbox else: # empty mask location = cxy_wh_2_rect(target_pos, target_sz) rbox_in_img = np.array([[location[0], location[1]], [location[0] + location[2], location[1]], [location[0] + location[2], location[1] + location[3]], [location[0], location[1] + location[3]]]) target_pos[0] = max(0, min(im_w, target_pos[0])) target_pos[1] = max(0, min(im_h, target_pos[1])) target_sz[0] = max(10, min(im_w, target_sz[0])) target_sz[1] = max(10, min(im_h, target_sz[1])) score = score[best_pscore_id] mask = mask_in_img if mask_enable else [] return target_pos, target_sz, score, mask
def main(): # args.base_path = base_path args.resume = "../SiamMask/experiments/siammask_sharp/SiamMask_DAVIS.pth" args.config = "../SiamMask/experiments/siammask_sharp/config_davis.json" print(join(args.base_path, 'groundtruth_rect.txt')) # Setup device device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') torch.backends.cudnn.benchmark = True # Setup Model cfg = load_config(args) p = TrackerConfig() p.renew() siammask = Tracker(p=p, anchors=cfg['anchors']) if args.resume: assert isfile(args.resume), 'Please download {} first.'.format(args.resume) siammask = load_pretrain(siammask, args.resume) siammask.eval().to(device) # Parse Image file img_files = sorted(glob.glob(join(join(args.base_path, 'imgs'), '*.jp*'))) ims = [cv2.imread(imf) for imf in img_files] # Select ROI cv2.namedWindow("SiamMask", cv2.WND_PROP_FULLSCREEN) try: init_rect = cv2.selectROI('SiamMask', ims[0], False, False) gts = None x, y, w, h = init_rect except: exit() file1 = open(join(args.base_path, 'groundtruth_rect.txt'), 'w') file1.write('{0:d},{1:d},{2:d},{3:d}\n'.format(x, y, w, h)) toc = 0 for f, im in enumerate(ims): tic = cv2.getTickCount() if f == 0: # init target_pos = np.array([x + w / 2, y + h / 2]) target_sz = np.array([w, h]) state = tracker_init(im, target_pos, target_sz, siammask, device=device) # init tracker state['gts'] = gts state['device'] = device elif f > 0: # tracking state = tracker_track(state, im, siammask, device=device) # track target_pos, target_sz =state['target_pos'], state['target_sz'] x, y = (target_pos - target_sz/2).astype(int) x2, y2 = (target_pos + target_sz/2).astype(int) cv2.rectangle(im, (x, y), (x2, y2), (0, 255, 0), 4) cv2.imshow('SiamMask', im) key = cv2.waitKey(1) if key == ord('q'): break file1.write('{0:d},{1:d},{2:d},{3:d}\n'.format(x, y, x2-x, y2-y)) toc += cv2.getTickCount() - tic file1.close() toc /= cv2.getTickFrequency() fps = f / toc print('SiamMask Time: {:02.1f}s Speed: {:3.1f}fps (with visulization!)'.format(toc, fps))
if __name__ == '__main__': # Setup cf and model file args.resume = "../SiamMask/experiments/siammask_sharp/SiamMask_DAVIS.pth" args.config = "../SiamMask/experiments/siammask_sharp/config_davis.json" cv2.namedWindow("template", cv2.WND_PROP_FULLSCREEN) cv2.namedWindow("SiamMask", cv2.WND_PROP_FULLSCREEN) # Setup device device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') torch.backends.cudnn.benchmark = True # Setup Model cfg = load_config(args) p = TrackerConfig() p.renew() siammask = Tracker(p=p, anchors=cfg['anchors']) if args.resume: assert isfile(args.resume), 'Please download {} first.'.format( args.resume) siammask = load_pretrain(siammask, args.resume) siammask.eval().to(device) model = siammask # Setup Dataset dataloader = DataLoader(AttackDataset(root_dir='data/Phone1', step=1, test=True), batch_size=100)
def siamese_init(im, search_shape, target_pos, target_sz, model, hp=None, device='cpu'): """ generate anchors, inference the template image, set up window :param im: whole image :param target_pos: target position that are selected :param target_sz: target size that are selected :param model: SiamMask model :param hp: hyper parameters :param device: :return: """ state = dict() state['im_h'] = search_shape[0] state['im_w'] = search_shape[1] p = TrackerConfig() p.update(hp, model.anchors) p.renew() net = model p.scales = model.anchors['scales'] p.ratios = model.anchors['ratios'] p.anchor_num = model.anchor_num p.anchor = generate_anchor( model.anchors, p.score_size) # anchor size: (25*25*5, 4) --> (3125, 4) avg_chans = np.mean(im, axis=(0, 1)) # wc_z = target_sz[0] + p.context_amount * sum(target_sz) # hc_z = target_sz[1] + p.context_amount * sum(target_sz) # s_z = round(np.sqrt(wc_z * hc_z)) # crop size = sqrt((w+(w+h)/2)*(h+(w+h)/2)) ## initialize the exemplar #im_patch = get_subwindow_tracking(im, target_pos, p.exemplar_size, s_z, avg_chans, out_mode="numpy") im_patch = im im_patch = cv2.resize(im_patch, (p.exemplar_size, p.exemplar_size)) cv2.imshow('crop_template', im_patch) cv2.waitKey(0) z_crop = im_to_torch(im_patch) z = Variable(z_crop.unsqueeze(0)) net.template(z.to(device)) if p.windowing == 'cosine': window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size)) elif p.windowing == 'uniform': window = np.ones((p.score_size, p.score_size)) window = np.tile(window.flatten(), p.anchor_num) state['p'] = p state['net'] = net state['avg_chans'] = avg_chans state['window'] = window state['target_pos'] = target_pos state['target_sz'] = target_sz return state
def siamese_init(im, target_pos, target_sz, model, hp=None, device='cpu'): #target_pos, target_sz输入的就是由gt轴对称得来的 state = dict() state['im_h'] = im.shape[0] state['im_w'] = im.shape[1] p = TrackerConfig() #配置参数 p.update(hp, model.anchors) #用hp,model.anchors更新p的参数,相当于用config_vot.json更新p p.renew() # p.score_size=25 net = model p.scales = model.anchors['scales'] #Custom的父类SiamMask里的属性 p.ratios = model.anchors['ratios'] p.anchor_num = model.anchor_num #vot数据集上是5 p.anchor = generate_anchor( model.anchors, p.score_size ) #generate_anchor 生成锚点。p.anchor.shape = (p.anchor_num*p.score_size*p.score_size,4) avg_chans = np.mean(im, axis=(0, 1)) #此处im单张图片,对每个颜色通道都求均值(3,)(B,G,R) #图像预处理,按比例外扩目标框,从而获得一定的 context 信息。p.context_amount = 0.5 wc_z = target_sz[0] + p.context_amount * sum( target_sz) #wc_z = w + p.context_amuont * (w+h) hc_z = target_sz[1] + p.context_amount * sum( target_sz) #hc_z = h + p.context_amuont * (w+h) #需要将框定的框做一个大约2倍放大,以物体为中心, s_z为宽高,截取一个正方体的物体出来 s_z = round(np.sqrt(wc_z * hc_z)) #round四舍五入取整,round(2.5) = 2,round(2.51)=3 # initialize the exemplar z_crop = get_subwindow_tracking( im, target_pos, p.exemplar_size, s_z, avg_chans) #tensor<(3, 127, 127), float32, cpu> #TrackerConfig中的定义是 input z size,127 #z_crop的维度是(127*127*3) z = Variable( z_crop.unsqueeze(0) ) #pytorch中的命令,扩充数据维度,变成神经网络的参数tensor<(1, 3, 127, 127), float32, cpu> net.template(z.to(device)) #将z送到cuda上面提取特征,即得到resnet50之后的结果 if p.windowing == 'cosine': #默认 window = np.outer( np.hanning(p.score_size), np.hanning(p.score_size) ) #求外积 ndarray(p.score_size,p.score_size)即<(25, 25), float64> elif p.windowing == 'uniform': window = np.ones((p.score_size, p.score_size)) window = np.tile(window.flatten(), p.anchor_num) #对window.flatten()在X轴进行重复p.anchor_num次 #ndarray<(3125,), float64>,p.anchor_num=5 state['p'] = p state['net'] = net state['avg_chans'] = avg_chans state['window'] = window state['target_pos'] = target_pos #还是传进来的数据 state['target_sz'] = target_sz #还是传进来的数据 return state
def trackres(cfg, outputs12, track12): delta = outputs12['predict'][0] score = outputs12['predict'][1] delta = delta.contiguous().view(delta.shape[0], 4, -1).data.cpu().numpy() score = F.softmax(score.contiguous().view(score.shape[0], 2, -1), dim=1).data[:, 1].cpu().numpy() anchor = generate_anchor(cfg['anchors'], 3) # anchors: anchor cfg delta[:, 0, :] = delta[:, 0, :] * anchor[:, 2] + anchor[:, 0] delta[:, 1, :] = delta[:, 1, :] * anchor[:, 3] + anchor[:, 1] delta[:, 2, :] = np.exp(delta[:, 2, :]) * anchor[:, 2] delta[:, 3, :] = np.exp(delta[:, 3, :]) * anchor[:, 3] p = TrackerConfig() def change(r): return np.maximum(r, 1. / r) def sz(w, h): pad = (w + h) * 0.5 sz2 = (w + pad) * (h + pad) return np.sqrt(sz2) def sz_wh(wh): pad = (wh[:, 0] + wh[:, 1]) * 0.5 sz2 = (wh[:, 0] + pad) * (wh[:, 1] + pad) return np.sqrt(sz2) target_sz = np.array(track12['template_bbox'].cpu().numpy()[:, 2:]) scale_x = np.ones(target_sz.shape[0]) # size penalty target_sz_in_crop = target_sz * scale_x[:, None] s_c = change( sz(delta[:, 2, :], delta[:, 3, :]) / (sz_wh(target_sz_in_crop))[:, None]) # scale penalty r_c = change((target_sz_in_crop[:, 0] / target_sz_in_crop[:, 1])[:, None] / (delta[:, 2, :] / delta[:, 3, :])) # ratio penalty penalty = np.exp(-(r_c * s_c - 1) * p.penalty_k) pscore = penalty * score if p.windowing == 'cosine': window = np.outer(np.hanning(3), np.hanning(3)) elif p.windowing == 'uniform': window = np.ones((3, 3)) window = np.tile(window.flatten(), p.anchor_num) # cos window (motion model) pscore = pscore * (1 - p.window_influence) + window * p.window_influence best_pscore_id = np.argmax(pscore, 1) pred_in_crop = delta[range(best_pscore_id.shape[0]), :, best_pscore_id] / scale_x[:, None] # lr = penalty[range(best_pscore_id.shape[0]),best_pscore_id] * score[range(best_pscore_id.shape[0]),best_pscore_id] * p.lr # lr for OTB res_cx = pred_in_crop[:, 0] + (track12['search'].shape[2] + 1) // 2 res_cy = pred_in_crop[:, 1] + (track12['search'].shape[3] + 1) // 2 res_w = pred_in_crop[:, 2] res_h = pred_in_crop[:, 3] target_pos = np.array([res_cx, res_cy]).T target_sz = np.array([res_w, res_h]).T def draw(image, box, name): image = np.transpose(image, (1, 2, 0)).copy() x1, y1, x2, y2 = map(lambda x: int(round(x)), box) image = cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0)) cv2.imwrite(name, image) debug = False if debug: batch_id = 0 img_search = track12['search'][batch_id].cpu().numpy() box = [ res_cx[batch_id] - res_w[batch_id] / 2., res_cy[batch_id] - res_h[batch_id] / 2., res_cx[batch_id] + res_w[batch_id] / 2., res_cy[batch_id] + res_h[batch_id] / 2. ] draw(img_search, box, "debug/{:06d}_pred.jpg".format(iter)) img_temp = track12['template'][batch_id].cpu().numpy() box_temp = track12['template_bbox'][batch_id].cpu().numpy() draw(img_temp, box_temp, "debug/{:06d}_temp.jpg".format(iter)) im_sz = track12['search'].shape[-2:] avg_chans = np.mean(track12['search'].cpu().numpy(), axis=(2, 3)) sz = p.exemplar_size c = (sz + 1) / 2 context_xmin = (target_pos[:, 0] - c).round() context_xmax = context_xmin + sz - 1 context_ymin = (target_pos[:, 1] - c).round() context_ymax = context_ymin + sz - 1 left_pad = int(max(0., -context_xmin.min())) top_pad = int(max(0., -context_ymin.min())) right_pad = int(max(0., context_xmax.max() - im_sz[1] + 1)) bottom_pad = int(max(0., context_ymax.max() - im_sz[0] + 1)) context_xmin = context_xmin + left_pad context_xmax = context_xmax + left_pad context_ymin = context_ymin + top_pad context_ymax = context_ymax + top_pad # zzp: a more easy speed version im = track12['search'].cpu().numpy() k, r, c = im.shape[-3:] if any([top_pad, bottom_pad, left_pad, right_pad]): te_im = np.zeros((args.batch, k, r + top_pad + bottom_pad, c + left_pad + right_pad), np.uint8) te_im[:, :, top_pad:top_pad + r, left_pad:left_pad + c] = im if top_pad: te_im[:, :, 0:top_pad, left_pad:left_pad + c] = avg_chans[:, :, None, None] if bottom_pad: te_im[:, :, r + top_pad:, left_pad:left_pad + c] = avg_chans[:, :, None, None] if left_pad: te_im[:, :, :, 0:left_pad] = avg_chans[:, :, None, None] if right_pad: te_im[:, :, :, c + left_pad:] = avg_chans[:, :, None, None] im_patch_original = np.zeros((args.batch, k, sz, sz), np.uint8) for id in range(args.batch): im_patch_original[id] = te_im[ id, :, int(context_ymin[id]):int(context_ymax[id] + 1), int(context_xmin[id]):int(context_xmax[id] + 1)] else: im_patch_original = np.zeros((args.batch, k, sz, sz), np.uint8) for id in range(args.batch): im_patch_original[id] = im[ id, :, int(context_ymin[id]):int(context_ymax[id] + 1), int(context_xmin[id]):int(context_xmax[id] + 1)] im_patch = im_patch_original return im_patch