def __init__(self, params, model_path = None, name='SiamRPN', **kargs): super(TrackerSiamRPNLate, self).__init__(name=name, is_deterministic=True) self.model = SiameseAlexNetLate() self.cuda = torch.cuda.is_available() self.device = torch.device('cuda:0' if self.cuda else 'cpu') checkpoint = torch.load(model_path, map_location = self.device) #print("1") if 'model' in checkpoint.keys(): self.model.load_state_dict(torch.load(model_path, map_location = self.device)['model']) else: self.model.load_state_dict(torch.load(model_path, map_location = self.device)) if self.cuda: self.model = self.model.cuda() self.model.eval() self.transforms = transforms.Compose([ ToTensor() ]) valid_scope = 2 * config.valid_scope + 1 self.anchors = util.generate_anchors( config.total_stride, config.anchor_base_size, config.anchor_scales, config.anchor_ratios, valid_scope) self.window = np.tile(np.outer(np.hanning(config.score_size), np.hanning(config.score_size))[None, :], [config.anchor_num, 1, 1]).flatten() self.data_loader = TrackerRGBTDataLoader() self.old_loader = TrackerDataLoader()
class TrackerSiamRPNLate(Tracker): def __init__(self, params, model_path = None, name='SiamRPN', **kargs): super(TrackerSiamRPNLate, self).__init__(name=name, is_deterministic=True) self.model = SiameseAlexNetLate() self.cuda = torch.cuda.is_available() self.device = torch.device('cuda:0' if self.cuda else 'cpu') checkpoint = torch.load(model_path, map_location = self.device) #print("1") if 'model' in checkpoint.keys(): self.model.load_state_dict(torch.load(model_path, map_location = self.device)['model']) else: self.model.load_state_dict(torch.load(model_path, map_location = self.device)) if self.cuda: self.model = self.model.cuda() self.model.eval() self.transforms = transforms.Compose([ ToTensor() ]) valid_scope = 2 * config.valid_scope + 1 self.anchors = util.generate_anchors( config.total_stride, config.anchor_base_size, config.anchor_scales, config.anchor_ratios, valid_scope) self.window = np.tile(np.outer(np.hanning(config.score_size), np.hanning(config.score_size))[None, :], [config.anchor_num, 1, 1]).flatten() self.data_loader = TrackerRGBTDataLoader() self.old_loader = TrackerDataLoader() def _cosine_window(self, size): """ get the cosine window """ cos_window = np.hanning(int(size[0]))[:, np.newaxis].dot(np.hanning(int(size[1]))[np.newaxis, :]) cos_window = cos_window.astype(np.float32) cos_window /= np.sum(cos_window) return cos_window def init(self, exemplar_rgb_img, exemplar_ir_img, bbox): # """ initialize siamfc tracker Args: frame: an RGB image bbox: one-based bounding box [x, y, width, height] """ self.pos = np.array([bbox[0] + bbox[2] / 2 - 1 / 2, bbox[1] + bbox[3] / 2 - 1 / 2]) # center x, center y, zero based #self.pos = np.array([bbox[0], bbox[1]]) # center x, center y, zero based self.target_sz = np.array([bbox[2], bbox[3]]) # width, height self.bbox = np.array([bbox[0] + bbox[2] / 2 - 1 / 2, bbox[1] + bbox[3] / 2 - 1 / 2, bbox[2], bbox[3]]) #print('Box:',self.bbox) #self.bbox = np.array([bbox[0], bbox[1], bbox[2], bbox[3]]) self.origin_target_sz = np.array([bbox[2], bbox[3]]) self.img_mean = np.mean(exemplar_rgb_img, axis=(0, 1)) exemplar_rgb_img = np.asarray(exemplar_rgb_img) exemplar_rgb_img, _, _ = self.old_loader.get_exemplar_image( exemplar_rgb_img, self.bbox, config.template_img_size, config.context_amount, self.img_mean) self.img_mean_ir = np.mean(exemplar_ir_img, axis=(0, 1)) exemplar_ir_img, _, _ = self.data_loader.get_exemplar_image( exemplar_ir_img, self.bbox, config.template_img_size, config.context_amount, self.img_mean_ir) #cv2.imshow('exemplar_img', exemplar_img) # get exemplar feature exemplar_rgb_img = self.transforms(exemplar_rgb_img)[None, :, :, :] exemplar_ir_img = self.transforms(exemplar_ir_img)[None, :, :, :] exemplar_ir_img = torch.from_numpy(np.zeros(exemplar_ir_img.size())).float() if self.cuda: self.model.track_init(exemplar_rgb_img.cuda(), exemplar_ir_img.cuda()) else: self.model.track_init(exemplar_img, exemplar_ir_img) print('bbox', self.bbox) def update(self, instance_rgb_img, instance_ir_img): """track object based on the previous frame Args: frame: an RGB image Returns: bbox: tuple of 1-based bounding box(xmin, ymin, xmax, ymax) """ instance_rgb_img = np.asarray(instance_rgb_img) frame = instance_rgb_img #cv2.imshow('instance_img', instance_ir_img) self.img_mean = np.mean(instance_rgb_img, axis=(0, 1)) instance_rgb_img, _, _, scale_x = self.old_loader.get_instance_image( instance_rgb_img, self.bbox, config.template_img_size, config.detection_img_size, config.context_amount, self.img_mean) self.img_mean_ir = np.mean(instance_ir_img, axis=(0, 1)) instance_ir_img, _, _, _ = self.data_loader.get_instance_image( instance_ir_img, self.bbox, config.template_img_size, config.detection_img_size, config.context_amount, self.img_mean_ir) instance_rgb_img = self.transforms(instance_rgb_img)[None, :, :, :] instance_ir_img = self.transforms(instance_ir_img)[None, :, :, :] instance_ir_img = torch.from_numpy(np.zeros(instance_ir_img.size())).float() if self.cuda: pred_score, pred_regression = self.model.track(instance_rgb_img.cuda(), instance_ir_img.cuda()) else: pred_score, pred_regression = self.model.track(instance_rgb_img, instance_ir_img) pred_conf = pred_score.reshape(-1, 2, config.size ).permute(0, 2, 1) pred_offset = pred_regression.reshape(-1, 4, config.size ).permute(0, 2, 1) delta = pred_offset[0].cpu().detach().numpy() #print(delta) box_pred = util.box_transform_inv(self.anchors, delta) #print(box_pred) score_pred = F.softmax(pred_conf, dim=2)[0, :, 1].cpu().detach().numpy() #print(score_pred) s_c = util.change(util.sz(box_pred[:, 2], box_pred[:, 3]) / (util.sz_wh(self.target_sz * scale_x))) # scale penalty r_c = util.change((self.target_sz[0] / self.target_sz[1]) / (box_pred[:, 2] / box_pred[:, 3])) # ratio penalty penalty = np.exp(-(r_c * s_c - 1.) * config.penalty_k) #print('penalty', penalty) pscore = penalty * score_pred pscore = pscore * (1 - config.window_influence) + self.window * config.window_influence #print('window', self.window) best_pscore_id = np.argmax(pscore) #print('id', np.argmax(pscore)) target = box_pred[best_pscore_id, :] / scale_x #print(target) lr = penalty[best_pscore_id] * score_pred[best_pscore_id] * config.lr_box res_x = np.clip(target[0] + self.pos[0], 0, frame.shape[1]) #print('resx', target[0] + self.pos[0]) res_y = np.clip(target[1] + self.pos[1], 0, frame.shape[0]) res_w = np.clip(self.target_sz[0] * (1 - lr) + target[2] * lr, config.min_scale * self.origin_target_sz[0], config.max_scale * self.origin_target_sz[0]) res_h = np.clip(self.target_sz[1] * (1 - lr) + target[3] * lr, config.min_scale * self.origin_target_sz[1], config.max_scale * self.origin_target_sz[1]) #print('res_h', self.target_sz[1] * (1 - lr)) self.pos = np.array([res_x, res_y]) self.target_sz = np.array([res_w, res_h]) bbox = np.array([res_x, res_y, res_w, res_h]) #print(bbox) self.bbox = ( np.clip(bbox[0], 0, frame.shape[1]).astype(np.float64), np.clip(bbox[1], 0, frame.shape[0]).astype(np.float64), np.clip(bbox[2], 10, frame.shape[1]).astype(np.float64), np.clip(bbox[3], 10, frame.shape[0]).astype(np.float64)) res_x = res_x - res_w/2 # x -> x1 res_y = res_y - res_h/2 # y -> y1 bbox = np.array([res_x, res_y, res_w, res_h]) #print('result', bbox) return bbox
class TrackerSiamRPNEval(Tracker): def __init__(self, modality=1, model_path=None, **kargs): super(TrackerSiamRPNEval, self).__init__(name='SiamRPN', is_deterministic=True) self.modality = modality if modality == 1: self.model = SiameseAlexNet() else: self.model = SiameseAlexNetMultimodal() self.cuda = torch.cuda.is_available() self.device = torch.device('cuda:0' if self.cuda else 'cpu') checkpoint = torch.load(model_path, map_location = self.device) if 'model' in checkpoint.keys(): self.model.load_state_dict(torch.load(model_path, map_location = self.device)['model']) else: self.model.load_state_dict(torch.load(model_path, map_location = self.device)) if self.cuda: self.model = self.model.cuda() self.model.eval() self.transforms = transforms.Compose([ ToTensor() ]) valid_scope = 2 * config.valid_scope + 1 self.anchors = util.generate_anchors( config.total_stride, config.anchor_base_size, config.anchor_scales, config.anchor_ratios, valid_scope) self.window = np.tile(np.outer(np.hanning(config.score_size), np.hanning(config.score_size))[None, :], [config.anchor_num, 1, 1]).flatten() self.data_loader = TrackerRGBTDataLoader() def _cosine_window(self, size): """ get the cosine window """ cos_window = np.hanning(int(size[0]))[:, np.newaxis].dot(np.hanning(int(size[1]))[np.newaxis, :]) cos_window = cos_window.astype(np.float32) cos_window /= np.sum(cos_window) return cos_window def init(self, frame_rgb, frame_ir, bbox): """ initialize tracker Args: frame: an RGB image bbox: one-based bounding box [x, y, width, height] """ frame_rgb = np.asarray(frame_rgb) frame_ir = np.asarray(frame_ir) self.pos = np.array([bbox[0] + bbox[2] / 2 - 1 / 2, bbox[1] + bbox[3] / 2 - 1 / 2]) # center x, center y, zero based self.target_sz = np.array([bbox[2], bbox[3]]) # width, height self.bbox = np.array([bbox[0] + bbox[2] / 2 - 1 / 2, bbox[1] + bbox[3] / 2 - 1 / 2, bbox[2], bbox[3]]) self.origin_target_sz = np.array([bbox[2], bbox[3]]) self.img_rgb_mean = np.mean(frame_rgb, axis=(0, 1)) self.img_ir_mean = np.mean(frame_ir) exemplar_img_rgb, _, _ = self.data_loader.get_exemplar_image(frame_rgb, self.bbox, config.template_img_size, config.context_amount, self.img_rgb_mean) exemplar_img_ir, _, _ = self.data_loader.get_exemplar_image(frame_ir, self.bbox, config.template_img_size, config.context_amount, self.img_ir_mean) # get exemplar feature exemplar_img_rgb = self.transforms(exemplar_img_rgb)[None, :, :, :] exemplar_img_ir = self.transforms(exemplar_img_ir)[None, :, :, :] if self.cuda: exemplar_img_rgb = exemplar_img_rgb.cuda() exemplar_img_ir = exemplar_img_ir.cuda() if self.modality == 1: self.model.track_init(exemplar_img_rgb) else: self.model.track_init(exemplar_img_rgb, exemplar_img_ir) def update(self, frame_rgb, frame_ir): """track object based on the previous frame Args: frame: an RGB image Returns: bbox: tuple of 1-based bounding box(xmin, ymin, xmax, ymax) """ frame_rgb = np.asarray(frame_rgb) frame_ir = np.asarray(frame_ir) instance_img_rgb, _, _, scale_x = self.data_loader.get_instance_image( frame_rgb, self.bbox, config.template_img_size, config.detection_img_size, config.context_amount, self.img_rgb_mean) instance_img_ir, _, _, scale_x = self.data_loader.get_instance_image(frame_ir, self.bbox, config.template_img_size, config.detection_img_size, config.context_amount, self.img_ir_mean) instance_img_rgb = self.transforms(instance_img_rgb)[None, :, :, :] instance_img_ir = self.transforms(instance_img_ir)[None, :, :, :] if self.cuda: instance_img_rgb = instance_img_rgb.cuda() instance_img_ir = instance_img_ir.cuda() if self.modality == 1: pred_score, pred_regression = self.model.track(instance_img_rgb) else: pred_score, pred_regression = self.model.track(instance_img_rgb, instance_img_ir) pred_conf = pred_score.reshape(-1, 2, config.size ).permute(0, 2, 1) pred_offset = pred_regression.reshape(-1, 4, config.size ).permute(0, 2, 1) delta = pred_offset[0].cpu().detach().numpy() box_pred = util.box_transform_inv(self.anchors, delta) score_pred = F.softmax(pred_conf, dim=2)[0, :, 1].cpu().detach().numpy() s_c = util.change(util.sz(box_pred[:, 2], box_pred[:, 3]) / (util.sz_wh(self.target_sz * scale_x))) # scale penalty r_c = util.change((self.target_sz[0] / self.target_sz[1]) / (box_pred[:, 2] / box_pred[:, 3])) # ratio penalty penalty = np.exp(-(r_c * s_c - 1.) * config.penalty_k) pscore = penalty * score_pred pscore = pscore * (1 - config.window_influence) + self.window * config.window_influence best_pscore_id = np.argmax(pscore) target = box_pred[best_pscore_id, :] / scale_x lr = penalty[best_pscore_id] * score_pred[best_pscore_id] * config.lr_box res_x = np.clip(target[0] + self.pos[0], 0, frame_rgb.shape[1]) res_y = np.clip(target[1] + self.pos[1], 0, frame_rgb.shape[0]) res_w = np.clip(self.target_sz[0] * (1 - lr) + target[2] * lr, config.min_scale * self.origin_target_sz[0], config.max_scale * self.origin_target_sz[0]) res_h = np.clip(self.target_sz[1] * (1 - lr) + target[3] * lr, config.min_scale * self.origin_target_sz[1], config.max_scale * self.origin_target_sz[1]) self.pos = np.array([res_x, res_y]) self.target_sz = np.array([res_w, res_h]) bbox = np.array([res_x, res_y, res_w, res_h]) self.bbox = ( np.clip(bbox[0], 0, frame_rgb.shape[1]).astype(np.float64), np.clip(bbox[1], 0, frame_rgb.shape[0]).astype(np.float64), np.clip(bbox[2], 10, frame_rgb.shape[1]).astype(np.float64), np.clip(bbox[3], 10, frame_rgb.shape[0]).astype(np.float64)) res_x = res_x - res_w/2 # x -> x1 res_y = res_y - res_h/2 # y -> y1 bbox = np.array([res_x, res_y, res_w, res_h]) return bbox def track(self, img_rgb_files, img_ir_files, box, visualize=False): frame_num = len(img_rgb_files) boxes = np.zeros((frame_num, 4)) boxes[0] = box times = np.zeros(frame_num) plotter = None for f, (img_rgb_file, img_ir_file) in enumerate(zip(img_rgb_files, img_ir_files)): img_rgb = Image.open(img_rgb_file).convert('RGB') img_ir = Image.open(img_ir_file).convert('L') img_rgb = np.asarray(img_rgb) / 255. img_ir = np.asarray(img_ir) / 255. start_time = time.time() if f == 0: self.init(img_rgb, img_ir, box) else: boxes[f, :] = self.update(img_rgb, img_ir) times[f] = time.time() - start_time if visualize: plotter = show_frame(img_ir, img_rgb, self.name, plotter, boxes[f, :] ) return boxes, times