def get_image_info(self, im_id): mask = self.anno_list[im_id] mask = torch.Tensor(mask == 255) bbox = masks_to_bboxes(mask, fmt='t').view(4, ) valid = (bbox[2] > 0) & (bbox[3] > 0) visible = valid.clone().byte() return {'bbox': bbox, 'mask': mask, 'valid': valid, 'visible': visible}
def get_image_info(self, im_id): mask = imread_indexed( os.path.join(self.root, 'Imgs', '{}.png'.format(self.image_list[im_id]))) mask = torch.Tensor(mask == 255) bbox = masks_to_bboxes(mask, fmt='t').view(4, ) valid = (bbox[2] > 0) & (bbox[3] > 0) visible = valid.clone().byte() return {'bbox': bbox, 'mask': mask, 'valid': valid, 'visible': visible}
def get_image_info(self, im_id): image_id, instance_id = self.image_list[im_id] anno_mat = loadmat(self.anno_file_list[image_id]) mask = self._get_mask_from_mat(anno_mat) mask = (mask == instance_id).float() bbox = masks_to_bboxes(mask, fmt='t') valid = (bbox[2] > 0) & (bbox[3] > 0) visible = valid.clone().byte() return {'bbox': bbox, 'mask': mask, 'valid': valid, 'visible': visible}
def get_frames(self, seq_id, frame_ids, anno=None): frame, anno, object_meta = self.base_image_dataset.get_image(seq_id, anno=anno) frame_list = [frame.copy() for _ in frame_ids] if anno is None: anno = self.get_sequence_info(seq_id) anno_frames = {} for key, value in anno.items(): anno_frames[key] = [value[0].clone() for f_id in frame_ids] if self.transform is not None: if 'mask' in anno_frames.keys(): frame_list, anno_frames['bbox'], anno_frames[ 'mask'] = self.transform(image=frame_list, bbox=anno_frames['bbox'], mask=anno_frames['mask'], joint=False) anno_frames['bbox'] = [ masks_to_bboxes(m, fmt='t') for m in anno_frames['mask'] ] else: frame_list, anno_frames['bbox'] = self.transform( image=frame_list, bbox=anno_frames['bbox'], joint=False) object_meta = OrderedDict({ 'object_class_name': self.get_class_name(seq_id), 'motion_class': None, 'major_class': None, 'root_class': None, 'motion_adverb': None }) return frame_list, anno_frames, object_meta
def run_vot2020(self, debug=None, visdom_info=None): params = self.get_parameters() params.tracker_name = self.name params.param_name = self.parameter_name params.run_id = self.run_id debug_ = debug if debug is None: debug_ = getattr(params, 'debug', 0) if debug is None: visualization_ = getattr(params, 'visualization', False) else: visualization_ = True if debug else False params.visualization = visualization_ params.debug = debug_ self._init_visdom(visdom_info, debug_) tracker = self.create_tracker(params) tracker.initialize_features() output_segmentation = tracker.predicts_segmentation_mask() import pytracking.evaluation.vot2020 as vot def _convert_anno_to_list(vot_anno): vot_anno = [vot_anno[0], vot_anno[1], vot_anno[2], vot_anno[3]] return vot_anno def _convert_image_path(image_path): return image_path """Run tracker on VOT.""" if output_segmentation: handle = vot.VOT("mask") else: handle = vot.VOT("rectangle") vot_anno = handle.region() image_path = handle.frame() if not image_path: return image_path = _convert_image_path(image_path) image = self._read_image(image_path) if output_segmentation: vot_anno_mask = vot.make_full_size(vot_anno, (image.shape[1], image.shape[0])) bbox = masks_to_bboxes(torch.from_numpy(vot_anno_mask), fmt='t').squeeze().tolist() else: bbox = _convert_anno_to_list(vot_anno) vot_anno_mask = None out = tracker.initialize(image, {'init_mask': vot_anno_mask, 'init_bbox': bbox}) if out is None: out = {} prev_output = OrderedDict(out) # Track while True: image_path = handle.frame() if not image_path: break image_path = _convert_image_path(image_path) image = self._read_image(image_path) info = OrderedDict() info['previous_output'] = prev_output out = tracker.track(image, info) prev_output = OrderedDict(out) if output_segmentation: pred = out['segmentation'].astype(np.uint8) else: state = out['target_bbox'] pred = vot.Rectangle(*state) handle.report(pred, 1.0) segmentation = out['segmentation'] if 'segmentation' in out else None if self.visdom is not None: tracker.visdom_draw_tracking(image, out['target_bbox'], segmentation) elif tracker.params.visualization: self.visualize(image, out['target_bbox'], segmentation)
def get_frames(self, sample_id, frame_ids, anno=None): """ Fetch frames with the given ids. :param sample_id: Sample to get. :param frame_ids: List of frame indices in the sequence belonging to the sample_id :return: dict of metadata and data: sequence: Sequence name images: List of images. No entries may be None labels: List of label/mask images. Entries may be None if the data is missing bboxes: List of bounding boxes. Entries may be None if the data is missing """ seq_name, obj_ids = self._samples[sample_id] meta = self.get_sequence_info(sample_id) if anno is None else anno frame_names = meta['frame_names'] images = [ self._load_image(self._jpeg_path / seq_name / (frame_names[f] + ".jpg")) for f in frame_ids ] labels = [ self._load_anno(self._anno_path / seq_name / (frame_names[f] + ".png")) for f in frame_ids ] # Generate bounding boxes for the requested objects bboxes = [] for lb in labels: lb = torch.from_numpy(lb.squeeze()) frame_bbs = {} for obj_id in obj_ids: bbox = masks_to_bboxes(lb == int(obj_id), fmt='t') if bbox[3] == 0 or bbox[2] == 0: print("!") frame_bbs[obj_id] = bbox bboxes.append(frame_bbs) # Insert empty bboxes for missing object ids for bbox in bboxes: for obj_id in obj_ids: if obj_id not in bbox: bbox[obj_id] = torch.zeros(4, dtype=torch.float32) # Remap to object id 1, if requested - for training if not self.multiobj: assert len(obj_ids) == 1 obj_id = obj_ids[0] labels = [torch.Tensor(lb == int(obj_id)) for lb in labels] bboxes = [bbox[obj_id] for bbox in bboxes] else: labels = [torch.Tensor(lb) for lb in labels] object_meta = { key: meta[key] for key in ['sequence', 'frame_shape', 'frame_names', 'object_ids'] } anno_frames = dict(bbox=bboxes, mask=labels) for key in ['object_sizes', 'visible', 'valid']: value = meta[key] anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids] return images, anno_frames, object_meta
def get_frames(self, seq_id, frame_ids, anno=None): # Handle foreground fg_frame, fg_anno, fg_object_meta = self.foreground_image_dataset.get_image( seq_id, anno=anno) fg_frame_list = [fg_frame.copy() for _ in frame_ids] fg_anno_frames = {} for key, value in fg_anno.items(): fg_anno_frames[key] = [value[0].clone() for f_id in frame_ids] if self.foreground_transform is not None: fg_frame_list, fg_anno_frames['bbox'], fg_anno_frames[ 'mask'] = self.foreground_transform( image=fg_frame_list, bbox=fg_anno_frames['bbox'], mask=fg_anno_frames['mask'], joint=False) # Sample a random background bg_seq_id = random.randint( 0, self.background_image_dataset.get_num_images() - 1) bg_frame, bg_anno, _ = self.background_image_dataset.get_image( bg_seq_id) bg_frame_list = [bg_frame.copy() for _ in frame_ids] bg_anno_frames = {} for key, value in bg_anno.items(): # Note: Since we get bg anno from image dataset, it does not has frame dimension bg_anno_frames[key] = [value.clone() for f_id in frame_ids] if self.background_transform is not None: if 'mask' in bg_anno_frames.keys(): bg_frame_list, bg_anno_frames['bbox'], bg_anno_frames[ 'mask'] = self.background_transform( image=bg_frame_list, bbox=bg_anno_frames['bbox'], mask=bg_anno_frames['mask'], joint=False) else: bg_frame_list, bg_anno_frames[ 'bbox'] = self.background_transform( image=bg_frame_list, bbox=bg_anno_frames['bbox'], joint=False) for i in range(len(frame_ids)): # To be safe, get target bb for the mask bbox = masks_to_bboxes(fg_anno_frames['mask'][i], fmt='t') loc_y = random.randint(0, bg_frame_list[i].shape[0] - 1) loc_x = random.randint(0, bg_frame_list[i].shape[1] - 1) paste_loc = (loc_x, loc_y) fg_frame_list[i], fg_anno_frames['mask'][i] = self._paste_target( fg_frame_list[i], bbox, fg_anno_frames['mask'][i], bg_frame_list[i], paste_loc) fg_anno_frames['bbox'][i] = masks_to_bboxes( fg_anno_frames['mask'][i], fmt='t') object_meta = OrderedDict({ 'object_class_name': self.get_class_name(seq_id), 'motion_class': None, 'major_class': None, 'root_class': None, 'motion_adverb': None }) return fg_frame_list, fg_anno_frames, object_meta