def update(self, image): """ Given current image, returns target box. """ image = np.array(image) if image.ndim == 2: image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB) # crop current and previous image at previous box location prev_sample, opts_prev = crop_sample({ 'image': self.prev_img, 'bb': self.prev_box }) curr_sample, opts_curr = crop_sample({ 'image': image, 'bb': self.prev_box }) self.opts = opts_curr self.curr_img = image curr_img = self.scale(curr_sample, opts_curr)['image'] prev_img = self.scale(prev_sample, opts_prev)['image'] sample = {'previmg': prev_img, 'currimg': curr_img} sample = self.transform_tensor(sample) # do forward pass to get box box = np.array(self._get_rect(sample)) # update previous box and image self.prev_img = image self.prev_box = np.copy(box) # convert [xmin, ymin, xmax, ymax] box to [xmin, ymin, width, height] # for correct evaluation by got10k toolkit box[2] = box[2] - box[0] box[3] = box[3] - box[1] return box
def get_sample(self, idx): """ Returns sample without transformation for visualization. Sample consists of resized previous and current frame with target which is passed to the network. Bounding box values are normalized between 0 and 1 with respect to the target frame and then scaled by factor of 10. """ opts_curr = {} curr_sample = {} curr_img = self.get_orig_sample(idx, 1)['image'] currbb = self.get_orig_sample(idx, 1)['bb'] prevbb = self.get_orig_sample(idx, 0)['bb'] bbox_curr_shift = BoundingBox(prevbb[0], prevbb[1], prevbb[2], prevbb[3]) (rand_search_region, rand_search_location, edge_spacing_x, edge_spacing_y) = cropPadImage(bbox_curr_shift, curr_img) bbox_curr_gt = BoundingBox(currbb[0], currbb[1], currbb[2], currbb[3]) bbox_gt_recentered = BoundingBox(0, 0, 0, 0) bbox_gt_recentered = bbox_curr_gt.recenter(rand_search_location, edge_spacing_x, edge_spacing_y, bbox_gt_recentered) # get larger context bbox_curr_shift.kContextFactor = 4 (rand_search_region_x2, rand_search_location_x2, edge_spacing_x_x2, edge_spacing_y_x2) = cropPadImage(bbox_curr_shift, curr_img) curr_sample['image'] = rand_search_region curr_sample['image_x2'] = rand_search_region_x2 curr_sample['bb'] = bbox_gt_recentered.get_bb_list() # additional options for visualization opts_curr['edge_spacing_x'] = edge_spacing_x opts_curr['edge_spacing_y'] = edge_spacing_y opts_curr['search_location'] = rand_search_location opts_curr['search_region'] = rand_search_region # build prev sample prev_sample = self.get_orig_sample(idx, 0) prev_sample_x2 = self.get_orig_sample(idx, 0) prev_sample, opts_prev = crop_sample(prev_sample) prev_sample_x2, opts_prev_x2 = crop_sample(prev_sample_x2, 4) prev_sample['image_x2'] = prev_sample_x2['image'] # scale scale = Rescale((self.input_size, self.input_size)) scaled_curr_obj = scale(curr_sample, opts_curr) scaled_prev_obj = scale(prev_sample, opts_prev) training_sample = { 'previmg': scaled_prev_obj['image'], 'currimg': scaled_curr_obj['image'], 'previmg_x2': scaled_prev_obj['image_x2'], 'currimg_x2': scaled_curr_obj['image_x2'], 'currbb': scaled_curr_obj['bb'] } return training_sample, opts_curr
def make_transformed_samples(dataset, args): ''' Given a dataset, it picks a random sample from it and returns a batch of (kGeneratedExamplesPerImage+1) samples. The batch contains true sample from dataset and kGeneratedExamplesPerImage samples, which are created artifically with augmentation by GOTURN smooth motion model. ''' idx = np.random.randint(dataset.len, size=1)[0] # unscaled original sample (single image and bb) orig_sample = dataset.get_orig_sample(idx) # cropped scaled sample (two frames and bb) true_sample, _ = dataset.get_sample(idx) true_tensor = transform(true_sample) x1_batch = torch.Tensor(kGeneratedExamplesPerImage + 1, 3, input_size, input_size) x2_batch = torch.Tensor(kGeneratedExamplesPerImage + 1, 3, input_size, input_size) x1_batch_x2 = torch.Tensor(kGeneratedExamplesPerImage + 1, 3, input_size * 2, input_size * 2) x2_batch_x2 = torch.Tensor(kGeneratedExamplesPerImage + 1, 3, input_size * 2, input_size * 2) y_batch = torch.Tensor(kGeneratedExamplesPerImage + 1, 4) # initialize batch with the true sample x1_batch[0] = true_tensor['previmg'] x2_batch[0] = true_tensor['currimg'] x1_batch_x2[0] = true_tensor['previmg_x2'] x2_batch_x2[0] = true_tensor['currimg_x2'] y_batch[0] = true_tensor['currbb'] scale = Rescale((input_size, input_size)) for i in range(kGeneratedExamplesPerImage): sample = orig_sample # unscaled current image crop with box curr_sample, opts_curr = shift_crop_training_sample(sample, bb_params) # unscaled previous image crop with box prev_sample, opts_prev = crop_sample(sample) prev_sample_x2, opts_prev_x2 = crop_sample(sample, contextFactor=4) prev_sample['image_x2'] = prev_sample_x2['image'] scaled_curr_obj = scale(curr_sample, opts_curr) scaled_prev_obj = scale(prev_sample, opts_prev) training_sample = { 'previmg': scaled_prev_obj['image'], 'currimg': scaled_curr_obj['image'], 'previmg_x2': scaled_prev_obj['image_x2'], 'currimg_x2': scaled_curr_obj['image_x2'], 'currbb': scaled_curr_obj['bb'] } sample = transform(training_sample) x1_batch[i + 1] = sample['previmg'] x2_batch[i + 1] = sample['currimg'] x1_batch_x2[i + 1] = sample['previmg_x2'] x2_batch_x2[i + 1] = sample['currimg_x2'] y_batch[i + 1] = sample['currbb'] return x1_batch, x2_batch, x1_batch_x2, x2_batch_x2, y_batch
def _get_sample(self, idx): """ Returns cropped previous and current frame at the previous predicted location. Note that the images are scaled to (224,224,3). """ prev = self.img[idx][0] curr = self.img[idx][1] prevbb = self.prev_rect prev_sample, opts_prev = crop_sample({'image': prev, 'bb': prevbb}) curr_sample, opts_curr = crop_sample({'image': curr, 'bb': prevbb}) curr_img = self.scale(curr_sample, opts_curr)['image'] prev_img = self.scale(prev_sample, opts_prev)['image'] sample = {'previmg': prev_img, 'currimg': curr_img} self.curr_img = curr self.opts = opts_curr return sample
def _get_sample(self): """ Returns cropped previous and current frame at the previous predicted location. Note that the images are scaled to (224,224,3). """ prev = self.prev_img curr = self.curr_img prevbb = self._last_bbox prev_sample, opts_prev = crop_sample({'image': prev, 'bb': prevbb}) curr_sample, opts_curr = crop_sample({'image': curr, 'bb': prevbb}) prev_img = bgr2rgb(self.scale(prev_sample, opts_prev)['image']) curr_img = bgr2rgb(self.scale(curr_sample, opts_curr)['image']) sample = {'previmg': prev_img, 'currimg': curr_img} self.curr_img = curr self.opts = opts_curr return sample
def make_transformed_samples(dataset, args): idx = np.random.randint(dataset.len, size=1)[0] # unscaled original sample (single image and bb) orig_sample = dataset.get_orig_sample(idx) # cropped scaled sample (two frames and bb) true_sample, _ = dataset.get_sample(idx) true_tensor = transform(true_sample) x1_batch = torch.Tensor(kGeneratedExamplesPerImage + 1, 3, input_size, input_size) x2_batch = torch.Tensor(kGeneratedExamplesPerImage + 1, 3, input_size, input_size) y_batch = torch.Tensor(kGeneratedExamplesPerImage + 1, 4) # initialize batch with the true sample x1_batch[0, :, :, :] = true_tensor['previmg'] x2_batch[0, :, :, :] = true_tensor['currimg'] y_batch[0, :] = true_tensor['currbb'] scale = Rescale((input_size, input_size)) for i in range(kGeneratedExamplesPerImage): sample = orig_sample # unscaled current image crop with box curr_sample, opts_curr = shift_crop_training_sample(sample, bb_params) # unscaled previous image crop with box prev_sample, opts_prev = crop_sample(sample) scaled_curr_obj = scale(curr_sample, opts_curr) scaled_prev_obj = scale(prev_sample, opts_prev) training_sample = { 'previmg': scaled_prev_obj['image'], 'currimg': scaled_curr_obj['image'], 'currbb': scaled_curr_obj['bb'] } sample = transform(training_sample) x1_batch[i + 1, :, :, :] = sample['previmg'] x2_batch[i + 1, :, :, :] = sample['currimg'] y_batch[i + 1, :] = sample['currbb'] return x1_batch, x2_batch, y_batch
def get_sample(self, idx): """ Returns sample without transformation for visualization. Sample consists of resized previous and current frame with target which is passed to the network. Bounding box values are normalized between 0 and 1 with respect to the target frame and then scaled by factor of 10. """ sample = self.get_orig_sample(idx) # unscaled current image crop with box curr_sample, opts_curr = shift_crop_training_sample( sample, self.bb_params) # unscaled previous image crop with box prev_sample, opts_prev = crop_sample(sample) scale = Rescale((self.sz, self.sz)) scaled_curr_obj = scale(curr_sample, opts_curr) scaled_prev_obj = scale(prev_sample, opts_prev) training_sample = { 'previmg': scaled_prev_obj['image'], 'currimg': scaled_curr_obj['image'], 'currbb': scaled_curr_obj['bb'] } return training_sample, opts_curr