def next(self): # 1) pick a random frame frame = self.source[random.choice(self.frame_ids)] # 2) generate a random perturbation and perturb the frame, this also perturbs the objects including segementation polygons perturbed_frame = RandomPerturber.perturb_frame(frame, {}) # 3) scale the perturbed frame to the desired input resolution crop_affine = resize_image_center_crop(perturbed_frame.image, self.crop_size) perturbed_frame = apply_affine_to_frame(perturbed_frame, crop_affine, self.crop_size) # visualize the perturbed_frame along with its perturbed objects and masks here # perturbed_frame.visualize(display=True) # 3) for each object type, produce a merged binary mask over the frame, # this results in a w x h x k target map where k is the number of classes in consideration # for now we will use the pycocotool's merge and polygon mapping functions since they are implemented in c # although I prefer to not have this dependency # loop over all object type and create a binary mask for each # declare a np array of whk masks = np.zeros(perturbed_frame.image.get_hw().tolist() + [len(self.obj_types_to_ids)]) for k, v in self.obj_types_to_ids.items(): # a) for all objs in the frame that belong to this type, create a merged mask polygons = [] for obj in perturbed_frame.get_objects(): if obj.obj_type == k: polygons.extend(obj.polygons) masks[:, :, v] = Polygon.create_mask(polygons, perturbed_frame.image.get_wh()[0], perturbed_frame.image.get_wh()[1]) # 4) create the segmentation sample chw_image = perturbed_frame.image.to_order_and_class( Ordering.CHW, ValueClass.FLOAT01) # transpose the mask chw_mask = np.transpose(masks, axes=(2, 0, 1)) # chw_image.visualize(title='chw_image') sample = SegmentationSample( [torch.Tensor(chw_image.get_data().astype(float))], [torch.Tensor(chw_mask)], self.ids_to_obj_types) return sample
def __next__(self): # 1) pick a random frame frame = self.source[random.choice(self.frame_ids)] # 2) generate a random perturbation and perturb the frame perturb_params = { 'translation_range': [-0.1, 0.1], 'scaling_range': [0.9, 1.1] } perturbed_frame = RandomPerturber.perturb_frame(frame, perturb_params) crop_affine = resize_image_center_crop(perturbed_frame.image, self.crop_size) output_size = [self.crop_size[1], self.crop_size[0]] perturbed_frame = apply_affine_to_frame(perturbed_frame, crop_affine, output_size) # perturbed_frame.visualize(title='chw_image',display=True) # 3) encode the objects into targets with size that does not exceed max_objects # if there are more objects than max_objects, the remaining ones are dropped. # vector of length max_objects # each comp vector has the form [class(1),bbox(4)] # a padding target is used to represent a non-existent object, this has the form [-1,-1,-1,-1,-1] # create the padding vector class_encoding, class_decoding = dict(), dict() padvec = [np.array([-1] * 5) for i in range(self.max_objects)] for i, obj in enumerate(perturbed_frame.objects[ 0:min(self.max_objects, len(perturbed_frame.objects))]): if obj.obj_type not in self.obj_types: continue if obj.obj_type not in class_encoding: code = len(class_encoding) class_encoding[obj.obj_type] = code class_decoding[code] = obj.obj_type box_coords = obj.box.to_single_array() padvec[i] = np.concatenate( (np.array([class_encoding[obj.obj_type]]), box_coords), axis=0) chw_image = perturbed_frame.image.to_order_and_class( Ordering.CHW, ValueClass.FLOAT01) sample = MultiObjectDetectionSample( [torch.Tensor(chw_image.get_data().astype(float))], [torch.Tensor(padvec)], class_decoding) return sample
def __next__(self): # randomly pick 3 frames in a row num_frames_in_src = len(self.source) # print("Number of frames in src {}".format(num_frames_in_src)) # 1) choose the first frame from 0 -> N-2 frames = [] first_frame = random.randint(0, num_frames_in_src - self.num_frames) for i in range(0, self.num_frames): frames.append(self.source[first_frame + i]) # 2) generate a random perturbation and perturb all the frames # note, need to apply same perts to all frames perturb_params = { 'translation_range': [0.0, 0.0], 'scaling_range': [1.0, 1.0] } perturbed_frames = [] for f in frames: perturbed_frame = RandomPerturber.perturb_frame(f, perturb_params) crop_affine = resize_image_center_crop(perturbed_frame.image, self.crop_size) output_size = [self.crop_size[1], self.crop_size[0]] perturbed_frame = apply_affine_to_frame(perturbed_frame, crop_affine, output_size) perturbed_frames.append(perturbed_frame) # perturbed_frame.visualize(title='chw_image',display=True) # 3) prepare tensors # -make a tensor with a stack of 3 frame # -add the calibration to the target input_tensors = [] calib_mats = [] for f in perturbed_frames: img = f.image.to_order_and_class(Ordering.CHW, ValueClass.FLOAT01) input_tensors.append(torch.Tensor(img.get_data().astype(float))) calib_mats.append(torch.Tensor(f.calib_mat)) # the input is now 3xCxWxH sample = SequenceVideoSample([torch.stack(input_tensors, dim=0)], [torch.stack(calib_mats)]) return sample