Exemple #1
0
    def next(self):
        # 1) pick a random frame
        frame = self.source[random.choice(self.frame_ids)]
        # 2) generate a random perturbation and perturb the frame, this also perturbs the objects including segementation polygons
        perturbed_frame = RandomPerturber.perturb_frame(frame, {})
        # 3) scale the perturbed frame to the desired input resolution
        crop_affine = resize_image_center_crop(perturbed_frame.image,
                                               self.crop_size)
        perturbed_frame = apply_affine_to_frame(perturbed_frame, crop_affine,
                                                self.crop_size)
        # visualize the perturbed_frame along with its perturbed objects and masks here
        # perturbed_frame.visualize(display=True)

        # 3) for each object type, produce a merged binary mask over the frame,
        # this results in a w x h x k target map where k is the number of classes in consideration
        # for now we will use the pycocotool's merge and polygon mapping functions since they are implemented in c
        # although I prefer to not have this dependency
        # loop over all object type and create a binary mask for each
        # declare a np array of whk
        masks = np.zeros(perturbed_frame.image.get_hw().tolist() +
                         [len(self.obj_types_to_ids)])
        for k, v in self.obj_types_to_ids.items():
            # a) for all objs in the frame that belong to this type, create a merged mask
            polygons = []
            for obj in perturbed_frame.get_objects():
                if obj.obj_type == k:
                    polygons.extend(obj.polygons)
            masks[:, :,
                  v] = Polygon.create_mask(polygons,
                                           perturbed_frame.image.get_wh()[0],
                                           perturbed_frame.image.get_wh()[1])

        # 4) create the segmentation sample
        chw_image = perturbed_frame.image.to_order_and_class(
            Ordering.CHW, ValueClass.FLOAT01)
        # transpose the mask

        chw_mask = np.transpose(masks, axes=(2, 0, 1))

        # chw_image.visualize(title='chw_image')
        sample = SegmentationSample(
            [torch.Tensor(chw_image.get_data().astype(float))],
            [torch.Tensor(chw_mask)], self.ids_to_obj_types)

        return sample
    def __next__(self):
        # 1) pick a random frame
        frame = self.source[random.choice(self.frame_ids)]

        # 2) generate a random perturbation and perturb the frame
        perturb_params = {
            'translation_range': [-0.1, 0.1],
            'scaling_range': [0.9, 1.1]
        }
        perturbed_frame = RandomPerturber.perturb_frame(frame, perturb_params)
        crop_affine = resize_image_center_crop(perturbed_frame.image,
                                               self.crop_size)
        output_size = [self.crop_size[1], self.crop_size[0]]
        perturbed_frame = apply_affine_to_frame(perturbed_frame, crop_affine,
                                                output_size)
        # perturbed_frame.visualize(title='chw_image',display=True)

        # 3) encode the objects into targets with size that does not exceed max_objects
        # if there are more objects than max_objects, the remaining ones are dropped.
        # vector of length max_objects
        # each comp vector has the form [class(1),bbox(4)]
        # a padding target is used to represent a non-existent object, this has the form [-1,-1,-1,-1,-1]

        # create the padding vector
        class_encoding, class_decoding = dict(), dict()
        padvec = [np.array([-1] * 5) for i in range(self.max_objects)]
        for i, obj in enumerate(perturbed_frame.objects[
                0:min(self.max_objects, len(perturbed_frame.objects))]):
            if obj.obj_type not in self.obj_types:
                continue
            if obj.obj_type not in class_encoding:
                code = len(class_encoding)
                class_encoding[obj.obj_type] = code
                class_decoding[code] = obj.obj_type
            box_coords = obj.box.to_single_array()
            padvec[i] = np.concatenate(
                (np.array([class_encoding[obj.obj_type]]), box_coords), axis=0)
        chw_image = perturbed_frame.image.to_order_and_class(
            Ordering.CHW, ValueClass.FLOAT01)
        sample = MultiObjectDetectionSample(
            [torch.Tensor(chw_image.get_data().astype(float))],
            [torch.Tensor(padvec)], class_decoding)
        return sample
    def __next__(self):
        # randomly pick 3 frames in a row
        num_frames_in_src = len(self.source)
        # print("Number of frames in src {}".format(num_frames_in_src))

        # 1) choose the first frame from 0 -> N-2
        frames = []
        first_frame = random.randint(0, num_frames_in_src - self.num_frames)
        for i in range(0, self.num_frames):
            frames.append(self.source[first_frame + i])

        # 2) generate a random perturbation and perturb all the frames
        # note, need to apply same perts to all frames
        perturb_params = {
            'translation_range': [0.0, 0.0],
            'scaling_range': [1.0, 1.0]
        }
        perturbed_frames = []
        for f in frames:
            perturbed_frame = RandomPerturber.perturb_frame(f, perturb_params)
            crop_affine = resize_image_center_crop(perturbed_frame.image,
                                                   self.crop_size)
            output_size = [self.crop_size[1], self.crop_size[0]]
            perturbed_frame = apply_affine_to_frame(perturbed_frame,
                                                    crop_affine, output_size)
            perturbed_frames.append(perturbed_frame)
            # perturbed_frame.visualize(title='chw_image',display=True)

        # 3) prepare tensors
        # -make a tensor with a stack of 3 frame
        # -add the calibration to the target
        input_tensors = []
        calib_mats = []
        for f in perturbed_frames:
            img = f.image.to_order_and_class(Ordering.CHW, ValueClass.FLOAT01)
            input_tensors.append(torch.Tensor(img.get_data().astype(float)))
            calib_mats.append(torch.Tensor(f.calib_mat))

        # the input is now 3xCxWxH
        sample = SequenceVideoSample([torch.stack(input_tensors, dim=0)],
                                     [torch.stack(calib_mats)])
        return sample