コード例 #1
0
ファイル: autoencoder_loader.py プロジェクト: dsp6414/pytlib
    def next(self):
        # just grab the next random frame
        frame = self.source[random.choice(self.frame_ids)]
        # frame.show_image_with_labels()
        # get a random crop object
        crop_objs = filter(lambda x: not self.obj_types or x.obj_type in self.obj_types,frame.get_objects())
        # print 'Num crop objs in sample: {0}'.format(len(crop_objs))
        crop = random.choice(crop_objs)
        # print 'crop_box: ' + str(crop.box)

        # frame.show_image_with_labels()

        # 1) Randomly perturb crop box (scale and translation)
        transformed_box = RandomPerturber.perturb_crop_box(crop.box,{})

        # 2) Take crop, todo, change to using center crop to preserve aspect ratio
        # check if the affine is identity within some toleranc, then don't bother applying
        affine = Affine()
        scalex = float(self.crop_size[0])/transformed_box.edges()[0]
        scaley = float(self.crop_size[1])/transformed_box.edges()[1]
        affine.append(Affine.translation(-transformed_box.xy_min()))
        affine.append(Affine.scaling((scalex,scaley)))

        transformed_image = affine.apply_to_image(frame.image,self.crop_size) 
        # transformed_image.visualize(title='transformed image')

        # 3) Randomly perturb cropped image (rotation only)

        chw_image = transformed_image.to_order_and_class(Ordering.CHW,ValueClass.FLOAT01)
        # chw_image.visualize(title='chw_image')
        sample = AutoEncoderSample([torch.Tensor(chw_image.get_data().astype(float))],
                                   [torch.Tensor(chw_image.get_data().astype(float))])
        return sample
コード例 #2
0
    def next(self):
        # 1) pick a random frame
        frame = self.source[random.choice(self.frame_ids)]
        # 2) generate a random perturbation and perturb the frame, this also perturbs the objects including segementation polygons
        perturbed_frame = RandomPerturber.perturb_frame(frame, {})
        # 3) scale the perturbed frame to the desired input resolution
        crop_affine = resize_image_center_crop(perturbed_frame.image,
                                               self.crop_size)
        perturbed_frame = apply_affine_to_frame(perturbed_frame, crop_affine,
                                                self.crop_size)
        # visualize the perturbed_frame along with its perturbed objects and masks here
        # perturbed_frame.visualize(display=True)

        # 3) for each object type, produce a merged binary mask over the frame,
        # this results in a w x h x k target map where k is the number of classes in consideration
        # for now we will use the pycocotool's merge and polygon mapping functions since they are implemented in c
        # although I prefer to not have this dependency
        # loop over all object type and create a binary mask for each
        # declare a np array of whk
        masks = np.zeros(perturbed_frame.image.get_hw().tolist() +
                         [len(self.obj_types_to_ids)])
        for k, v in self.obj_types_to_ids.items():
            # a) for all objs in the frame that belong to this type, create a merged mask
            polygons = []
            for obj in perturbed_frame.get_objects():
                if obj.obj_type == k:
                    polygons.extend(obj.polygons)
            masks[:, :,
                  v] = Polygon.create_mask(polygons,
                                           perturbed_frame.image.get_wh()[0],
                                           perturbed_frame.image.get_wh()[1])

        # 4) create the segmentation sample
        chw_image = perturbed_frame.image.to_order_and_class(
            Ordering.CHW, ValueClass.FLOAT01)
        # transpose the mask

        chw_mask = np.transpose(masks, axes=(2, 0, 1))

        # chw_image.visualize(title='chw_image')
        sample = SegmentationSample(
            [torch.Tensor(chw_image.get_data().astype(float))],
            [torch.Tensor(chw_mask)], self.ids_to_obj_types)

        return sample
コード例 #3
0
    def __next__(self):
        # 1) pick a random frame
        frame = self.source[random.choice(self.frame_ids)]

        # 2) generate a random perturbation and perturb the frame
        perturb_params = {
            'translation_range': [-0.1, 0.1],
            'scaling_range': [0.9, 1.1]
        }
        perturbed_frame = RandomPerturber.perturb_frame(frame, perturb_params)
        crop_affine = resize_image_center_crop(perturbed_frame.image,
                                               self.crop_size)
        output_size = [self.crop_size[1], self.crop_size[0]]
        perturbed_frame = apply_affine_to_frame(perturbed_frame, crop_affine,
                                                output_size)
        # perturbed_frame.visualize(title='chw_image',display=True)

        # 3) encode the objects into targets with size that does not exceed max_objects
        # if there are more objects than max_objects, the remaining ones are dropped.
        # vector of length max_objects
        # each comp vector has the form [class(1),bbox(4)]
        # a padding target is used to represent a non-existent object, this has the form [-1,-1,-1,-1,-1]

        # create the padding vector
        class_encoding, class_decoding = dict(), dict()
        padvec = [np.array([-1] * 5) for i in range(self.max_objects)]
        for i, obj in enumerate(perturbed_frame.objects[
                0:min(self.max_objects, len(perturbed_frame.objects))]):
            if obj.obj_type not in self.obj_types:
                continue
            if obj.obj_type not in class_encoding:
                code = len(class_encoding)
                class_encoding[obj.obj_type] = code
                class_decoding[code] = obj.obj_type
            box_coords = obj.box.to_single_array()
            padvec[i] = np.concatenate(
                (np.array([class_encoding[obj.obj_type]]), box_coords), axis=0)
        chw_image = perturbed_frame.image.to_order_and_class(
            Ordering.CHW, ValueClass.FLOAT01)
        sample = MultiObjectDetectionSample(
            [torch.Tensor(chw_image.get_data().astype(float))],
            [torch.Tensor(padvec)], class_decoding)
        return sample
コード例 #4
0
    def __next__(self):
        # randomly pick 3 frames in a row
        num_frames_in_src = len(self.source)
        # print("Number of frames in src {}".format(num_frames_in_src))

        # 1) choose the first frame from 0 -> N-2
        frames = []
        first_frame = random.randint(0, num_frames_in_src - self.num_frames)
        for i in range(0, self.num_frames):
            frames.append(self.source[first_frame + i])

        # 2) generate a random perturbation and perturb all the frames
        # note, need to apply same perts to all frames
        perturb_params = {
            'translation_range': [0.0, 0.0],
            'scaling_range': [1.0, 1.0]
        }
        perturbed_frames = []
        for f in frames:
            perturbed_frame = RandomPerturber.perturb_frame(f, perturb_params)
            crop_affine = resize_image_center_crop(perturbed_frame.image,
                                                   self.crop_size)
            output_size = [self.crop_size[1], self.crop_size[0]]
            perturbed_frame = apply_affine_to_frame(perturbed_frame,
                                                    crop_affine, output_size)
            perturbed_frames.append(perturbed_frame)
            # perturbed_frame.visualize(title='chw_image',display=True)

        # 3) prepare tensors
        # -make a tensor with a stack of 3 frame
        # -add the calibration to the target
        input_tensors = []
        calib_mats = []
        for f in perturbed_frames:
            img = f.image.to_order_and_class(Ordering.CHW, ValueClass.FLOAT01)
            input_tensors.append(torch.Tensor(img.get_data().astype(float)))
            calib_mats.append(torch.Tensor(f.calib_mat))

        # the input is now 3xCxWxH
        sample = SequenceVideoSample([torch.stack(input_tensors, dim=0)],
                                     [torch.stack(calib_mats)])
        return sample
コード例 #5
0
    def load_train(self):
        frame1, frame2, neg_box, pos_box, anchor_box = None, None, None, None, None
        # TODO, this should probably break if never find anything for a while
        while neg_box is None:
            indices = random.sample(self.frame_ids, 2)
            frame1, frame2 = [self.source[x] for x in indices]
            frame1_objs = filter(lambda x: x.obj_type in self.obj_types,
                                 frame1.get_objects())
            frame2_objs = filter(lambda x: x.obj_type in self.obj_types,
                                 frame2.get_objects())
            # get random pos boxes
            pos_box = random.choice(frame1_objs).box
            anchor_box = random.choice(frame2_objs).box

            # find random neg crop
            neg_box = self.find_negative_crop(frame1, frame1_objs)

        perturbed_pos_box = RandomPerturber.perturb_crop_box(
            pos_box, self.perturbations)
        affine_crop0 = crop_image_resize(frame1.image, perturbed_pos_box,
                                         self.crop_size)
        pos_crop = affine_crop0.apply_to_image(frame1.image, self.crop_size)

        affine_crop1 = crop_image_resize(frame2.image, anchor_box,
                                         self.anchor_size)
        anchor_crop = affine_crop1.apply_to_image(frame2.image,
                                                  self.anchor_size)

        affine_crop2 = crop_image_resize(frame1.image, neg_box, self.crop_size)
        neg_crop = affine_crop2.apply_to_image(frame1.image, self.crop_size)
        # neg_crop.visualize(display=True,title='neg')

        # now find all the boxes that intersect with the perturbed_pos_box
        intersected_boxes = []
        for obj in filter(lambda x: x.obj_type in self.obj_types,
                          frame1.get_objects()):
            if Box.intersection(obj.box, perturbed_pos_box) is not None:
                intersected_boxes.append(obj.box)

        intersected_boxes = list(
            map(lambda x: affine_crop0.apply_to_box(x), intersected_boxes))
        # test display
        # disp_frame = Frame.from_image_and_objects(pos_crop,[Object(box_crop)])
        # disp_frame.visualize(display=True,title='pos frame')
        # pos_crop.visualize(display=True,title='pos crop')

        pos = torch.Tensor(
            pos_crop.to_order_and_class(
                Ordering.CHW, ValueClass.FLOAT01).get_data().astype(float))
        neg = torch.Tensor(
            neg_crop.to_order_and_class(
                Ordering.CHW, ValueClass.FLOAT01).get_data().astype(float))
        anchor = torch.Tensor(
            anchor_crop.to_order_and_class(
                Ordering.CHW, ValueClass.FLOAT01).get_data().astype(float))

        # pos_map = generate_response_map_from_boxes(pos_crop.get_hw(),intersected_boxes)
        # PTImage.from_2d_numpy(pos_map).visualize(display=True,title='pos frame')
        pos_map = torch.Tensor(
            generate_response_map_from_boxes(pos_crop.get_hw(),
                                             intersected_boxes))
        neg_map = torch.Tensor(
            generate_response_map_from_boxes(pos_crop.get_hw()))

        data = [pos, neg, anchor]
        target = [pos_map, neg_map, anchor]
        return TripletDetectionSample(data, target)