예제 #1
0
    def preprocess(roidb):
        fname, boxes, klass, is_crowd = roidb['file_name'], roidb['boxes'], roidb['class'], roidb['is_crowd']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = {'image': im}
        # rpn anchor:
        try:
            if cfg.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(im, boxes, is_crowd)
                for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs):
                    ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels
                    ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes
            else:
                # anchor_labels, anchor_boxes
                ret['anchor_labels'], ret['anchor_boxes'] = get_rpn_anchor_input(im, boxes, is_crowd)

            boxes = boxes[is_crowd == 0]    # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            ret['gt_boxes'] = boxes
            ret['gt_labels'] = klass
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn')
            return None

        if cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(roidb['segmentation'])
            segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')    # values in {0, 1}
            ret['gt_masks'] = masks

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret
예제 #2
0
    def preprocess(roidb):
        fname, boxes, klass, is_crowd = roidb['file_name'], roidb['boxes'], roidb['class'], roidb['is_crowd']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = {'image': im}
        # rpn anchor:
        try:
            if cfg.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(im, boxes, is_crowd)
                for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs):
                    ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels
                    ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes
            else:
                # anchor_labels, anchor_boxes
                ret['anchor_labels'], ret['anchor_boxes'] = get_rpn_anchor_input(im, boxes, is_crowd)

            boxes = boxes[is_crowd == 0]    # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            ret['gt_boxes'] = boxes
            ret['gt_labels'] = klass
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn')
            return None

        if cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(roidb['segmentation'])
            segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')    # values in {0, 1}
            ret['gt_masks'] = masks

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret
예제 #3
0
파일: data.py 프로젝트: yuxwind/tensorpack
    def preprocess(img):
        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img['class'], img['is_crowd']
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        # rpn anchor:
        try:
            if config.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(im, boxes, is_crowd)
                anchor_inputs = itertools.chain.from_iterable(multilevel_anchor_inputs)
            else:
                # anchor_labels, anchor_boxes
                anchor_inputs = get_rpn_anchor_input(im, boxes, is_crowd)
                assert len(anchor_inputs) == 2

            boxes = boxes[is_crowd == 0]    # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn')
            return None

        ret = [im] + list(anchor_inputs) + [boxes, klass]
        # TODO pad im when FPN

        if add_mask:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(img['segmentation'])
            segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')    # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret
예제 #4
0
    def preprocess(img):
        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[
            'class'], img['is_crowd']
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)

        # rpn anchor:
        try:
            fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, klass,
                                                       is_crowd)
            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is invalid for training: {}".format(fname, str(e)),
                'warn')
            return None

        ret = [im, fm_labels, fm_boxes, boxes, klass]

        # masks
        segmentation = img.get('segmentation', None)
        if segmentation is not None:
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # one image-sized binary mask per box
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret
예제 #5
0
파일: data.py 프로젝트: caiwenpu/tensorpack
    def preprocess(img):
        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img['class'], img['is_crowd']
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        # rpn anchor:
        try:
            fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, is_crowd)
            boxes = boxes[is_crowd == 0]    # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn')
            return None

        ret = [im, fm_labels, fm_boxes, boxes, klass]

        if add_mask:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(img['segmentation'])
            segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')    # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret
    def preprocess(img):
        print("start preproc coco")
        start = time.time()
        if config.USE_SECOND_HEAD:
            fname, boxes, klass, second_klass, is_crowd = img['file_name'], img['boxes'], img['class'], \
                                                          img['second_class'], img['is_crowd']
        else:
            fname, boxes, klass, is_crowd = img['file_name'], img[
                'boxes'], img['class'], img['is_crowd']
            second_klass = None
        res = preproc_img(fname, boxes, klass, second_klass, is_crowd, aug)
        if res is None:
            print("coco: preproc_img returned None on", fname)
            return None

        ret, params = res
        im = ret[0]
        boxes = ret[3]
        # masks
        if add_mask:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(img.get('segmentation', None))
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes), (len(segmentation),
                                                     len(boxes))

            # one image-sized binary mask per box
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        end = time.time()
        elapsed = end - start
        print("coco example done, elapsed:", elapsed)
        return ret
예제 #7
0
def draw_mask(im, mask, alpha=0.5, color=None):
    """
    Overlay a mask on top of the image.

    Args:
        im: a 3-channel uint8 image in BGR
        mask: a binary 1-channel image of the same size
        color: if None, will choose automatically
    """
    x, y, width, height, angle = mask
    rect = (x,y), (width, height), angle
    box = cv2.boxPoints(rect)
    mask = segmentation_to_mask([box], im.shape[0], im.shape[1])

    if color is None:
        color = PALETTE_RGB[np.random.choice(len(PALETTE_RGB))][::-1]
    im = np.where(np.repeat((mask > 0)[:, :, None], 3, axis=2),
                  im * (1 - alpha) + color * alpha, im)
    im = im.astype('uint8')
    return im
예제 #8
0
    def __call__(self, roidb):
        fname, boxes, klass, is_crowd = roidb["file_name"], roidb["boxes"], roidb["class"], roidb["is_crowd"]
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype("float32")
        height, width = im.shape[:2]
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        if not self.cfg.DATA.ABSOLUTE_COORD:
            boxes[:, 0::2] *= width
            boxes[:, 1::2] *= height

        # augmentation:
        im, params = self.aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = self.aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = {"image": im}
        # Add rpn data to dataflow:
        try:
            if self.cfg.MODE_FPN:
                multilevel_anchor_inputs = self.get_multilevel_rpn_anchor_input(im, boxes, is_crowd)
                for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs):
                    ret["anchor_labels_lvl{}".format(i + 2)] = anchor_labels
                    ret["anchor_boxes_lvl{}".format(i + 2)] = anchor_boxes
            else:
                ret["anchor_labels"], ret["anchor_boxes"] = self.get_rpn_anchor_input(im, boxes, is_crowd)

            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            ret["gt_boxes"] = boxes
            ret["gt_labels"] = klass
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once("Input {} is filtered for training: {}".format(fname, str(e)), "warn")
            return None

        if self.cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(roidb["segmentation"])
            segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            width_height = np.asarray([width, height], dtype=np.float32)
            gt_mask_width = int(np.ceil(im.shape[1] / 8.0) * 8)   # pad to 8 in order to pack mask into bits
            for polys in segmentation:
                if not self.cfg.DATA.ABSOLUTE_COORD:
                    polys = [p * width_height for p in polys]
                polys = [self.aug.augment_coords(p, params) for p in polys]
                masks.append(segmentation_to_mask(polys, im.shape[0], gt_mask_width))
            masks = np.asarray(masks, dtype='uint8')    # values in {0, 1}
            masks = np.packbits(masks, axis=-1)
            ret['gt_masks_packed'] = masks

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret
예제 #9
0
    def preprocess(roidb_batch):
        datapoint_list = []
        for roidb in roidb_batch:
            fname, boxes, klass, is_crowd = roidb['file_name'], roidb['boxes'], roidb['class'], roidb['is_crowd']
            boxes = np.copy(boxes)
            im = cv2.imread(fname, cv2.IMREAD_COLOR)
            assert im is not None, fname
            im = im.astype('float32')
            # assume floatbox as input
            assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

            # augmentation:
            im, params = aug.augment_return_params(im)
            points = box_to_point8(boxes)
            points = aug.augment_coords(points, params)
            boxes = point8_to_box(points)
            assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

            ret = {'images': im}
            # rpn anchor:
            try:
                if cfg.MODE_FPN:
                    multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(im, boxes, is_crowd)
                    for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs):
                        ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels
                        ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes
                else:
                    raise NotImplementedError("[armand] Batch mode only available for FPN")

                boxes = boxes[is_crowd == 0]    # skip crowd boxes in training target
                klass = klass[is_crowd == 0]
                ret['gt_boxes'] = boxes
                ret['gt_labels'] = klass
                ret['filename'] = fname
                if not len(boxes):
                    raise MalformedData("No valid gt_boxes!")
            except MalformedData as e:
                log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn')
                return None

            if cfg.MODE_MASK:
                # augmentation will modify the polys in-place
                segmentation = copy.deepcopy(roidb['segmentation'])
                segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]]
                assert len(segmentation) == len(boxes)

                # Apply augmentation on polygon coordinates.
                # And produce one image-sized binary mask per box.
                masks = []
                for polys in segmentation:
                    polys = [aug.augment_coords(p, params) for p in polys]
                    masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1]))
                masks = np.asarray(masks, dtype='uint8')    # values in {0, 1}
                ret['gt_masks'] = masks

            datapoint_list.append(ret)

        #################################################################################################################
        # Batchify the output
        #################################################################################################################

        # Now we need to batch the various fields

        # Easily stackable:
        # - anchor_labels_lvl2
        # - anchor_boxes_lvl2
        # - anchor_labels_lvl3
        # - anchor_boxes_lvl3
        # - anchor_labels_lvl4
        # - anchor_boxes_lvl4
        # - anchor_labels_lvl5
        # - anchor_boxes_lvl5
        # - anchor_labels_lvl6
        # - anchor_boxes_lvl6

        batched_datapoint = {}
        for stackable_field in ["anchor_labels_lvl2",
                                "anchor_boxes_lvl2",
                                "anchor_labels_lvl3",
                                "anchor_boxes_lvl3",
                                "anchor_labels_lvl4",
                                "anchor_boxes_lvl4",
                                "anchor_labels_lvl5",
                                "anchor_boxes_lvl5",
                                "anchor_labels_lvl6",
                                "anchor_boxes_lvl6"]:
            batched_datapoint[stackable_field] = np.stack([d[stackable_field] for d in datapoint_list])



        # Require padding and original dimension storage
        # - image (HxWx3)
        # - gt_boxes (?x4)
        # - gt_labels (?)
        # - gt_masks (?xHxW)

        """
        Find the minimum container size for images (maxW x maxH)
        Find the maximum number of ground truth boxes
        For each image, save original dimension and pad
        """

        if cfg.PREPROC.PREDEFINED_PADDING:
            padding_shapes = [get_padding_shape(*(d["images"].shape[:2])) for d in datapoint_list]
            max_height = max([shp[0] for shp in padding_shapes])
            max_width = max([shp[1] for shp in padding_shapes])
        else:
            image_dims = [d["images"].shape for d in datapoint_list]
            heights = [dim[0] for dim in image_dims]
            widths = [dim[1] for dim in image_dims]

            max_height = max(heights)
            max_width = max(widths)


        # image
        padded_images = []
        original_image_dims = []
        for datapoint in datapoint_list:
            image = datapoint["images"]
            original_image_dims.append(image.shape)

            h_padding = max_height - image.shape[0]
            w_padding = max_width - image.shape[1]

            padded_image = np.pad(image,
                                  [[0, h_padding],
                                   [0, w_padding],
                                   [0, 0]],
                                  'constant')

            padded_images.append(padded_image)

        batched_datapoint["images"] = np.stack(padded_images)
        #print(batched_datapoint["images"].shape)
        batched_datapoint["orig_image_dims"] = np.stack(original_image_dims)


        # gt_boxes and gt_labels
        max_num_gts = max([d["gt_labels"].size for d in datapoint_list])

        gt_counts = []
        padded_gt_labels = []
        padded_gt_boxes = []
        padded_gt_masks = []
        for datapoint in datapoint_list:
            gt_count_for_image = datapoint["gt_labels"].size
            gt_counts.append(gt_count_for_image)

            gt_padding = max_num_gts - gt_count_for_image

            padded_gt_labels_for_img = np.pad(datapoint["gt_labels"], [0, gt_padding], 'constant', constant_values=-1)
            padded_gt_labels.append(padded_gt_labels_for_img)

            padded_gt_boxes_for_img = np.pad(datapoint["gt_boxes"],
                                             [[0, gt_padding],
                                              [0,0]],
                                             'constant')
            padded_gt_boxes.append(padded_gt_boxes_for_img)




            h_padding = max_height - datapoint["images"].shape[0]
            w_padding = max_width - datapoint["images"].shape[1]



            if cfg.MODE_MASK:
                padded_gt_masks_for_img = np.pad(datapoint["gt_masks"],
                                         [[0, gt_padding],
                                          [0, h_padding],
                                          [0, w_padding]],
                                         'constant')
                padded_gt_masks.append(padded_gt_masks_for_img)


        batched_datapoint["orig_gt_counts"] = np.stack(gt_counts)
        batched_datapoint["gt_labels"] = np.stack(padded_gt_labels)
        batched_datapoint["gt_boxes"] = np.stack(padded_gt_boxes)
        batched_datapoint["filenames"] = [d["filename"] for d in datapoint_list]

        if cfg.MODE_MASK:
            batched_datapoint["gt_masks"] = np.stack(padded_gt_masks)



        return batched_datapoint
예제 #10
0
    def preprocess(roidb):
        fname, boxes, klass, is_crowd = roidb['file_name'], roidb[
            'boxes'], roidb['class'], roidb['is_crowd']
        boxes = np.copy(boxes)
        im = imread(fname)
        assert im is not None, fname
        im = np.expand_dims(im, axis=2)
        im = np.repeat(im, 3, axis=2)
        im = im.astype('float32')
        #height, width = im.shape[:2]
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        if not cfg.DATA.ABSOLUTE_COORD:
            boxes[:, 0::2] *= width
            boxes[:, 1::2] *= height

        #source_image = Image.fromarray(im.astype('uint8'))
        #imsave('./input_image1', im[:,:,1].astype(np.float32), imagej=True)
        """
        draw = ImageDraw.Draw(source_image)
        for i, bbox in enumerate(boxes):
            # tmp_x = bbox[2] - bbox[0]
            # tmp_y = bbox[3] - bbox[1]
            # draw.rectangle((bbox[0], bbox[1], tmp_x, tmp_y), outline='red')
            draw.rectangle((bbox[0], bbox[1], bbox[2], bbox[3]), outline='red')
            #draw.text((bbox[0] + 5, bbox[1] + 5), str(klass_tmp[i]))
        source_image.save('./input_image1', "JPEG")

        """
        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        klass_tmp = np.copy(klass)
        #print(klass)

        #imsave('./input_image2', im[:,:,1].astype(np.float32), imagej=True)
        """
        source_image = Image.fromarray(im.astype('uint8'))
        draw = ImageDraw.Draw(source_image)
        for i, bbox in enumerate(boxes):
            # tmp_x = bbox[2] - bbox[0]
            # tmp_y = bbox[3] - bbox[1]
            # draw.rectangle((bbox[0], bbox[1], tmp_x, tmp_y), outline='red')
            draw.rectangle((bbox[0], bbox[1], bbox[2], bbox[3]), outline='red')
            #draw.text((bbox[0]+5, bbox[1]+5), str(klass_tmp[i]))
        source_image.save('./input_image2', "JPEG")
        """

        ret = {'image': im}
        # rpn anchor:
        try:
            if cfg.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(
                    im, boxes, is_crowd)
                for i, (anchor_labels,
                        anchor_boxes) in enumerate(multilevel_anchor_inputs):
                    ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels
                    ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes
            else:
                # anchor_labels, anchor_boxes
                ret['anchor_labels'], ret[
                    'anchor_boxes'] = get_rpn_anchor_input(
                        im, boxes, is_crowd)

            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            ret['gt_boxes'] = boxes
            ret['gt_labels'] = klass
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None

        if cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(roidb['segmentation'])
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            width_height = np.asarray([width, height], dtype=np.float32)
            for polys in segmentation:
                if not cfg.DATA.ABSOLUTE_COORD:
                    polys = [p * width_height for p in polys]
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret['gt_masks'] = masks

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret