Ejemplo n.º 1
0
def draw_final_outputs(img, results):
    """
    Args:
        results: [DetectionResult]
    """
    if len(results) == 0:
        return img

    # Display in largest to smallest order to reduce occlusion
    boxes = np.asarray([r.box for r in results])
    areas = np_area(boxes)
    sorted_inds = np.argsort(-areas)

    ret = img
    tags = []

    for result_id in sorted_inds:
        r = results[result_id]
        if r.mask is not None:
            ret = draw_mask(ret, r.mask)

    for r in results:
        tags.append("{},{:.2f}".format(cfg.DATA.CLASS_NAMES[r.class_id],
                                       r.score))
    ret = viz.draw_boxes(ret, boxes, tags)
    return ret
Ejemplo n.º 2
0
    def preprocess(img):

        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[
            'class'], img['is_crowd']
        img_name = fname.split('/')[-1]
        img_id = int(img_name[3:-4])
        # pretrain rpn for negtive chip extraction

        proposals = proposal_pickle['boxes'][proposal_pickle['ids'].index(
            img_id)]
        proposals[2:4] += proposals[0:2]  # from [x,y,w,h] to [x1,y1,x2,y2]

        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"
        chip_generator = Im2Chip(im,
                                 boxes,
                                 klass,
                                 proposals,
                                 cfg.SNIPER.SCALES,
                                 cfg.SNIPER.VALID_RANGES,
                                 is_crowd=is_crowd,
                                 chip_size=cfg.SNIPER.CHIP_SIZE,
                                 chip_stride=cfg.SNIPER.CHIP_STRIDE)
        im, boxes, klass, scale_indices, is_crowd = chip_generator.genChipMultiScale(
        )
        rets = []
        for i in range(len(im)):
            try:
                if len(boxes[i]) == 0:
                    continue
                if not len(boxes[i]):
                    raise MalformedData("No valid gt_boxes!")
            except MalformedData as e:
                log_once(
                    "Input {} is filtered for training: {}".format(
                        fname, str(e)), 'warn')
                ret = None
                continue
            # ret = [im[i]] + list(anchor_inputs) + [boxes[i], klass[i]
            #                                        ] + [scale_indices[i]*len(boxes[i])]
            new_name = '%s_%d' % (img_name, i)
            cv2.imwrite('%s/%s' % (OUTPUT_IMG_DIR, new_name), im[i])

            ret = [im[i]] + [boxes[i], klass[i]]
            for j in range(len(klass[i])):
                if j == 0:
                    out_file.write(new_name)
                out_file.write(' %d %f %f %f %f' %
                               (klass[i][j], boxes[i][j][0], boxes[i][j][1],
                                boxes[i][j][2], boxes[i][j][3]))
                if j == len(klass[i]) - 1:
                    out_file.write('\n')
            rets.append(ret)
        return rets
Ejemplo n.º 3
0
    def preprocess(roidb):
        fname, boxes, klass, is_crowd = roidb['file_name'], roidb[
            'boxes'], roidb['class'], roidb['is_crowd']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = {'image': im}
        # rpn anchor:
        try:
            ret['anchor_labels'], ret['anchor_boxes'] = get_rpn_anchor_input(
                im, boxes, is_crowd)
            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            ret['gt_boxes'] = boxes
            ret['gt_labels'] = klass
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None
        return ret
Ejemplo n.º 4
0
    def preprocess(roidb):
        fname, boxes, klass, is_crowd = roidb['file_name'], roidb['boxes'], roidb['class'], roidb['is_crowd']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = {'image': im}
        # rpn anchor:
        try:
            if cfg.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(im, boxes, is_crowd)
                for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs):
                    ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels
                    ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes
            else:
                # anchor_labels, anchor_boxes
                ret['anchor_labels'], ret['anchor_boxes'] = get_rpn_anchor_input(im, boxes, is_crowd)

            boxes = boxes[is_crowd == 0]    # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            ret['gt_boxes'] = boxes
            ret['gt_labels'] = klass
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn')
            return None

        if cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(roidb['segmentation'])
            segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')    # values in {0, 1}
            ret['gt_masks'] = masks

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret
Ejemplo n.º 5
0
    def preprocess(roidb):
        fname, boxes, klass, is_crowd = roidb['file_name'], roidb['boxes'], roidb['class'], roidb['is_crowd']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = {'image': im}
        # rpn anchor:
        try:
            if cfg.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(im, boxes, is_crowd)
                for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs):
                    ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels
                    ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes
            else:
                # anchor_labels, anchor_boxes
                ret['anchor_labels'], ret['anchor_boxes'] = get_rpn_anchor_input(im, boxes, is_crowd)

            boxes = boxes[is_crowd == 0]    # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            ret['gt_boxes'] = boxes
            ret['gt_labels'] = klass
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn')
            return None

        if cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(roidb['segmentation'])
            segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')    # values in {0, 1}
            ret['gt_masks'] = masks

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret
Ejemplo n.º 6
0
def _augment_boxes(boxes, aug, params):
    points = box_to_point8(boxes)
    points = aug.augment_coords(points, params)
    boxes = point8_to_box(points)
    #assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"
    if np.min(np_area(boxes)) <= 0:
        return None
    return boxes
Ejemplo n.º 7
0
    def preprocess(img):
        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img['class'], img['is_crowd']
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        # rpn anchor:
        try:
            if config.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(im, boxes, is_crowd)
                anchor_inputs = itertools.chain.from_iterable(multilevel_anchor_inputs)
            else:
                # anchor_labels, anchor_boxes
                anchor_inputs = get_rpn_anchor_input(im, boxes, is_crowd)
                assert len(anchor_inputs) == 2

            boxes = boxes[is_crowd == 0]    # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn')
            return None

        ret = [im] + list(anchor_inputs) + [boxes, klass]
        # TODO pad im when FPN

        if add_mask:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(img['segmentation'])
            segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')    # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret
Ejemplo n.º 8
0
    def preprocess(img):
        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img['class'], img['is_crowd']
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        # rpn anchor:
        try:
            fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, is_crowd)
            boxes = boxes[is_crowd == 0]    # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn')
            return None

        ret = [im, fm_labels, fm_boxes, boxes, klass]

        if add_mask:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(img['segmentation'])
            segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')    # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret
Ejemplo n.º 9
0
    def preprocess(img):
        im, fname = img['image_data'], img['id']
        multi_mask = getAnnotation(df, fname)
        if multi_mask is None:
            return None
        im = cv2.imread(im)
        #============================
        #if random.random() > 0.5:
        #    im = np.fliplr(im) # h, w, 3
        #    multi_mask = np.fliplr(multi_mask)
        #im, multi_mask = do_flip_transpose2(im, multi_mask, type=random.randint(0,7))
        augmented = strong_aug()(image=im, mask=multi_mask)
        im, multi_mask = augmented['image'], augmented['mask']
        #============================
        # Resize
        im, multi_mask = fix_resize_transform_range(im, multi_mask, [768, 2000], 1.0)
        im = pad_to_factor(im)
        multi_mask = pad_to_factor(multi_mask)

        boxes, klass, masks, is_crowd = multi_mask_to_annotation(multi_mask)
        if len(boxes) == 0 or np.min(np_area(boxes)) <= 0:
            log_once("Input have zero area box: {}".format(fname), 'warn')
            return None
        # rpn anchor:
        try:
            if config.FPN:
                fm_labels, fm_boxes = get_rpn_anchor_input_FPN(im, boxes, is_crowd)
            else:
                fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, is_crowd)
                
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn')
            return None

        ret = [im, fm_labels, fm_boxes, boxes, klass, masks]
        """
        from viz import draw_annotation, draw_mask
        viz = draw_annotation(im, boxes, klass)
        for ind, mask in enumerate(masks):
            viz = draw_mask(viz, mask)
            cv2.imwrite("./test_{}.jpg".format(np.random.rand()), viz)
        if (len(boxes) > 3):
            exit()
        """
        return ret
Ejemplo n.º 10
0
    def preprocess(img):
        fname, boxes, re_id_class = img['file_name'], img['boxes'], img[
            're_id_class']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = [im, boxes, re_id_class]

        return ret
Ejemplo n.º 11
0
    def preprocess(img):
        fname, boxes, klass, is_crowd, re_id_class = img['file_name'], img['boxes'], \
                                                     img['class'], img['is_crowd'], img['re_id_class']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        orig_shape = im.shape[:2]
        orig_im = np.copy(im)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        # rpn anchor:
        try:
            # anchor_labels, anchor_boxes
            anchor_inputs = get_rpn_anchor_input(im, boxes, is_crowd)
            assert len(anchor_inputs) == 2

            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None

        ret = [im] + list(anchor_inputs) + [
            boxes, klass, re_id_class, orig_shape, orig_im
        ]

        return ret
Ejemplo n.º 12
0
    def __call__(self, roidb):
        fname, boxes, klass, is_crowd = roidb["file_name"], roidb["boxes"], roidb["class"], roidb["is_crowd"]
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype("float32")
        height, width = im.shape[:2]
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        if not self.cfg.DATA.ABSOLUTE_COORD:
            boxes[:, 0::2] *= width
            boxes[:, 1::2] *= height

        # augmentation:
        im, params = self.aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = self.aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = {"image": im}
        # Add rpn data to dataflow:
        try:
            if self.cfg.MODE_FPN:
                multilevel_anchor_inputs = self.get_multilevel_rpn_anchor_input(im, boxes, is_crowd)
                for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs):
                    ret["anchor_labels_lvl{}".format(i + 2)] = anchor_labels
                    ret["anchor_boxes_lvl{}".format(i + 2)] = anchor_boxes
            else:
                ret["anchor_labels"], ret["anchor_boxes"] = self.get_rpn_anchor_input(im, boxes, is_crowd)

            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            ret["gt_boxes"] = boxes
            ret["gt_labels"] = klass
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once("Input {} is filtered for training: {}".format(fname, str(e)), "warn")
            return None

        if self.cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(roidb["segmentation"])
            segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            width_height = np.asarray([width, height], dtype=np.float32)
            gt_mask_width = int(np.ceil(im.shape[1] / 8.0) * 8)   # pad to 8 in order to pack mask into bits
            for polys in segmentation:
                if not self.cfg.DATA.ABSOLUTE_COORD:
                    polys = [p * width_height for p in polys]
                polys = [self.aug.augment_coords(p, params) for p in polys]
                masks.append(segmentation_to_mask(polys, im.shape[0], gt_mask_width))
            masks = np.asarray(masks, dtype='uint8')    # values in {0, 1}
            masks = np.packbits(masks, axis=-1)
            ret['gt_masks_packed'] = masks

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret
Ejemplo n.º 13
0
 csv_path = os.path.join(config.BASEDIR, 'train_ship_segmentations_v2.csv')
 df = pd.read_csv(csv_path, engine="python")
 df = df.dropna(axis=0)
 df = df.set_index('ImageId')
 from tqdm import tqdm
 for img in tqdm(imgs, total=len(imgs)):
     im, fname = img['image_data'], img['id']
     multi_mask = getAnnotation(df, fname)
     
     im = cv2.imread(im)
     #============================
     # Resize
     augmented = strong_aug()(image=im, mask=multi_mask)
     im, multi_mask = augmented['image'], augmented['mask']
     boxes, klass, masks, is_crowd = multi_mask_to_annotation(multi_mask)
     if len(boxes) == 0 or np.min(np_area(boxes)) <= 0:
         log_once("Input have zero area box: {}".format(fname), 'warn')
         print(boxes)
         exit()
     """
     from viz import draw_annotation, draw_mask
     viz = draw_annotation(im, boxes, klass)
     for ind, mask in enumerate(masks):
         viz = draw_mask(viz, mask)
         cv2.imwrite("./eval_gt/{}.jpg".format(fname), viz)
     """
 """    
 # for each gt, find all those anchors (including ties) that has the max ious with it
 ANCHOR_SIZES = (32,64,128,256,512)
 RAIOS = (0.5,1,2)
 #ANCHOR_SIZES = (16, 32, 64, 128, 256)
Ejemplo n.º 14
0
def draw_final_outputs(img, results):
    """
    Args:
        results: [DetectionResult]
    """
    # new_results = []
    # for r in results:
    #     if r.score <=0.49:
    #         new_results.append(r)
    # results = new_results
    if len(results) == 0:
        return img

    # Display in largest to smallest order to reduce occlusion
    boxes = np.asarray([r.box for r in results])
    areas = np_area(boxes)
    sorted_inds = np.argsort(-areas)

    ret = img
    tags = []

    new_boxes = []
    # rm_lst = class_nms(results, sorted_inds)
    rm_lst = box_class_nms(results, sorted_inds)
    print("rm_lst = ", rm_lst)

    for result_id in sorted_inds:
        if result_id in rm_lst:
            continue
        r = results[result_id]

        # print("r = ", r)
        if r.mask is not None:
            level = str(r.class_id).split(" ")[0]
            if "1" in level:
                # color = (0, 255, 0)
                # color = [0.000, 255.000, 0.000]
                color_id = 23
                # color_id = 9
            elif "2" in level:
                color_id = 22
                # color_id = 9
                # color = [0.000, 255.000, 255.000]
                # color = (0, 255, 255)
            elif "3" in level:
                color_id = 9
                # color = [0.000, 0.000, 255.000]
                # color = (0, 0, 255)
            else:
                color = [0.000, 255.000, 0.000]
                # color = (0, 255, 0)
                print("error level!")
            ret = draw_mask(ret, r.mask, color=None, color_id=color_id)

    for result_id in sorted_inds:
        if result_id in rm_lst:
            continue
        r = results[result_id]
        new_boxes.append(r.box)
        tags.append("{}, {:.2f}".format(cfg.DATA.CLASS_NAMES[r.class_id],
                                        r.score))
    # for r in results:
    #     tags.append(
    #         "{}, {:.2f}".format(cfg.DATA.CLASS_NAMES[r.class_id], r.score))
    ret = viz.draw_boxes(ret, new_boxes, tags)
    return ret
Ejemplo n.º 15
0
    def __call__(self, roidb):
        fname, boxes_house, boxes_damage, klass, is_crowd = roidb[
            "file_name"], roidb["boxes_house"], roidb["boxes_damage"], roidb[
                "class"], roidb["is_crowd"]
        assert boxes_damage.ndim == 2 and boxes_damage.shape[
            1] == 4, boxes_damage.shape

        boxes_house = np.copy(boxes_house)
        boxes_damage = np.copy(boxes_damage)

        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype("float32")
        height, width = im.shape[:2]
        # assume floatbox as input
        assert boxes_damage.dtype == np.float32, "Loader has to return float32 boxes!"

        if not self.cfg.DATA.ABSOLUTE_COORD:
            boxes_house[:, 0::2] *= width
            boxes_house[:, 1::2] *= height
            boxes_damage[:, 0::2] *= width
            boxes_damage[:, 1::2] *= height

        # augmentation:
        tfms = self.aug.get_transform(im)
        im = tfms.apply_image(im)

        points_house = box_to_point4(boxes_house)
        points_house = tfms.apply_coords(points_house)
        boxes_house = point4_to_box(points_house)
        if len(boxes_house):
            assert klass.max() <= self.cfg.DATA.NUM_CATEGORY, \
                "Invalid category {}!".format(klass.max())
            assert np.min(
                np_area(boxes_house)) > 0, "Some boxes have zero area!"

        points_damage = box_to_point4(boxes_damage)
        points_damage = tfms.apply_coords(points_damage)
        boxes_damage = point4_to_box(points_damage)
        if len(boxes_damage):
            assert klass.max() <= self.cfg.DATA.NUM_CATEGORY, \
                "Invalid category {}!".format(klass.max())
            assert np.min(
                np_area(boxes_damage)) > 0, "Some boxes have zero area!"

        ret = {"image": im}
        # Add rpn data to dataflow:
        try:
            if self.cfg.MODE_FPN:
                # CHANGE TWO RPN anchors here
                multilevel_anchor_inputs_house = self.get_multilevel_rpn_anchor_input(
                    im, boxes_house, is_crowd)
                for i, (anchor_labels, anchor_boxes_house
                        ) in enumerate(multilevel_anchor_inputs_house):
                    ret["anchor_labels_lvl{}_house".format(i +
                                                           2)] = anchor_labels
                    ret["anchor_boxes_lvl{}_house".format(
                        i + 2)] = anchor_boxes_house

                multilevel_anchor_inputs_damage = self.get_multilevel_rpn_anchor_input(
                    im, boxes_damage, is_crowd)
                for i, (anchor_labels, anchor_boxes_damage
                        ) in enumerate(multilevel_anchor_inputs_damage):
                    ret["anchor_labels_lvl{}_damage".format(i +
                                                            2)] = anchor_labels
                    ret["anchor_boxes_lvl{}_damage".format(
                        i + 2)] = anchor_boxes_damage
            else:
                ret["anchor_labels"], ret[
                    "anchor_boxes_house"] = self.get_rpn_anchor_input(
                        im, boxes_house, is_crowd)
                ret["anchor_labels"], ret[
                    "anchor_boxes_damage"] = self.get_rpn_anchor_input(
                        im, boxes_damage, is_crowd)
            boxes_house = boxes_house[is_crowd ==
                                      0]  # skip crowd boxes in training target
            boxes_damage = boxes_damage[
                is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            ret["gt_boxes_house"] = boxes_house
            ret["gt_boxes_damage"] = boxes_damage
            ret["gt_labels"] = klass
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                "warn")
            return None

        if self.cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(roidb["segmentation"])
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes_house)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            width_height = np.asarray([width, height], dtype=np.float32)
            gt_mask_width = int(np.ceil(im.shape[1] / 8.0) *
                                8)  # pad to 8 in order to pack mask into bits

            for polys in segmentation:
                if not self.cfg.DATA.ABSOLUTE_COORD:
                    polys = [p * width_height for p in polys]
                polys = [tfms.apply_coords(p) for p in polys]
                masks.append(
                    polygons_to_mask(polys, im.shape[0], gt_mask_width))

            if len(masks):
                masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
                masks = np.packbits(masks, axis=-1)
            else:  # no gt on the image
                masks = np.zeros((0, im.shape[0], gt_mask_width // 8),
                                 dtype='uint8')

            ret['gt_masks_packed'] = masks

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret
Ejemplo n.º 16
0
    def preprocess(roidb_batch):
        datapoint_list = []
        for roidb in roidb_batch:
            fname, boxes, klass, is_crowd = roidb['file_name'], roidb['boxes'], roidb['class'], roidb['is_crowd']
            boxes = np.copy(boxes)
            im = cv2.imread(fname, cv2.IMREAD_COLOR)
            assert im is not None, fname
            im = im.astype('float32')
            # assume floatbox as input
            assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

            # augmentation:
            im, params = aug.augment_return_params(im)
            points = box_to_point8(boxes)
            points = aug.augment_coords(points, params)
            boxes = point8_to_box(points)
            assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

            ret = {'images': im}
            # rpn anchor:
            try:
                if cfg.MODE_FPN:
                    multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(im, boxes, is_crowd)
                    for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs):
                        ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels
                        ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes
                else:
                    raise NotImplementedError("[armand] Batch mode only available for FPN")

                boxes = boxes[is_crowd == 0]    # skip crowd boxes in training target
                klass = klass[is_crowd == 0]
                ret['gt_boxes'] = boxes
                ret['gt_labels'] = klass
                ret['filename'] = fname
                if not len(boxes):
                    raise MalformedData("No valid gt_boxes!")
            except MalformedData as e:
                log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn')
                return None

            if cfg.MODE_MASK:
                # augmentation will modify the polys in-place
                segmentation = copy.deepcopy(roidb['segmentation'])
                segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]]
                assert len(segmentation) == len(boxes)

                # Apply augmentation on polygon coordinates.
                # And produce one image-sized binary mask per box.
                masks = []
                for polys in segmentation:
                    polys = [aug.augment_coords(p, params) for p in polys]
                    masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1]))
                masks = np.asarray(masks, dtype='uint8')    # values in {0, 1}
                ret['gt_masks'] = masks

            datapoint_list.append(ret)

        #################################################################################################################
        # Batchify the output
        #################################################################################################################

        # Now we need to batch the various fields

        # Easily stackable:
        # - anchor_labels_lvl2
        # - anchor_boxes_lvl2
        # - anchor_labels_lvl3
        # - anchor_boxes_lvl3
        # - anchor_labels_lvl4
        # - anchor_boxes_lvl4
        # - anchor_labels_lvl5
        # - anchor_boxes_lvl5
        # - anchor_labels_lvl6
        # - anchor_boxes_lvl6

        batched_datapoint = {}
        for stackable_field in ["anchor_labels_lvl2",
                                "anchor_boxes_lvl2",
                                "anchor_labels_lvl3",
                                "anchor_boxes_lvl3",
                                "anchor_labels_lvl4",
                                "anchor_boxes_lvl4",
                                "anchor_labels_lvl5",
                                "anchor_boxes_lvl5",
                                "anchor_labels_lvl6",
                                "anchor_boxes_lvl6"]:
            batched_datapoint[stackable_field] = np.stack([d[stackable_field] for d in datapoint_list])



        # Require padding and original dimension storage
        # - image (HxWx3)
        # - gt_boxes (?x4)
        # - gt_labels (?)
        # - gt_masks (?xHxW)

        """
        Find the minimum container size for images (maxW x maxH)
        Find the maximum number of ground truth boxes
        For each image, save original dimension and pad
        """

        if cfg.PREPROC.PREDEFINED_PADDING:
            padding_shapes = [get_padding_shape(*(d["images"].shape[:2])) for d in datapoint_list]
            max_height = max([shp[0] for shp in padding_shapes])
            max_width = max([shp[1] for shp in padding_shapes])
        else:
            image_dims = [d["images"].shape for d in datapoint_list]
            heights = [dim[0] for dim in image_dims]
            widths = [dim[1] for dim in image_dims]

            max_height = max(heights)
            max_width = max(widths)


        # image
        padded_images = []
        original_image_dims = []
        for datapoint in datapoint_list:
            image = datapoint["images"]
            original_image_dims.append(image.shape)

            h_padding = max_height - image.shape[0]
            w_padding = max_width - image.shape[1]

            padded_image = np.pad(image,
                                  [[0, h_padding],
                                   [0, w_padding],
                                   [0, 0]],
                                  'constant')

            padded_images.append(padded_image)

        batched_datapoint["images"] = np.stack(padded_images)
        #print(batched_datapoint["images"].shape)
        batched_datapoint["orig_image_dims"] = np.stack(original_image_dims)


        # gt_boxes and gt_labels
        max_num_gts = max([d["gt_labels"].size for d in datapoint_list])

        gt_counts = []
        padded_gt_labels = []
        padded_gt_boxes = []
        padded_gt_masks = []
        for datapoint in datapoint_list:
            gt_count_for_image = datapoint["gt_labels"].size
            gt_counts.append(gt_count_for_image)

            gt_padding = max_num_gts - gt_count_for_image

            padded_gt_labels_for_img = np.pad(datapoint["gt_labels"], [0, gt_padding], 'constant', constant_values=-1)
            padded_gt_labels.append(padded_gt_labels_for_img)

            padded_gt_boxes_for_img = np.pad(datapoint["gt_boxes"],
                                             [[0, gt_padding],
                                              [0,0]],
                                             'constant')
            padded_gt_boxes.append(padded_gt_boxes_for_img)




            h_padding = max_height - datapoint["images"].shape[0]
            w_padding = max_width - datapoint["images"].shape[1]



            if cfg.MODE_MASK:
                padded_gt_masks_for_img = np.pad(datapoint["gt_masks"],
                                         [[0, gt_padding],
                                          [0, h_padding],
                                          [0, w_padding]],
                                         'constant')
                padded_gt_masks.append(padded_gt_masks_for_img)


        batched_datapoint["orig_gt_counts"] = np.stack(gt_counts)
        batched_datapoint["gt_labels"] = np.stack(padded_gt_labels)
        batched_datapoint["gt_boxes"] = np.stack(padded_gt_boxes)
        batched_datapoint["filenames"] = [d["filename"] for d in datapoint_list]

        if cfg.MODE_MASK:
            batched_datapoint["gt_masks"] = np.stack(padded_gt_masks)



        return batched_datapoint
Ejemplo n.º 17
0
    def preprocess(roidb):
        fname, boxes, klass, is_crowd = roidb['file_name'], roidb[
            'boxes'], roidb['class'], roidb['is_crowd']
        boxes = np.copy(boxes)
        im = imread(fname)
        assert im is not None, fname
        im = np.expand_dims(im, axis=2)
        im = np.repeat(im, 3, axis=2)
        im = im.astype('float32')
        #height, width = im.shape[:2]
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        if not cfg.DATA.ABSOLUTE_COORD:
            boxes[:, 0::2] *= width
            boxes[:, 1::2] *= height

        #source_image = Image.fromarray(im.astype('uint8'))
        #imsave('./input_image1', im[:,:,1].astype(np.float32), imagej=True)
        """
        draw = ImageDraw.Draw(source_image)
        for i, bbox in enumerate(boxes):
            # tmp_x = bbox[2] - bbox[0]
            # tmp_y = bbox[3] - bbox[1]
            # draw.rectangle((bbox[0], bbox[1], tmp_x, tmp_y), outline='red')
            draw.rectangle((bbox[0], bbox[1], bbox[2], bbox[3]), outline='red')
            #draw.text((bbox[0] + 5, bbox[1] + 5), str(klass_tmp[i]))
        source_image.save('./input_image1', "JPEG")

        """
        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        klass_tmp = np.copy(klass)
        #print(klass)

        #imsave('./input_image2', im[:,:,1].astype(np.float32), imagej=True)
        """
        source_image = Image.fromarray(im.astype('uint8'))
        draw = ImageDraw.Draw(source_image)
        for i, bbox in enumerate(boxes):
            # tmp_x = bbox[2] - bbox[0]
            # tmp_y = bbox[3] - bbox[1]
            # draw.rectangle((bbox[0], bbox[1], tmp_x, tmp_y), outline='red')
            draw.rectangle((bbox[0], bbox[1], bbox[2], bbox[3]), outline='red')
            #draw.text((bbox[0]+5, bbox[1]+5), str(klass_tmp[i]))
        source_image.save('./input_image2', "JPEG")
        """

        ret = {'image': im}
        # rpn anchor:
        try:
            if cfg.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(
                    im, boxes, is_crowd)
                for i, (anchor_labels,
                        anchor_boxes) in enumerate(multilevel_anchor_inputs):
                    ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels
                    ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes
            else:
                # anchor_labels, anchor_boxes
                ret['anchor_labels'], ret[
                    'anchor_boxes'] = get_rpn_anchor_input(
                        im, boxes, is_crowd)

            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            ret['gt_boxes'] = boxes
            ret['gt_labels'] = klass
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None

        if cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(roidb['segmentation'])
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            width_height = np.asarray([width, height], dtype=np.float32)
            for polys in segmentation:
                if not cfg.DATA.ABSOLUTE_COORD:
                    polys = [p * width_height for p in polys]
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret['gt_masks'] = masks

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret
Ejemplo n.º 18
0
    def preprocess(img):

        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[
            'class'], img['is_crowd']
        img_name = fname.split('/')[-1]
        img_id = int(img_name[3:-4])
        # pretrain rpn for negtive chip extraction

        proposals = proposal_pickle['boxes'][proposal_pickle['ids'].index(
            img_id)]
        proposals[2:4] += proposals[0:2]  # from [x,y,w,h] to [x1,y1,x2,y2]

        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"
        chip_generator = Im2Chip(im,
                                 boxes,
                                 klass,
                                 proposals,
                                 cfg.SNIPER.SCALES,
                                 cfg.SNIPER.VALID_RANGES,
                                 is_crowd=is_crowd,
                                 chip_size=cfg.SNIPER.CHIP_SIZE,
                                 chip_stride=cfg.SNIPER.CHIP_STRIDE)
        im, boxes, klass, scale_indices, is_crowd = chip_generator.genChipMultiScale(
        )
        rets = []
        for i in range(len(im)):
            try:
                if len(boxes[i]) == 0:
                    continue
                # anchor_labels, anchor_boxes
                gt_invalid = []
                maxbox = cfg.SNIPER.VALID_RANGES[scale_indices[i]][0]
                minbox = cfg.SNIPER.VALID_RANGES[scale_indices[i]][1]
                maxbox = sys.maxsize if maxbox == -1 else maxbox
                minbox = 0 if minbox == -1 else minbox
                for box in boxes[i]:
                    w = box[2] - box[0]
                    h = box[3] - box[1]
                    if w >= maxbox or h >= maxbox or (w < minbox
                                                      and h < minbox):
                        gt_invalid.append(box)
                anchor_inputs = get_sniper_rpn_anchor_input(
                    im[i], boxes[i], is_crowd[i], gt_invalid)
                assert len(anchor_inputs) == 2

                boxes[i] = boxes[i][is_crowd[i] ==
                                    0]  # skip crowd boxes in training target
                klass[i] = klass[i][is_crowd[i] == 0]

                if not len(boxes[i]):
                    raise MalformedData("No valid gt_boxes!")
            except MalformedData as e:
                log_once(
                    "Input {} is filtered for training: {}".format(
                        fname, str(e)), 'warn')
                ret = None
                continue

            # ret = [im[i]] + list(anchor_inputs) + [boxes[i], klass[i]
            #                                        ] + [scale_indices[i]*len(boxes[i])]
            ret = [im[i]] + list(anchor_inputs) + [boxes[i], klass[i]]
            rets.append(ret)
        return rets