def preproc_img(fname, boxes, klass, second_klass, is_crowd, aug):
    im = cv2.imread(fname, cv2.IMREAD_COLOR)
    assert im is not None, fname
    im = im.astype('float32')
    # assume floatbox as input
    assert boxes.dtype == np.float32

    # augmentation:
    im, params = aug.augment_return_params(im)
    points = box_to_point8(boxes)
    points = aug.augment_coords(points, params)
    boxes = point8_to_box(points)

    # rpn anchor:
    try:
        fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, is_crowd)
        boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
        klass = klass[is_crowd == 0]
        if config.USE_SECOND_HEAD:
            second_klass = second_klass[is_crowd == 0]
        if not len(boxes):
            raise MalformedData("No valid gt_boxes!")
    except MalformedData as e:
        log_once("Input {} is filtered for training: {}".format(fname, str(e)),
                 'warn')
        return None

    if config.USE_SECOND_HEAD:
        ret = [im, fm_labels, fm_boxes, boxes, klass, second_klass]
    else:
        ret = [im, fm_labels, fm_boxes, boxes, klass]
    return ret, params
Example #2
0
    def preprocess(img):

        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[
            'class'], img['is_crowd']
        img_name = fname.split('/')[-1]
        img_id = int(img_name[3:-4])
        # pretrain rpn for negtive chip extraction

        proposals = proposal_pickle['boxes'][proposal_pickle['ids'].index(
            img_id)]
        proposals[2:4] += proposals[0:2]  # from [x,y,w,h] to [x1,y1,x2,y2]

        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"
        chip_generator = Im2Chip(im,
                                 boxes,
                                 klass,
                                 proposals,
                                 cfg.SNIPER.SCALES,
                                 cfg.SNIPER.VALID_RANGES,
                                 is_crowd=is_crowd,
                                 chip_size=cfg.SNIPER.CHIP_SIZE,
                                 chip_stride=cfg.SNIPER.CHIP_STRIDE)
        im, boxes, klass, scale_indices, is_crowd = chip_generator.genChipMultiScale(
        )
        rets = []
        for i in range(len(im)):
            try:
                if len(boxes[i]) == 0:
                    continue
                if not len(boxes[i]):
                    raise MalformedData("No valid gt_boxes!")
            except MalformedData as e:
                log_once(
                    "Input {} is filtered for training: {}".format(
                        fname, str(e)), 'warn')
                ret = None
                continue
            # ret = [im[i]] + list(anchor_inputs) + [boxes[i], klass[i]
            #                                        ] + [scale_indices[i]*len(boxes[i])]
            new_name = '%s_%d' % (img_name, i)
            cv2.imwrite('%s/%s' % (OUTPUT_IMG_DIR, new_name), im[i])

            ret = [im[i]] + [boxes[i], klass[i]]
            for j in range(len(klass[i])):
                if j == 0:
                    out_file.write(new_name)
                out_file.write(' %d %f %f %f %f' %
                               (klass[i][j], boxes[i][j][0], boxes[i][j][1],
                                boxes[i][j][2], boxes[i][j][3]))
                if j == len(klass[i]) - 1:
                    out_file.write('\n')
            rets.append(ret)
        return rets
Example #3
0
    def preprocess(roidb):
        fname, boxes, klass, is_crowd = roidb['file_name'], roidb[
            'boxes'], roidb['class'], roidb['is_crowd']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = {'image': im}
        # rpn anchor:
        try:
            ret['anchor_labels'], ret['anchor_boxes'] = get_rpn_anchor_input(
                im, boxes, is_crowd)
            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            ret['gt_boxes'] = boxes
            ret['gt_labels'] = klass
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None
        return ret
Example #4
0
    def _add_detection_gt(self, img, add_mask):
        """
        Add 'boxes', 'class', 'is_crowd' of this image to the dict, used by detection.
        If add_mask is True, also add 'segmentation' in coco poly format.
        """
        ann_ids = self.coco.getAnnIds(imgIds=img['id'], iscrowd=None)
        objs = self.coco.loadAnns(ann_ids)

        # clean-up boxes
        valid_objs = []
        width = img['width']
        height = img['height']
        for obj in objs:
            if obj.get('ignore', 0) == 1:
                continue
            x1, y1, w, h = obj['bbox']
            # bbox is originally in float
            # x1/y1 means upper-left corner and w/h means true w/h. This can be verified by segmentation pixels.
            # But we do assume that (0.0, 0.0) is upper-left corner of the first pixel
            box = FloatBox(float(x1), float(y1), float(x1 + w), float(y1 + h))
            box.clip_by_shape([height, width])
            # Require non-zero seg area and more than 1x1 box size
            if obj['area'] > 1 and box.is_box() and box.area() >= 4:
                obj['bbox'] = [box.x1, box.y1, box.x2, box.y2]
                valid_objs.append(obj)

                if add_mask:
                    segs = obj['segmentation']
                    if not isinstance(segs, list):
                        assert obj['iscrowd'] == 1
                        obj['segmentation'] = None
                    else:
                        valid_segs = [
                            np.asarray(p).reshape(-1, 2) for p in segs
                            if len(p) >= 6
                        ]
                        if len(valid_segs) < len(segs):
                            log_once(
                                "Image {} has invalid polygons!".format(
                                    img['file_name']), 'warn')

                        obj['segmentation'] = valid_segs

        # all geometrically-valid boxes are returned
        boxes = np.asarray([obj['bbox'] for obj in valid_objs],
                           dtype='float32')  # (n, 4)
        cls = np.asarray([
            COCOMeta.category_id_to_class_id[obj['category_id']]
            for obj in valid_objs
        ],
                         dtype='int32')  # (n,)
        is_crowd = np.asarray([obj['iscrowd'] for obj in valid_objs],
                              dtype='int8')

        # add the keys
        img['boxes'] = boxes  # nx4
        img['class'] = cls  # n, always >0
        img['is_crowd'] = is_crowd  # n,
        if add_mask:
            img['segmentation'] = [obj['segmentation'] for obj in valid_objs]
Example #5
0
    def __init__(self, input_tensors, output_tensors,
                 return_input=False, sess=None):
        """
        Args:
            input_tensors (list): list of names.
            output_tensors (list): list of names.
            return_input (bool): same as :attr:`PredictorBase.return_input`.
            sess (tf.Session): the session this predictor runs in. If None,
                will use the default session at the first call.
        """
        self.return_input = return_input
        self.input_tensors = input_tensors
        self.output_tensors = output_tensors
        self.sess = sess
        self._use_callable = get_tf_version_number() >= 1.2

        if self._use_callable:
            if sess is not None:
                self._callable = sess.make_callable(
                    fetches=output_tensors,
                    feed_list=input_tensors,
                    accept_options=self.ACCEPT_OPTIONS)
            else:
                self._callable = None
        else:
            log_once(
                "TF>=1.2 is recommended for better performance of predictor!", 'warn')
Example #6
0
    def preprocess(roidb):
        fname, boxes, klass, is_crowd = roidb['file_name'], roidb['boxes'], roidb['class'], roidb['is_crowd']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = {'image': im}
        # rpn anchor:
        try:
            if cfg.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(im, boxes, is_crowd)
                for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs):
                    ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels
                    ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes
            else:
                # anchor_labels, anchor_boxes
                ret['anchor_labels'], ret['anchor_boxes'] = get_rpn_anchor_input(im, boxes, is_crowd)

            boxes = boxes[is_crowd == 0]    # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            ret['gt_boxes'] = boxes
            ret['gt_labels'] = klass
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn')
            return None

        if cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(roidb['segmentation'])
            segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')    # values in {0, 1}
            ret['gt_masks'] = masks

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret
Example #7
0
    def preprocess(roidb):
        fname, boxes, klass, is_crowd = roidb['file_name'], roidb['boxes'], roidb['class'], roidb['is_crowd']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = {'image': im}
        # rpn anchor:
        try:
            if cfg.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(im, boxes, is_crowd)
                for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs):
                    ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels
                    ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes
            else:
                # anchor_labels, anchor_boxes
                ret['anchor_labels'], ret['anchor_boxes'] = get_rpn_anchor_input(im, boxes, is_crowd)

            boxes = boxes[is_crowd == 0]    # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            ret['gt_boxes'] = boxes
            ret['gt_labels'] = klass
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn')
            return None

        if cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(roidb['segmentation'])
            segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')    # values in {0, 1}
            ret['gt_masks'] = masks

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret
Example #8
0
    def _add_detection_gt(self, img, add_mask):
        """
        Add 'boxes', 'class', 'is_crowd' of this image to the dict, used by detection.
        If add_mask is True, also add 'segmentation' in mafat poly format.
        """
        # ann_ids = self.mafat.getAnnIds(imgIds=img['id'])
        # objs = self.mafat.loadAnns(ann_ids)
        objs = self.coco.imgToAnns[img['id']]  # equivalent but faster than the above two lines

        # clean-up boxes
        valid_objs = []
        width = img['width']
        height = img['height']
        for obj in objs:
            if obj.get('ignore', 0) == 1:
                continue
            x1, y1, w, h = obj['bbox']
            # bbox is originally in float
            # x1/y1 means upper-left corner and w/h means true w/h. This can be verified by segmentation pixels.
            # But we do make an assumption here that (0.0, 0.0) is upper-left corner of the first pixel

            x1 = np.clip(float(x1), 0, width)
            y1 = np.clip(float(y1), 0, height)
            w = np.clip(float(x1 + w), 0, width) - x1
            h = np.clip(float(y1 + h), 0, height) - y1
            # Require non-zero seg area and more than 1x1 box size
            if obj['area'] > 1 and w > 0 and h > 0 and w * h >= 4:
                obj['bbox'] = [x1, y1, x1 + w, y1 + h]
                valid_objs.append(obj)

                if add_mask:
                    segs = obj['segmentation']
                    if not isinstance(segs, list):
                        assert obj['iscrowd'] == 1
                        obj['segmentation'] = None
                    else:
                        valid_segs = [np.asarray(p).reshape(-1, 2).astype('float32') for p in segs if len(p) >= 6]
                        if len(valid_segs) < len(segs):
                            log_once("Image {} has invalid polygons!".format(img['file_name']), 'warn')

                        obj['segmentation'] = valid_segs

        # all geometrically-valid boxes are returned
        boxes = np.asarray([obj['bbox'] for obj in valid_objs], dtype='float32')  # (n, 4)
        cls = np.asarray([
            MAFATMeta.category_id_to_class_id[obj['category_id']]
            for obj in valid_objs], dtype='int32')  # (n,)
        is_crowd = np.asarray([obj['iscrowd'] for obj in valid_objs], dtype='int8')

        # add the keys
        img['boxes'] = boxes        # nx4
        img['class'] = cls          # n, always >0
        img['is_crowd'] = is_crowd  # n,
        if add_mask:
            # also required to be float32
            img['segmentation'] = [
                obj['segmentation'] for obj in valid_objs]
Example #9
0
    def preprocess(img):
        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img['class'], img['is_crowd']
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        # rpn anchor:
        try:
            if config.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(im, boxes, is_crowd)
                anchor_inputs = itertools.chain.from_iterable(multilevel_anchor_inputs)
            else:
                # anchor_labels, anchor_boxes
                anchor_inputs = get_rpn_anchor_input(im, boxes, is_crowd)
                assert len(anchor_inputs) == 2

            boxes = boxes[is_crowd == 0]    # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn')
            return None

        ret = [im] + list(anchor_inputs) + [boxes, klass]
        # TODO pad im when FPN

        if add_mask:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(img['segmentation'])
            segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')    # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret
Example #10
0
    def preprocess(img):
        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[
            'class'], img['is_crowd']
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)

        # rpn anchor:
        try:
            fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, klass,
                                                       is_crowd)
            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is invalid for training: {}".format(fname, str(e)),
                'warn')
            return None

        ret = [im, fm_labels, fm_boxes, boxes, klass]

        # masks
        segmentation = img.get('segmentation', None)
        if segmentation is not None:
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # one image-sized binary mask per box
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret
Example #11
0
    def _add_detection_gt(self, img, add_mask):
        """
        Add 'boxes', 'class', 'is_crowd' of this image to the dict, used by detection.
        If add_mask is True, also add 'segmentation' in coco poly format.
        """
        # ann_ids = self.coco.getAnnIds(imgIds=img['id'])
        # objs = self.coco.loadAnns(ann_ids)
        objs = self.coco.imgToAnns[img['id']]  # equivalent but faster than the above two lines

        # clean-up boxes
        valid_objs = []
        width = img['width']
        height = img['height']
        for obj in objs:
            if obj.get('ignore', 0) == 1:
                continue
            x1, y1, w, h = obj['bbox']
            # bbox is originally in float
            # x1/y1 means upper-left corner and w/h means true w/h. This can be verified by segmentation pixels.
            # But we do assume that (0.0, 0.0) is upper-left corner of the first pixel
            box = FloatBox(float(x1), float(y1),
                           float(x1 + w), float(y1 + h))
            box.clip_by_shape([height, width])
            # Require non-zero seg area and more than 1x1 box size
            if obj['area'] > 1 and box.is_box() and box.area() >= 4:
                obj['bbox'] = [box.x1, box.y1, box.x2, box.y2]
                valid_objs.append(obj)

                if add_mask:
                    segs = obj['segmentation']
                    if not isinstance(segs, list):
                        assert obj['iscrowd'] == 1
                        obj['segmentation'] = None
                    else:
                        valid_segs = [np.asarray(p).reshape(-1, 2).astype('float32') for p in segs if len(p) >= 6]
                        if len(valid_segs) < len(segs):
                            log_once("Image {} has invalid polygons!".format(img['file_name']), 'warn')

                        obj['segmentation'] = valid_segs

        # all geometrically-valid boxes are returned
        boxes = np.asarray([obj['bbox'] for obj in valid_objs], dtype='float32')  # (n, 4)
        cls = np.asarray([
            COCOMeta.category_id_to_class_id[obj['category_id']]
            for obj in valid_objs], dtype='int32')  # (n,)
        is_crowd = np.asarray([obj['iscrowd'] for obj in valid_objs], dtype='int8')

        # add the keys
        img['boxes'] = boxes        # nx4
        img['class'] = cls          # n, always >0
        img['is_crowd'] = is_crowd  # n,
        if add_mask:
            # also required to be float32
            img['segmentation'] = [
                obj['segmentation'] for obj in valid_objs]
Example #12
0
    def preprocess(img):
        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img['class'], img['is_crowd']
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        # rpn anchor:
        try:
            fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, is_crowd)
            boxes = boxes[is_crowd == 0]    # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn')
            return None

        ret = [im, fm_labels, fm_boxes, boxes, klass]

        if add_mask:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(img['segmentation'])
            segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')    # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret
Example #13
0
    def add_anchor_to_dp(dp):
        im, boxes, klass, is_crowd, fname = dp
        try:
            fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, klass, is_crowd)

            boxes = boxes[is_crowd == 0]    # skip crowd boxes in training target
            klass = klass[is_crowd == 0]

            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once("Input {} is invalid for training: {}".format(fname, str(e)), 'warn')
            return None

        return [im, fm_labels, fm_boxes, boxes, klass]
Example #14
0
    def preprocess(img):
        im, fname = img['image_data'], img['id']
        multi_mask = getAnnotation(df, fname)
        if multi_mask is None:
            return None
        im = cv2.imread(im)
        #============================
        #if random.random() > 0.5:
        #    im = np.fliplr(im) # h, w, 3
        #    multi_mask = np.fliplr(multi_mask)
        #im, multi_mask = do_flip_transpose2(im, multi_mask, type=random.randint(0,7))
        augmented = strong_aug()(image=im, mask=multi_mask)
        im, multi_mask = augmented['image'], augmented['mask']
        #============================
        # Resize
        im, multi_mask = fix_resize_transform_range(im, multi_mask, [768, 2000], 1.0)
        im = pad_to_factor(im)
        multi_mask = pad_to_factor(multi_mask)

        boxes, klass, masks, is_crowd = multi_mask_to_annotation(multi_mask)
        if len(boxes) == 0 or np.min(np_area(boxes)) <= 0:
            log_once("Input have zero area box: {}".format(fname), 'warn')
            return None
        # rpn anchor:
        try:
            if config.FPN:
                fm_labels, fm_boxes = get_rpn_anchor_input_FPN(im, boxes, is_crowd)
            else:
                fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, is_crowd)
                
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn')
            return None

        ret = [im, fm_labels, fm_boxes, boxes, klass, masks]
        """
        from viz import draw_annotation, draw_mask
        viz = draw_annotation(im, boxes, klass)
        for ind, mask in enumerate(masks):
            viz = draw_mask(viz, mask)
            cv2.imwrite("./test_{}.jpg".format(np.random.rand()), viz)
        if (len(boxes) > 3):
            exit()
        """
        return ret
Example #15
0
    def preprocess(img):
        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[
            'class'], img['is_crowd']
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32
        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)

        # rpn anchor:
        try:
            fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, is_crowd)
            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None

        ret = [im, fm_labels, fm_boxes, boxes, klass]

        # masks
        if add_mask:
            masks = img['masks']
            masks = [np.dstack([m, m, m]) for m in masks]
            masks = [aug.augment(m)[..., 0] for m in masks]
            assert len(boxes) == np.asarray(masks).shape[0]
            ret.append(masks)


#           from viz import draw_annotation, draw_mask
#           viz = draw_annotation(im, boxes, klass)
#           for mask in masks:
#               viz = draw_mask(viz, mask)
#           tpviz.interactive_imshow(viz)
#           input()

        return ret
Example #16
0
 def fp16_getter(getter, *args, **kwargs):
     name = args[0] if len(args) else kwargs['name']
     if not name.endswith('/W') and not name.endswith('/b'):
         """
         Following convention, convolution & fc are quantized.
         BatchNorm (gamma & beta) are not quantized.
         """
         return getter(*args, **kwargs)
     else:
         if kwargs['dtype'] == tf.float16:
             kwargs['dtype'] = tf.float32
             ret = getter(*args, **kwargs)
             ret = tf.cast(ret, tf.float16)
             log_once("Variable {} casted to fp16 ...".format(name))
             return ret
         else:
             return getter(*args, **kwargs)
Example #17
0
    def preprocess(roidb):
        fname, boxes, klass, is_crowd = roidb['file_name'], roidb[
            'boxes'], roidb['class'], roidb['is_crowd']
        boxes = np.copy(boxes)
        im = imread(fname)
        assert im is not None, fname
        im = np.expand_dims(im, axis=2)
        im = np.repeat(im, 3, axis=2)
        im = im.astype('float32')
        #height, width = im.shape[:2]
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        if not cfg.DATA.ABSOLUTE_COORD:
            boxes[:, 0::2] *= width
            boxes[:, 1::2] *= height

        #source_image = Image.fromarray(im.astype('uint8'))
        #imsave('./input_image1', im[:,:,1].astype(np.float32), imagej=True)
        """
        draw = ImageDraw.Draw(source_image)
        for i, bbox in enumerate(boxes):
            # tmp_x = bbox[2] - bbox[0]
            # tmp_y = bbox[3] - bbox[1]
            # draw.rectangle((bbox[0], bbox[1], tmp_x, tmp_y), outline='red')
            draw.rectangle((bbox[0], bbox[1], bbox[2], bbox[3]), outline='red')
            #draw.text((bbox[0] + 5, bbox[1] + 5), str(klass_tmp[i]))
        source_image.save('./input_image1', "JPEG")

        """
        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        klass_tmp = np.copy(klass)
        #print(klass)

        #imsave('./input_image2', im[:,:,1].astype(np.float32), imagej=True)
        """
        source_image = Image.fromarray(im.astype('uint8'))
        draw = ImageDraw.Draw(source_image)
        for i, bbox in enumerate(boxes):
            # tmp_x = bbox[2] - bbox[0]
            # tmp_y = bbox[3] - bbox[1]
            # draw.rectangle((bbox[0], bbox[1], tmp_x, tmp_y), outline='red')
            draw.rectangle((bbox[0], bbox[1], bbox[2], bbox[3]), outline='red')
            #draw.text((bbox[0]+5, bbox[1]+5), str(klass_tmp[i]))
        source_image.save('./input_image2', "JPEG")
        """

        ret = {'image': im}
        # rpn anchor:
        try:
            if cfg.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(
                    im, boxes, is_crowd)
                for i, (anchor_labels,
                        anchor_boxes) in enumerate(multilevel_anchor_inputs):
                    ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels
                    ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes
            else:
                # anchor_labels, anchor_boxes
                ret['anchor_labels'], ret[
                    'anchor_boxes'] = get_rpn_anchor_input(
                        im, boxes, is_crowd)

            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            ret['gt_boxes'] = boxes
            ret['gt_labels'] = klass
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None

        if cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(roidb['segmentation'])
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            width_height = np.asarray([width, height], dtype=np.float32)
            for polys in segmentation:
                if not cfg.DATA.ABSOLUTE_COORD:
                    polys = [p * width_height for p in polys]
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret['gt_masks'] = masks

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret
Example #18
0
def Conv(inputs,
         filters,
         kernel_size,
         strides=(1, 1),
         padding='same',
         data_format='channels_last',
         dilation_rate=(1, 1),
         activation=None,
         use_bias=True,
         kernel_initializer=None,
         bias_initializer=tf.zeros_initializer(),
         kernel_regularizer=None,
         bias_regularizer=None,
         activity_regularizer=None,
         split=1,
         norm=False):
    """
    Similar to `tf.layers.Conv2D`, but with some differences:
    1. Default kernel initializer is variance_scaling_initializer(2.0).
    2. Default padding is 'same'.
    3. Support 'split' argument to do group convolution.
    Variable Names:
    * ``W``: weights
    * ``b``: bias
    """
    if kernel_initializer is None:
        if get_tf_version_tuple() <= (1, 12):
            kernel_initializer = tf.contrib.layers.variance_scaling_initializer(
                2.0)  # deprecated
        else:
            kernel_initializer = tf.keras.initializers.VarianceScaling(
                2.0, distribution='untruncated_normal')
    dilation_rate = shape2d(dilation_rate)

    if True:
        # group conv implementation
        data_format = get_data_format(data_format, keras_mode=False)
        in_shape = inputs.get_shape().as_list()
        channel_axis = 3 if data_format == 'NHWC' else 1
        in_channel = in_shape[channel_axis]
        assert in_channel is not None, "[Conv2D] Input cannot have unknown channel!"
        assert in_channel % split == 0

        assert kernel_regularizer is None and bias_regularizer is None and activity_regularizer is None, \
            "Not supported by group conv or dilated conv!"

        out_channel = filters
        assert out_channel % split == 0
        assert dilation_rate == [1, 1] or get_tf_version_tuple() >= (
            1, 5), 'TF>=1.5 required for dilated conv.'

        kernel_shape = shape2d(kernel_size)
        filter_shape = kernel_shape + [in_channel // split, out_channel]
        stride = shape4d(strides, data_format=data_format)

        kwargs = {"data_format": data_format}
        if get_tf_version_tuple() >= (1, 5):
            kwargs['dilations'] = shape4d(dilation_rate,
                                          data_format=data_format)

        # matching input dtype (ex. tf.float16) since the default dtype of variable if tf.float32
        inputs_dtype = inputs.dtype
        W = tf.get_variable('parseweigth',
                            filter_shape,
                            dtype=inputs_dtype,
                            initializer=kernel_initializer)
        if norm:
            use_bias = False
            W = tf.reshape(W, kernel_shape + [4, in_channel // 4, out_channel])
            W = tf.nn.softmax(W, 2)
            W = tf.reshape(W, filter_shape)
        #dynamics = tf.reduce_mean(inputs, 0)
        #dynamics = tf.transpose(dynamics, [1,2,0])
        #dynamics = tf.image.resize_images(dynamics, kernel_shape)
        #dynamics = tf.expand_dims(dynamics, -1)
        #W = W  +  0.001 * dynamics #tf.random_normal(shape = tf.shape(W), mean = 0.0, stddev = 0.012, dtype = tf.float32)

        #W = W *tf.random_uniform(shape=W.get_shape().as_list(), minval=0., maxval=2.)

        if use_bias:
            b = tf.get_variable('parsebias', [out_channel],
                                dtype=inputs_dtype,
                                initializer=bias_initializer)

        if split == 1:
            conv = tf.nn.conv2d(inputs, W, stride, padding.upper(), **kwargs)
        else:
            try:
                conv = tf.nn.conv2d(inputs, W, stride, padding.upper(),
                                    **kwargs)
            except ValueError:
                log_once(
                    "CUDNN group convolution support is only available with "
                    "https://github.com/tensorflow/tensorflow/pull/25818 . "
                    "Will fall back to a loop-based slow implementation instead!",
                    'warn')

        ret = tf.nn.bias_add(conv, b,
                             data_format=data_format) if use_bias else conv
        if activation is not None:
            ret = activation(ret)
        ret = tf.identity(ret, name='output')

        ret.variables = VariableHolder(W=W)
        if use_bias:
            ret.variables.b = b
    return ret
Example #19
0
 df = pd.read_csv(csv_path, engine="python")
 df = df.dropna(axis=0)
 df = df.set_index('ImageId')
 from tqdm import tqdm
 for img in tqdm(imgs, total=len(imgs)):
     im, fname = img['image_data'], img['id']
     multi_mask = getAnnotation(df, fname)
     
     im = cv2.imread(im)
     #============================
     # Resize
     augmented = strong_aug()(image=im, mask=multi_mask)
     im, multi_mask = augmented['image'], augmented['mask']
     boxes, klass, masks, is_crowd = multi_mask_to_annotation(multi_mask)
     if len(boxes) == 0 or np.min(np_area(boxes)) <= 0:
         log_once("Input have zero area box: {}".format(fname), 'warn')
         print(boxes)
         exit()
     """
     from viz import draw_annotation, draw_mask
     viz = draw_annotation(im, boxes, klass)
     for ind, mask in enumerate(masks):
         viz = draw_mask(viz, mask)
         cv2.imwrite("./eval_gt/{}.jpg".format(fname), viz)
     """
 """    
 # for each gt, find all those anchors (including ties) that has the max ious with it
 ANCHOR_SIZES = (32,64,128,256,512)
 RAIOS = (0.5,1,2)
 #ANCHOR_SIZES = (16, 32, 64, 128, 256)
 from tensorpack.dataflow import PrintData
Example #20
0
    def __call__(self, roidb):  #
        fname, boxes, klass, is_crowd = roidb["file_name"], roidb[
            "boxes"], roidb["class"], roidb["is_crowd"]
        assert boxes.ndim == 2 and boxes.shape[1] == 4, boxes.shape
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype("float32")
        height, width = im.shape[:2]
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return float32 boxes!"

        if not self.cfg.DATA.ABSOLUTE_COORD:
            boxes[:, 0::2] *= width
            boxes[:, 1::2] *= height

        ret = {}

        tfms = self.aug_weak.get_transform(im)
        im = tfms.apply_image(im)
        points = box_to_point8(boxes)
        points = tfms.apply_coords(points)
        boxes = point8_to_box(points)
        h, w = im.shape[:2]
        if self.aug_type != "default":
            boxes_backup = boxes.copy()
            try:
                assert len(boxes) > 0, "boxes after resizing becomes to zero"
                assert np.sum(np_area(boxes)) > 0, "boxes are all zero area!"
                bbs = array_to_bb(boxes)
                images_aug, bbs_aug, _ = self.aug_strong(images=[im],
                                                         bounding_boxes=[bbs],
                                                         n_real_box=len(bbs))

                # convert to gt boxes array
                boxes = bb_to_array(bbs_aug[0])
                boxes[:, 0] = np.clip(boxes[:, 0], 0, w)
                boxes[:, 1] = np.clip(boxes[:, 1], 0, h)
                boxes[:, 2] = np.clip(boxes[:, 2], 0, w)
                boxes[:, 3] = np.clip(boxes[:, 3], 0, h)

                # after affine, some boxes can be zero area. Let's remove them and their corresponding info
                boxes, mask = remove_empty_boxes(boxes)
                klass = klass[mask]
                assert len(
                    klass
                ) > 0, "Empty boxes and kclass after removing empty ones"
                is_crowd = np.array(
                    [0] * len(klass))  # do not ahve crowd annotations
                assert klass.max() <= self.cfg.DATA.NUM_CATEGORY, \
                    "Invalid category {}!".format(klass.max())
                assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"
                im = images_aug[0]
            except Exception as e:
                logger.warn("Error catched " + str(e) +
                            "\n Use non-augmented data.")
                boxes = boxes_backup

        ret["image"] = im

        try:
            # Add rpn data to dataflow:
            if self.cfg.MODE_FPN:
                multilevel_anchor_inputs = self.get_multilevel_rpn_anchor_input(
                    im, boxes, is_crowd)
                for i, (anchor_labels,
                        anchor_boxes) in enumerate(multilevel_anchor_inputs):
                    ret["anchor_labels_lvl{}".format(i + 2)] = anchor_labels
                    ret["anchor_boxes_lvl{}".format(i + 2)] = anchor_boxes
            else:
                ret["anchor_labels"], ret[
                    "anchor_boxes"] = self.get_rpn_anchor_input(
                        im, boxes, is_crowd)

            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            ret["gt_boxes"] = boxes
            ret["gt_labels"] = klass

        except Exception as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                "warn")
            return None

        return ret
def _preprocess_common(ref_box, target_box, ref_im, target_im, aug):
    ref_boxes = np.array([ref_box], dtype=np.float32)
    target_boxes = np.array([target_box], dtype=np.float32)
    klass = np.array([1], dtype=np.int32)

    # augmentation:
    target_im, target_params = aug.augment_return_params(target_im)
    ref_im, ref_params = aug.augment_return_params(ref_im)
    ref_boxes = _augment_boxes(ref_boxes, aug, ref_params)
    target_boxes = _augment_boxes(target_boxes, aug, target_params)
    if ref_boxes is None or target_boxes is None:
        return None

    # additional augmentations:
    # motion blur
    if cfg.DATA.MOTION_BLUR_AUGMENTATIONS:
        do_motion_blur_ref = np.random.rand() < 0.25
        if do_motion_blur_ref:
            # generating the kernel
            kernel_size = np.random.randint(5, 15)
            kernel_motion_blur = np.zeros((kernel_size, kernel_size))
            kernel_motion_blur[int(
                (kernel_size - 1) / 2), :] = np.ones(kernel_size)
            kernel_motion_blur = kernel_motion_blur / kernel_size
            # applying the kernel
            ref_im = cv2.filter2D(ref_im, -1, kernel_motion_blur)
        do_motion_blur_target = np.random.rand() < 0.25
        if do_motion_blur_target:
            # generating the kernel
            kernel_size = np.random.randint(5, 15)
            kernel_motion_blur = np.zeros((kernel_size, kernel_size))
            kernel_motion_blur[int(
                (kernel_size - 1) / 2), :] = np.ones(kernel_size)
            kernel_motion_blur = kernel_motion_blur / kernel_size
            # applying the kernel
            target_im = cv2.filter2D(target_im, -1, kernel_motion_blur)

    # grayscale
    if cfg.DATA.GRAYSCALE_AUGMENTATIONS:
        do_grayscale = np.random.rand() < 0.25
        if do_grayscale:
            grayscale_aug = imgaug.Grayscale()
            ref_im = np.tile(grayscale_aug.augment(ref_im), [1, 1, 3])
            target_im = np.tile(grayscale_aug.augment(target_im), [1, 1, 3])

    if cfg.DATA.DEBUG_VIS:
        import matplotlib.pyplot as plt
        ref_im_vis = ref_im.copy()
        #ref_im_vis[int(ref_boxes[0][1]):int(ref_boxes[0][3]), int(ref_boxes[0][0]):int(ref_boxes[0][2]), 0] = 255
        ref_im_vis[int(ref_boxes[0][1]):int(ref_boxes[0][3]), int(ref_boxes[0][0]):int(ref_boxes[0][2]), 2] = \
            (0.5 * ref_im_vis[int(ref_boxes[0][1]):int(ref_boxes[0][3]), int(ref_boxes[0][0]):int(ref_boxes[0][2]), 2] + 120).astype(np.uint8)
        plt.imshow(ref_im_vis[..., ::-1])
        plt.show()
        target_im_vis = target_im.copy()
        target_im_vis[int(target_boxes[0][1]):int(target_boxes[0][3]), int(target_boxes[0][0]):int(target_boxes[0][2]), 2] = \
            (0.5 * target_im_vis[int(target_boxes[0][1]):int(target_boxes[0][3]), int(target_boxes[0][0]):int(target_boxes[0][2]), 2] + 120).astype(np.uint8)
        plt.imshow(target_im_vis[..., ::-1])
        plt.show()

    is_crowd = np.array([0], dtype=np.int32)
    ret = {'ref_image': ref_im, 'ref_box': ref_boxes[0], 'image': target_im}
    if cfg.DATA.DEBUG_VIS:
        return ret

    # rpn anchor:
    try:
        if cfg.MODE_FPN:
            multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(
                target_im, target_boxes, is_crowd)
            for i, (anchor_labels,
                    anchor_boxes) in enumerate(multilevel_anchor_inputs):
                ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels
                ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes
        else:
            # anchor_labels, anchor_boxes
            ret['anchor_labels'], ret['anchor_boxes'] = get_rpn_anchor_input(
                target_im, target_boxes, is_crowd)
        ret['gt_boxes'] = target_boxes
        ret['gt_labels'] = klass
        if not len(target_boxes):
            raise MalformedData("No valid gt_boxes!")
    except MalformedData as e:
        log_once("Input is filtered for training: {}".format(str(e)), 'warn')
        return None
    return ret
def _maybe_add_hard_example_data(data, ref_fname, vid_name, hard_example_index,
                                 hard_example_names, dataset_name):
    if not cfg.MODE_HARD_MINING:
        return data
    data = data.copy()
    name_for_idx = dataset_name + "/" + vid_name + "/"

    if dataset_name == "GOT10k":
        name_for_idx += ref_fname.split("/")[-1].replace(".jpg", "")
        this_fmt = "%08d"
    elif dataset_name == "ImageNetVID":
        name_for_idx += str(int(ref_fname.split("/")[-1].replace(".JPEG", "")))
        this_fmt = "%06d"
    elif dataset_name == "LaSOT":
        name_for_idx += str(int(ref_fname.split("/")[-1].replace(".jpg", "")))
        this_fmt = "%08d"
    elif dataset_name == "YouTubeVOS":
        name_for_idx += str(int(ref_fname.split("/")[-1].replace(".jpg", "")))
        this_fmt = "%05d"
    else:
        assert False, ("unknown dataset", dataset_name)

    try:
        idx = hard_example_names["all"].index(name_for_idx)
    except ValueError:
        log_once("Not found in index: {}".format(name_for_idx), 'warn')
        return None
    if dataset_name == "LaSOT":
        nns = hard_example_index.get_nns_by_item(idx,
                                                 cfg.HARD_MINING_KNN_LASOT)
    else:
        nns = hard_example_index.get_nns_by_item(idx, cfg.HARD_MINING_KNN)
    if cfg.MODE_HARD_NEGATIVES_ONLY_CROSSOVER or \
          (cfg.MODE_HARD_NEGATIVES_ONLY_CROSSOVER_YOUTUBEVOS and dataset_name == "YouTubeVOS"):
        nn_names = [hard_example_names["all"][nn] for nn in nns]
        nn_datasets = [x.split("/")[0] for x in nn_names]
        nns = [nn for nn, ds_ in zip(nns, nn_datasets) if ds_ != dataset_name]
        remove_query = False
    else:
        remove_query = True
    nns = subsample_nns(vid_name,
                        nns,
                        hard_example_names["all"],
                        cfg.N_HARD_NEGATIVES_TO_SAMPLE,
                        remove_query=remove_query)

    feats = []
    for nn in nns:
        sp = hard_example_names["all"][nn].split("/")
        if sp[0] == "GOT10k":
            fmt = "%08d"
        elif sp[0] == "ImageNetVID":
            fmt = "%06d"
        elif sp[0] == "LaSOT":
            fmt = "%08d"
        elif sp[0] == "YouTubeVOS":
            fmt = "%05d"
        else:
            assert False, ("unknown dataset", sp[0])

        feat_fn = os.path.join(cfg.HARD_MINING_DATA_PATH, sp[0],
                               "det_feats_compressed", sp[1],
                               fmt % int(sp[2]) + ".npz")
        feat = np.load(feat_fn)
        feat = feat["f"]
        feats.append(feat)
    feats = np.stack(feats, axis=0)
    data['hard_negative_features'] = feats

    if cfg.MODE_IF_HARD_MINING_THEN_ALSO_POSITIVES:
        hard_example_names_dataset = hard_example_names[dataset_name]

        #hpens_oldversion = [x for x in hard_example_names_dataset if x.startswith(vid_name)]
        left = right = bisect.bisect_left(hard_example_names_dataset, vid_name)
        while left > 0:
            if hard_example_names_dataset[left - 1].startswith(vid_name):
                left -= 1
            else:
                break
        while right < len(hard_example_names_dataset):
            if hard_example_names_dataset[right].startswith(vid_name):
                right += 1
            else:
                break
        hpens = hard_example_names_dataset[left:right]

        assert len(hpens) > 0, vid_name
        random.shuffle(hpens)
        hpens = hpens[:cfg.N_HARD_POS_TO_SAMPLE]
        feats = []
        ious = []
        gt_boxes = []
        jitter_boxes = []
        for hpen in hpens:
            sp = hpen.split("/")
            feat_fn = os.path.join(cfg.HARD_MINING_DATA_PATH, dataset_name,
                                   "det_feats_compressed", sp[0],
                                   this_fmt % int(sp[1]) + ".npz")
            npz_data = np.load(feat_fn)
            feat = npz_data["f"]
            iou_data = npz_data["i"]

            feats.append(feat)
            iou = [float(x) for x in iou_data[-3:]]
            ious.append(iou)
            box_xyxy = [float(x) for x in iou_data[:4]]
            gt_boxes.append(box_xyxy)
            jitter_box_xyxy = np.array([float(x) for x in iou_data[4:16]
                                        ]).reshape(3, 4)
            jitter_boxes.append(jitter_box_xyxy)
        feats = np.stack(feats, axis=0)
        # atm just sample from same sequence, does not need to be hard
        data['hard_positive_features'] = feats
        data['hard_positive_ious'] = np.stack(ious, axis=0)
        data['hard_positive_gt_boxes'] = np.stack(gt_boxes, axis=0)
        data['hard_positive_jitter_boxes'] = np.stack(jitter_boxes, axis=0)
    return data
Example #23
0
    def preprocess(roidb):
        fname = roidb['img']
        x1, y1, w, h = np.split(roidb['bbox'], 4, axis=1)
        boxes = np.concatenate([x1, y1, x1 + w, y1 + h], axis=1)

        klass = np.ones(len(roidb['bbox']), dtype=np.int32)

        male = roidb['male']
        longhair = roidb['longhair']
        sunglass = roidb['sunglass']
        hat = roidb['hat']
        tshirt = roidb['tshirt']
        longsleeve = roidb['longsleeve']
        formal = roidb['formal']
        shorts = roidb['shorts']
        jeans = roidb['jeans']
        longpants = roidb['longpants']
        skirt = roidb['skirt']
        facemask = roidb['facemask']
        logo = roidb['logo']
        stripe = roidb['stripe']

        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = {'image': im}
        # rpn anchor:
        try:

            # anchor_labels, anchor_boxes
            ret['anchor_labels'], ret['anchor_boxes'] = get_rpn_anchor_input(
                im, boxes, np.zeros(len(boxes), dtype=int))
            ret['gt_boxes'] = boxes
            ret['gt_labels'] = klass

            ret['male'] = male
            ret['longhair'] = longhair
            ret['sunglass'] = sunglass
            ret['hat'] = hat
            ret['tshirt'] = tshirt
            ret['longsleeve'] = longsleeve
            ret['formal'] = formal
            ret['shorts'] = shorts
            ret['jeans'] = jeans
            ret['longpants'] = longpants
            ret['skirt'] = skirt
            ret['facemask'] = facemask
            ret['logo'] = logo
            ret['stripe'] = stripe

            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None
        return ret
Example #24
0
    def _add_detection_gt(
            self, img, add_mask
    ):  # 디텍션을 위해 박스와 클래스와 is_crowd를 만든다. 이것인 ground truth인가?@@@@@
        """
        Add 'boxes', 'class', 'is_crowd' of this image to the dict, used by detection.
        If add_mask is True, also add 'segmentation' in coco poly format.
        """
        # ann_ids = self.coco.getAnnIds(imgIds=img['id'])
        # objs = self.coco.loadAnns(ann_ids)
        objs = self.coco.imgToAnns[img[
            'id']]  # equivalent but faster than the above two lines # id 값을 통해 이미지 객체 만든다.

        # clean-up boxes
        valid_objs = []  # 리스트 하나 만들어서
        width = img['width']  # 이미지 객체에 대한 width 값 초기화
        height = img['height']  # height 값 초기화
        for obj in objs:  # 객체들 중에서
            if obj.get('ignore',
                       0) == 1:  # ignore 값이 있는 딕셔너리가 있으면 뛰어넘는다. 보지 않는다.
                continue
            x1, y1, w, h = obj['bbox']  # 객체에서 bbox 정보를 초기화한다.
            # bbox is originally in float
            # x1/y1 means upper-left corner and w/h means true w/h. This can be verified by segmentation pixels.
            # But we do assume that (0.0, 0.0) is upper-left corner of the first pixel
            box = FloatBox(
                float(x1),
                float(y1),  # float 박스를 만든다.(네모난 박스를 만든다.)
                float(x1 + w),
                float(y1 + h))
            box.clip_by_shape([height, width])  # clip_by_shape함수가 뭐지?@@@@@
            # Require non-zero seg area and more than 1x1 box size
            if obj['area'] > 1 and box.is_box(
            ) and box.area() >= 4:  # 객체의 너비가 1보다 크고 박스가 있고 박스의 너비가 4이상이면
                obj['bbox'] = [box.x1, box.y1, box.x2,
                               box.y2]  # 객체의 bbox는 x1,x2,y1,y2로 지정해준다.
                valid_objs.append(obj)  # 그리고 객체를 유효한 객체들의 리스트에 넣는다.

                if add_mask:  # 그리고 여기서 마스크가 있으면(mask r cnn 일때를 말한다.)
                    segs = obj[
                        'segmentation']  # 객체의 segmentation 부분을 가지고 segs 라는 변수를 초기화한다.
                    if not isinstance(segs, list):  # segs 라는 변수가 리스트가 아닐때,
                        assert obj['iscrowd'] == 1  # 객체에 iscrowd가 1이면 예외처리해준다.
                        obj['segmentation'] = None  # 객체의 segmentation이 없다? @@@@@
                    else:
                        valid_segs = [
                            np.asarray(p).reshape(-1, 2).astype('float32')
                            for p in segs if len(p) >= 6
                        ]  # segs라는 리스트에서 유효한 것들만 뽑는다.
                        if len(valid_segs) < len(
                                segs):  # 근데 segs들에서 유효한 segs들이 별로 없다면
                            log_once("Image {} has invalid polygons!".format(
                                img['file_name']),
                                     'warn')  # 로그를 띄운다. 별로 없어서 warning이라고

                        obj['segmentation'] = valid_segs  # 유효한 segs들은 객체의 segmentation에 다시 넣어준다.

        # all geometrically-valid boxes are returned
        boxes = np.asarray(
            [obj['bbox'] for obj in valid_objs],
            dtype='float32')  # (n, 4) # 유효한 객체들의 bbox를 np를 통해 만든어 준다.
        cls = np.asarray(
            [  # 유효한 객체들로 클래스를 만든어 cls라는 변수를 만들어 준다. 
                COCOMeta.category_id_to_class_id[obj['category_id']]
                for obj in valid_objs
            ],
            dtype='int32')  # (n,)
        is_crowd = np.asarray(
            [obj['iscrowd'] for obj in valid_objs], dtype='int8'
        )  # 유효한 객체에서 각 객체의 is_crowd 를 가지고 is_crowd라는 똑같은 이름의 변수를 초기화해준다.

        # add the keys
        img['boxes'] = boxes  # nx4  # 박스들을 이미지 객체의 박스에 넣는다.  여기서 boxes는 아마 유효한 것들의 boxes 일것이다.
        img['class'] = cls  # n, always >0   # 클래스들을 이미지 객체의 클래스에 넣는다.
        img['is_crowd'] = is_crowd  # n, # is_crowd를 이미지 객체의 그것에 넣는다.
        if add_mask:  # 만약 마스크 rcnn 이라면
            # also required to be float32
            img['segmentation'] = [  # 세그멘테이션도 이와 동일한 맥락이다.
                obj['segmentation'] for obj in valid_objs
            ]
Example #25
0
    def __call__(self, roidb):
        fname, boxes_house, boxes_damage, klass, is_crowd = roidb[
            "file_name"], roidb["boxes_house"], roidb["boxes_damage"], roidb[
                "class"], roidb["is_crowd"]
        assert boxes_damage.ndim == 2 and boxes_damage.shape[
            1] == 4, boxes_damage.shape

        boxes_house = np.copy(boxes_house)
        boxes_damage = np.copy(boxes_damage)

        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype("float32")
        height, width = im.shape[:2]
        # assume floatbox as input
        assert boxes_damage.dtype == np.float32, "Loader has to return float32 boxes!"

        if not self.cfg.DATA.ABSOLUTE_COORD:
            boxes_house[:, 0::2] *= width
            boxes_house[:, 1::2] *= height
            boxes_damage[:, 0::2] *= width
            boxes_damage[:, 1::2] *= height

        # augmentation:
        tfms = self.aug.get_transform(im)
        im = tfms.apply_image(im)

        points_house = box_to_point4(boxes_house)
        points_house = tfms.apply_coords(points_house)
        boxes_house = point4_to_box(points_house)
        if len(boxes_house):
            assert klass.max() <= self.cfg.DATA.NUM_CATEGORY, \
                "Invalid category {}!".format(klass.max())
            assert np.min(
                np_area(boxes_house)) > 0, "Some boxes have zero area!"

        points_damage = box_to_point4(boxes_damage)
        points_damage = tfms.apply_coords(points_damage)
        boxes_damage = point4_to_box(points_damage)
        if len(boxes_damage):
            assert klass.max() <= self.cfg.DATA.NUM_CATEGORY, \
                "Invalid category {}!".format(klass.max())
            assert np.min(
                np_area(boxes_damage)) > 0, "Some boxes have zero area!"

        ret = {"image": im}
        # Add rpn data to dataflow:
        try:
            if self.cfg.MODE_FPN:
                # CHANGE TWO RPN anchors here
                multilevel_anchor_inputs_house = self.get_multilevel_rpn_anchor_input(
                    im, boxes_house, is_crowd)
                for i, (anchor_labels, anchor_boxes_house
                        ) in enumerate(multilevel_anchor_inputs_house):
                    ret["anchor_labels_lvl{}_house".format(i +
                                                           2)] = anchor_labels
                    ret["anchor_boxes_lvl{}_house".format(
                        i + 2)] = anchor_boxes_house

                multilevel_anchor_inputs_damage = self.get_multilevel_rpn_anchor_input(
                    im, boxes_damage, is_crowd)
                for i, (anchor_labels, anchor_boxes_damage
                        ) in enumerate(multilevel_anchor_inputs_damage):
                    ret["anchor_labels_lvl{}_damage".format(i +
                                                            2)] = anchor_labels
                    ret["anchor_boxes_lvl{}_damage".format(
                        i + 2)] = anchor_boxes_damage
            else:
                ret["anchor_labels"], ret[
                    "anchor_boxes_house"] = self.get_rpn_anchor_input(
                        im, boxes_house, is_crowd)
                ret["anchor_labels"], ret[
                    "anchor_boxes_damage"] = self.get_rpn_anchor_input(
                        im, boxes_damage, is_crowd)
            boxes_house = boxes_house[is_crowd ==
                                      0]  # skip crowd boxes in training target
            boxes_damage = boxes_damage[
                is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            ret["gt_boxes_house"] = boxes_house
            ret["gt_boxes_damage"] = boxes_damage
            ret["gt_labels"] = klass
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                "warn")
            return None

        if self.cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(roidb["segmentation"])
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes_house)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            width_height = np.asarray([width, height], dtype=np.float32)
            gt_mask_width = int(np.ceil(im.shape[1] / 8.0) *
                                8)  # pad to 8 in order to pack mask into bits

            for polys in segmentation:
                if not self.cfg.DATA.ABSOLUTE_COORD:
                    polys = [p * width_height for p in polys]
                polys = [tfms.apply_coords(p) for p in polys]
                masks.append(
                    polygons_to_mask(polys, im.shape[0], gt_mask_width))

            if len(masks):
                masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
                masks = np.packbits(masks, axis=-1)
            else:  # no gt on the image
                masks = np.zeros((0, im.shape[0], gt_mask_width // 8),
                                 dtype='uint8')

            ret['gt_masks_packed'] = masks

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret
Example #26
0
    def preprocess(img):

        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[
            'class'], img['is_crowd']
        img_name = fname.split('/')[-1]
        img_id = int(img_name[3:-4])
        # pretrain rpn for negtive chip extraction

        proposals = proposal_pickle['boxes'][proposal_pickle['ids'].index(
            img_id)]
        proposals[2:4] += proposals[0:2]  # from [x,y,w,h] to [x1,y1,x2,y2]

        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"
        chip_generator = Im2Chip(im,
                                 boxes,
                                 klass,
                                 proposals,
                                 cfg.SNIPER.SCALES,
                                 cfg.SNIPER.VALID_RANGES,
                                 is_crowd=is_crowd,
                                 chip_size=cfg.SNIPER.CHIP_SIZE,
                                 chip_stride=cfg.SNIPER.CHIP_STRIDE)
        im, boxes, klass, scale_indices, is_crowd = chip_generator.genChipMultiScale(
        )
        rets = []
        for i in range(len(im)):
            try:
                if len(boxes[i]) == 0:
                    continue
                # anchor_labels, anchor_boxes
                gt_invalid = []
                maxbox = cfg.SNIPER.VALID_RANGES[scale_indices[i]][0]
                minbox = cfg.SNIPER.VALID_RANGES[scale_indices[i]][1]
                maxbox = sys.maxsize if maxbox == -1 else maxbox
                minbox = 0 if minbox == -1 else minbox
                for box in boxes[i]:
                    w = box[2] - box[0]
                    h = box[3] - box[1]
                    if w >= maxbox or h >= maxbox or (w < minbox
                                                      and h < minbox):
                        gt_invalid.append(box)
                anchor_inputs = get_sniper_rpn_anchor_input(
                    im[i], boxes[i], is_crowd[i], gt_invalid)
                assert len(anchor_inputs) == 2

                boxes[i] = boxes[i][is_crowd[i] ==
                                    0]  # skip crowd boxes in training target
                klass[i] = klass[i][is_crowd[i] == 0]

                if not len(boxes[i]):
                    raise MalformedData("No valid gt_boxes!")
            except MalformedData as e:
                log_once(
                    "Input {} is filtered for training: {}".format(
                        fname, str(e)), 'warn')
                ret = None
                continue

            # ret = [im[i]] + list(anchor_inputs) + [boxes[i], klass[i]
            #                                        ] + [scale_indices[i]*len(boxes[i])]
            ret = [im[i]] + list(anchor_inputs) + [boxes[i], klass[i]]
            rets.append(ret)
        return rets
def MaskedConv2D(
        inputs,
        filters,
        kernel_size,
        strides=(1, 1),
        padding='same',
        data_format='channels_last',
        dilation_rate=(1, 1),
        activation=None,
        use_bias=True,
        kernel_initializer=None,
        bias_initializer=tf.zeros_initializer(),
        kernel_regularizer=None,
        bias_regularizer=None,
        activity_regularizer=None,
        split=1,
        masking=False):
    """
    A wrapper around `tf.layers.Conv2D`.
    Some differences to maintain backward-compatibility:

    1. Default kernel initializer is variance_scaling_initializer(2.0).
    2. Default padding is 'same'.
    3. Support 'split' argument to do group conv.

    Variable Names:

    * ``W``: weights
    * ``b``: bias
    """
    if kernel_initializer is None:
        if get_tf_version_tuple() <= (1, 12):
            kernel_initializer = tf.contrib.layers.variance_scaling_initializer(2.0)
        else:
            kernel_initializer = tf.keras.initializers.VarianceScaling(2.0, distribution='untruncated_normal')
    dilation_rate = shape2d(dilation_rate)

    if (masking == False) and (split == 1) and (dilation_rate == [1, 1]):
        # tf.layers.Conv2D has bugs with dilations (https://github.com/tensorflow/tensorflow/issues/26797)
        with rename_get_variable({'kernel': 'W', 'bias': 'b'}):
            layer = tf.layers.Conv2D(
                filters,
                kernel_size,
                strides=strides,
                padding=padding,
                data_format=data_format,
                dilation_rate=dilation_rate,
                activation=activation,
                use_bias=use_bias,
                kernel_initializer=kernel_initializer,
                bias_initializer=bias_initializer,
                kernel_regularizer=kernel_regularizer,
                bias_regularizer=bias_regularizer,
                activity_regularizer=activity_regularizer,
                _reuse=tf.get_variable_scope().reuse)
            ret = layer.apply(inputs, scope=tf.get_variable_scope())
            ret = tf.identity(ret, name='output')

        ret.variables = VariableHolder(W=layer.kernel)
        if use_bias:
            ret.variables.b = layer.bias

    else:
        if masking == True:
            assert split == 1, "Pruining group conv is not supported yet"

        # group conv implementation
        data_format = get_data_format(data_format, keras_mode=False)
        in_shape = inputs.get_shape().as_list()
        channel_axis = 3 if data_format == 'NHWC' else 1
        in_channel = in_shape[channel_axis]
        assert in_channel is not None, "[Conv2D] Input cannot have unknown channel!"
        assert in_channel % split == 0

        assert kernel_regularizer is None and bias_regularizer is None and activity_regularizer is None, \
            "Not supported by group conv or dilated conv!"

        out_channel = filters
        assert out_channel % split == 0
        assert dilation_rate == [1, 1] or get_tf_version_tuple() >= (1, 5), 'TF>=1.5 required for dilated conv.'

        kernel_shape = shape2d(kernel_size)
        filter_shape = kernel_shape + [in_channel / split, out_channel]
        stride = shape4d(strides, data_format=data_format)

        kwargs = dict(data_format=data_format)
        if get_tf_version_tuple() >= (1, 5):
            kwargs['dilations'] = shape4d(dilation_rate, data_format=data_format)

        W = tf.get_variable(
            'W', filter_shape, initializer=kernel_initializer)

        if use_bias:
            b = tf.get_variable('b', [out_channel], initializer=bias_initializer)

        if split == 1:
            if masking:
                W = pruning.apply_mask(W)
            conv = tf.nn.conv2d(inputs, W, stride, padding.upper(), **kwargs)
        else:
            conv = None
            if get_tf_version_tuple() >= (1, 13):
                try:
                    conv = tf.nn.conv2d(inputs, W, stride, padding.upper(), **kwargs)
                except ValueError:
                    log_once("CUDNN group convolution support is only available with "
                             "https://github.com/tensorflow/tensorflow/pull/25818 . "
                             "Will fall back to a loop-based slow implementation instead!", 'warn')
            if conv is None:
                inputs = tf.split(inputs, split, channel_axis)
                kernels = tf.split(W, split, 3)
                outputs = [tf.nn.conv2d(i, k, stride, padding.upper(), **kwargs)
                           for i, k in zip(inputs, kernels)]
                conv = tf.concat(outputs, channel_axis)

        ret = tf.nn.bias_add(conv, b, data_format=data_format) if use_bias else conv
        if activation is not None:
            ret = activation(ret)
        ret = tf.identity(ret, name='output')

        ret.variables = VariableHolder(W=W)
        if use_bias:
            ret.variables.b = b
    return ret
Example #28
0
    def __call__(self, roidb):
        fname, boxes, klass, is_crowd = roidb["file_name"], roidb["boxes"], roidb["class"], roidb["is_crowd"]
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype("float32")
        height, width = im.shape[:2]
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        if not self.cfg.DATA.ABSOLUTE_COORD:
            boxes[:, 0::2] *= width
            boxes[:, 1::2] *= height

        # augmentation:
        im, params = self.aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = self.aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = {"image": im}
        # Add rpn data to dataflow:
        try:
            if self.cfg.MODE_FPN:
                multilevel_anchor_inputs = self.get_multilevel_rpn_anchor_input(im, boxes, is_crowd)
                for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs):
                    ret["anchor_labels_lvl{}".format(i + 2)] = anchor_labels
                    ret["anchor_boxes_lvl{}".format(i + 2)] = anchor_boxes
            else:
                ret["anchor_labels"], ret["anchor_boxes"] = self.get_rpn_anchor_input(im, boxes, is_crowd)

            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            ret["gt_boxes"] = boxes
            ret["gt_labels"] = klass
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once("Input {} is filtered for training: {}".format(fname, str(e)), "warn")
            return None

        if self.cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(roidb["segmentation"])
            segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            width_height = np.asarray([width, height], dtype=np.float32)
            gt_mask_width = int(np.ceil(im.shape[1] / 8.0) * 8)   # pad to 8 in order to pack mask into bits
            for polys in segmentation:
                if not self.cfg.DATA.ABSOLUTE_COORD:
                    polys = [p * width_height for p in polys]
                polys = [self.aug.augment_coords(p, params) for p in polys]
                masks.append(segmentation_to_mask(polys, im.shape[0], gt_mask_width))
            masks = np.asarray(masks, dtype='uint8')    # values in {0, 1}
            masks = np.packbits(masks, axis=-1)
            ret['gt_masks_packed'] = masks

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret
Example #29
0
    def preprocess(roidb_batch):
        datapoint_list = []
        for roidb in roidb_batch:
            fname, boxes, klass, is_crowd = roidb['file_name'], roidb['boxes'], roidb['class'], roidb['is_crowd']
            boxes = np.copy(boxes)
            im = cv2.imread(fname, cv2.IMREAD_COLOR)
            assert im is not None, fname
            im = im.astype('float32')
            # assume floatbox as input
            assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

            # augmentation:
            im, params = aug.augment_return_params(im)
            points = box_to_point8(boxes)
            points = aug.augment_coords(points, params)
            boxes = point8_to_box(points)
            assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

            ret = {'images': im}
            # rpn anchor:
            try:
                if cfg.MODE_FPN:
                    multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(im, boxes, is_crowd)
                    for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs):
                        ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels
                        ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes
                else:
                    raise NotImplementedError("[armand] Batch mode only available for FPN")

                boxes = boxes[is_crowd == 0]    # skip crowd boxes in training target
                klass = klass[is_crowd == 0]
                ret['gt_boxes'] = boxes
                ret['gt_labels'] = klass
                ret['filename'] = fname
                if not len(boxes):
                    raise MalformedData("No valid gt_boxes!")
            except MalformedData as e:
                log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn')
                return None

            if cfg.MODE_MASK:
                # augmentation will modify the polys in-place
                segmentation = copy.deepcopy(roidb['segmentation'])
                segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]]
                assert len(segmentation) == len(boxes)

                # Apply augmentation on polygon coordinates.
                # And produce one image-sized binary mask per box.
                masks = []
                for polys in segmentation:
                    polys = [aug.augment_coords(p, params) for p in polys]
                    masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1]))
                masks = np.asarray(masks, dtype='uint8')    # values in {0, 1}
                ret['gt_masks'] = masks

            datapoint_list.append(ret)

        #################################################################################################################
        # Batchify the output
        #################################################################################################################

        # Now we need to batch the various fields

        # Easily stackable:
        # - anchor_labels_lvl2
        # - anchor_boxes_lvl2
        # - anchor_labels_lvl3
        # - anchor_boxes_lvl3
        # - anchor_labels_lvl4
        # - anchor_boxes_lvl4
        # - anchor_labels_lvl5
        # - anchor_boxes_lvl5
        # - anchor_labels_lvl6
        # - anchor_boxes_lvl6

        batched_datapoint = {}
        for stackable_field in ["anchor_labels_lvl2",
                                "anchor_boxes_lvl2",
                                "anchor_labels_lvl3",
                                "anchor_boxes_lvl3",
                                "anchor_labels_lvl4",
                                "anchor_boxes_lvl4",
                                "anchor_labels_lvl5",
                                "anchor_boxes_lvl5",
                                "anchor_labels_lvl6",
                                "anchor_boxes_lvl6"]:
            batched_datapoint[stackable_field] = np.stack([d[stackable_field] for d in datapoint_list])



        # Require padding and original dimension storage
        # - image (HxWx3)
        # - gt_boxes (?x4)
        # - gt_labels (?)
        # - gt_masks (?xHxW)

        """
        Find the minimum container size for images (maxW x maxH)
        Find the maximum number of ground truth boxes
        For each image, save original dimension and pad
        """

        if cfg.PREPROC.PREDEFINED_PADDING:
            padding_shapes = [get_padding_shape(*(d["images"].shape[:2])) for d in datapoint_list]
            max_height = max([shp[0] for shp in padding_shapes])
            max_width = max([shp[1] for shp in padding_shapes])
        else:
            image_dims = [d["images"].shape for d in datapoint_list]
            heights = [dim[0] for dim in image_dims]
            widths = [dim[1] for dim in image_dims]

            max_height = max(heights)
            max_width = max(widths)


        # image
        padded_images = []
        original_image_dims = []
        for datapoint in datapoint_list:
            image = datapoint["images"]
            original_image_dims.append(image.shape)

            h_padding = max_height - image.shape[0]
            w_padding = max_width - image.shape[1]

            padded_image = np.pad(image,
                                  [[0, h_padding],
                                   [0, w_padding],
                                   [0, 0]],
                                  'constant')

            padded_images.append(padded_image)

        batched_datapoint["images"] = np.stack(padded_images)
        #print(batched_datapoint["images"].shape)
        batched_datapoint["orig_image_dims"] = np.stack(original_image_dims)


        # gt_boxes and gt_labels
        max_num_gts = max([d["gt_labels"].size for d in datapoint_list])

        gt_counts = []
        padded_gt_labels = []
        padded_gt_boxes = []
        padded_gt_masks = []
        for datapoint in datapoint_list:
            gt_count_for_image = datapoint["gt_labels"].size
            gt_counts.append(gt_count_for_image)

            gt_padding = max_num_gts - gt_count_for_image

            padded_gt_labels_for_img = np.pad(datapoint["gt_labels"], [0, gt_padding], 'constant', constant_values=-1)
            padded_gt_labels.append(padded_gt_labels_for_img)

            padded_gt_boxes_for_img = np.pad(datapoint["gt_boxes"],
                                             [[0, gt_padding],
                                              [0,0]],
                                             'constant')
            padded_gt_boxes.append(padded_gt_boxes_for_img)




            h_padding = max_height - datapoint["images"].shape[0]
            w_padding = max_width - datapoint["images"].shape[1]



            if cfg.MODE_MASK:
                padded_gt_masks_for_img = np.pad(datapoint["gt_masks"],
                                         [[0, gt_padding],
                                          [0, h_padding],
                                          [0, w_padding]],
                                         'constant')
                padded_gt_masks.append(padded_gt_masks_for_img)


        batched_datapoint["orig_gt_counts"] = np.stack(gt_counts)
        batched_datapoint["gt_labels"] = np.stack(padded_gt_labels)
        batched_datapoint["gt_boxes"] = np.stack(padded_gt_boxes)
        batched_datapoint["filenames"] = [d["filename"] for d in datapoint_list]

        if cfg.MODE_MASK:
            batched_datapoint["gt_masks"] = np.stack(padded_gt_masks)



        return batched_datapoint