Beispiel #1
0
def get_train_dataflow():
    imgs = COCODetection.load_many(config.BASEDIR, config.TRAIN_DATASET)
    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    imgs = list(filter(lambda img: len(img['boxes']) > 0, imgs))    # log invalid training

    ds = DataFromListOfDict(
        imgs,
        ['file_name', 'boxes', 'class', 'is_crowd'],  # we need this four keys only
        shuffle=True)
    ds = read_and_augment_images(ds)

    def add_anchor_to_dp(dp):
        im, boxes, klass, is_crowd, fname = dp
        try:
            fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, klass, is_crowd)

            boxes = boxes[is_crowd == 0]    # skip crowd boxes in training target
            klass = klass[is_crowd == 0]

            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once("Input {} is invalid for training: {}".format(fname, str(e)), 'warn')
            return None

        return [im, fm_labels, fm_boxes, boxes, klass]

    ds = MapData(ds, add_anchor_to_dp)
    return ds
Beispiel #2
0
def get_eval_dataflow(shard=0, num_shards=1):
    """
    Args:
        shard, num_shards: to get subset of evaluation data
    """
    imgs = COCODetection.load_many(cfg.DATA.BASEDIR,
                                   cfg.DATA.VAL,
                                   add_gt=False)
    num_imgs = len(imgs)
    img_per_shard = num_imgs // num_shards
    img_range = (shard * img_per_shard, (shard + 1) *
                 img_per_shard if shard + 1 < num_shards else num_imgs)

    # no filter for training
    ds = DataFromListOfDict(imgs[img_range[0]:img_range[1]],
                            ['file_name', 'id'])

    def f(fname):
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        return im

    ds = MapDataComponent(ds, f, 0)
    # Evaluation itself may be multi-threaded, therefore don't add prefetch here.
    return ds
Beispiel #3
0
def get_eval_dataflow(name, shard=0, num_shards=1):
    """
    Args:
        name (str): name of the dataset to evaluate
        shard, num_shards: to get subset of evaluation data
    """
    roidbs = COCODetection.load_many(cfg.DATA.BASEDIR, name, add_gt=False)
    """
    To inference on your own data, change this to your loader.
    Produce "roidbs" as a list of dict, in the dict the following keys are needed for training:
    file_name: str, full path to the image
    id: an id of this image
    """

    num_imgs = len(roidbs)
    img_per_shard = num_imgs // num_shards
    img_range = (shard * img_per_shard, (shard + 1) * img_per_shard if shard + 1 < num_shards else num_imgs)

    # no filter for training
    ds = DataFromListOfDict(roidbs[img_range[0]: img_range[1]], ['file_name', 'id'])

    def f(fname):
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        return im
    ds = MapDataComponent(ds, f, 0)
    # Evaluation itself may be multi-threaded, therefore don't add prefetch here.
    return ds
Beispiel #4
0
def get_eval_dataflow():
    imgs = COCODetection.load_many(config.BASEDIR, config.VAL_DATASET, add_gt=False)
    # no filter for training
    ds = DataFromListOfDict(imgs, ['file_name', 'id'])

    def f(fname):
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        return im
    ds = MapDataComponent(ds, f, 0)
    return ds
Beispiel #5
0
def get_eval_dataflow():
    imgs = COCODetection.load_many(config.BASEDIR, config.VAL_DATASET, add_gt=False)
    # no filter for training
    ds = DataFromListOfDict(imgs, ['file_name', 'id'])

    def f(fname):
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        return im
    ds = MapDataComponent(ds, f, 0)
    ds = PrefetchDataZMQ(ds, 1)
    return ds
Beispiel #6
0
def get_eval_dataflow():
    imgs = COCODetection.load_many(cfg.DATA.BASEDIR, cfg.DATA.VAL, add_gt=False)
    # no filter for training
    ds = DataFromListOfDict(imgs, ['file_name', 'id'])

    def f(fname):
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        return im
    ds = MapDataComponent(ds, f, 0)
    if cfg.TRAINER != 'horovod':
        ds = PrefetchDataZMQ(ds, 1)
    return ds
Beispiel #7
0
def get_eval_dataflow(shard=0, num_shards=1):
    """
    Args:
        shard, num_shards: to get subset of evaluation data
    """
    roidbs = COCODetection.load_many(cfg.DATA.BASEDIR, cfg.DATA.VAL, add_gt=False)
    num_imgs = len(roidbs)
    img_per_shard = num_imgs // num_shards
    img_range = (shard * img_per_shard, (shard + 1) * img_per_shard if shard + 1 < num_shards else num_imgs)

    # no filter for training
    ds = DataFromListOfDict(roidbs[img_range[0]: img_range[1]], ['file_name', 'id'])

    def f(fname):
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        return im
    ds = MapDataComponent(ds, f, 0)
    # Evaluation itself may be multi-threaded, therefore don't add prefetch here.
    return ds
Beispiel #8
0
def get_train_dataflow():
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    If MODE_MASK, gt_masks: (N, h, w)
    """

    imgs = COCODetection.load_many(cfg.DATA.BASEDIR,
                                   cfg.DATA.TRAIN,
                                   add_gt=True,
                                   add_mask=cfg.MODE_MASK)
    """
    To train on your own data, change this to your loader.
    Produce "imgs" as a list of dict, in the dict the following keys are needed for training:
    height, width: integer
    file_name: str, full path to the image
    boxes: numpy array of kx4 floats
    class: numpy array of k integers
    is_crowd: k booleans. Use k False if you don't know what it means.
    segmentation: k lists of numpy arrays (one for each box).
        Each list of numpy array corresponds to the mask for one instance.
        Each numpy array in the list is a polygon of shape Nx2,
        because one mask can be represented by N polygons.

        If your segmentation annotations are originally masks rather than polygons,
        either convert it, or the augmentation code below will need to be
        changed or skipped accordingly.
    """

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(imgs)
    imgs = list(
        filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, imgs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}"
        .format(num - len(imgs), len(imgs)))

    ds = DataFromList(imgs, shuffle=True)

    aug = imgaug.AugmentorList([
        CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(img):
        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[
            'class'], img['is_crowd']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        # rpn anchor:
        try:
            if cfg.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(
                    im, boxes, is_crowd)
                anchor_inputs = itertools.chain.from_iterable(
                    multilevel_anchor_inputs)
            else:
                # anchor_labels, anchor_boxes
                anchor_inputs = get_rpn_anchor_input(im, boxes, is_crowd)
                assert len(anchor_inputs) == 2

            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None

        ret = [im] + list(anchor_inputs) + [boxes, klass]

        if cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(img['segmentation'])
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret

    if cfg.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # MPI does not like fork()
    else:
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    return ds
Beispiel #9
0
        if args.visualize:
            visualize(MODEL, args.load)
        else:
            pred = OfflinePredictor(
                PredictConfig(
                    model=MODEL,
                    session_init=get_model_loader(args.load),
                    input_names=MODEL.get_inference_tensor_names()[0],
                    output_names=MODEL.get_inference_tensor_names()[1]))
            if args.evaluate:
                assert args.evaluate.endswith('.json'), args.evaluate
                offline_evaluate(pred, args.evaluate)
            elif args.predict:
                COCODetection(
                    cfg.DATA.BASEDIR,
                    'val2014')  # Only to load the class names into caches
                predict(pred, args.predict)
    else:
        is_horovod = cfg.TRAINER == 'horovod'
        if is_horovod:
            hvd.init()
            logger.info("Horovod Rank={}, Size={}".format(
                hvd.rank(), hvd.size()))

        if not is_horovod or hvd.rank() == 0:
            logger.set_logger_dir(args.logdir, 'd')

        finalize_configs(is_training=True)
        stepnum = cfg.TRAIN.STEPS_PER_EPOCH
Beispiel #10
0
        assert args.load
        print_config()
        if args.visualize:
            visualize(args.load)
        else:
            pred = OfflinePredictor(
                PredictConfig(model=Model(),
                              session_init=get_model_loader(args.load),
                              input_names=['image'],
                              output_names=get_model_output_names()))
            if args.evaluate:
                assert args.evaluate.endswith('.json')
                offline_evaluate(pred, args.evaluate)
            elif args.predict:
                COCODetection(
                    config.BASEDIR,
                    'train2014')  # to load the class names into caches
                predict(pred, args.predict)
    else:
        logger.set_logger_dir(args.logdir)
        print_config()
        stepnum = 500
        warmup_epoch = 3
        factor = get_batch_factor()

        cfg = TrainConfig(
            model=Model(),
            data=QueueInput(get_train_dataflow(add_mask=config.MODE_MASK)),
            callbacks=[
                ModelSaver(max_to_keep=10, keep_checkpoint_every_n_hours=1),
                # linear warmup
Beispiel #11
0
def get_sniper_train_dataflow():
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    scale_index: i

    If MODE_MASK, gt_masks: (N, h, w)
    """

    OUTPUT_FILE = 'train_512_annotation.txt'
    OUTPUT_IMG_DIR = 'out'
    out_file = open(OUTPUT_FILE, 'w')

    class SniperDataFlow(ProxyDataFlow):
        def __init__(self, ds):
            super(SniperDataFlow, self).__init__(ds)
            # self.ds = ds
        def size(self):
            raise NotImplementedError()

        def get_data(self):
            for img in self.ds.get_data():
                for chip in img:
                    yield chip

    imgs = COCODetection.load_many(cfg.DATA.BASEDIR,
                                   cfg.DATA.TRAIN,
                                   add_gt=True,
                                   add_mask=cfg.MODE_MASK)

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(imgs)
    imgs = list(
        filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, imgs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}"
        .format(num - len(imgs), len(imgs)))

    ds = DataFromList(imgs, shuffle=False)
    # aug = imgaug.AugmentorList([
    #     CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
    #     imgaug.Flip(horiz=True)
    # ])

    assert os.path.isfile(cfg.SNIPER.PRN_PRE)
    proposal_pickle = pandas.read_pickle(cfg.SNIPER.PRN_PRE)

    def preprocess(img):

        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[
            'class'], img['is_crowd']
        img_name = fname.split('/')[-1]
        img_id = int(img_name[3:-4])
        # pretrain rpn for negtive chip extraction

        proposals = proposal_pickle['boxes'][proposal_pickle['ids'].index(
            img_id)]
        proposals[2:4] += proposals[0:2]  # from [x,y,w,h] to [x1,y1,x2,y2]

        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"
        chip_generator = Im2Chip(im,
                                 boxes,
                                 klass,
                                 proposals,
                                 cfg.SNIPER.SCALES,
                                 cfg.SNIPER.VALID_RANGES,
                                 is_crowd=is_crowd,
                                 chip_size=cfg.SNIPER.CHIP_SIZE,
                                 chip_stride=cfg.SNIPER.CHIP_STRIDE)
        im, boxes, klass, scale_indices, is_crowd = chip_generator.genChipMultiScale(
        )
        rets = []
        for i in range(len(im)):
            try:
                if len(boxes[i]) == 0:
                    continue
                if not len(boxes[i]):
                    raise MalformedData("No valid gt_boxes!")
            except MalformedData as e:
                log_once(
                    "Input {} is filtered for training: {}".format(
                        fname, str(e)), 'warn')
                ret = None
                continue
            # ret = [im[i]] + list(anchor_inputs) + [boxes[i], klass[i]
            #                                        ] + [scale_indices[i]*len(boxes[i])]
            new_name = '%s_%d' % (img_name, i)
            cv2.imwrite('%s/%s' % (OUTPUT_IMG_DIR, new_name), im[i])

            ret = [im[i]] + [boxes[i], klass[i]]
            for j in range(len(klass[i])):
                if j == 0:
                    out_file.write(new_name)
                out_file.write(' %d %f %f %f %f' %
                               (klass[i][j], boxes[i][j][0], boxes[i][j][1],
                                boxes[i][j][2], boxes[i][j][3]))
                if j == len(klass[i]) - 1:
                    out_file.write('\n')
            rets.append(ret)
        return rets

    if cfg.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # ds = PrefetchDataZM
        # MPI does not like fork()
    else:
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    # ds = SniperDataFlow(ds)
    return ds
def get_train_dataflow_coco(add_mask=False):
    """
    Return a training dataflow. Each datapoint is:
    image, fm_labels, fm_boxes, gt_boxes, gt_class [, masks]
    """
    imgs = COCODetection.load_many(config.BASEDIR,
                                   config.TRAIN_DATASET,
                                   add_gt=True,
                                   add_mask=add_mask)
    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    imgs = list(filter(lambda img: len(img['boxes']) > 0,
                       imgs))  # log invalid training

    ds = DataFromList(imgs, shuffle=True)

    aug = imgaug.AugmentorList([
        CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(img):
        print("start preproc coco")
        start = time.time()
        if config.USE_SECOND_HEAD:
            fname, boxes, klass, second_klass, is_crowd = img['file_name'], img['boxes'], img['class'], \
                                                          img['second_class'], img['is_crowd']
        else:
            fname, boxes, klass, is_crowd = img['file_name'], img[
                'boxes'], img['class'], img['is_crowd']
            second_klass = None
        res = preproc_img(fname, boxes, klass, second_klass, is_crowd, aug)
        if res is None:
            print("coco: preproc_img returned None on", fname)
            return None

        ret, params = res
        im = ret[0]
        boxes = ret[3]
        # masks
        if add_mask:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(img.get('segmentation', None))
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes), (len(segmentation),
                                                     len(boxes))

            # one image-sized binary mask per box
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        end = time.time()
        elapsed = end - start
        print("coco example done, elapsed:", elapsed)
        return ret

    #ds = MapData(ds, preprocess)
    ds = MultiProcessMapData(ds,
                             nr_proc=4,
                             map_func=preprocess,
                             buffer_size=20)
    return ds
Beispiel #13
0
def get_train_dataflow(add_mask=False):
    """
    Return a training dataflow. Each datapoint is:
    image, fm_labels, fm_boxes, gt_boxes, gt_class [, masks]
    """

    imgs = COCODetection.load_many(
        config.BASEDIR, config.TRAIN_DATASET, add_gt=True, add_mask=add_mask)
    """
    To train on your own data, change this to your loader.
    Produce "imgs" as a list of dict, in the dict the following keys are needed for training:
    height, width: integer
    file_name: str
    boxes: kx4 floats
    class: k integers
    is_crowd: k booleans. Use k False if you don't know what it means.
    segmentation: k numpy arrays. Each array is a polygon of shape Nx2.
        If your segmentation annotations are masks rather than polygons,
        either convert it, or the augmentation code below will need to be
        changed or skipped accordingly.
    """

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    imgs = list(filter(lambda img: len(img['boxes']) > 0, imgs))    # log invalid training

    ds = DataFromList(imgs, shuffle=True)

    aug = imgaug.AugmentorList(
        [CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE),
         imgaug.Flip(horiz=True)])

    def preprocess(img):
        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img['class'], img['is_crowd']
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)

        # rpn anchor:
        try:
            fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, is_crowd)
            boxes = boxes[is_crowd == 0]    # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn')
            return None

        ret = [im, fm_labels, fm_boxes, boxes, klass]

        if add_mask:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(img['segmentation'])
            segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')    # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret

    ds = MultiProcessMapData(ds, 3, preprocess)
    return ds
Beispiel #14
0
def get_train_dataflow():
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    If MODE_MASK, gt_masks: (N, h, w)
    """

    imgs = COCODetection.load_many(
        config.BASEDIR, config.TRAIN_DATASET, add_gt=True, add_mask=config.MODE_MASK)
    """
    To train on your own data, change this to your loader.
    Produce "imgs" as a list of dict, in the dict the following keys are needed for training:
    height, width: integer
    file_name: str
    boxes: kx4 floats
    class: k integers
    is_crowd: k booleans. Use k False if you don't know what it means.
    segmentation: k lists of numpy arrays (one for each box).
        Each list of numpy array corresponds to the mask for one instance.
        Each numpy array in the list is a polygon of shape Nx2,
        because one mask can be represented by N polygons.

        If your segmentation annotations are originally masks rather than polygons,
        either convert it, or the augmentation code below will need to be
        changed or skipped accordingly.
    """

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(imgs)
    imgs = list(filter(lambda img: len(img['boxes']) > 0, imgs))    # log invalid training
    logger.info("Filtered {} images which contain no groudtruth boxes. Total #images for training: {}".format(
        num - len(imgs), len(imgs)))

    ds = DataFromList(imgs, shuffle=True)

    aug = imgaug.AugmentorList(
        [CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE),
         imgaug.Flip(horiz=True)])

    def preprocess(img):
        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img['class'], img['is_crowd']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        # rpn anchor:
        try:
            if config.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(im, boxes, is_crowd)
                anchor_inputs = itertools.chain.from_iterable(multilevel_anchor_inputs)
            else:
                # anchor_labels, anchor_boxes
                anchor_inputs = get_rpn_anchor_input(im, boxes, is_crowd)
                assert len(anchor_inputs) == 2

            boxes = boxes[is_crowd == 0]    # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn')
            return None

        ret = [im] + list(anchor_inputs) + [boxes, klass]

        if config.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(img['segmentation'])
            segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')    # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret

    if config.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # MPI does not like fork()
    else:
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    return ds
Beispiel #15
0
def get_sniper_train_dataflow():
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    scale_index: i

    If MODE_MASK, gt_masks: (N, h, w)
    """
    class SniperDataFlow(ProxyDataFlow):
        def __init__(self, ds):
            super(SniperDataFlow, self).__init__(ds)
            # self.ds = ds
        def size(self):
            raise NotImplementedError()

        def get_data(self):
            for img in self.ds.get_data():
                for chip in img:
                    yield chip

    imgs = COCODetection.load_many(cfg.DATA.BASEDIR,
                                   cfg.DATA.TRAIN,
                                   add_gt=True,
                                   add_mask=cfg.MODE_MASK)

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(imgs)
    imgs = list(
        filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, imgs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}"
        .format(num - len(imgs), len(imgs)))

    ds = DataFromList(imgs, shuffle=True)
    aug = imgaug.AugmentorList([imgaug.Flip(horiz=True)])

    assert os.path.isfile(cfg.SNIPER.PRN_PRE)
    proposal_pickle = pandas.read_pickle(cfg.SNIPER.PRN_PRE)

    def preprocess(img):

        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[
            'class'], img['is_crowd']
        img_name = fname.split('/')[-1]
        img_id = int(img_name[3:-4])
        # pretrain rpn for negtive chip extraction

        proposals = proposal_pickle['boxes'][proposal_pickle['ids'].index(
            img_id)]
        proposals[2:4] += proposals[0:2]  # from [x,y,w,h] to [x1,y1,x2,y2]

        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"
        chip_generator = Im2Chip(im,
                                 boxes,
                                 klass,
                                 proposals,
                                 cfg.SNIPER.SCALES,
                                 cfg.SNIPER.VALID_RANGES,
                                 is_crowd=is_crowd,
                                 chip_size=cfg.SNIPER.CHIP_SIZE,
                                 chip_stride=cfg.SNIPER.CHIP_STRIDE)
        im, boxes, klass, scale_indices, is_crowd = chip_generator.genChipMultiScale(
        )
        rets = []
        for i in range(len(im)):
            try:
                if len(boxes[i]) == 0:
                    continue
                # anchor_labels, anchor_boxes
                gt_invalid = []
                maxbox = cfg.SNIPER.VALID_RANGES[scale_indices[i]][0]
                minbox = cfg.SNIPER.VALID_RANGES[scale_indices[i]][1]
                maxbox = sys.maxsize if maxbox == -1 else maxbox
                minbox = 0 if minbox == -1 else minbox
                for box in boxes[i]:
                    w = box[2] - box[0]
                    h = box[3] - box[1]
                    if w >= maxbox or h >= maxbox or (w < minbox
                                                      and h < minbox):
                        gt_invalid.append(box)
                anchor_inputs = get_sniper_rpn_anchor_input(
                    im[i], boxes[i], is_crowd[i], gt_invalid)
                assert len(anchor_inputs) == 2

                boxes[i] = boxes[i][is_crowd[i] ==
                                    0]  # skip crowd boxes in training target
                klass[i] = klass[i][is_crowd[i] == 0]

                if not len(boxes[i]):
                    raise MalformedData("No valid gt_boxes!")
            except MalformedData as e:
                log_once(
                    "Input {} is filtered for training: {}".format(
                        fname, str(e)), 'warn')
                ret = None
                continue

            # ret = [im[i]] + list(anchor_inputs) + [boxes[i], klass[i]
            #                                        ] + [scale_indices[i]*len(boxes[i])]
            ret = [im[i]] + list(anchor_inputs) + [boxes[i], klass[i]]
            rets.append(ret)
        return rets

    if cfg.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # ds = PrefetchDataZM
        # MPI does not like fork()
    else:
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    ds = SniperDataFlow(ds)
    return ds
Beispiel #16
0
def get_train_dataflow(add_mask=False):
    """
    Return a training dataflow. Each datapoint is:
    image, fm_labels, fm_boxes, gt_boxes, gt_class [, masks]
    """

    imgs = COCODetection.load_many(config.BASEDIR,
                                   config.TRAIN_DATASET,
                                   add_gt=True,
                                   add_mask=add_mask)
    """
    To train on your own data, change this to your loader.
    Produce "igms" as a list of dict, in the dict the following keys are needed for training:
    height, width: integer
    file_name: str
    boxes: kx4 floats
    class: k integers
    is_crowd: k booleans. Use k False if you don't know what it means.
    segmentation: k numpy arrays. Each array is a polygon of shape Nx2.
        If your segmentation annotations are masks rather than polygons,
        either convert it, or the augmentation code below will need to be
        changed or skipped accordingly.
    """

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    imgs = list(filter(lambda img: len(img['boxes']) > 0,
                       imgs))  # log invalid training

    ds = DataFromList(imgs, shuffle=True)

    aug = imgaug.AugmentorList([
        CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(img):
        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[
            'class'], img['is_crowd']
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)

        # rpn anchor:
        try:
            fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, is_crowd)
            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None

        ret = [im, fm_labels, fm_boxes, boxes, klass]

        if add_mask:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(img.get('segmentation', None))
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret

    ds = MultiProcessMapData(ds, 3, preprocess)
    return ds
Beispiel #17
0
def get_train_dataflow(add_mask=False):
    """
    Return a training dataflow. Each datapoint is:
    image, fm_labels, fm_boxes, gt_boxes, gt_class [, masks]
    """
    imgs = COCODetection.load_many(
        config.BASEDIR, config.TRAIN_DATASET, add_gt=True, add_mask=add_mask)
    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    imgs = list(filter(lambda img: len(img['boxes']) > 0, imgs))    # log invalid training

    ds = DataFromList(imgs, shuffle=True)

    aug = imgaug.AugmentorList(
        [CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE),
         imgaug.Flip(horiz=True)])

    def preprocess(img):
        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img['class'], img['is_crowd']
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)

        # rpn anchor:
        try:
            fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, is_crowd)
            boxes = boxes[is_crowd == 0]    # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn')
            return None

        ret = [im, fm_labels, fm_boxes, boxes, klass]

        if add_mask:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(img.get('segmentation', None))
            segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]]
            assert len(segmentation) == len(boxes)

            # one image-sized binary mask per box
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')    # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret

    ds = MultiProcessMapData(ds, 3, preprocess)
    return ds
Beispiel #18
0
def evalImage_fix(model_path, save_path, num_samples):

    # Load TFLite model and allocate tensors
    interpreter = tf.contrib.lite.Interpreter(model_path)
    interpreter.allocate_tensors()

    # Get input and output tensors
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    all_boxes = [[np.array([]) for _ in range(num_samples)] for _ in range(80)]

    class_90_80_map = {
        1: 0,
        2: 1,
        3: 2,
        4: 3,
        5: 4,
        6: 5,
        7: 6,
        8: 7,
        9: 8,
        10: 9,
        11: 10,
        13: 11,
        14: 12,
        15: 13,
        16: 14,
        17: 15,
        18: 16,
        19: 17,
        20: 18,
        21: 19,
        22: 20,
        23: 21,
        24: 22,
        25: 23,
        27: 24,
        28: 25,
        31: 26,
        32: 27,
        33: 28,
        34: 29,
        35: 30,
        36: 31,
        37: 32,
        38: 33,
        39: 34,
        40: 35,
        41: 36,
        42: 37,
        43: 38,
        44: 39,
        46: 40,
        47: 41,
        48: 42,
        49: 43,
        50: 44,
        51: 45,
        52: 46,
        53: 47,
        54: 48,
        55: 49,
        56: 50,
        57: 51,
        58: 52,
        59: 53,
        60: 54,
        61: 55,
        62: 56,
        63: 57,
        64: 58,
        65: 59,
        67: 60,
        70: 61,
        72: 62,
        73: 63,
        74: 64,
        75: 65,
        76: 66,
        77: 67,
        78: 68,
        79: 69,
        80: 70,
        81: 71,
        82: 72,
        84: 73,
        85: 74,
        86: 75,
        87: 76,
        88: 77,
        89: 78,
        90: 79
    }

    testset = COCODetection(COCOroot, ['instances_minval_L'], None)

    for i in range(num_samples):
        #if(i>10): break
        print(i)
        image = testset.pull_image(i)
        init_shape = image.shape
        Image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        Image = cv2.resize(Image, (320, 320))
        input_image = np.array([Image], dtype=np.uint8)
        pred = tfliteDetect(init_shape, input_image, interpreter,
                            input_details, output_details)

        for j in range(80):
            temp = [
                x[0:5] for x in pred if class_90_80_map[int(x[-1]) + 1] == j
            ]
            all_boxes[j][i] = np.array(temp)

    print('Evaluating detections')
    testset.evaluate_detections(np.array(all_boxes), save_path)
Beispiel #19
0
def get_sniper_train_dataflow():

    imgs = COCODetection.load_many(cfg.DATA.BASEDIR,
                                   cfg.DATA.TRAIN,
                                   add_gt=True,
                                   add_mask=cfg.MODE_MASK)

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(imgs)
    imgs = list(
        filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, imgs))
    print(imgs[0])

    proposal_pickle = pandas.read_pickle(cfg.SNIPER.PRN_PRE)

    def preprocess(img):

        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[
            'class'], img['is_crowd']
        img_name = fname.split('/')[-1]
        print(img_name)
        img_id = int(img_name[3:-4])
        # pretrain rpn for negtive chip extraction

        proposals = proposal_pickle['boxes'][proposal_pickle['ids'].index(
            img_id)]
        proposals[2:4] += proposals[0:2]  # from [x,y,w,h] to [x1,y1,x2,y2]

        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"
        chip_generator = Im2Chip(im,
                                 boxes,
                                 klass,
                                 proposals,
                                 cfg.SNIPER.SCALES,
                                 cfg.SNIPER.VALID_RANGES,
                                 is_crowd=is_crowd,
                                 chip_size=cfg.SNIPER.CHIP_SIZE,
                                 chip_stride=cfg.SNIPER.CHIP_STRIDE)
        im, boxes, klass, scale_indices, is_crowd = chip_generator.genChipMultiScale(
        )
        rets = []
        for i in range(len(im)):
            try:
                if not len(boxes[i]):
                    raise MalformedData("No valid gt_boxes!")
            except MalformedData as e:
                log_once(
                    "Input {} is filtered for training: {}".format(
                        fname, str(e)), 'warn')
                ret = None
                continue

            # ret = [im[i]] + list(anchor_inputs) + [boxes[i], klass[i]
            #                                        ] + [scale_indices[i]*len(boxes[i])]
            ret = [im[i]] + [boxes[i], klass[i]]
            rets.append(ret)
        return rets

    for img in imgs:
        preprocess(img)

    return imgs
Beispiel #20
0
def get_train_dataflow(add_mask=False):
    """
    Return a training dataflow. Each datapoint is:
    image, fm_labels, fm_boxes, gt_boxes, gt_class [, masks]
    """
    imgs = COCODetection.load_many(config.BASEDIR,
                                   config.TRAIN_DATASET,
                                   add_gt=True,
                                   add_mask=add_mask)
    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    imgs = list(filter(lambda img: len(img['boxes']) > 0,
                       imgs))  # log invalid training

    ds = DataFromList(imgs, shuffle=True)

    aug = imgaug.AugmentorList([
        CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(img):
        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[
            'class'], img['is_crowd']
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)

        # rpn anchor:
        try:
            fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, klass,
                                                       is_crowd)
            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is invalid for training: {}".format(fname, str(e)),
                'warn')
            return None

        ret = [im, fm_labels, fm_boxes, boxes, klass]

        # masks
        segmentation = img.get('segmentation', None)
        if segmentation is not None:
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # one image-sized binary mask per box
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret

    ds = MapData(ds, preprocess)
    ds = PrefetchDataZMQ(ds, 1)
    return ds
Beispiel #21
0
def get_train_dataflow():
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    If MODE_MASK, gt_masks: (N, h, w)
    """

    roidbs = COCODetection.load_many(cfg.DATA.BASEDIR,
                                     cfg.DATA.TRAIN,
                                     add_gt=True,
                                     add_mask=cfg.MODE_MASK)
    """
    To train on your own data, change this to your loader.
    Produce "roidbs" as a list of dict, in the dict the following keys are needed for training:
    height, width: integer
    file_name: str, full path to the image
    boxes: numpy array of kx4 floats
    class: numpy array of k integers
    is_crowd: k booleans. Use k False if you don't know what it means.
    segmentation: k lists of numpy arrays (one for each box).
        Each list of numpy arrays corresponds to the mask for one instance.
        Each numpy array in the list is a polygon of shape Nx2,
        because one mask can be represented by N polygons.

        If your segmentation annotations are originally masks rather than polygons,
        either convert it, or the augmentation code below will need to be
        changed or skipped accordingly.
    """

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(roidbs)
    roidbs = list(
        filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0,
               roidbs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}"
        .format(num - len(roidbs), len(roidbs)))

    ds = DataFromList(roidbs, shuffle=True)

    aug = imgaug.AugmentorList([
        CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(roidb):
        fname, boxes, klass, is_crowd = roidb['file_name'], roidb[
            'boxes'], roidb['class'], roidb['is_crowd']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = {'image': im}
        # rpn anchor:
        try:
            ret['anchor_labels'], ret['anchor_boxes'] = get_rpn_anchor_input(
                im, boxes, is_crowd)
            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            ret['gt_boxes'] = boxes
            ret['gt_labels'] = klass
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None
        return ret

    if cfg.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # MPI does not like fork()
    else:
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    return ds