def get_imagenet_dataflow(datadir, name, batch_size, parallel=None):
    """
    Get a standard imagenet training/evaluation dataflow, for linear classifier tuning.
    """
    assert name in ['train', 'val']
    isTrain = name == 'train'
    assert datadir is not None
    augmentors = get_basic_augmentor(isTrain)
    augmentors = imgaug.AugmentorList(augmentors)
    if parallel is None:
        parallel = min(50, mp.cpu_count())

    def mapper(dp):
        fname, label = dp
        img = cv2.imread(fname)
        img = augmentors.augment(img)
        return img, label

    if isTrain:
        ds = dataset.ILSVRC12Files(datadir, name, shuffle=True)
        ds = MultiProcessMapAndBatchDataZMQ(ds,
                                            parallel,
                                            mapper,
                                            batch_size,
                                            buffer_size=7000)
    else:
        ds = dataset.ILSVRC12Files(datadir, name, shuffle=False)
        ds = MultiThreadMapData(ds,
                                parallel,
                                mapper,
                                buffer_size=2000,
                                strict=True)
        ds = BatchData(ds, batch_size, remainder=True)
        ds = MultiProcessRunnerZMQ(ds, 1)
    return ds
Beispiel #2
0
 def __init__(self, cfg):
     self.cfg = cfg
     self.aug = imgaug.AugmentorList([
         CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE,
                      cfg.PREPROC.MAX_SIZE),
         imgaug.Flip(horiz=True)
     ])
Beispiel #3
0
def get_train_aseval_dataflow():
    """
    Args:
        shard, num_shards: to get subset of evaluation data
    """
    prw = PRWDataset(cfg.DATA.BASEDIR)
    imgs = prw.load()

    # no filter for training
    # test if it can repeat keys
    ds = DataFromList(imgs, shuffle=False)

    aug = imgaug.AugmentorList(
        [CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)])

    def preprocess(img):
        fname = img['file_name']
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        orig_shape = im.shape[:2]
        assert im is not None, fname
        im = im.astype('float32')

        # augmentation:
        im, params = aug.augment_return_params(im)

        ret = [fname, im, orig_shape]

        return ret

    ds = MapData(ds, preprocess)
    return ds
Beispiel #4
0
 def __init__(self, cfg):
     self.cfg = cfg
     self.aug = imgaug.AugmentorList([
         imgaug.RandomApplyAug(SquareAspectRatioResize(), 0.075),
         # imgaug.RandomApplyAug(imgaug.RandomCropRandomShape(wmin=int(
         #     0.75*cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE[0]), hmin=int(0.75*cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE[0])), 0.25),
         CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
         imgaug.RandomApplyAug(imgaug.Flip(horiz=True), 0.5),
     ])
Beispiel #5
0
def get_moco_dataflow(datadir, batch_size, augmentors):
    """
    Dataflow for training MOCO.
    """
    augmentors = imgaug.AugmentorList(augmentors)
    parallel = min(30, mp.cpu_count())  # tuned on a 40-CPU 80-core machine
    ds = dataset.ILSVRC12Files(datadir, 'train', shuffle=True)
    ds = MultiProcessMapAndBatchDataZMQ(ds, parallel, MoCoMapper(augmentors), batch_size, buffer_size=5000)
    return ds
Beispiel #6
0
 def __init__(self, cfg):
     self.cfg = cfg
     self.aug_weak = imgaug.AugmentorList([
         CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE,
                      cfg.PREPROC.MAX_SIZE),
         imgaug.Flip(horiz=True)
     ])
     self.aug_type = cfg.TRAIN.AUGTYPE_LAB
     self.aug_strong = RandomAugmentBBox(aug_type=cfg.TRAIN.AUGTYPE_LAB)
     logger.info("Use affine-enabled TrainingDataPreprocessor_aug")
Beispiel #7
0
def get_imagenet_dataflow(datadir,
                          name,
                          batch_size,
                          augmentors=None,
                          parallel=None):
    """
    Args:
        augmentors (list[imgaug.Augmentor]): Defaults to `fbresnet_augmentor(isTrain)`

    Returns: A DataFlow which produces BGR images and labels.

    See explanations in the tutorial:
    http://tensorpack.readthedocs.io/tutorial/efficient-dataflow.html
    """
    assert name in ['train', 'val', 'test']
    isTrain = name == 'train'
    assert datadir is not None
    if augmentors is None:
        augmentors = fbresnet_augmentor(isTrain)
    assert isinstance(augmentors, list)
    if parallel is None:
        parallel = min(40,
                       multiprocessing.cpu_count() //
                       2)  # assuming hyperthreading

    if isTrain:
        ds = dataset.ILSVRC12(datadir, name, shuffle=True)
        ds = AugmentImageComponent(ds, augmentors, copy=False)
        if parallel < 16:
            logger.warn(
                "DataFlow may become the bottleneck when too few processes are used."
            )
        ds = PrefetchDataZMQ(ds, parallel)
        ds = BatchData(ds, batch_size, remainder=False)
    else:
        ds = dataset.ILSVRC12Files(datadir, name, shuffle=False)
        aug = imgaug.AugmentorList(augmentors)

        def mapf(dp):
            fname, cls = dp
            im = cv2.imread(fname, cv2.IMREAD_COLOR)
            im = aug.augment(im)
            return im, cls

        ds = MultiThreadMapData(ds,
                                parallel,
                                mapf,
                                buffer_size=2000,
                                strict=True)
        ds = BatchData(ds, batch_size, remainder=True)
        ds = PrefetchDataZMQ(ds, 1)
    return ds
Beispiel #8
0
def get_data(name, meta_dir, gpu_nums):
    isTrain = True if 'train' in name else False

    m = np.array([104, 116, 122])
    const_arr = np.resize(m, (1, 1, 3))  # NCHW
    const_arr = np.zeros(
        (args.crop_size[0], args.crop_size[1], 3)) + const_arr  #broadcast

    if isTrain:
        #ds = FakeData([[1024, 2048, 3], [ 1024, 2048]], 5000, random=False, dtype='uint8')
        #ds = FakeData([[CROP_HEIGHT, CROP_HEIGHT, 3], [CROP_HEIGHT, CROP_HEIGHT]], 5000,random=False, dtype='uint8')
        ds = CityscapesFiles(base_dir, meta_dir, name, shuffle=True)
        parallel = min(3, multiprocessing.cpu_count())
        augmentors = [
            RandomCropWithPadding(args.crop_size),
            Flip(horiz=True),
        ]
        aug = imgaug.AugmentorList(augmentors)

        def mapf(ds):
            img, label = ds
            img = cv2.imread(img, cv2.IMREAD_COLOR)
            label = cv2.imread(label, cv2.IMREAD_GRAYSCALE)
            img, params = aug.augment_return_params(img)
            label = aug._augment(label, params)
            img = img - const_arr  # very time-consuming
            return img, label

        #ds = MapData(ds, mapf)
        ds = MultiThreadMapData(ds,
                                parallel,
                                mapf,
                                buffer_size=500,
                                strict=True)
        #ds = MapData(ds, reduce_mean_rgb)

        ds = BatchData(ds, args.batch_size * gpu_nums)
        #ds = PrefetchDataZMQ(ds, 1)
    else:

        def imgread(ds):
            img, label = ds
            img = cv2.imread(img, cv2.IMREAD_COLOR)
            label = cv2.imread(label, cv2.IMREAD_GRAYSCALE)
            return [img, label]

        ds = CityscapesFiles(base_dir, meta_dir, name, shuffle=False)
        ds = MapData(ds, imgread)
        ds = BatchData(ds, 1)

    return ds
Beispiel #9
0
 def __init__(self, cfg):
     self.cfg = cfg
     self.aug = imgaug.AugmentorList([
         #            imgaug.RandomApplyAug(imgaug.RandomResize( xrange = (0.8, 1.5), minimum = (cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE[0], cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE[0]), aspect_ratio_thres = 0.0 ), prob = 0.5),
         imgaug.Flip(horiz=True, prob=0.5),
         imgaug.Flip(vert=True, prob=0.5),
         imgaug.RandomApplyAug(imgaug.Rotation(max_deg=180.0,
                                               step_deg=30.0,
                                               center_range=(0.5, 0.5)),
                               prob=0.5),
         imgaug.RandomApplyAug(imgaug.Grayscale(keepshape=True), prob=0.5),
         CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE,
                      cfg.PREPROC.MAX_SIZE),
     ])
Beispiel #10
0
 def __init__(self, cfg, is_aws, is_gcs):
     self.cfg = cfg
     self.aug = imgaug.AugmentorList(
         [
             CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
             imgaug.Flip(horiz=True),
         ]
     )
     self.is_aws = is_aws
     self.is_gcs = is_gcs
     if self.is_aws:
         self.s3 = boto3.resource("s3")
     elif self.is_gcs:
         self.storage_client = storage.Client.create_anonymous_client()
         self.bucket = self.storage_client.get_bucket("determined-ai-coco-dataset")
Beispiel #11
0
    def __init__(self, cfg, confidence, pseudo_targets):
        self.cfg = cfg
        self.aug = imgaug.AugmentorList([
            CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE,
                         cfg.PREPROC.MAX_SIZE),
            imgaug.Flip(horiz=True)
        ])

        self.resize = imgaug.AugmentorList([
            CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE,
                         cfg.PREPROC.MAX_SIZE),
        ])

        self.aug_strong = RandomAugmentBBox(aug_type=cfg.TRAIN.AUGTYPE)
        self.aug_strong_labeled = RandomAugmentBBox(
            aug_type=cfg.TRAIN.AUGTYPE_LAB)
        self.labeled_augment_type = cfg.TRAIN.AUGTYPE_LAB
        self.unlabeled_augment_type = cfg.TRAIN.AUGTYPE

        self.confidence = confidence
        logger.info(
            "Use TrainingDataPreprocessor6 (using offline generated pseudo labels)"
        )
        self.pseudo_targets = pseudo_targets
Beispiel #12
0
def build_dataflow(files):
    train_ds = DataFromList(files)
    aug = imgaug.AugmentorList(get_basic_augmentor(isTrain=False))

    def mapper(dp):
        idx, fname, label = dp
        img = cv2.imread(fname)
        img = aug.augment(img)
        return img, idx

    train_ds = MultiProcessMapDataZMQ(train_ds,
                                      num_proc=8,
                                      map_func=mapper,
                                      strict=True)
    train_ds = BatchData(train_ds, local_batch_size)
    train_ds.reset_state()
    return train_ds
Beispiel #13
0
def get_imagenet_dataflow(datadir,
                          is_train,
                          batch_size,
                          augmentors,
                          parallel=None):
    """
    See explanations in the tutorial:
    http://tensorpack.readthedocs.io/en/latest/tutorial/efficient-dataflow.html
    """
    assert datadir is not None
    assert isinstance(augmentors, list)
    if parallel is None:
        parallel = min(40,
                       multiprocessing.cpu_count() //
                       2)  # assuming hyperthreading
    if is_train:
        ds = dataset.ILSVRC12(datadir, "train", shuffle=True)
        ds = AugmentImageComponent(ds, augmentors, copy=False)
        if parallel < 16:
            logging.warning(
                "DataFlow may become the bottleneck when too few processes are used."
            )
        ds = PrefetchDataZMQ(ds, parallel)
        ds = BatchData(ds, batch_size, remainder=False)
    else:
        ds = dataset.ILSVRC12Files(datadir, "val", shuffle=False)
        aug = imgaug.AugmentorList(augmentors)

        def mapf(dp):
            fname, cls = dp
            im = cv2.imread(fname, cv2.IMREAD_COLOR)
            im = np.flip(im, axis=2)
            # print("fname={}".format(fname))
            im = aug.augment(im)
            return im, cls

        ds = MultiThreadMapData(ds,
                                parallel,
                                mapf,
                                buffer_size=2000,
                                strict=True)
        # ds = MapData(ds, mapf)
        ds = BatchData(ds, batch_size, remainder=True)
        ds = PrefetchDataZMQ(ds, 1)
        # ds = PrefetchData(ds, 1)
    return ds
Beispiel #14
0
    def get_data(self, name, num_gpu):
        gpu_batch = self.batch_size // num_gpu

        assert name in ['train', 'val', 'test']
        isTrain = name == 'train'

        augmentors = fbresnet_augmentor(isTrain)
        assert isinstance(augmentors, list)

        parallel = min(40,
                       multiprocessing.cpu_count() //
                       2)  # assuming hyperthreading

        if isTrain:
            ds = dataset.ILSVRC12(self.datadir,
                                  name,
                                  shuffle=True,
                                  dir_structure='train')
            ds = AugmentImageComponent(ds, augmentors, copy=False)
            ds = MultiProcessRunnerZMQ(ds, parallel)
            ds = BatchData(ds, gpu_batch, remainder=False)
            #ds = QueueInput(ds)
        else:
            ds = dataset.ILSVRC12Files(self.datadir,
                                       name,
                                       shuffle=False,
                                       dir_structure='train')
            aug = imgaug.AugmentorList(augmentors)

            def mapf(dp):
                fname, cls = dp
                im = cv2.imread(fname, cv2.IMREAD_COLOR)
                im = aug.augment(im)
                return im, cls

            ds = MultiThreadMapData(ds,
                                    parallel,
                                    mapf,
                                    buffer_size=2000,
                                    strict=True)
            ds = BatchData(ds, gpu_batch, remainder=True)
            ds = MultiProcessRunnerZMQ(ds, 1)

            if num_gpu == 1:
                ds = QueueInput(ds)
        return ds
Beispiel #15
0
def get_query_dataflow():
    """
    Args:
        shard, num_shards: to get subset of evaluation data
    """
    prw = PRWDataset(cfg.DATA.BASEDIR)
    imgs = prw.load_query()

    # no filter for training
    # test if it can repeat keys
    ds = DataFromList(imgs, shuffle=False)

    aug = imgaug.AugmentorList(
        [CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)])

    def preprocess(img):
        fname, boxes, re_id_class = img['file_name'], img['boxes'], img[
            're_id_class']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = [im, boxes, re_id_class]

        return ret

    ds = MapData(ds, preprocess)
    return ds
Beispiel #16
0
def process_avatar_synth_data(df, batch_size, num_threads):
    """
    Perform preprocessing for the avatar synth data.

    :param df: An AvatarSynthDataFlow.
    :param batch_size: The minibatch size.
    :param num_threads: The number of threads to read and process data.

    :return: A dataflow with extra processing steps applied.
    """
    augmentor = imgaug.AugmentorList([imgaug.MinMaxNormalize(min=-1, max=1)])

    df = MultiThreadMapData(
        df,
        nr_thread=num_threads,
        map_func=lambda dp: [np.load(dp[0]),
                             augmentor.augment(imread(dp[1]))])

    # df = MapData(df, lambda dp: [np.load(dp[0]), augmentor.augment(imread(dp[1]))])
    df = PrefetchDataZMQ(df, nr_proc=num_threads)
    df = BatchData(df, batch_size, remainder=True)

    return df
Beispiel #17
0
def get_imagenet_dataflow(
        datadir, name, batch_size,
        augmentors, parallel=None):
    """
    See explanations in the tutorial:
    http://tensorpack.readthedocs.io/en/latest/tutorial/efficient-dataflow.html
    """
    assert name in ['train', 'val', 'test']
    assert datadir is not None
    assert isinstance(augmentors, list)
    isTrain = name == 'train'
    if parallel is None:
        parallel = min(40, 16)  # assuming hyperthreading
    if isTrain:
        ds1 = ilsvrcsemi.ILSVRC12(datadir, name, shuffle=True, labeled=True)
        ds2 = ilsvrcsemi.ILSVRC12(datadir, name, shuffle=True, labeled=False)
        ds1 = AugmentImageComponent(ds1, augmentors, copy=False)
        ds2 = AugmentImageComponent(ds2, augmentors, copy=False)
        ds = JoinData([ds1, ds2])

        if parallel < 16:
            logger.warn("DataFlow may become the bottleneck when too few processes are used.")
        ds = PrefetchDataZMQ(ds, parallel)
        ds = BatchData(ds, batch_size, remainder=False)
    else:
        ds = dataset.ILSVRC12Files(datadir, name, shuffle=False)
        aug = imgaug.AugmentorList(augmentors)

        def mapf(dp):
            fname, cls = dp
            im = cv2.imread(fname, cv2.IMREAD_COLOR)
            im = aug.augment(im)
            return im, cls, im, cls
        ds = MultiThreadMapData(ds, parallel, mapf, buffer_size=2000, strict=True)
        ds = BatchData(ds, batch_size, remainder=True)
        ds = PrefetchDataZMQ(ds, 1)
    return ds
Beispiel #18
0
def process_s2b_data(df, batch_size, num_threads):
    """
    Perform preprocessing for the avatar synth data.

    :param df: An AvatarSynthDataFlow.
    :param batch_size: The minibatch size.
    :param num_threads: The number of threads to read and process data.

    :return: A dataflow with extra processing steps applied.
    """
    augmentor = imgaug.AugmentorList([imgaug.MinMaxNormalize(min=-1, max=1)])

    def get_imgs(dp):
        """
        :param dp: A datapoint tuple, (path_to_face.jpg, path_to_bitmoji.jpg)
        """
        face_img = augmentor.augment(imread(dp[0]))
        bitmoji_img = augmentor.augment(imread(dp[1]))
        if len(face_img.shape) == 2:
            face_img = np.stack([face_img] * 3, axis=-1)
        if len(bitmoji_img.shape) == 2:
            bitmoji_img = np.stack([bitmoji_img] * 3, axis=-1)

        return [face_img, bitmoji_img]

    df = MultiThreadMapData(df,
                            nr_thread=num_threads,
                            map_func=get_imgs,
                            buffer_size=min(df.size(), 200))
    df = PrefetchDataZMQ(df, nr_proc=num_threads)

    # TODO: switch back to remainder=True when s2b input batch size switched back to None
    df = BatchData(df, batch_size, remainder=False)
    # df = BatchData(df, batch_size, remainder=True)

    return df
Beispiel #19
0
def get_train_dataflow():
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    If MODE_MASK, gt_masks: (N, h, w)
    """

    roidbs = DetectionDataset().load_training_roidbs(cfg.DATA.TRAIN)
    print_class_histogram(roidbs)

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(roidbs)
    roidbs = list(
        filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0,
               roidbs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}"
        .format(num - len(roidbs), len(roidbs)))

    ds = DataFromList(roidbs, shuffle=True)

    aug = imgaug.AugmentorList([
        CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(roidb):
        fname, boxes, klass, is_crowd = roidb['file_name'], roidb[
            'boxes'], roidb['class'], roidb['is_crowd']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        height, width = im.shape[:2]
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        if not cfg.DATA.ABSOLUTE_COORD:
            boxes[:, 0::2] *= width
            boxes[:, 1::2] *= height

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = {'image': im}
        # rpn anchor:
        try:
            if cfg.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(
                    im, boxes, is_crowd)
                for i, (anchor_labels,
                        anchor_boxes) in enumerate(multilevel_anchor_inputs):
                    ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels
                    ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes
            else:
                # anchor_labels, anchor_boxes
                ret['anchor_labels'], ret[
                    'anchor_boxes'] = get_rpn_anchor_input(
                        im, boxes, is_crowd)

            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            ret['gt_boxes'] = boxes
            ret['gt_labels'] = klass
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None

        if cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(roidb['segmentation'])
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            width_height = np.asarray([width, height], dtype=np.float32)
            for polys in segmentation:
                if not cfg.DATA.ABSOLUTE_COORD:
                    polys = [p * width_height for p in polys]
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret['gt_masks'] = masks

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret

    if cfg.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # MPI does not like fork()
    else:
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    return ds
def get_train_dataflow_mapillary(add_mask=False, map_to_coco=False):
    train_img_path = config.MAPILLARY_PATH + "training/images/"
    train_label_path = config.MAPILLARY_PATH + "training/instances/"
    imgs = glob.glob(train_img_path + "*.jpg")

    ds = DataFromList(imgs, shuffle=True)
    aug = imgaug.AugmentorList([
        CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(fname):
        print("start preproc mapillary")
        start = time.time()

        label_fname = fname.replace(train_img_path,
                                    train_label_path).replace(".jpg", ".png")
        pil_label = Image.open(label_fname)
        label = np.array(pil_label)
        instances = np.unique(label)
        instance_classes = [x // 256 for x in instances]

        # filter by categories we use
        instances_valid = [
            cls in config.MAPILLARY_CAT_IDS_TO_USE for cls in instance_classes
        ]
        instances = [
            inst for inst, valid in zip(instances, instances_valid) if valid
        ]
        instance_classes = [
            cls for cls, valid in zip(instance_classes, instances_valid)
            if valid
        ]

        if len(instances) == 0:
            print("no instances")
            pil_label.close()
            return None

        if map_to_coco:
            instance_classes = [
                config.MAPILLARY_TO_COCO_MAP[cls] for cls in instance_classes
            ]
            instance_classes = [
                config.VOID_LABEL if cls == config.VOID_LABEL else
                COCOMeta.category_id_to_class_id[cls]
                for cls in instance_classes
            ]
        else:
            # remap to contiguous numbers starting with 1
            instance_classes = [
                config.MAPILLARY_CAT_IDS_TO_USE.index(cls) + 1
                for cls in instance_classes
            ]

        masks = np.array([label == inst for inst in instances], dtype=np.uint8)

        #import cProfile
        #start1 = time.time()
        boxes1 = np.array(
            [get_bbox_from_segmentation_mask(mask) for mask in masks],
            dtype=np.float32)
        #boxes1_time = time.time() - start1
        #pr = cProfile.Profile()
        #pr.enable()
        #start1 = time.time()
        #boxes2 = get_bboxes_from_segmentation_masks(masks)
        #print("boxes1", boxes1_time, "boxes2", time.time() - start1)
        #pr.disable()
        #pr.print_stats(sort="cumulative")
        #assert (boxes1 == boxes2).all(), (boxes1, boxes2)
        boxes = boxes1

        second_klass = np.array(instance_classes, dtype=np.int)
        klass = np.ones_like(second_klass)
        is_crowd = np.zeros_like(second_klass)

        res = preproc_img(fname, boxes, klass, second_klass, is_crowd, aug)
        if res is None:
            print("mapillary: preproc_img returned None on", fname)
            pil_label.close()
            return None
        ret, params = res
        if add_mask:
            do_flip, h, w = params[1]
            assert do_flip in (True, False), do_flip
            # augment label
            label = np.array(pil_label.resize((w, h), Image.NEAREST))
            if do_flip:
                label = label[:, ::-1]
            # create augmented masks
            masks = np.array([label == inst for inst in instances],
                             dtype=np.uint8)
            ret.append(masks)

        end = time.time()
        elapsed = end - start
        print("mapillary example done, elapsed:", elapsed)

        VISUALIZE = False
        if VISUALIZE:
            from viz import draw_annotation, draw_mask
            config.CLASS_NAMES = [str(idx) for idx in range(81)]
            im = ret[0]
            boxes = ret[3]
            draw_klass = ret[-2]
            viz = draw_annotation(im, boxes, draw_klass)
            for mask in masks:
                viz = draw_mask(viz, mask)
            tpviz.interactive_imshow(viz)

        pil_label.close()
        return ret

    #ds = MapData(ds, preprocess)
    ds = MultiProcessMapData(ds,
                             nr_proc=8,
                             map_func=preprocess,
                             buffer_size=35)
    return ds
def get_train_dataflow_coco(add_mask=False):
    """
    Return a training dataflow. Each datapoint is:
    image, fm_labels, fm_boxes, gt_boxes, gt_class [, masks]
    """
    imgs = COCODetection.load_many(config.BASEDIR,
                                   config.TRAIN_DATASET,
                                   add_gt=True,
                                   add_mask=add_mask)
    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    imgs = list(filter(lambda img: len(img['boxes']) > 0,
                       imgs))  # log invalid training

    ds = DataFromList(imgs, shuffle=True)

    aug = imgaug.AugmentorList([
        CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(img):
        print("start preproc coco")
        start = time.time()
        if config.USE_SECOND_HEAD:
            fname, boxes, klass, second_klass, is_crowd = img['file_name'], img['boxes'], img['class'], \
                                                          img['second_class'], img['is_crowd']
        else:
            fname, boxes, klass, is_crowd = img['file_name'], img[
                'boxes'], img['class'], img['is_crowd']
            second_klass = None
        res = preproc_img(fname, boxes, klass, second_klass, is_crowd, aug)
        if res is None:
            print("coco: preproc_img returned None on", fname)
            return None

        ret, params = res
        im = ret[0]
        boxes = ret[3]
        # masks
        if add_mask:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(img.get('segmentation', None))
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes), (len(segmentation),
                                                     len(boxes))

            # one image-sized binary mask per box
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        end = time.time()
        elapsed = end - start
        print("coco example done, elapsed:", elapsed)
        return ret

    #ds = MapData(ds, preprocess)
    ds = MultiProcessMapData(ds,
                             nr_proc=4,
                             map_func=preprocess,
                             buffer_size=20)
    return ds
def get_train_dataflow(src):
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    If MODE_MASK, gt_masks: (N, h, w)
    """

    #imgs = COCODetection.load_many(cfg.DATA.BASEDIR, cfg.DATA.TRAIN, add_gt=True, add_mask=cfg.MODE_MASK)

    classes = (
        'BG',  # always index 0
        'bathtub',
        'bed',
        'bookshelf',
        'box',
        'chair',
        'counter',
        'desk',
        'door',
        'dresser',
        'garbage_bin',
        'lamp',
        'monitor',
        'night_stand',
        'pillow',
        'sink',
        'sofa',
        'table',
        'toilet',
        'tv')

    class_to_ind = dict(list(zip(classes, list(range(len(classes))))))
    #src = '/media/ayan/Drive/IMI-Research/Datasets/Datasets_OP_Train/'
    textfile_index = natsorted(
        [src + f for f in np.sort(os.listdir(src)) if f.endswith('.txt')])
    imgs = []
    count = 0
    for fn in textfile_index:
        each_file = {}
        count = count + 1
        print(str(count) + ':::', fn)
        F = open(fn, 'r')
        file_F = F.read()
        file_F = file_F.split('\n')
        each_file['file_name'] = file_F[0]
        im = cv2.imread(each_file['file_name'])
        each_file['height'] = im.shape[0]
        each_file['width'] = im.shape[1]
        objects = file_F[2:len(file_F) - 1]
        boxes = []
        class_ = []
        for obj in objects:
            objs_line = obj.split(' ')
            x1 = float(objs_line[1]) - 1.0
            y1 = float(objs_line[2]) - 1.0
            x2 = float(objs_line[3]) - 1.0
            y2 = float(objs_line[4]) - 1.0
            y2 = float(objs_line[4]) - 1.0
            if x1 >= x2:
                x2 = x1 + 1
            boxes.append([x1, y1, x2, y2])
            cls = class_to_ind[objs_line[0]]
            class_.append(cls)
        each_file['boxes'] = np.array(boxes).astype(np.float32)
        each_file['class'] = np.array(class_).astype(np.int32)
        each_file['is_crowd'] = np.zeros_like(each_file['class']).astype(
            np.int8)
        imgs.append(each_file)
    """ 
    To train on your own data, change this to your loader.
    Produce "imgs" as a list of dict, in the dict the following keys are needed for training:
    height, width: integer
    file_name: str, full path to the image
    boxes: numpy array of kx4 floats
    class: numpy array of k integers
    is_crowd: k booleans. Use k False if you don't know what it means.
    segmentation: k lists of numpy arrays (one for each box).
        Each list of numpy array corresponds to the mask for one instance.
        Each numpy array in the list is a polygon of shape Nx2,
        because one mask can be represented by N polygons.

        If your segmentation annotations are originally masks rather than polygons,
        either convert it, or the augmentation code below will need to be
        changed or skipped accordingly.
    """

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(imgs)
    imgs = list(
        filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, imgs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}"
        .format(num - len(imgs), len(imgs)))

    ds = DataFromList(imgs, shuffle=False)

    aug = imgaug.AugmentorList([
        CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(img):
        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[
            'class'], img['is_crowd']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        # rpn anchor:
        try:
            if cfg.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(
                    im, boxes, is_crowd)
                anchor_inputs = itertools.chain.from_iterable(
                    multilevel_anchor_inputs)
            else:
                # anchor_labels, anchor_boxes
                anchor_inputs = get_rpn_anchor_input(im, boxes, is_crowd)
                assert len(anchor_inputs) == 2

            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None

        ret = [im] + list(anchor_inputs) + [boxes, klass]

        if cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(img['segmentation'])
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret

    if cfg.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # MPI does not like fork()
    else:
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    return ds
def get_train_dataflow_davis(add_mask=False):
    # train_img_path = config.DAVIS_PATH + "train/"
    # train_label_path = config.DAVIS_PATH + "train-gt/"
    # imgs = glob.glob(train_img_path + "*/*.jpg")

    # train_img_path = "/home/luiten/vision/PReMVOS/data/first/bike-trial/lucid_data_dreaming/"
    # train_label_path = "/home/luiten/vision/PReMVOS/data/first/bike-trial/lucid_data_dreaming/"

    # train_img_path = "/home/luiten/vision/PReMVOS/data/"+config.DAVIS_NAME+"/lucid_data_dreaming/"
    # train_label_path = "/home/luiten/vision/PReMVOS/data/"+config.DAVIS_NAME+"/lucid_data_dreaming/"

    # train_img_path = "/home/luiten/vision/youtubevos/ytvos_data/together/generated/augment_images/"
    # train_label_path = "/home/luiten/vision/youtubevos/ytvos_data/together/generated/augment_gt/"

    train_img_path = "/home/luiten/vision/youtubevos/DAVIS/davis_together/augment_images/"
    train_label_path = "/home/luiten/vision/youtubevos/DAVIS/davis_together/augment_gt/"

    imgs = sorted(glob.glob(train_img_path + "*/*.jpg"))

    ds = DataFromList(imgs, shuffle=True)
    aug = imgaug.AugmentorList([
        CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(fname):
        # print("start preproc mapillary")
        start = time.time()

        label_fname = fname.replace(train_img_path,
                                    train_label_path).replace(".jpg", ".png")
        pil_label = Image.open(label_fname)
        label = np.array(pil_label)
        instances = np.unique(label)
        instance_classes = [x // 256 for x in instances]

        if len(instances) == 0:
            print("no instances")
            pil_label.close()
            return None

        masks = np.array([label == inst for inst in instances], dtype=np.uint8)

        boxes1 = np.array(
            [get_bbox_from_segmentation_mask(mask) for mask in masks],
            dtype=np.float32)
        boxes = boxes1

        # second_klass = np.array(instance_classes, dtype=np.int)
        second_klass = np.zeros_like(instance_classes, dtype=np.int)
        klass = np.ones_like(second_klass)
        is_crowd = np.zeros_like(second_klass)

        res = preproc_img(fname, boxes, klass, second_klass, is_crowd, aug)
        if res is None:
            print("davis: preproc_img returned None on", fname)
            pil_label.close()
            return None
        ret, params = res
        if add_mask:
            do_flip, h, w = params[1]
            assert do_flip in (True, False), do_flip
            # augment label
            label = np.array(pil_label.resize((w, h), Image.NEAREST))
            if do_flip:
                label = label[:, ::-1]
            # create augmented masks
            masks = np.array([label == inst for inst in instances],
                             dtype=np.uint8)
            ret.append(masks)

        end = time.time()
        elapsed = end - start
        # print("davis example done, elapsed:", elapsed)

        VISUALIZE = False
        if VISUALIZE:
            from viz import draw_annotation, draw_mask
            config.CLASS_NAMES = [str(idx) for idx in range(81)]
            im = ret[0]
            boxes = ret[3]
            draw_klass = ret[-2]
            viz = draw_annotation(im, boxes, draw_klass)
            for mask in masks:
                viz = draw_mask(viz, mask)
            tpviz.interactive_imshow(viz)

        pil_label.close()
        return ret

    ds = MapData(ds, preprocess)
    # ds = MultiProcessMapData(ds, nr_proc=8, map_func=preprocess, buffer_size=35)
    # ds = MultiProcessMapData(ds, nr_proc=8, map_func=preprocess)
    return ds
Beispiel #24
0
def get_train_dataflow(add_mask=False):
    """
    Return a training dataflow. Each datapoint is:
    image, fm_labels, fm_boxes, gt_boxes, gt_class [, masks]
    """

    imgs = COCODetection.load_many(config.BASEDIR,
                                   config.TRAIN_DATASET,
                                   add_gt=True,
                                   add_mask=add_mask)
    """
    To train on your own data, change this to your loader.
    Produce "igms" as a list of dict, in the dict the following keys are needed for training:
    height, width: integer
    file_name: str
    boxes: kx4 floats
    class: k integers
    is_crowd: k booleans. Use k False if you don't know what it means.
    segmentation: k numpy arrays. Each array is a polygon of shape Nx2.
        If your segmentation annotations are masks rather than polygons,
        either convert it, or the augmentation code below will need to be
        changed or skipped accordingly.
    """

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    imgs = list(filter(lambda img: len(img['boxes']) > 0,
                       imgs))  # log invalid training

    ds = DataFromList(imgs, shuffle=True)

    aug = imgaug.AugmentorList([
        CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(img):
        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[
            'class'], img['is_crowd']
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)

        # rpn anchor:
        try:
            fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, is_crowd)
            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None

        ret = [im, fm_labels, fm_boxes, boxes, klass]

        if add_mask:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(img.get('segmentation', None))
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret

    ds = MultiProcessMapData(ds, 3, preprocess)
    return ds
Beispiel #25
0
def get_train_dataflow():
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    If MODE_MASK, gt_masks: (N, h, w)
    """

    roidbs = COCODetection.load_many(cfg.DATA.BASEDIR,
                                     cfg.DATA.TRAIN,
                                     add_gt=True,
                                     add_mask=cfg.MODE_MASK)
    """
    To train on your own data, change this to your loader.
    Produce "roidbs" as a list of dict, in the dict the following keys are needed for training:
    height, width: integer
    file_name: str, full path to the image
    boxes: numpy array of kx4 floats
    class: numpy array of k integers
    is_crowd: k booleans. Use k False if you don't know what it means.
    segmentation: k lists of numpy arrays (one for each box).
        Each list of numpy arrays corresponds to the mask for one instance.
        Each numpy array in the list is a polygon of shape Nx2,
        because one mask can be represented by N polygons.

        If your segmentation annotations are originally masks rather than polygons,
        either convert it, or the augmentation code below will need to be
        changed or skipped accordingly.
    """

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(roidbs)
    roidbs = list(
        filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0,
               roidbs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}"
        .format(num - len(roidbs), len(roidbs)))

    ds = DataFromList(roidbs, shuffle=True)

    aug = imgaug.AugmentorList([
        CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(roidb):
        fname, boxes, klass, is_crowd = roidb['file_name'], roidb[
            'boxes'], roidb['class'], roidb['is_crowd']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = {'image': im}
        # rpn anchor:
        try:
            ret['anchor_labels'], ret['anchor_boxes'] = get_rpn_anchor_input(
                im, boxes, is_crowd)
            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            ret['gt_boxes'] = boxes
            ret['gt_labels'] = klass
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None
        return ret

    if cfg.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # MPI does not like fork()
    else:
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    return ds
def get_batch_train_dataflow(batch_size):
    """
    Return a training dataflow. Each datapoint consists of the following:

    A batch of images: (BS, h, w, 3),

    For each image

    1 or more pairs of (anchor_labels, anchor_boxes) :
    anchor_labels: (BS, h', w', maxNumAnchors)
    anchor_boxes: (BS, h', w', maxNumAnchors, 4)

    gt_boxes: (BS, maxNumAnchors, 4)
    gt_labels: (BS, maxNumAnchors)

    If MODE_MASK, gt_masks: (BS, maxNumAnchors, h, w)
    """
    print("In train dataflow")
    roidbs = DetectionDataset().load_training_roidbs(cfg.DATA.TRAIN)
    print("Done loading roidbs")

    # print_class_histogram(roidbs)

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(roidbs)
    roidbs = list(filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, roidbs))
    logger.info("Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}".format(
        num - len(roidbs), len(roidbs)))

    roidbs = sorted(roidbs, key=lambda x: float(x['width']) / float(x['height']), reverse=True)     # will shuffle it later at every rank

    print("Batching roidbs")
    batched_roidbs = []

    if cfg.PREPROC.PREDEFINED_PADDING:
        taken = [False for _ in roidbs]
        done = False

        for i, d in enumerate(roidbs):
            batch = []
            if not taken[i]:
                batch.append(d)
                padding_shape = get_padding_shape(d['height'], d['width'])
                while len(batch) < batch_size:
                    k = get_next_roidb(roidbs, i, padding_shape, taken)
                    if k == None:
                        done = True
                        break
                    batch.append(roidbs[k])
                    taken[i], taken[k] = True, True
                if not done:
                    batched_roidbs.append(batch)
    else:
        batch = []
        for i, d in enumerate(roidbs):
            if i % batch_size == 0:
                if len(batch) == batch_size:
                    batched_roidbs.append(batch)
                batch = []
            batch.append(d)

    #batched_roidbs = sort_by_aspect_ratio(roidbs, batch_size)
    #batched_roidbs = group_by_aspect_ratio(roidbs, batch_size)
    print("Done batching roidbs")


    # Notes:
    #   - discard any leftover images
    #   - The batches will be shuffled, but the contents of each batch will always be the same
    #   - TODO: Fix lack of batch contents shuffling


    aug = imgaug.AugmentorList(
         [CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
          imgaug.Flip(horiz=True)])

    # aug = imgaug.AugmentorList([CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)])


    def preprocess(roidb_batch):
        datapoint_list = []
        for roidb in roidb_batch:
            fname, boxes, klass, is_crowd = roidb['file_name'], roidb['boxes'], roidb['class'], roidb['is_crowd']
            boxes = np.copy(boxes)
            im = cv2.imread(fname, cv2.IMREAD_COLOR)
            assert im is not None, fname
            im = im.astype('float32')
            # assume floatbox as input
            assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

            # augmentation:
            im, params = aug.augment_return_params(im)
            points = box_to_point8(boxes)
            points = aug.augment_coords(points, params)
            boxes = point8_to_box(points)
            assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

            ret = {'images': im}
            # rpn anchor:
            try:
                if cfg.MODE_FPN:
                    multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(im, boxes, is_crowd)
                    for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs):
                        ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels
                        ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes
                else:
                    raise NotImplementedError("[armand] Batch mode only available for FPN")

                boxes = boxes[is_crowd == 0]    # skip crowd boxes in training target
                klass = klass[is_crowd == 0]
                ret['gt_boxes'] = boxes
                ret['gt_labels'] = klass
                ret['filename'] = fname
                if not len(boxes):
                    raise MalformedData("No valid gt_boxes!")
            except MalformedData as e:
                log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn')
                return None

            if cfg.MODE_MASK:
                # augmentation will modify the polys in-place
                segmentation = copy.deepcopy(roidb['segmentation'])
                segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]]
                assert len(segmentation) == len(boxes)

                # Apply augmentation on polygon coordinates.
                # And produce one image-sized binary mask per box.
                masks = []
                for polys in segmentation:
                    polys = [aug.augment_coords(p, params) for p in polys]
                    masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1]))
                masks = np.asarray(masks, dtype='uint8')    # values in {0, 1}
                ret['gt_masks'] = masks

            datapoint_list.append(ret)

        #################################################################################################################
        # Batchify the output
        #################################################################################################################

        # Now we need to batch the various fields

        # Easily stackable:
        # - anchor_labels_lvl2
        # - anchor_boxes_lvl2
        # - anchor_labels_lvl3
        # - anchor_boxes_lvl3
        # - anchor_labels_lvl4
        # - anchor_boxes_lvl4
        # - anchor_labels_lvl5
        # - anchor_boxes_lvl5
        # - anchor_labels_lvl6
        # - anchor_boxes_lvl6

        batched_datapoint = {}
        for stackable_field in ["anchor_labels_lvl2",
                                "anchor_boxes_lvl2",
                                "anchor_labels_lvl3",
                                "anchor_boxes_lvl3",
                                "anchor_labels_lvl4",
                                "anchor_boxes_lvl4",
                                "anchor_labels_lvl5",
                                "anchor_boxes_lvl5",
                                "anchor_labels_lvl6",
                                "anchor_boxes_lvl6"]:
            batched_datapoint[stackable_field] = np.stack([d[stackable_field] for d in datapoint_list])



        # Require padding and original dimension storage
        # - image (HxWx3)
        # - gt_boxes (?x4)
        # - gt_labels (?)
        # - gt_masks (?xHxW)

        """
        Find the minimum container size for images (maxW x maxH)
        Find the maximum number of ground truth boxes
        For each image, save original dimension and pad
        """

        if cfg.PREPROC.PREDEFINED_PADDING:
            padding_shapes = [get_padding_shape(*(d["images"].shape[:2])) for d in datapoint_list]
            max_height = max([shp[0] for shp in padding_shapes])
            max_width = max([shp[1] for shp in padding_shapes])
        else:
            image_dims = [d["images"].shape for d in datapoint_list]
            heights = [dim[0] for dim in image_dims]
            widths = [dim[1] for dim in image_dims]

            max_height = max(heights)
            max_width = max(widths)


        # image
        padded_images = []
        original_image_dims = []
        for datapoint in datapoint_list:
            image = datapoint["images"]
            original_image_dims.append(image.shape)

            h_padding = max_height - image.shape[0]
            w_padding = max_width - image.shape[1]

            padded_image = np.pad(image,
                                  [[0, h_padding],
                                   [0, w_padding],
                                   [0, 0]],
                                  'constant')

            padded_images.append(padded_image)

        batched_datapoint["images"] = np.stack(padded_images)
        #print(batched_datapoint["images"].shape)
        batched_datapoint["orig_image_dims"] = np.stack(original_image_dims)


        # gt_boxes and gt_labels
        max_num_gts = max([d["gt_labels"].size for d in datapoint_list])

        gt_counts = []
        padded_gt_labels = []
        padded_gt_boxes = []
        padded_gt_masks = []
        for datapoint in datapoint_list:
            gt_count_for_image = datapoint["gt_labels"].size
            gt_counts.append(gt_count_for_image)

            gt_padding = max_num_gts - gt_count_for_image

            padded_gt_labels_for_img = np.pad(datapoint["gt_labels"], [0, gt_padding], 'constant', constant_values=-1)
            padded_gt_labels.append(padded_gt_labels_for_img)

            padded_gt_boxes_for_img = np.pad(datapoint["gt_boxes"],
                                             [[0, gt_padding],
                                              [0,0]],
                                             'constant')
            padded_gt_boxes.append(padded_gt_boxes_for_img)




            h_padding = max_height - datapoint["images"].shape[0]
            w_padding = max_width - datapoint["images"].shape[1]



            if cfg.MODE_MASK:
                padded_gt_masks_for_img = np.pad(datapoint["gt_masks"],
                                         [[0, gt_padding],
                                          [0, h_padding],
                                          [0, w_padding]],
                                         'constant')
                padded_gt_masks.append(padded_gt_masks_for_img)


        batched_datapoint["orig_gt_counts"] = np.stack(gt_counts)
        batched_datapoint["gt_labels"] = np.stack(padded_gt_labels)
        batched_datapoint["gt_boxes"] = np.stack(padded_gt_boxes)
        batched_datapoint["filenames"] = [d["filename"] for d in datapoint_list]

        if cfg.MODE_MASK:
            batched_datapoint["gt_masks"] = np.stack(padded_gt_masks)



        return batched_datapoint

    ds = DataFromList(batched_roidbs, shuffle=True)



    if cfg.TRAINER == 'horovod':
        # ds = MapData(ds, preprocess)
        ds = MultiThreadMapData(ds, 5, preprocess)
        # MPI does not like fork()
    else:
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    return ds
Beispiel #27
0
def get_train_dataflow(add_mask=False):
    """
    Return a training dataflow. Each datapoint is:
    image, fm_labels, fm_boxes, gt_boxes, gt_class [, masks]
    """
    imgs = COCODetection.load_many(config.BASEDIR,
                                   config.TRAIN_DATASET,
                                   add_gt=True,
                                   add_mask=add_mask)
    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    imgs = list(filter(lambda img: len(img['boxes']) > 0,
                       imgs))  # log invalid training

    ds = DataFromList(imgs, shuffle=True)

    aug = imgaug.AugmentorList([
        CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(img):
        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[
            'class'], img['is_crowd']
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)

        # rpn anchor:
        try:
            fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, klass,
                                                       is_crowd)
            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is invalid for training: {}".format(fname, str(e)),
                'warn')
            return None

        ret = [im, fm_labels, fm_boxes, boxes, klass]

        # masks
        segmentation = img.get('segmentation', None)
        if segmentation is not None:
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # one image-sized binary mask per box
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret

    ds = MapData(ds, preprocess)
    ds = PrefetchDataZMQ(ds, 1)
    return ds
def get_train_dataflow():
    roidbs = DetectionDataset().load_training_roidbs(cfg.DATA.TRAIN)
    ds = DataFromList(roidbs, shuffle=True)
    # for now let's not do flipping to keep things simple
    aug = imgaug.AugmentorList([
        CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)
    ])  #,
    #imgaug.Flip(horiz=True)])

    if cfg.MODE_HARD_MINING:
        from annoy import AnnoyIndex
        hard_mining_index = AnnoyIndex(128, 'euclidean')
        hard_mining_index.load(cfg.HARD_MINING_DATA_PATH +
                               "/index_all/index.ann")
        names_path = cfg.HARD_MINING_DATA_PATH + "index_all/names.txt"
        hard_mining_names_all = []
        with open(names_path) as f:
            for l in f:
                hard_mining_names_all.append(l.strip())
            hard_example_names_got = [
                x[7:] for x in hard_mining_names_all if x.startswith("GOT10k/")
            ]
            hard_example_names_vid = [
                x[12:] for x in hard_mining_names_all
                if x.startswith("ImageNetVID/")
            ]
            hard_example_names_ytbvos = [
                x[11:] for x in hard_mining_names_all
                if x.startswith("YouTubeVOS/")
            ]
            hard_example_names_lasot = [
                x[6:] for x in hard_mining_names_all if x.startswith("LaSOT/")
            ]
            assert len(hard_example_names_got) > 0
            assert len(hard_example_names_vid) > 0
            assert len(hard_example_names_ytbvos) > 0
            assert len(hard_example_names_lasot) > 0
            hard_example_names_got.sort()
            hard_example_names_vid.sort()
            hard_example_names_ytbvos.sort()
            hard_example_names_lasot.sort()
            hard_mining_names = {
                "all": hard_mining_names_all,
                "GOT10k": hard_example_names_got,
                "ImageNetVID": hard_example_names_vid,
                "YouTubeVOS": hard_example_names_ytbvos,
                "LaSOT": hard_example_names_lasot
            }
    else:
        hard_mining_index = None
        hard_mining_names = None

    def preprocess(roidb):
        if roidb.startswith("VID/"):
            return _preprocess_imagenet_vid(roidb[4:], aug, hard_mining_index,
                                            hard_mining_names)
        elif roidb.startswith("DAVIS/"):
            return _preprocess_davis_like(
                roidb[6:], aug,
                os.path.join(cfg.DATA.DAVIS2017_ROOT, "Annotations", "480p"))
        elif roidb.startswith("YouTubeVOS/"):
            return _preprocess_davis_like(
                roidb[11:], aug,
                os.path.join(cfg.DATA.YOUTUBE_VOS_ROOT, "train",
                             "Annotations"), "YouTubeVOS", hard_mining_index,
                hard_mining_names)
        elif roidb.startswith("GOT10K/"):
            return _preprocess_got10k(roidb[7:], aug, hard_mining_index,
                                      hard_mining_names)
        elif roidb.startswith("LaSOT/"):
            return _preprocess_lasot(roidb[6:], aug, hard_mining_index,
                                     hard_mining_names)
        elif roidb.startswith("YouTube-BB/"):
            return _preprocess_youtube_bb(roidb[11:], aug)
        elif roidb.startswith("TrackingNet/"):
            return _preprocess_trackingnet(roidb[12:], aug)
        else:
            assert False

    #ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    #ds = MapData(ds, preprocess)
    if cfg.DATA.DEBUG_VIS or not cfg.DATA.MULTITHREAD:
        ds = MapData(ds, preprocess)
    else:
        #ds = MultiThreadMapData(ds, 6, preprocess)
        ds = MultiThreadMapData(ds, 8, preprocess, buffer_size=80)
    return ds
Beispiel #29
0
def get_train_dataflow():
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    If MODE_MASK, gt_masks: (N, h, w)
    """

    imgs = COCODetection.load_many(cfg.DATA.BASEDIR,
                                   cfg.DATA.TRAIN,
                                   add_gt=True,
                                   add_mask=cfg.MODE_MASK)
    """
    To train on your own data, change this to your loader.
    Produce "imgs" as a list of dict, in the dict the following keys are needed for training:
    height, width: integer
    file_name: str, full path to the image
    boxes: numpy array of kx4 floats
    class: numpy array of k integers
    is_crowd: k booleans. Use k False if you don't know what it means.
    segmentation: k lists of numpy arrays (one for each box).
        Each list of numpy array corresponds to the mask for one instance.
        Each numpy array in the list is a polygon of shape Nx2,
        because one mask can be represented by N polygons.

        If your segmentation annotations are originally masks rather than polygons,
        either convert it, or the augmentation code below will need to be
        changed or skipped accordingly.
    """

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(imgs)
    imgs = list(
        filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, imgs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}"
        .format(num - len(imgs), len(imgs)))

    ds = DataFromList(imgs, shuffle=True)

    aug = imgaug.AugmentorList([
        CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(img):
        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[
            'class'], img['is_crowd']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        # rpn anchor:
        try:
            if cfg.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(
                    im, boxes, is_crowd)
                anchor_inputs = itertools.chain.from_iterable(
                    multilevel_anchor_inputs)
            else:
                # anchor_labels, anchor_boxes
                anchor_inputs = get_rpn_anchor_input(im, boxes, is_crowd)
                assert len(anchor_inputs) == 2

            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None

        ret = [im] + list(anchor_inputs) + [boxes, klass]

        if cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(img['segmentation'])
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret

    if cfg.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # MPI does not like fork()
    else:
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    return ds
Beispiel #30
0
def get_train_dataflow():
    """
    Return a training dataflow. Each datapoint consists of the following:

    input image: (h, w, 3),
    semantic label image: (h, w, 1)
    """
    # imgs is a list, where each element is a dict containing 'fn_img', and 'fn_label'
    imgs = load_many_from_db(cfg.DATA.NAME, add_gt=True, is_train=True)
    # imgs = COCODetection.load_many(
    #     cfg.DATA.BASEDIR, cfg.DATA.TRAIN, add_gt=True, add_mask=cfg.MODE_MASK)
    """
    To train on your own data, change this to your loader.
    Produce "imgs" as a list of dict, in the dict the following keys are needed for training:
    height, width: integer
    file_name: str
    boxes: kx4 floats
    class: k integers
    difficult: k booleans. Use k False if you don't know what it means.
    segmentation: k lists of numpy arrays (one for each box).
        Each list of numpy array corresponds to the mask for one instance.
        Each numpy array in the list is a polygon of shape Nx2,
        because one mask can be represented by N polygons.

        If your segmentation annotations are originally masks rather than polygons,
        either convert it, or the augmentation code below will need to be
        changed or skipped accordingly.
    """

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(imgs)
    # log invalid training

    ds = DataFromList(imgs, shuffle=True)

    mean_bgr = np.array(cfg.PREPROC.PIXEL_MEAN[::-1])

    if cfg.DATA.NAME == 'cityscapes':
        aspect_exp = 1.1
    elif cfg.DATA.NAME == 'cocostuff':
        aspect_exp = 1.1 #2.0
    else:
        logger.warn('Dataset name not known.')
        assert False

    aug = imgaug.AugmentorList([ \
            SSDCropRandomShape(cfg.PREPROC.INPUT_SHAPE_TRAIN, aspect_exp=aspect_exp, mean_rgbgr=mean_bgr),
            SSDResize(cfg.PREPROC.INPUT_SHAPE_TRAIN),
            imgaug.Flip(horiz=True),
            SSDColorJitter(mean_rgbgr=mean_bgr)
            ])
    aug_label = imgaug.AugmentorList([ \
            SSDCropRandomShape(cfg.PREPROC.INPUT_SHAPE_TRAIN, aspect_exp=aspect_exp, mean_rgbgr=[255,]),
            SSDResize(cfg.PREPROC.INPUT_SHAPE_TRAIN, interp=cv2.INTER_NEAREST),
            imgaug.Flip(horiz=True)
            ])

    def preprocess(img):
        fn_img, fn_label = img['fn_img'], img['fn_label']
        # load head (and landmark) data as well
        im = cv2.imread(fn_img, cv2.IMREAD_COLOR)
        if fn_label.endswith('.mat'): # cocostuff
            label = loadmat(fn_label)['S'].astype(int)
            label = (label - 1).astype(np.uint8) # -1 becomes 255
        else:
            label = cv2.imread(fn_label, cv2.IMREAD_GRAYSCALE)
        label = np.expand_dims(label, 2)
        assert (im is not None) and (label is not None), fn_img
        im = im.astype('float32')
        # label = label.astype('int32')
        # augmentation
        im, params = aug.augment_return_params(im)
        # TODO: better way to adjust label?
        params_label = deepcopy(params[:-1])
        params_label[0].mean_rgbgr = [255,]
        params_label[1].interp = cv2.INTER_NEAREST
        label = aug_label.augment_with_params(label, params_label)
        label = label.astype('int32')

        ret = [im, label]
        return ret

    if cfg.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # MPI does not like fork()
    else:
        # ds = MapData(ds, preprocess) # for debugging
        ds = MultiProcessMapDataZMQ(ds, cfg.PREPROC.NUM_WORKERS, preprocess)
    ds = BatchData(ds, cfg.PREPROC.BATCH_SIZE)
    return ds