コード例 #1
0
def get_pascal_voc_train_dataflow(batch_size=1):
    from dataset import register_pascal_voc

    # register_coco(os.path.expanduser("/media/ubuntu/Working/common_data/coco"))
    register_pascal_voc(os.path.expanduser("/media/ubuntu/Working/voc2012/VOC2012/"))

    print("In train dataflow")
    roidbs = list(itertools.chain.from_iterable(DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN))
    print_class_histogram(roidbs)
    print("Done loading roidbs")

    # Filter out images that have no gt boxes, but this filter shall not be applied for testing.
    # The model does support training with empty images, but it is not useful for COCO.
    num = len(roidbs)
    roidbs = list(filter(lambda img: len(img["boxes"][img["is_crowd"] == 0]) > 0, roidbs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}".format(
            num - len(roidbs), len(roidbs)
        )
    )

    aspect_grouping = [1]
    aspect_ratios = [float(x["height"]) / float(x["width"]) for x in roidbs]
    group_ids = _quantize(aspect_ratios, aspect_grouping)

    ds = DataFromList(np.arange(len(roidbs)), shuffle=True)
    ds.reset_state()
    ds = AspectGroupingDataFlow(roidbs, ds, group_ids, batch_size=batch_size, drop_uneven=True).__iter__()
    preprocess = TrainingDataPreprocessor()

    while True:
        batch_roidbs = next(ds)
        yield preprocess(batch_roidbs)
コード例 #2
0
ファイル: data.py プロジェクト: hakillha/maria03
def get_train_aseval_dataflow():
    """
    Args:
        shard, num_shards: to get subset of evaluation data
    """
    prw = PRWDataset(cfg.DATA.BASEDIR)
    imgs = prw.load()

    # no filter for training
    # test if it can repeat keys
    ds = DataFromList(imgs, shuffle=False)

    aug = imgaug.AugmentorList(
        [CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)])

    def preprocess(img):
        fname = img['file_name']
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        orig_shape = im.shape[:2]
        assert im is not None, fname
        im = im.astype('float32')

        # augmentation:
        im, params = aug.augment_return_params(im)

        ret = [fname, im, orig_shape]

        return ret

    ds = MapData(ds, preprocess)
    return ds
コード例 #3
0
def get_resnet_train_dataflow():
    imgs = ResnetDetection.load_many(
        config.BASEDIR, config.TRAIN_DATASET)
    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    imgs = list(imgs)

    ds = DataFromList(imgs, shuffle=True)
    augmentors = get_resnet_augmentor()
    def preprocess(img):
        im, fname, label = img['image_data'], img['id'], img['with_ship']
        im = cv2.imread(im)
        #============Aug================
        im = cv2.resize(im, (config.RESNET_SIZE, config.RESNET_SIZE))
        augmented = strong_aug()(image=im)
        im = augmented['image']
        # im, multi_mask = do_flip_transpose2(im, multi_mask, type=random.randint(0,7))
        #============================
        ret = [im, label]
        return ret
    ds = MapData(ds, preprocess)
    ds = AugmentImageComponent(ds, augmentors, copy=False)
    ds = BatchData(ds, config.RESNET_BATCH)
    ds = PrefetchDataZMQ(ds, 6)
    return ds
コード例 #4
0
def get_debug_dataflow(add_mask=True, imageHW=768):
    """
    Return a training dataflow. Each datapoint is:
    image, fm_labels, fm_boxes, gt_boxes, gt_class [, masks]
    """
    imgs = Detection.load_many(
        config.BASEDIR, config.TRAIN_DATASET, add_gt=True, add_mask=add_mask)
    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    imgs = list(imgs)
    import os
    import pandas as pd
    csv_path = os.path.join(config.BASEDIR, 'train_ship_segmentations_v2.csv')
    df = pd.read_csv(csv_path, engine="python")
    df = df.dropna(axis=0)
    df = df.set_index('ImageId')

    ds = DataFromList(imgs, shuffle=True)
    def preprocess(img):
        im, fname = img['image_data'], img['id']
        multi_mask = getAnnotation(df, fname)
        im = cv2.imread(im)
        im, multi_mask = fix_resize_transform_range(im, multi_mask, [imageHW, imageHW], 1.0)
        boxes, klass, masks, is_crowd = multi_mask_to_annotation(multi_mask)
        return boxes
    ds = MapData(ds, preprocess)
    ds = PrefetchDataZMQ(ds, 6)
    return ds
コード例 #5
0
ファイル: imagenet_utils.py プロジェクト: shlpu/benchmarks
def get_val_dataflow(
        datadir, batch_size,
        augmentors, parallel=None,
        num_splits=None, split_index=None):
    assert datadir is not None
    assert isinstance(augmentors, list)
    if parallel is None:
        parallel = min(40, multiprocessing.cpu_count())

    if num_splits is None:
        ds = dataset.ILSVRC12Files(datadir, 'val', shuffle=False)
    else:
        assert split_index < num_splits
        files = dataset.ILSVRC12Files(datadir, 'val', shuffle=False)
        files.reset_state()
        files = list(files.get_data())
        logger.info("#ValidationData = {}".format(len(files)))
        split_size = len(files) // num_splits
        start, end = split_size * split_index, split_size * (split_index + 1)
        end = min(end, len(files))
        logger.info("#ValidationSplit = {} - {}".format(start, end))
        files = files[start: end]
        ds = DataFromList(files, shuffle=False)
    aug = imgaug.AugmentorList(augmentors)

    def mapf(dp):
        fname, cls = dp
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        im = aug.augment(im)
        return im, cls
    ds = MultiThreadMapData(ds, parallel, mapf, buffer_size=2000, strict=True)
    ds = BatchData(ds, batch_size, remainder=True)
    # ds = PrefetchDataZMQ(ds, 1)
    # do not fork() under MPI
    return ds
コード例 #6
0
def get_train_dataflow(add_mask=True):
    """
    
    """
    if config.CROSS_VALIDATION:
        imgs = BRATS_SEG.load_from_file(config.BASEDIR, config.TRAIN_DATASET)
    else:
        imgs = BRATS_SEG.load_many(config.BASEDIR,
                                   config.TRAIN_DATASET,
                                   add_gt=False,
                                   add_mask=add_mask)
    # no filter for training
    imgs = list(imgs)

    ds = DataFromList(imgs, shuffle=True)

    def preprocess(data):
        if config.NO_CACHE:
            fname, gt, im = data['file_name'], data['gt'], data['image_data']
            volume_list, label, weight, _, _ = crop_brain_region(im, gt)
            batch = sampler3d(volume_list, label, weight)
        else:
            volume_list, label, weight, _, _ = data['preprocessed']
            batch = sampler3d(volume_list, label, weight)
        return [batch['images'], batch['weights'], batch['labels']]

    ds = BatchData(MapData(ds, preprocess), config.BATCH_SIZE)
    ds = PrefetchDataZMQ(ds, 6)
    return ds
コード例 #7
0
def get_batch_train_dataflow(roidbs, batch_size):
    """
    Tensorpack batch text dataflow.
    """
    batched_roidbs = []

    batch = []
    for i, d in enumerate(roidbs):
        if i % batch_size == 0:
            if len(batch) == batch_size:
                batched_roidbs.append(batch)
            batch = []
        batch.append(d)

    def preprocess(roidb_batch):
        """
        Tensorpack batch text data preprocess function.
        """
        datapoint_list = []
        for roidb in roidb_batch:
            filename, label, mask, bbox, polygon = roidb['filename'], roidb[
                'label'], roidb['mask'], roidb['bbox'], roidb['polygon']
            img = cv2.imread(filename)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            image = affine_transform(img, polygon)
            # img = img[bbox[0]:bbox[2], bbox[1]:bbox[3], :] if image.shape[0]<cfg.stride/2 or image.shape[1]<cfg.stride/2 else image
            img = img if image.shape[0] < cfg.stride / 2 or image.shape[
                1] < cfg.stride / 2 else image

            largest_side = np.random.randint(cfg.crop_min_size, cfg.image_size)
            img = aspect_preserving_resize(img, largest_side)

            img, crop_bbox = padding_image(img, cfg.image_size)

            normalized_bbox = [coord / cfg.image_size for coord in crop_bbox]

            img = img.astype("float32") / 255.

            ret = {
                "image": img,
                "label": label,
                "mask": mask,
                "normalized_bbox": normalized_bbox
            }
            datapoint_list.append(ret)

        batched_datapoint = {"is_training": True, "dropout_keep_prob": 0.5}
        for stackable_field in ["image", "label", "mask", "normalized_bbox"]:
            batched_datapoint[stackable_field] = np.stack(
                [d[stackable_field] for d in datapoint_list])
        return batched_datapoint

    ds = DataFromList(batched_roidbs, shuffle=True)
    ds = MultiThreadMapData(ds, cfg.num_threads, preprocess)
    # ds = PrefetchData(ds, 100, multiprocessing.cpu_count() // 4)
    return ds
コード例 #8
0
    def build_iter(self,samples):

        map_func=partial(self._map_func,is_training=self.training_flag)
        ds = DataFromList(samples, shuffle=True)

        ds = MultiThreadMapData(ds, self.thread_num, map_func, buffer_size=self.buffer_size)

        ds = BatchData(ds, self.num_gpu *  self.batch_size)
        ds = MultiProcessPrefetchData(ds, self.prefetch_size, self.process_num)
        ds.reset_state()
        ds = ds.get_data()
        return ds
コード例 #9
0
def get_train_dataflow():
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    If MODE_MASK, gt_masks: (N, h, w)
    """
    roidbs = list(
        itertools.chain.from_iterable(
            DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN))
    print(
        "---------------------------------------------------------------- data.py:343"
    )
    print_class_histogram(roidbs)

    # Filter out images that have no gt boxes, but this filter shall not be applied for testing.
    # The model does support training with empty images, but it is not useful for COCO.
    num = len(roidbs)
    roidbs = list(
        filter(lambda img: len(img["boxes"][img["is_crowd"] == 0]) > 0,
               roidbs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}"
        .format(num - len(roidbs), len(roidbs)))

    ds = DataFromList(roidbs, shuffle=True)

    preprocess = TrainingDataPreprocessor(cfg)

    if cfg.DATA.NUM_WORKERS > 0:
        if cfg.TRAINER == "horovod":
            # one dataflow for each process, therefore don't need large buffer
            buffer_size = cfg.DATA.NUM_WORKERS * 10
            ds = MultiThreadMapData(ds,
                                    cfg.DATA.NUM_WORKERS,
                                    preprocess,
                                    buffer_size=buffer_size)
            # MPI does not like fork()
        else:
            buffer_size = cfg.DATA.NUM_WORKERS * 20
            ds = MultiProcessMapData(ds,
                                     cfg.DATA.NUM_WORKERS,
                                     preprocess,
                                     buffer_size=buffer_size)
    else:
        ds = MapData(ds, preprocess)
    return ds
コード例 #10
0
ファイル: serialize.py プロジェクト: ai-med/almgig
def create_dataflow(graphs: List[GraphAdjacencyTuple],
                    max_nodes: int,
                    metrics_fn: Callable[[np.ndarray, np.ndarray], float],
                    validator: Optional[GraphValidatorFn] = None,
                    shuffle: bool = False) -> SelectComponent:
    ds = DataFromList(graphs, shuffle)
    ds = AppendNodeFeatures(ds, data_key='AtomCode')
    ds_conv = GraphConvEmbedding(ds, max_nodes, validator)
    ds = AppendMolMetrics(ds_conv, metrics_fn, index_edges=0, index_node=2)
    ds = SelectComponent(ds, [0, 2, 3])

    return ds
コード例 #11
0
def get_plain_train_dataflow(batch_size=2):
    # no aspect ratio grouping

    print("In train dataflow")
    roidbs = list(itertools.chain.from_iterable(DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN))
    print_class_histogram(roidbs)
    print("Done loading roidbs")

    # Filter out images that have no gt boxes, but this filter shall not be applied for testing.
    # The model does support training with empty images, but it is not useful for COCO.
    num = len(roidbs)
    roidbs = list(filter(lambda img: len(img["boxes"][img["is_crowd"] == 0]) > 0, roidbs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}".format(
            num - len(roidbs), len(roidbs)
        )
    )

    ds = DataFromList(roidbs, shuffle=True)
    preprocess = TrainingDataPreprocessor()
    buffer_size = cfg.DATA.NUM_WORKERS * 20
    ds = MultiProcessMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size)
    ds.reset_state()
    dataiter = ds.__iter__()
    return dataiter
コード例 #12
0
ファイル: train.py プロジェクト: bBobxx/pose_estimate
def make_data():
    from COCOAllJoints import COCOJoints
    from dataset import Preprocessing
    d = COCOJoints()
    train_data, _ = d.load_data(1)
    from tensorpack.dataflow import DataFromList, MapData, BatchData
    dp = DataFromList(train_data)
    dp = MapData(dp, Preprocessing)
    dp = BatchData(dp, cfg.batch_size, use_list=True)
    dp.reset_state()
    dataiter = dp.get_data()
    return dataiter
コード例 #13
0
def get_train_dataflow(roidb):
    """
    Tensorpack text dataflow.
    """
    ds = DataFromList(roidb, shuffle=True)
    preprocess = TextDataPreprocessor(cfg)

    buffer_size = cfg.num_threads * 10
    ds = MultiThreadMapData(ds, cfg.num_threads, preprocess, buffer_size=buffer_size)
    # ds = MultiProcessMapData(ds, cfg.num_workers, preprocess, buffer_size=buffer_size)
    ds = PrefetchData(ds, 100, multiprocessing.cpu_count() // 4)

    #ds = BatchData(ds, cfg.batch_size, remainder=True)

    return ds
コード例 #14
0
def get_resnet_val_dataflow():
    imgs = ResnetDetection.load_many(
        config.BASEDIR, config.VAL_DATASET)
    imgs = list(imgs)
    # ds = DataFromListOfDict(imgs, ['image_data', 'with_ship', 'id'])
    ds = DataFromList(imgs, shuffle=False)
    def f(img):
        image, label = img['image_data'], img['with_ship']
        im = cv2.imread(image)
        im = cv2.resize(im, (config.RESNET_SIZE, config.RESNET_SIZE))
        return [im, label]

    ds = MapData(ds, f)
    ds = BatchData(ds, config.RESNET_BATCH)
    ds = PrefetchDataZMQ(ds, 1)
    return ds
コード例 #15
0
def get_val_dataflow(datadir,
                     batch_size,
                     augmentors=None,
                     parallel=None,
                     num_splits=None,
                     split_index=None,
                     dataname="val"):
    if augmentors is None:
        augmentors = fbresnet_augmentor(False)
    assert datadir is not None
    assert isinstance(augmentors, list)
    if parallel is None:
        parallel = min(40, multiprocessing.cpu_count())

    if num_splits is None:
        ds = dataset.ILSVRC12Files(datadir, dataname, shuffle=True)
    else:
        # shard validation data
        assert False
        assert split_index < num_splits
        files = dataset.ILSVRC12Files(datadir, dataname, shuffle=True)
        files.reset_state()
        files = list(files.get_data())
        logger.info("Number of validation data = {}".format(len(files)))
        split_size = len(files) // num_splits
        start, end = split_size * split_index, split_size * (split_index + 1)
        end = min(end, len(files))
        logger.info("Local validation split = {} - {}".format(start, end))
        files = files[start:end]
        ds = DataFromList(files, shuffle=True)

    aug = imgaug.AugmentorList(augmentors)

    def mapf(dp):
        fname, cls = dp
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        #from BGR to RGB
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        im = aug.augment(im)
        return im, cls

    ds = MultiThreadMapData(ds,
                            parallel,
                            mapf,
                            buffer_size=min(2000, ds.size()),
                            strict=True)
    ds = BatchData(ds, batch_size, remainder=False)
    ds = RepeatedData(ds, num=-1)
    # do not fork() under MPI
    return ds
コード例 #16
0
ファイル: eval_knn.py プロジェクト: ppwwyyxx/moco.tensorflow
def build_dataflow(files):
    train_ds = DataFromList(files)
    aug = imgaug.AugmentorList(get_basic_augmentor(isTrain=False))

    def mapper(dp):
        idx, fname, label = dp
        img = cv2.imread(fname)
        img = aug.augment(img)
        return img, idx

    train_ds = MultiProcessMapDataZMQ(train_ds,
                                      num_proc=8,
                                      map_func=mapper,
                                      strict=True)
    train_ds = BatchData(train_ds, local_batch_size)
    train_ds.reset_state()
    return train_ds
コード例 #17
0
ファイル: data.py プロジェクト: hakillha/maria03
def get_query_dataflow():
    """
    Args:
        shard, num_shards: to get subset of evaluation data
    """
    prw = PRWDataset(cfg.DATA.BASEDIR)
    imgs = prw.load_query()

    # no filter for training
    # test if it can repeat keys
    ds = DataFromList(imgs, shuffle=False)

    aug = imgaug.AugmentorList(
        [CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)])

    def preprocess(img):
        fname, boxes, re_id_class = img['file_name'], img['boxes'], img[
            're_id_class']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = [im, boxes, re_id_class]

        return ret

    ds = MapData(ds, preprocess)
    return ds
def get_train_dataflow_coco(add_mask=False):
    """
    Return a training dataflow. Each datapoint is:
    image, fm_labels, fm_boxes, gt_boxes, gt_class [, masks]
    """
    imgs = COCODetection.load_many(config.BASEDIR,
                                   config.TRAIN_DATASET,
                                   add_gt=True,
                                   add_mask=add_mask)
    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    imgs = list(filter(lambda img: len(img['boxes']) > 0,
                       imgs))  # log invalid training

    ds = DataFromList(imgs, shuffle=True)

    aug = imgaug.AugmentorList([
        CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(img):
        print("start preproc coco")
        start = time.time()
        if config.USE_SECOND_HEAD:
            fname, boxes, klass, second_klass, is_crowd = img['file_name'], img['boxes'], img['class'], \
                                                          img['second_class'], img['is_crowd']
        else:
            fname, boxes, klass, is_crowd = img['file_name'], img[
                'boxes'], img['class'], img['is_crowd']
            second_klass = None
        res = preproc_img(fname, boxes, klass, second_klass, is_crowd, aug)
        if res is None:
            print("coco: preproc_img returned None on", fname)
            return None

        ret, params = res
        im = ret[0]
        boxes = ret[3]
        # masks
        if add_mask:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(img.get('segmentation', None))
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes), (len(segmentation),
                                                     len(boxes))

            # one image-sized binary mask per box
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        end = time.time()
        elapsed = end - start
        print("coco example done, elapsed:", elapsed)
        return ret

    #ds = MapData(ds, preprocess)
    ds = MultiProcessMapData(ds,
                             nr_proc=4,
                             map_func=preprocess,
                             buffer_size=20)
    return ds
def get_train_dataflow_mapillary(add_mask=False, map_to_coco=False):
    train_img_path = config.MAPILLARY_PATH + "training/images/"
    train_label_path = config.MAPILLARY_PATH + "training/instances/"
    imgs = glob.glob(train_img_path + "*.jpg")

    ds = DataFromList(imgs, shuffle=True)
    aug = imgaug.AugmentorList([
        CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(fname):
        print("start preproc mapillary")
        start = time.time()

        label_fname = fname.replace(train_img_path,
                                    train_label_path).replace(".jpg", ".png")
        pil_label = Image.open(label_fname)
        label = np.array(pil_label)
        instances = np.unique(label)
        instance_classes = [x // 256 for x in instances]

        # filter by categories we use
        instances_valid = [
            cls in config.MAPILLARY_CAT_IDS_TO_USE for cls in instance_classes
        ]
        instances = [
            inst for inst, valid in zip(instances, instances_valid) if valid
        ]
        instance_classes = [
            cls for cls, valid in zip(instance_classes, instances_valid)
            if valid
        ]

        if len(instances) == 0:
            print("no instances")
            pil_label.close()
            return None

        if map_to_coco:
            instance_classes = [
                config.MAPILLARY_TO_COCO_MAP[cls] for cls in instance_classes
            ]
            instance_classes = [
                config.VOID_LABEL if cls == config.VOID_LABEL else
                COCOMeta.category_id_to_class_id[cls]
                for cls in instance_classes
            ]
        else:
            # remap to contiguous numbers starting with 1
            instance_classes = [
                config.MAPILLARY_CAT_IDS_TO_USE.index(cls) + 1
                for cls in instance_classes
            ]

        masks = np.array([label == inst for inst in instances], dtype=np.uint8)

        #import cProfile
        #start1 = time.time()
        boxes1 = np.array(
            [get_bbox_from_segmentation_mask(mask) for mask in masks],
            dtype=np.float32)
        #boxes1_time = time.time() - start1
        #pr = cProfile.Profile()
        #pr.enable()
        #start1 = time.time()
        #boxes2 = get_bboxes_from_segmentation_masks(masks)
        #print("boxes1", boxes1_time, "boxes2", time.time() - start1)
        #pr.disable()
        #pr.print_stats(sort="cumulative")
        #assert (boxes1 == boxes2).all(), (boxes1, boxes2)
        boxes = boxes1

        second_klass = np.array(instance_classes, dtype=np.int)
        klass = np.ones_like(second_klass)
        is_crowd = np.zeros_like(second_klass)

        res = preproc_img(fname, boxes, klass, second_klass, is_crowd, aug)
        if res is None:
            print("mapillary: preproc_img returned None on", fname)
            pil_label.close()
            return None
        ret, params = res
        if add_mask:
            do_flip, h, w = params[1]
            assert do_flip in (True, False), do_flip
            # augment label
            label = np.array(pil_label.resize((w, h), Image.NEAREST))
            if do_flip:
                label = label[:, ::-1]
            # create augmented masks
            masks = np.array([label == inst for inst in instances],
                             dtype=np.uint8)
            ret.append(masks)

        end = time.time()
        elapsed = end - start
        print("mapillary example done, elapsed:", elapsed)

        VISUALIZE = False
        if VISUALIZE:
            from viz import draw_annotation, draw_mask
            config.CLASS_NAMES = [str(idx) for idx in range(81)]
            im = ret[0]
            boxes = ret[3]
            draw_klass = ret[-2]
            viz = draw_annotation(im, boxes, draw_klass)
            for mask in masks:
                viz = draw_mask(viz, mask)
            tpviz.interactive_imshow(viz)

        pil_label.close()
        return ret

    #ds = MapData(ds, preprocess)
    ds = MultiProcessMapData(ds,
                             nr_proc=8,
                             map_func=preprocess,
                             buffer_size=35)
    return ds
def get_train_dataflow_davis(add_mask=False):
    # train_img_path = config.DAVIS_PATH + "train/"
    # train_label_path = config.DAVIS_PATH + "train-gt/"
    # imgs = glob.glob(train_img_path + "*/*.jpg")

    # train_img_path = "/home/luiten/vision/PReMVOS/data/first/bike-trial/lucid_data_dreaming/"
    # train_label_path = "/home/luiten/vision/PReMVOS/data/first/bike-trial/lucid_data_dreaming/"

    # train_img_path = "/home/luiten/vision/PReMVOS/data/"+config.DAVIS_NAME+"/lucid_data_dreaming/"
    # train_label_path = "/home/luiten/vision/PReMVOS/data/"+config.DAVIS_NAME+"/lucid_data_dreaming/"

    # train_img_path = "/home/luiten/vision/youtubevos/ytvos_data/together/generated/augment_images/"
    # train_label_path = "/home/luiten/vision/youtubevos/ytvos_data/together/generated/augment_gt/"

    train_img_path = "/home/luiten/vision/youtubevos/DAVIS/davis_together/augment_images/"
    train_label_path = "/home/luiten/vision/youtubevos/DAVIS/davis_together/augment_gt/"

    imgs = sorted(glob.glob(train_img_path + "*/*.jpg"))

    ds = DataFromList(imgs, shuffle=True)
    aug = imgaug.AugmentorList([
        CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(fname):
        # print("start preproc mapillary")
        start = time.time()

        label_fname = fname.replace(train_img_path,
                                    train_label_path).replace(".jpg", ".png")
        pil_label = Image.open(label_fname)
        label = np.array(pil_label)
        instances = np.unique(label)
        instance_classes = [x // 256 for x in instances]

        if len(instances) == 0:
            print("no instances")
            pil_label.close()
            return None

        masks = np.array([label == inst for inst in instances], dtype=np.uint8)

        boxes1 = np.array(
            [get_bbox_from_segmentation_mask(mask) for mask in masks],
            dtype=np.float32)
        boxes = boxes1

        # second_klass = np.array(instance_classes, dtype=np.int)
        second_klass = np.zeros_like(instance_classes, dtype=np.int)
        klass = np.ones_like(second_klass)
        is_crowd = np.zeros_like(second_klass)

        res = preproc_img(fname, boxes, klass, second_klass, is_crowd, aug)
        if res is None:
            print("davis: preproc_img returned None on", fname)
            pil_label.close()
            return None
        ret, params = res
        if add_mask:
            do_flip, h, w = params[1]
            assert do_flip in (True, False), do_flip
            # augment label
            label = np.array(pil_label.resize((w, h), Image.NEAREST))
            if do_flip:
                label = label[:, ::-1]
            # create augmented masks
            masks = np.array([label == inst for inst in instances],
                             dtype=np.uint8)
            ret.append(masks)

        end = time.time()
        elapsed = end - start
        # print("davis example done, elapsed:", elapsed)

        VISUALIZE = False
        if VISUALIZE:
            from viz import draw_annotation, draw_mask
            config.CLASS_NAMES = [str(idx) for idx in range(81)]
            im = ret[0]
            boxes = ret[3]
            draw_klass = ret[-2]
            viz = draw_annotation(im, boxes, draw_klass)
            for mask in masks:
                viz = draw_mask(viz, mask)
            tpviz.interactive_imshow(viz)

        pil_label.close()
        return ret

    ds = MapData(ds, preprocess)
    # ds = MultiProcessMapData(ds, nr_proc=8, map_func=preprocess, buffer_size=35)
    # ds = MultiProcessMapData(ds, nr_proc=8, map_func=preprocess)
    return ds
コード例 #21
0
def get_train_dataflow(src):
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    If MODE_MASK, gt_masks: (N, h, w)
    """

    #imgs = COCODetection.load_many(cfg.DATA.BASEDIR, cfg.DATA.TRAIN, add_gt=True, add_mask=cfg.MODE_MASK)

    classes = (
        'BG',  # always index 0
        'bathtub',
        'bed',
        'bookshelf',
        'box',
        'chair',
        'counter',
        'desk',
        'door',
        'dresser',
        'garbage_bin',
        'lamp',
        'monitor',
        'night_stand',
        'pillow',
        'sink',
        'sofa',
        'table',
        'toilet',
        'tv')

    class_to_ind = dict(list(zip(classes, list(range(len(classes))))))
    #src = '/media/ayan/Drive/IMI-Research/Datasets/Datasets_OP_Train/'
    textfile_index = natsorted(
        [src + f for f in np.sort(os.listdir(src)) if f.endswith('.txt')])
    imgs = []
    count = 0
    for fn in textfile_index:
        each_file = {}
        count = count + 1
        print(str(count) + ':::', fn)
        F = open(fn, 'r')
        file_F = F.read()
        file_F = file_F.split('\n')
        each_file['file_name'] = file_F[0]
        im = cv2.imread(each_file['file_name'])
        each_file['height'] = im.shape[0]
        each_file['width'] = im.shape[1]
        objects = file_F[2:len(file_F) - 1]
        boxes = []
        class_ = []
        for obj in objects:
            objs_line = obj.split(' ')
            x1 = float(objs_line[1]) - 1.0
            y1 = float(objs_line[2]) - 1.0
            x2 = float(objs_line[3]) - 1.0
            y2 = float(objs_line[4]) - 1.0
            y2 = float(objs_line[4]) - 1.0
            if x1 >= x2:
                x2 = x1 + 1
            boxes.append([x1, y1, x2, y2])
            cls = class_to_ind[objs_line[0]]
            class_.append(cls)
        each_file['boxes'] = np.array(boxes).astype(np.float32)
        each_file['class'] = np.array(class_).astype(np.int32)
        each_file['is_crowd'] = np.zeros_like(each_file['class']).astype(
            np.int8)
        imgs.append(each_file)
    """ 
    To train on your own data, change this to your loader.
    Produce "imgs" as a list of dict, in the dict the following keys are needed for training:
    height, width: integer
    file_name: str, full path to the image
    boxes: numpy array of kx4 floats
    class: numpy array of k integers
    is_crowd: k booleans. Use k False if you don't know what it means.
    segmentation: k lists of numpy arrays (one for each box).
        Each list of numpy array corresponds to the mask for one instance.
        Each numpy array in the list is a polygon of shape Nx2,
        because one mask can be represented by N polygons.

        If your segmentation annotations are originally masks rather than polygons,
        either convert it, or the augmentation code below will need to be
        changed or skipped accordingly.
    """

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(imgs)
    imgs = list(
        filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, imgs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}"
        .format(num - len(imgs), len(imgs)))

    ds = DataFromList(imgs, shuffle=False)

    aug = imgaug.AugmentorList([
        CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(img):
        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[
            'class'], img['is_crowd']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        # rpn anchor:
        try:
            if cfg.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(
                    im, boxes, is_crowd)
                anchor_inputs = itertools.chain.from_iterable(
                    multilevel_anchor_inputs)
            else:
                # anchor_labels, anchor_boxes
                anchor_inputs = get_rpn_anchor_input(im, boxes, is_crowd)
                assert len(anchor_inputs) == 2

            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None

        ret = [im] + list(anchor_inputs) + [boxes, klass]

        if cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(img['segmentation'])
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret

    if cfg.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # MPI does not like fork()
    else:
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    return ds
コード例 #22
0
def get_sniper_train_dataflow():
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    scale_index: i

    If MODE_MASK, gt_masks: (N, h, w)
    """

    OUTPUT_FILE = 'train_512_annotation.txt'
    OUTPUT_IMG_DIR = 'out'
    out_file = open(OUTPUT_FILE, 'w')

    class SniperDataFlow(ProxyDataFlow):
        def __init__(self, ds):
            super(SniperDataFlow, self).__init__(ds)
            # self.ds = ds
        def size(self):
            raise NotImplementedError()

        def get_data(self):
            for img in self.ds.get_data():
                for chip in img:
                    yield chip

    imgs = COCODetection.load_many(cfg.DATA.BASEDIR,
                                   cfg.DATA.TRAIN,
                                   add_gt=True,
                                   add_mask=cfg.MODE_MASK)

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(imgs)
    imgs = list(
        filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, imgs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}"
        .format(num - len(imgs), len(imgs)))

    ds = DataFromList(imgs, shuffle=False)
    # aug = imgaug.AugmentorList([
    #     CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
    #     imgaug.Flip(horiz=True)
    # ])

    assert os.path.isfile(cfg.SNIPER.PRN_PRE)
    proposal_pickle = pandas.read_pickle(cfg.SNIPER.PRN_PRE)

    def preprocess(img):

        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[
            'class'], img['is_crowd']
        img_name = fname.split('/')[-1]
        img_id = int(img_name[3:-4])
        # pretrain rpn for negtive chip extraction

        proposals = proposal_pickle['boxes'][proposal_pickle['ids'].index(
            img_id)]
        proposals[2:4] += proposals[0:2]  # from [x,y,w,h] to [x1,y1,x2,y2]

        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"
        chip_generator = Im2Chip(im,
                                 boxes,
                                 klass,
                                 proposals,
                                 cfg.SNIPER.SCALES,
                                 cfg.SNIPER.VALID_RANGES,
                                 is_crowd=is_crowd,
                                 chip_size=cfg.SNIPER.CHIP_SIZE,
                                 chip_stride=cfg.SNIPER.CHIP_STRIDE)
        im, boxes, klass, scale_indices, is_crowd = chip_generator.genChipMultiScale(
        )
        rets = []
        for i in range(len(im)):
            try:
                if len(boxes[i]) == 0:
                    continue
                if not len(boxes[i]):
                    raise MalformedData("No valid gt_boxes!")
            except MalformedData as e:
                log_once(
                    "Input {} is filtered for training: {}".format(
                        fname, str(e)), 'warn')
                ret = None
                continue
            # ret = [im[i]] + list(anchor_inputs) + [boxes[i], klass[i]
            #                                        ] + [scale_indices[i]*len(boxes[i])]
            new_name = '%s_%d' % (img_name, i)
            cv2.imwrite('%s/%s' % (OUTPUT_IMG_DIR, new_name), im[i])

            ret = [im[i]] + [boxes[i], klass[i]]
            for j in range(len(klass[i])):
                if j == 0:
                    out_file.write(new_name)
                out_file.write(' %d %f %f %f %f' %
                               (klass[i][j], boxes[i][j][0], boxes[i][j][1],
                                boxes[i][j][2], boxes[i][j][3]))
                if j == len(klass[i]) - 1:
                    out_file.write('\n')
            rets.append(ret)
        return rets

    if cfg.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # ds = PrefetchDataZM
        # MPI does not like fork()
    else:
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    # ds = SniperDataFlow(ds)
    return ds
コード例 #23
0
def get_train_dataflow(add_mask=False):
    """
    Return a training dataflow. Each datapoint is:
    image, fm_labels, fm_boxes, gt_boxes, gt_class [, masks]
    """
    imgs = COCODetection.load_many(config.BASEDIR,
                                   config.TRAIN_DATASET,
                                   add_gt=True,
                                   add_mask=add_mask)
    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    imgs = list(filter(lambda img: len(img['boxes']) > 0,
                       imgs))  # log invalid training

    ds = DataFromList(imgs, shuffle=True)

    aug = imgaug.AugmentorList([
        CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(img):
        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[
            'class'], img['is_crowd']
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)

        # rpn anchor:
        try:
            fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, klass,
                                                       is_crowd)
            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is invalid for training: {}".format(fname, str(e)),
                'warn')
            return None

        ret = [im, fm_labels, fm_boxes, boxes, klass]

        # masks
        segmentation = img.get('segmentation', None)
        if segmentation is not None:
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # one image-sized binary mask per box
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret

    ds = MapData(ds, preprocess)
    ds = PrefetchDataZMQ(ds, 1)
    return ds
コード例 #24
0
def get_train_dataflow():
    roidbs = DetectionDataset().load_training_roidbs(cfg.DATA.TRAIN)
    ds = DataFromList(roidbs, shuffle=True)
    # for now let's not do flipping to keep things simple
    aug = imgaug.AugmentorList([
        CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)
    ])  #,
    #imgaug.Flip(horiz=True)])

    if cfg.MODE_HARD_MINING:
        from annoy import AnnoyIndex
        hard_mining_index = AnnoyIndex(128, 'euclidean')
        hard_mining_index.load(cfg.HARD_MINING_DATA_PATH +
                               "/index_all/index.ann")
        names_path = cfg.HARD_MINING_DATA_PATH + "index_all/names.txt"
        hard_mining_names_all = []
        with open(names_path) as f:
            for l in f:
                hard_mining_names_all.append(l.strip())
            hard_example_names_got = [
                x[7:] for x in hard_mining_names_all if x.startswith("GOT10k/")
            ]
            hard_example_names_vid = [
                x[12:] for x in hard_mining_names_all
                if x.startswith("ImageNetVID/")
            ]
            hard_example_names_ytbvos = [
                x[11:] for x in hard_mining_names_all
                if x.startswith("YouTubeVOS/")
            ]
            hard_example_names_lasot = [
                x[6:] for x in hard_mining_names_all if x.startswith("LaSOT/")
            ]
            assert len(hard_example_names_got) > 0
            assert len(hard_example_names_vid) > 0
            assert len(hard_example_names_ytbvos) > 0
            assert len(hard_example_names_lasot) > 0
            hard_example_names_got.sort()
            hard_example_names_vid.sort()
            hard_example_names_ytbvos.sort()
            hard_example_names_lasot.sort()
            hard_mining_names = {
                "all": hard_mining_names_all,
                "GOT10k": hard_example_names_got,
                "ImageNetVID": hard_example_names_vid,
                "YouTubeVOS": hard_example_names_ytbvos,
                "LaSOT": hard_example_names_lasot
            }
    else:
        hard_mining_index = None
        hard_mining_names = None

    def preprocess(roidb):
        if roidb.startswith("VID/"):
            return _preprocess_imagenet_vid(roidb[4:], aug, hard_mining_index,
                                            hard_mining_names)
        elif roidb.startswith("DAVIS/"):
            return _preprocess_davis_like(
                roidb[6:], aug,
                os.path.join(cfg.DATA.DAVIS2017_ROOT, "Annotations", "480p"))
        elif roidb.startswith("YouTubeVOS/"):
            return _preprocess_davis_like(
                roidb[11:], aug,
                os.path.join(cfg.DATA.YOUTUBE_VOS_ROOT, "train",
                             "Annotations"), "YouTubeVOS", hard_mining_index,
                hard_mining_names)
        elif roidb.startswith("GOT10K/"):
            return _preprocess_got10k(roidb[7:], aug, hard_mining_index,
                                      hard_mining_names)
        elif roidb.startswith("LaSOT/"):
            return _preprocess_lasot(roidb[6:], aug, hard_mining_index,
                                     hard_mining_names)
        elif roidb.startswith("YouTube-BB/"):
            return _preprocess_youtube_bb(roidb[11:], aug)
        elif roidb.startswith("TrackingNet/"):
            return _preprocess_trackingnet(roidb[12:], aug)
        else:
            assert False

    #ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    #ds = MapData(ds, preprocess)
    if cfg.DATA.DEBUG_VIS or not cfg.DATA.MULTITHREAD:
        ds = MapData(ds, preprocess)
    else:
        #ds = MultiThreadMapData(ds, 6, preprocess)
        ds = MultiThreadMapData(ds, 8, preprocess, buffer_size=80)
    return ds
コード例 #25
0
def get_data_set(root_path, ana_path):
    data_list = get_train_data_list(root_path, ana_path)
    dataset = DataFromList(data_list, shuffle=True)
    return dataset
コード例 #26
0
def get_dataflow(is_train=True):
    train_df = pd.read_csv(os.path.join('/data/kaggle/HPA', 'train.csv'))
    #train_df = oversample(train_df)
    labels = [[int(i) for i in s.split()] for s in train_df['Target']]
    fnames = train_df['Id'].tolist()
    fnames = [os.path.join(config.TRAIN_DATASET, f) for f in fnames]
    sprase_label = [
        np.eye(config.NUM_CLASS, dtype=np.float)[np.array(la)].sum(axis=0)
        for la in labels
    ]

    extra_df = pd.read_csv(
        os.path.join('/data/kaggle/HPA',
                     'HPAv18RGBY_WithoutUncertain_wodpl.csv'))
    #extra_df = oversample(extra_df)
    extra_labels = [[int(i) for i in s.split()] for s in extra_df['Target']]
    extra_labels = [
        np.eye(config.NUM_CLASS, dtype=np.float)[np.array(la)].sum(axis=0)
        for la in extra_labels
    ]
    extra_fnames = extra_df['Id'].tolist()
    extra_fnames = [
        os.path.join(config.EXTRA_DATASET, f) for f in extra_fnames
    ]
    fnames = fnames + extra_fnames
    sprase_label = sprase_label + extra_labels

    fnames = np.array(fnames)
    sprase_label = np.array(sprase_label)
    msss = MultilabelStratifiedShuffleSplit(n_splits=1,
                                            test_size=0.15,
                                            random_state=42)

    for train_index, test_index in msss.split(fnames, sprase_label):
        x_train, x_test = fnames[train_index], fnames[test_index]
        y_train, y_test = sprase_label[train_index], sprase_label[test_index]

    holdout_data = list(zip(x_test, y_test))
    # 5 fold the rest
    mskf = MultilabelStratifiedKFold(n_splits=5, random_state=1)
    for fold_num, (train_index,
                   test_index) in enumerate(mskf.split(x_train, y_train)):
        if fold_num == config.FOLD:
            foldx_train, foldx_test = x_train[train_index], x_train[test_index]
            foldy_train, foldy_test = y_train[train_index], y_train[test_index]
            break

    train_data = list(zip(foldx_train, foldy_train))
    val_data = list(zip(foldx_test, foldy_test))

    train_data = oversample_2(train_data)

    pseudo_df = pd.read_csv(os.path.join('/data/kaggle/HPA', 'LB623.csv'))
    pseudo_fnames = pseudo_df['Id'].tolist()
    pseudo_fnames = [
        os.path.join(config.TEST_DATASET, f) for f in pseudo_fnames
    ]
    #pseudo_labels = np.load("./SOTA.npy")
    #pseudo_labels = [np.array(_) for _ in pseudo_labels]
    pseudo_labels = [[int(i) for i in s.split()]
                     for s in pseudo_df['Predicted']]
    pseudo_labels = [
        np.eye(config.NUM_CLASS, dtype=np.float)[np.array(la)].sum(axis=0)
        for la in pseudo_labels
    ]
    pseudo_data = list(zip(pseudo_fnames, pseudo_labels))
    train_data = train_data + pseudo_data

    print("train: ", len(train_data), len(val_data))

    if not is_train:
        return val_data

    ds = DataFromList(train_data, shuffle=True)
    ds = BatchData(MapData(ds, preprocess), config.BATCH)
    ds = PrefetchDataZMQ(ds, 6)
    return ds
コード例 #27
0
ファイル: data.py プロジェクト: chpohl/tensorpack
def get_train_dataflow_YCBV():
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    If MODE_MASK, gt_masks: (N, h, w)
    """

    img_ids = YCBVDetectionDataset().load_training_image_ids(cfg.DATA.TRAIN)
    # print_class_histogram(roidbs)

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    # num = len(img_ids)
    # roidbs = list(filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, roidbs))
    # logger.info("Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}".format(
    #     num - len(roidbs), len(roidbs)))

    ds = DataFromList(img_ids, shuffle=True)

    # aug = imgaug.AugmentorList(
    #     [CustomResize(cfg.PREPROC.TRAIN_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)])

    def preprocess(image_id):
        roidb = YCBVDetectionDataset().load_single_roidb(image_id)
        fname, boxes, klass, is_crowd = roidb['file_name'], roidb[
            'boxes'], roidb['class'], roidb['is_crowd']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        height, width = im.shape[:2]
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        if not cfg.DATA.ABSOLUTE_COORD:
            boxes[:, 0::2] *= width
            boxes[:, 1::2] *= height

        # augmentation:
        # im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        # points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = {'image': im}
        # rpn anchor:
        try:
            if cfg.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(
                    im, boxes, is_crowd)
                for i, (anchor_labels,
                        anchor_boxes) in enumerate(multilevel_anchor_inputs):
                    ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels
                    ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes
            else:
                # anchor_labels, anchor_boxes
                ret['anchor_labels'], ret[
                    'anchor_boxes'] = get_rpn_anchor_input(
                        im, boxes, is_crowd)

            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            ret['gt_boxes'] = boxes
            ret['gt_labels'] = klass
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None

        if cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(roidb['segmentation'])
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            ret['gt_masks'] = segmentation

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret

    if cfg.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # MPI does not like fork()
    else:
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    return ds
コード例 #28
0
def get_eval_dataflow(name, shard=0, num_shards=1):
    seqs = []
    with open("davis2017_fast_val_ids.txt") as f:
        for l in f:
            seqs.append(l.strip())

    seqs_timesteps = []
    for seq in seqs:
        files = sorted(
            glob.glob(cfg.DATA.DAVIS2017_ROOT + "/JPEGImages/480p/" +
                      seq.split("__")[0] + "/*.jpg"))[1:-1]
        timesteps = [f.split('/')[-1].replace(".jpg", "") for f in files]

        for timestep in timesteps:
            ann_fn = cfg.DATA.DAVIS2017_ROOT + "/Annotations/480p/" + seq.split(
                "__")[0] + '/' + timestep + ".png"
            ann = np.array(PIL.Image.open(ann_fn))
            ann_mask = ann == int(seq.split("__")[1])
            if ann_mask.any():
                seqs_timesteps.append(
                    (seq.split('__')[0], seq.split('__')[1], timestep))

        # seqs_timesteps += [(seq.split('__')[0], seq.split('__')[1], timestep) for timestep in timesteps]

    num_seqs_timesteps = len(seqs_timesteps)
    seqs_timesteps_per_shard = num_seqs_timesteps // num_shards
    seqs_timesteps_range = (shard * seqs_timesteps_per_shard,
                            (shard + 1) * seqs_timesteps_per_shard
                            if shard + 1 < num_shards else num_seqs_timesteps)
    ds = DataFromList(
        seqs_timesteps[seqs_timesteps_range[0]:seqs_timesteps_range[1]])

    def preprocess(seq_timestep):

        seq, obj_id, timestep = seq_timestep
        ann_fn = cfg.DATA.DAVIS2017_ROOT + "/Annotations/480p/" + seq + '/' + timestep + ".png"
        ann = np.array(PIL.Image.open(ann_fn))
        ann_mask = ann == int(obj_id)
        if not ann_mask.any():
            return None, None, None, None, None
            # ann_box = np.array([-1000000, -1000000, 100000, 100000])
        else:
            ann_box = get_bbox_from_segmentation_mask_np(ann_mask)

        ff_fn = cfg.DATA.DAVIS2017_ROOT + "/Annotations/480p/" + seq + '/' + str(
            0).zfill(5) + ".png"
        ff = np.array(PIL.Image.open(ff_fn))
        ff_mask = ff == int(obj_id)
        ff_box = get_bbox_from_segmentation_mask_np(ff_mask)

        x1, y1, x2, y2 = [float(x) for x in ann_box]
        target_bbox = np.array([x1, y1, x2, y2], dtype=np.float32)

        x1, y1, x2, y2 = [float(x) for x in ff_box]
        ref_bbox = np.array([x1, y1, x2, y2], dtype=np.float32)

        target_img_fn = cfg.DATA.DAVIS2017_ROOT + "/JPEGImages/480p/" + seq + "/" + timestep + ".jpg"
        ref_img_fn = cfg.DATA.DAVIS2017_ROOT + "/JPEGImages/480p/" + seq + "/" + str(
            0).zfill(5) + ".jpg"
        target_img = cv2.imread(target_img_fn, cv2.IMREAD_COLOR)
        ref_img = cv2.imread(ref_img_fn, cv2.IMREAD_COLOR)
        return ref_img, ref_bbox, target_img, target_bbox, "__".join(
            seq_timestep)

    ds = MapData(ds, preprocess)
    return ds
コード例 #29
0
def get_train_dataflow():
    """
    Return a training dataflow. Each datapoint consists of the following:

    An image: (h, w, 3),

    1 or more pairs of (anchor_labels, anchor_boxes):
    anchor_labels: (h', w', NA)
    anchor_boxes: (h', w', NA, 4)

    gt_boxes: (N, 4)
    gt_labels: (N,)

    If MODE_MASK, gt_masks: (N, h, w)
    """

    imgs = COCODetection.load_many(cfg.DATA.BASEDIR,
                                   cfg.DATA.TRAIN,
                                   add_gt=True,
                                   add_mask=cfg.MODE_MASK)
    """
    To train on your own data, change this to your loader.
    Produce "imgs" as a list of dict, in the dict the following keys are needed for training:
    height, width: integer
    file_name: str, full path to the image
    boxes: numpy array of kx4 floats
    class: numpy array of k integers
    is_crowd: k booleans. Use k False if you don't know what it means.
    segmentation: k lists of numpy arrays (one for each box).
        Each list of numpy array corresponds to the mask for one instance.
        Each numpy array in the list is a polygon of shape Nx2,
        because one mask can be represented by N polygons.

        If your segmentation annotations are originally masks rather than polygons,
        either convert it, or the augmentation code below will need to be
        changed or skipped accordingly.
    """

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(imgs)
    imgs = list(
        filter(lambda img: len(img['boxes'][img['is_crowd'] == 0]) > 0, imgs))
    logger.info(
        "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}"
        .format(num - len(imgs), len(imgs)))

    ds = DataFromList(imgs, shuffle=True)

    aug = imgaug.AugmentorList([
        CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
        imgaug.Flip(horiz=True)
    ])

    def preprocess(img):
        fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[
            'class'], img['is_crowd']
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        # rpn anchor:
        try:
            if cfg.MODE_FPN:
                multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(
                    im, boxes, is_crowd)
                anchor_inputs = itertools.chain.from_iterable(
                    multilevel_anchor_inputs)
            else:
                # anchor_labels, anchor_boxes
                anchor_inputs = get_rpn_anchor_input(im, boxes, is_crowd)
                assert len(anchor_inputs) == 2

            boxes = boxes[is_crowd == 0]  # skip crowd boxes in training target
            klass = klass[is_crowd == 0]
            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once(
                "Input {} is filtered for training: {}".format(fname, str(e)),
                'warn')
            return None

        ret = [im] + list(anchor_inputs) + [boxes, klass]

        if cfg.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(img['segmentation'])
            segmentation = [
                segmentation[k] for k in range(len(segmentation))
                if not is_crowd[k]
            ]
            assert len(segmentation) == len(boxes)

            # Apply augmentation on polygon coordinates.
            # And produce one image-sized binary mask per box.
            masks = []
            for polys in segmentation:
                polys = [aug.augment_coords(p, params) for p in polys]
                masks.append(
                    segmentation_to_mask(polys, im.shape[0], im.shape[1]))
            masks = np.asarray(masks, dtype='uint8')  # values in {0, 1}
            ret.append(masks)

            # from viz import draw_annotation, draw_mask
            # viz = draw_annotation(im, boxes, klass)
            # for mask in masks:
            #     viz = draw_mask(viz, mask)
            # tpviz.interactive_imshow(viz)
        return ret

    if cfg.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # MPI does not like fork()
    else:
        ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
    return ds
コード例 #30
0
ファイル: data.py プロジェクト: eldercrow/segmentation-tf
def get_train_dataflow():
    """
    Return a training dataflow. Each datapoint consists of the following:

    input image: (h, w, 3),
    semantic label image: (h, w, 1)
    """
    # imgs is a list, where each element is a dict containing 'fn_img', and 'fn_label'
    imgs = load_many_from_db(cfg.DATA.NAME, add_gt=True, is_train=True)
    # imgs = COCODetection.load_many(
    #     cfg.DATA.BASEDIR, cfg.DATA.TRAIN, add_gt=True, add_mask=cfg.MODE_MASK)
    """
    To train on your own data, change this to your loader.
    Produce "imgs" as a list of dict, in the dict the following keys are needed for training:
    height, width: integer
    file_name: str
    boxes: kx4 floats
    class: k integers
    difficult: k booleans. Use k False if you don't know what it means.
    segmentation: k lists of numpy arrays (one for each box).
        Each list of numpy array corresponds to the mask for one instance.
        Each numpy array in the list is a polygon of shape Nx2,
        because one mask can be represented by N polygons.

        If your segmentation annotations are originally masks rather than polygons,
        either convert it, or the augmentation code below will need to be
        changed or skipped accordingly.
    """

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    num = len(imgs)
    # log invalid training

    ds = DataFromList(imgs, shuffle=True)

    mean_bgr = np.array(cfg.PREPROC.PIXEL_MEAN[::-1])

    if cfg.DATA.NAME == 'cityscapes':
        aspect_exp = 1.1
    elif cfg.DATA.NAME == 'cocostuff':
        aspect_exp = 1.1 #2.0
    else:
        logger.warn('Dataset name not known.')
        assert False

    aug = imgaug.AugmentorList([ \
            SSDCropRandomShape(cfg.PREPROC.INPUT_SHAPE_TRAIN, aspect_exp=aspect_exp, mean_rgbgr=mean_bgr),
            SSDResize(cfg.PREPROC.INPUT_SHAPE_TRAIN),
            imgaug.Flip(horiz=True),
            SSDColorJitter(mean_rgbgr=mean_bgr)
            ])
    aug_label = imgaug.AugmentorList([ \
            SSDCropRandomShape(cfg.PREPROC.INPUT_SHAPE_TRAIN, aspect_exp=aspect_exp, mean_rgbgr=[255,]),
            SSDResize(cfg.PREPROC.INPUT_SHAPE_TRAIN, interp=cv2.INTER_NEAREST),
            imgaug.Flip(horiz=True)
            ])

    def preprocess(img):
        fn_img, fn_label = img['fn_img'], img['fn_label']
        # load head (and landmark) data as well
        im = cv2.imread(fn_img, cv2.IMREAD_COLOR)
        if fn_label.endswith('.mat'): # cocostuff
            label = loadmat(fn_label)['S'].astype(int)
            label = (label - 1).astype(np.uint8) # -1 becomes 255
        else:
            label = cv2.imread(fn_label, cv2.IMREAD_GRAYSCALE)
        label = np.expand_dims(label, 2)
        assert (im is not None) and (label is not None), fn_img
        im = im.astype('float32')
        # label = label.astype('int32')
        # augmentation
        im, params = aug.augment_return_params(im)
        # TODO: better way to adjust label?
        params_label = deepcopy(params[:-1])
        params_label[0].mean_rgbgr = [255,]
        params_label[1].interp = cv2.INTER_NEAREST
        label = aug_label.augment_with_params(label, params_label)
        label = label.astype('int32')

        ret = [im, label]
        return ret

    if cfg.TRAINER == 'horovod':
        ds = MultiThreadMapData(ds, 5, preprocess)
        # MPI does not like fork()
    else:
        # ds = MapData(ds, preprocess) # for debugging
        ds = MultiProcessMapDataZMQ(ds, cfg.PREPROC.NUM_WORKERS, preprocess)
    ds = BatchData(ds, cfg.PREPROC.BATCH_SIZE)
    return ds