Python DataFromListOfDict Examples, common.DataFromListOfDict Python Examples

Example #1

0

Show file

File: data.py Project: chpohl/tensorpack

def get_eval_dataflow_YCBV(name, shard=0, num_shards=1):
    """
    Args:
        name (str): name of the dataset to evaluate
        shard, num_shards: to get subset of evaluation data
    """
    roidbs = YCBVDetectionDataset().load_inference_image_ids(name)

    num_imgs = len(roidbs)
    img_per_shard = num_imgs // num_shards
    img_range = (shard * img_per_shard, (shard + 1) *
                 img_per_shard if shard + 1 < num_shards else num_imgs)

    # no filter for training
    ds = DataFromListOfDict(roidbs[img_range[0]:img_range[1]],
                            ['file_name', 'image_id'])

    def f(fname):
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        return im

    ds = MapDataComponent(ds, f, 0)
    # Evaluation itself may be multi-threaded, therefore don't add prefetch here.
    return ds

Example #2

0

Show file

def get_eval_dataflow(shard=0, num_shards=1):
    """
    Args:
        shard, num_shards: to get subset of evaluation data
    """
    imgs = COCODetection.load_many(cfg.DATA.BASEDIR,
                                   cfg.DATA.VAL,
                                   add_gt=False)
    num_imgs = len(imgs)
    img_per_shard = num_imgs // num_shards
    img_range = (shard * img_per_shard, (shard + 1) *
                 img_per_shard if shard + 1 < num_shards else num_imgs)

    # no filter for training
    ds = DataFromListOfDict(imgs[img_range[0]:img_range[1]],
                            ['file_name', 'id'])

    def f(fname):
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        return im

    ds = MapDataComponent(ds, f, 0)
    # Evaluation itself may be multi-threaded, therefore don't add prefetch here.
    return ds

Example #3

0

Show file

def get_eval_dataflow(name, shard=0, num_shards=1):
    """
    Args:
        name (str): name of the dataset to evaluate
        shard, num_shards: to get subset of evaluation data
    """
    roidbs = COCODetection.load_many(cfg.DATA.BASEDIR, name, add_gt=False)
    """
    To inference on your own data, change this to your loader.
    Produce "roidbs" as a list of dict, in the dict the following keys are needed for training:
    file_name: str, full path to the image
    id: an id of this image
    """

    num_imgs = len(roidbs)
    img_per_shard = num_imgs // num_shards
    img_range = (shard * img_per_shard, (shard + 1) * img_per_shard if shard + 1 < num_shards else num_imgs)

    # no filter for training
    ds = DataFromListOfDict(roidbs[img_range[0]: img_range[1]], ['file_name', 'id'])

    def f(fname):
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        return im
    ds = MapDataComponent(ds, f, 0)
    # Evaluation itself may be multi-threaded, therefore don't add prefetch here.
    return ds

Example #4

0

Show file

File: data.py Project: leeshien/mytensorpack

def get_eval_dataflow(name, shard=0, num_shards=1):
    """
    Args:
        name (str): name of the dataset to evaluate
        shard, num_shards: to get subset of evaluation data
    """
    roidbs = DatasetRegistry.get(name).inference_roidbs()
    logger.info("Found {} images for inference.".format(len(roidbs)))

    num_imgs = len(roidbs)
    img_per_shard = num_imgs // num_shards
    img_range = (shard * img_per_shard, (shard + 1) *
                 img_per_shard if shard + 1 < num_shards else num_imgs)

    # no filter for training
    ds = DataFromListOfDict(roidbs[img_range[0]:img_range[1]],
                            ["file_name", "image_id"])

    def f(fname):
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        return im

    ds = MapDataComponent(ds, f, 0)
    # Evaluation itself may be multi-threaded, therefore don't add prefetch here.
    return ds

Example #5

0

Show file

File: data.py Project: tigerneil/tensorpack

def get_train_dataflow():
    imgs = COCODetection.load_many(config.BASEDIR, config.TRAIN_DATASET)
    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
    imgs = list(filter(lambda img: len(img['boxes']) > 0, imgs))    # log invalid training

    ds = DataFromListOfDict(
        imgs,
        ['file_name', 'boxes', 'class', 'is_crowd'],  # we need this four keys only
        shuffle=True)
    ds = read_and_augment_images(ds)

    def add_anchor_to_dp(dp):
        im, boxes, klass, is_crowd, fname = dp
        try:
            fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, klass, is_crowd)

            boxes = boxes[is_crowd == 0]    # skip crowd boxes in training target
            klass = klass[is_crowd == 0]

            if not len(boxes):
                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once("Input {} is invalid for training: {}".format(fname, str(e)), 'warn')
            return None

        return [im, fm_labels, fm_boxes, boxes, klass]

    ds = MapData(ds, add_anchor_to_dp)
    return ds

Example #6

0

Show file

File: data.py Project: hakillha/maria03

def get_eval_dataflow(shard=0, num_shards=1):
    """
    Args:
        shard, num_shards: to get subset of evaluation data
    """
    prw = PRWDataset(cfg.DATA.BASEDIR)
    imgs = prw.load('test')
    num_imgs = len(imgs)
    img_per_shard = num_imgs // num_shards
    img_range = (shard * img_per_shard, (shard + 1) *
                 img_per_shard if shard + 1 < num_shards else num_imgs)

    # no filter for training
    # test if it can repeat keys
    ds = DataFromListOfDict(imgs[img_range[0]:img_range[1]],
                            ['file_name', 'file_name'])

    def f(fname):
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        return im

    ds = MapDataComponent(ds, f, 0)
    # Evaluation itself may be multi-threaded, therefore don't add prefetch here.
    return ds

Example #7

0

Show file

File: data.py Project: tigerneil/tensorpack

def get_eval_dataflow():
    imgs = COCODetection.load_many(config.BASEDIR, config.VAL_DATASET, add_gt=False)
    # no filter for training
    ds = DataFromListOfDict(imgs, ['file_name', 'id'])

    def f(fname):
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        return im
    ds = MapDataComponent(ds, f, 0)
    return ds

Example #8

0

Show file

File: data.py Project: zhuguangqiang/tensorpack

def get_eval_dataflow():
    imgs = COCODetection.load_many(cfg.DATA.BASEDIR, cfg.DATA.VAL, add_gt=False)
    # no filter for training
    ds = DataFromListOfDict(imgs, ['file_name', 'id'])

    def f(fname):
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        return im
    ds = MapDataComponent(ds, f, 0)
    if cfg.TRAINER != 'horovod':
        ds = PrefetchDataZMQ(ds, 1)
    return ds

Example #9

0

Show file

File: data.py Project: zzhxyz/Kaggle_Ship_Detection_2018

def get_test_dataflow(add_mask=True):
    """
    Return a training dataflow. Each datapoint is:
    image, fm_labels, fm_boxes, gt_boxes, gt_class [, masks]
    """
    imgs = Detection.load_many(
        config.BASEDIR, config.VAL_DATASET, add_gt=False, add_mask=add_mask)
    # no filter for training
    ds = DataFromListOfDict(imgs, ['image_data', 'id'])

    def f(image):
        im = cv2.imread(image)
        return im

    ds = MapDataComponent(ds, f, 0)
    ds = PrefetchDataZMQ(ds, 1)
    return ds

Example #10

0

Show file

def get_eval_dataflow(name, is_aws, is_gcs, shard=0, num_shards=1):
    """
    Args:
        name (str): name of the dataset to evaluate
        shard, num_shards: to get subset of evaluation data
    """
    roidbs = DatasetRegistry.get(name).inference_roidbs()
    logger.info("Found {} images for inference.".format(len(roidbs)))

    num_imgs = len(roidbs)
    img_per_shard = num_imgs // num_shards
    img_range = (
        shard * img_per_shard,
        (shard + 1) * img_per_shard if shard + 1 < num_shards else num_imgs,
    )

    # no filter for training
    ds = DataFromListOfDict(roidbs[img_range[0] : img_range[1]], ["file_name", "image_id"])

    if is_aws:
        s3 = boto3.resource("s3")
    elif is_gcs:
        c = storage.Client.create_anonymous_client()
        bucket = c.get_bucket("determined-ai-coco-dataset")

    def f(fname):
        if is_aws:
            s3_object = s3.meta.client.get_object(Bucket="determined-ai-coco-dataset", Key=fname)
            im = cv2.imdecode(
                np.asarray(bytearray(s3_object["Body"].read()), dtype=np.uint8), cv2.IMREAD_COLOR,
            )
        elif is_gcs:
            blob = bucket.blob(fname)
            s = download_gcs_blob_with_backoff(blob)
            im = cv2.imdecode(np.asarray(bytearray(s), dtype=np.uint8), cv2.IMREAD_COLOR)
        else:
            im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        return im

    ds = MapDataComponent(ds, f, 0)
    # Evaluation itself may be multi-threaded, therefore don't add prefetch here.
    return ds

Example #11

0

Show file

File: data.py Project: eldercrow/segmentation-tf

def get_eval_dataflow(batch_size=0, shard=0, num_shards=1):
    '''
    '''
    imgs = load_many_from_db(cfg.DATA.NAME, add_gt=True, is_train=False)

    if num_shards > 1:
        num_imgs = len(imgs)
        img_per_shard = num_imgs // num_shards
        s, e = shard * img_per_shard, min(num_imgs, (shard + 1) * img_per_shard)
        imgs = imgs[s:e]

    assert len(imgs) % batch_size == 0, \
            'len(img) must be multiples of batch_size, {}, {}'.format(len(imgs), batch_size)
    # imgs = COCODetection.load_many(cfg.DATA.BASEDIR, cfg.DATA.VAL, add_gt=False)
    # no filter for training
    # ds = DataFromList(imgs, shuffle=False)
    ds = DataFromListOfDict(imgs, ['fn_img', 'id'])

    if batch_size <= 0:
        batch_size = cfg.PREPROC.EVAL_BATCH_SIZE
    assert batch_size > 0, 'Batch size should be greater than 0'

    hh, ww = cfg.PREPROC.INPUT_SHAPE_EVAL
    mean_bgr = np.array(cfg.PREPROC.PIXEL_MEAN[::-1])
    aug = CropPadTransform(0, 0, ww, hh, mean_bgr)
    def f(fname):
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
        scale = min(ww / float(im.shape[1]), hh / float(im.shape[0]))
        im = cv2.resize(im, (0, 0), fx=scale, fy=scale)
        im = aug.apply_image(im)
        im = cv2.resize(im, (ww, hh))
        return im
    ds = MapDataComponent(ds, f, 0)
    ds = BatchData(ds, batch_size, use_list=False)
    return ds