def get_eval_dataflow_YCBV(name, shard=0, num_shards=1): """ Args: name (str): name of the dataset to evaluate shard, num_shards: to get subset of evaluation data """ roidbs = YCBVDetectionDataset().load_inference_image_ids(name) num_imgs = len(roidbs) img_per_shard = num_imgs // num_shards img_range = (shard * img_per_shard, (shard + 1) * img_per_shard if shard + 1 < num_shards else num_imgs) # no filter for training ds = DataFromListOfDict(roidbs[img_range[0]:img_range[1]], ['file_name', 'image_id']) def f(fname): im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname return im ds = MapDataComponent(ds, f, 0) # Evaluation itself may be multi-threaded, therefore don't add prefetch here. return ds
def get_eval_dataflow(shard=0, num_shards=1): """ Args: shard, num_shards: to get subset of evaluation data """ imgs = COCODetection.load_many(cfg.DATA.BASEDIR, cfg.DATA.VAL, add_gt=False) num_imgs = len(imgs) img_per_shard = num_imgs // num_shards img_range = (shard * img_per_shard, (shard + 1) * img_per_shard if shard + 1 < num_shards else num_imgs) # no filter for training ds = DataFromListOfDict(imgs[img_range[0]:img_range[1]], ['file_name', 'id']) def f(fname): im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname return im ds = MapDataComponent(ds, f, 0) # Evaluation itself may be multi-threaded, therefore don't add prefetch here. return ds
def get_eval_dataflow(name, shard=0, num_shards=1): """ Args: name (str): name of the dataset to evaluate shard, num_shards: to get subset of evaluation data """ roidbs = COCODetection.load_many(cfg.DATA.BASEDIR, name, add_gt=False) """ To inference on your own data, change this to your loader. Produce "roidbs" as a list of dict, in the dict the following keys are needed for training: file_name: str, full path to the image id: an id of this image """ num_imgs = len(roidbs) img_per_shard = num_imgs // num_shards img_range = (shard * img_per_shard, (shard + 1) * img_per_shard if shard + 1 < num_shards else num_imgs) # no filter for training ds = DataFromListOfDict(roidbs[img_range[0]: img_range[1]], ['file_name', 'id']) def f(fname): im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname return im ds = MapDataComponent(ds, f, 0) # Evaluation itself may be multi-threaded, therefore don't add prefetch here. return ds
def get_eval_dataflow(name, shard=0, num_shards=1): """ Args: name (str): name of the dataset to evaluate shard, num_shards: to get subset of evaluation data """ roidbs = DatasetRegistry.get(name).inference_roidbs() logger.info("Found {} images for inference.".format(len(roidbs))) num_imgs = len(roidbs) img_per_shard = num_imgs // num_shards img_range = (shard * img_per_shard, (shard + 1) * img_per_shard if shard + 1 < num_shards else num_imgs) # no filter for training ds = DataFromListOfDict(roidbs[img_range[0]:img_range[1]], ["file_name", "image_id"]) def f(fname): im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname return im ds = MapDataComponent(ds, f, 0) # Evaluation itself may be multi-threaded, therefore don't add prefetch here. return ds
def get_train_dataflow(): imgs = COCODetection.load_many(config.BASEDIR, config.TRAIN_DATASET) # Valid training images should have at least one fg box. # But this filter shall not be applied for testing. imgs = list(filter(lambda img: len(img['boxes']) > 0, imgs)) # log invalid training ds = DataFromListOfDict( imgs, ['file_name', 'boxes', 'class', 'is_crowd'], # we need this four keys only shuffle=True) ds = read_and_augment_images(ds) def add_anchor_to_dp(dp): im, boxes, klass, is_crowd, fname = dp try: fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, klass, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once("Input {} is invalid for training: {}".format(fname, str(e)), 'warn') return None return [im, fm_labels, fm_boxes, boxes, klass] ds = MapData(ds, add_anchor_to_dp) return ds
def get_eval_dataflow(shard=0, num_shards=1): """ Args: shard, num_shards: to get subset of evaluation data """ prw = PRWDataset(cfg.DATA.BASEDIR) imgs = prw.load('test') num_imgs = len(imgs) img_per_shard = num_imgs // num_shards img_range = (shard * img_per_shard, (shard + 1) * img_per_shard if shard + 1 < num_shards else num_imgs) # no filter for training # test if it can repeat keys ds = DataFromListOfDict(imgs[img_range[0]:img_range[1]], ['file_name', 'file_name']) def f(fname): im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname return im ds = MapDataComponent(ds, f, 0) # Evaluation itself may be multi-threaded, therefore don't add prefetch here. return ds
def get_eval_dataflow(): imgs = COCODetection.load_many(config.BASEDIR, config.VAL_DATASET, add_gt=False) # no filter for training ds = DataFromListOfDict(imgs, ['file_name', 'id']) def f(fname): im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname return im ds = MapDataComponent(ds, f, 0) return ds
def get_eval_dataflow(): imgs = COCODetection.load_many(cfg.DATA.BASEDIR, cfg.DATA.VAL, add_gt=False) # no filter for training ds = DataFromListOfDict(imgs, ['file_name', 'id']) def f(fname): im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname return im ds = MapDataComponent(ds, f, 0) if cfg.TRAINER != 'horovod': ds = PrefetchDataZMQ(ds, 1) return ds
def get_test_dataflow(add_mask=True): """ Return a training dataflow. Each datapoint is: image, fm_labels, fm_boxes, gt_boxes, gt_class [, masks] """ imgs = Detection.load_many( config.BASEDIR, config.VAL_DATASET, add_gt=False, add_mask=add_mask) # no filter for training ds = DataFromListOfDict(imgs, ['image_data', 'id']) def f(image): im = cv2.imread(image) return im ds = MapDataComponent(ds, f, 0) ds = PrefetchDataZMQ(ds, 1) return ds
def get_eval_dataflow(name, is_aws, is_gcs, shard=0, num_shards=1): """ Args: name (str): name of the dataset to evaluate shard, num_shards: to get subset of evaluation data """ roidbs = DatasetRegistry.get(name).inference_roidbs() logger.info("Found {} images for inference.".format(len(roidbs))) num_imgs = len(roidbs) img_per_shard = num_imgs // num_shards img_range = ( shard * img_per_shard, (shard + 1) * img_per_shard if shard + 1 < num_shards else num_imgs, ) # no filter for training ds = DataFromListOfDict(roidbs[img_range[0] : img_range[1]], ["file_name", "image_id"]) if is_aws: s3 = boto3.resource("s3") elif is_gcs: c = storage.Client.create_anonymous_client() bucket = c.get_bucket("determined-ai-coco-dataset") def f(fname): if is_aws: s3_object = s3.meta.client.get_object(Bucket="determined-ai-coco-dataset", Key=fname) im = cv2.imdecode( np.asarray(bytearray(s3_object["Body"].read()), dtype=np.uint8), cv2.IMREAD_COLOR, ) elif is_gcs: blob = bucket.blob(fname) s = download_gcs_blob_with_backoff(blob) im = cv2.imdecode(np.asarray(bytearray(s), dtype=np.uint8), cv2.IMREAD_COLOR) else: im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname return im ds = MapDataComponent(ds, f, 0) # Evaluation itself may be multi-threaded, therefore don't add prefetch here. return ds
def get_eval_dataflow(batch_size=0, shard=0, num_shards=1): ''' ''' imgs = load_many_from_db(cfg.DATA.NAME, add_gt=True, is_train=False) if num_shards > 1: num_imgs = len(imgs) img_per_shard = num_imgs // num_shards s, e = shard * img_per_shard, min(num_imgs, (shard + 1) * img_per_shard) imgs = imgs[s:e] assert len(imgs) % batch_size == 0, \ 'len(img) must be multiples of batch_size, {}, {}'.format(len(imgs), batch_size) # imgs = COCODetection.load_many(cfg.DATA.BASEDIR, cfg.DATA.VAL, add_gt=False) # no filter for training # ds = DataFromList(imgs, shuffle=False) ds = DataFromListOfDict(imgs, ['fn_img', 'id']) if batch_size <= 0: batch_size = cfg.PREPROC.EVAL_BATCH_SIZE assert batch_size > 0, 'Batch size should be greater than 0' hh, ww = cfg.PREPROC.INPUT_SHAPE_EVAL mean_bgr = np.array(cfg.PREPROC.PIXEL_MEAN[::-1]) aug = CropPadTransform(0, 0, ww, hh, mean_bgr) def f(fname): im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname scale = min(ww / float(im.shape[1]), hh / float(im.shape[0])) im = cv2.resize(im, (0, 0), fx=scale, fy=scale) im = aug.apply_image(im) im = cv2.resize(im, (ww, hh)) return im ds = MapDataComponent(ds, f, 0) ds = BatchData(ds, batch_size, use_list=False) return ds