def test_train(self): """ Test reader for training """ coco = Reader( self.coco_conf['DATA'], self.coco_conf['TRANSFORM'], maxiter=1000) train_rd = coco.train() self.assertTrue(train_rd is not None) ct = 0 total = 0 bytes = 0 prev_ts = None for sample in train_rd(): if prev_ts is None: start_ts = time.time() prev_ts = start_ts ct += 1 bytes += 4 * sample[0][0].size * len(sample[0]) self.assertTrue(sample is not None) cost = time.time() - prev_ts if cost >= 1.0: total += ct qps = total / (time.time() - start_ts) bps = bytes / (time.time() - start_ts) logging.info('got %d/%d samples in %.3fsec with qps:%d bps:%d' % (ct, total, cost, qps, bps)) bytes = 0 ct = 0 prev_ts = time.time() total += ct self.assertEqual(total, coco._maxiter)
def test_loader_multi_threads(self): coco_loader = COCODataSet(dataset_dir=self.root_path, image_dir=self.image_dir, anno_path=self.anno_path, sample_num=10) sample_trans = [ DecodeImage(to_rgb=True), ResizeImage(target_size=800, max_size=1333, interp=1), Permute(to_bgr=False) ] batch_trans = [ PadBatch(pad_to_stride=32, use_padded_im_info=True), ] inputs_def = { 'fields': [ 'image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_mask' ], } data_loader = Reader(coco_loader, sample_transforms=sample_trans, batch_transforms=batch_trans, batch_size=2, shuffle=True, drop_empty=True, worker_num=2, use_process=False, bufsize=8, inputs_def=inputs_def)() for i in range(2): for samples in data_loader: for sample in samples: im_shape = sample[0].shape self.assertEqual(im_shape[0], 3) self.assertEqual(im_shape[1] % 32, 0) self.assertEqual(im_shape[2] % 32, 0) im_info_shape = sample[1].shape self.assertEqual(im_info_shape[-1], 3) im_id_shape = sample[2].shape self.assertEqual(im_id_shape[-1], 1) gt_bbox_shape = sample[3].shape self.assertEqual(gt_bbox_shape[-1], 4) gt_class_shape = sample[4].shape self.assertEqual(gt_class_shape[-1], 1) self.assertEqual(gt_class_shape[0], gt_bbox_shape[0]) is_crowd_shape = sample[5].shape self.assertEqual(is_crowd_shape[-1], 1) self.assertEqual(is_crowd_shape[0], gt_bbox_shape[0]) mask = sample[6] self.assertEqual(len(mask), gt_bbox_shape[0]) self.assertEqual(mask[0][0].shape[-1], 2) data_loader.reset()
def test_val(self): """ Test reader for validation """ coco = Reader(self.coco_conf['DATA'], self.coco_conf['TRANSFORM'], 10) val_rd = coco.val() self.assertTrue(val_rd is not None) # test 3 epoches for _ in range(3): ct = 0 for sample in val_rd(): ct += 1 self.assertTrue(sample is not None) self.assertGreaterEqual(ct, coco._maxiter)
def test_create(self): """ Test create a reader using my source """ def _my_data_reader(): mydata = build_source(self.rcnn_conf['DATA']['TRAIN']) for i, sample in enumerate(mydata): yield sample my_source = IteratorSource(_my_data_reader) mode = 'TRAIN' train_rd = Reader.create(mode, self.rcnn_conf['DATA'][mode], self.rcnn_conf['TRANSFORM'][mode], max_iter=10, my_source=my_source) out = None for sample in train_rd(): out = sample self.assertTrue(sample is not None) self.assertEqual(out[0][0].shape[0], 3) self.assertEqual(out[0][1].shape[0], 3) self.assertEqual(out[0][3].shape[1], 4) self.assertEqual(out[0][4].shape[1], 1) self.assertEqual(out[0][5].shape[1], 1)
def test_loader(self): coco_loader = VOCDataSet( dataset_dir=self.image_dir, image_dir=self.root_path, anno_path=self.anno_path, sample_num=240, use_default_label=False, label_list='/path/to/your/fl_fruit/label_list.txt') sample_trans = [ DecodeImage(to_rgb=True), RandomFlipImage(), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], is_scale=True, is_channel_first=False), ResizeImage(target_size=800, max_size=1333, interp=1), Permute(to_bgr=False) ] batch_trans = [ PadBatch(pad_to_stride=32, use_padded_im_info=True), ] inputs_def = { 'fields': ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd'], } data_loader = Reader(coco_loader, sample_transforms=sample_trans, batch_transforms=batch_trans, batch_size=1, shuffle=True, drop_empty=True, inputs_def=inputs_def)() return data_loader
def test_rcnn_train(self): """ Test reader for training """ anno = self.rcnn_conf['DATA']['TRAIN']['ANNO_FILE'] if not os.path.exists(anno): logging.error('exit test_rcnn for not found file[%s]' % (anno)) return rcnn = Reader(self.rcnn_conf['DATA'], self.rcnn_conf['TRANSFORM'], 10) rcnn_rd = rcnn.train() self.assertTrue(rcnn_rd is not None) ct = 0 out = None for sample in rcnn_rd(): out = sample ct += 1 self.assertTrue(sample is not None) self.assertEqual(out[0][0].shape[0], 3) self.assertEqual(out[0][1].shape[0], 3) self.assertEqual(out[0][3].shape[1], 4) self.assertEqual(out[0][4].shape[1], 1) self.assertEqual(out[0][5].shape[1], 1) self.assertGreaterEqual(ct, rcnn._maxiter)
def create_reader(feed, max_iter=0, args_path=None, my_source=None): """ Return iterable data reader. Args: max_iter (int): number of iterations. my_source (callable): callable function to create a source iterator which is used to provide source data in 'ppdet.data.reader' """ # if `DATASET_DIR` does not exists, search ~/.paddle/dataset for a directory # named `DATASET_DIR` (e.g., coco, pascal), if not present either, download data_config = _prepare_data_config(feed, args_path) bufsize = getattr(feed, 'bufsize', 10) use_process = getattr(feed, 'use_process', False) memsize = getattr(feed, 'memsize', '3G') transform_config = { 'WORKER_CONF': { 'bufsize': bufsize, 'worker_num': feed.num_workers, 'use_process': use_process, 'memsize': memsize }, 'BATCH_SIZE': feed.batch_size, 'DROP_LAST': feed.drop_last, 'USE_PADDED_IM_INFO': feed.use_padded_im_info, } batch_transforms = feed.batch_transforms pad = [t for t in batch_transforms if isinstance(t, PadBatch)] rand_shape = [t for t in batch_transforms if isinstance(t, RandomShape)] multi_scale = [t for t in batch_transforms if isinstance(t, MultiScale)] pad_ms_test = [t for t in batch_transforms if isinstance(t, PadMSTest)] if any(pad): transform_config['IS_PADDING'] = True if pad[0].pad_to_stride != 0: transform_config['COARSEST_STRIDE'] = pad[0].pad_to_stride if any(rand_shape): transform_config['RANDOM_SHAPES'] = rand_shape[0].sizes if any(multi_scale): transform_config['MULTI_SCALES'] = multi_scale[0].scales if any(pad_ms_test): transform_config['ENABLE_MULTISCALE_TEST'] = True transform_config['NUM_SCALE'] = feed.num_scale transform_config['COARSEST_STRIDE'] = pad_ms_test[0].pad_to_stride if hasattr(inspect, 'getfullargspec'): argspec = inspect.getfullargspec else: argspec = inspect.getargspec ops = [] for op in feed.sample_transforms: op_dict = op.__dict__.copy() argnames = [ arg for arg in argspec(type(op).__init__).args if arg != 'self' ] op_dict = {k: v for k, v in op_dict.items() if k in argnames} op_dict['op'] = op.__class__.__name__ ops.append(op_dict) transform_config['OPS'] = ops return Reader.create(feed.mode, data_config, transform_config, max_iter, my_source)