Example #1
0
    def test_train(self):
        """ Test reader for training
        """
        coco = Reader(
            self.coco_conf['DATA'], self.coco_conf['TRANSFORM'], maxiter=1000)
        train_rd = coco.train()
        self.assertTrue(train_rd is not None)

        ct = 0
        total = 0
        bytes = 0
        prev_ts = None
        for sample in train_rd():
            if prev_ts is None:
                start_ts = time.time()
                prev_ts = start_ts

            ct += 1
            bytes += 4 * sample[0][0].size * len(sample[0])
            self.assertTrue(sample is not None)
            cost = time.time() - prev_ts
            if cost >= 1.0:
                total += ct
                qps = total / (time.time() - start_ts)
                bps = bytes / (time.time() - start_ts)

                logging.info('got %d/%d samples in %.3fsec with qps:%d bps:%d' %
                             (ct, total, cost, qps, bps))
                bytes = 0
                ct = 0
                prev_ts = time.time()

        total += ct
        self.assertEqual(total, coco._maxiter)
    def test_loader_multi_threads(self):
        coco_loader = COCODataSet(dataset_dir=self.root_path,
                                  image_dir=self.image_dir,
                                  anno_path=self.anno_path,
                                  sample_num=10)
        sample_trans = [
            DecodeImage(to_rgb=True),
            ResizeImage(target_size=800, max_size=1333, interp=1),
            Permute(to_bgr=False)
        ]
        batch_trans = [
            PadBatch(pad_to_stride=32, use_padded_im_info=True),
        ]

        inputs_def = {
            'fields': [
                'image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd',
                'gt_mask'
            ],
        }
        data_loader = Reader(coco_loader,
                             sample_transforms=sample_trans,
                             batch_transforms=batch_trans,
                             batch_size=2,
                             shuffle=True,
                             drop_empty=True,
                             worker_num=2,
                             use_process=False,
                             bufsize=8,
                             inputs_def=inputs_def)()
        for i in range(2):
            for samples in data_loader:
                for sample in samples:
                    im_shape = sample[0].shape
                    self.assertEqual(im_shape[0], 3)
                    self.assertEqual(im_shape[1] % 32, 0)
                    self.assertEqual(im_shape[2] % 32, 0)

                    im_info_shape = sample[1].shape
                    self.assertEqual(im_info_shape[-1], 3)

                    im_id_shape = sample[2].shape
                    self.assertEqual(im_id_shape[-1], 1)

                    gt_bbox_shape = sample[3].shape
                    self.assertEqual(gt_bbox_shape[-1], 4)

                    gt_class_shape = sample[4].shape
                    self.assertEqual(gt_class_shape[-1], 1)
                    self.assertEqual(gt_class_shape[0], gt_bbox_shape[0])

                    is_crowd_shape = sample[5].shape
                    self.assertEqual(is_crowd_shape[-1], 1)
                    self.assertEqual(is_crowd_shape[0], gt_bbox_shape[0])

                    mask = sample[6]
                    self.assertEqual(len(mask), gt_bbox_shape[0])
                    self.assertEqual(mask[0][0].shape[-1], 2)
            data_loader.reset()
    def test_val(self):
        """ Test reader for validation
        """
        coco = Reader(self.coco_conf['DATA'], self.coco_conf['TRANSFORM'], 10)
        val_rd = coco.val()
        self.assertTrue(val_rd is not None)

        # test 3 epoches
        for _ in range(3):
            ct = 0
            for sample in val_rd():
                ct += 1
                self.assertTrue(sample is not None)
            self.assertGreaterEqual(ct, coco._maxiter)
    def test_create(self):
        """ Test create a reader using my source
        """
        def _my_data_reader():
            mydata = build_source(self.rcnn_conf['DATA']['TRAIN'])
            for i, sample in enumerate(mydata):
                yield sample

        my_source = IteratorSource(_my_data_reader)
        mode = 'TRAIN'
        train_rd = Reader.create(mode,
                                 self.rcnn_conf['DATA'][mode],
                                 self.rcnn_conf['TRANSFORM'][mode],
                                 max_iter=10,
                                 my_source=my_source)

        out = None
        for sample in train_rd():
            out = sample
            self.assertTrue(sample is not None)
        self.assertEqual(out[0][0].shape[0], 3)
        self.assertEqual(out[0][1].shape[0], 3)
        self.assertEqual(out[0][3].shape[1], 4)
        self.assertEqual(out[0][4].shape[1], 1)
        self.assertEqual(out[0][5].shape[1], 1)
Example #5
0
    def test_loader(self):
        coco_loader = VOCDataSet(
            dataset_dir=self.image_dir,
            image_dir=self.root_path,
            anno_path=self.anno_path,
            sample_num=240,
            use_default_label=False,
            label_list='/path/to/your/fl_fruit/label_list.txt')
        sample_trans = [
            DecodeImage(to_rgb=True),
            RandomFlipImage(),
            NormalizeImage(mean=[0.485, 0.456, 0.406],
                           std=[0.229, 0.224, 0.225],
                           is_scale=True,
                           is_channel_first=False),
            ResizeImage(target_size=800, max_size=1333, interp=1),
            Permute(to_bgr=False)
        ]
        batch_trans = [
            PadBatch(pad_to_stride=32, use_padded_im_info=True),
        ]

        inputs_def = {
            'fields':
            ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd'],
        }
        data_loader = Reader(coco_loader,
                             sample_transforms=sample_trans,
                             batch_transforms=batch_trans,
                             batch_size=1,
                             shuffle=True,
                             drop_empty=True,
                             inputs_def=inputs_def)()

        return data_loader
    def test_rcnn_train(self):
        """ Test reader for training
        """
        anno = self.rcnn_conf['DATA']['TRAIN']['ANNO_FILE']
        if not os.path.exists(anno):
            logging.error('exit test_rcnn for not found file[%s]' % (anno))
            return

        rcnn = Reader(self.rcnn_conf['DATA'], self.rcnn_conf['TRANSFORM'], 10)
        rcnn_rd = rcnn.train()
        self.assertTrue(rcnn_rd is not None)

        ct = 0
        out = None
        for sample in rcnn_rd():
            out = sample
            ct += 1
            self.assertTrue(sample is not None)
        self.assertEqual(out[0][0].shape[0], 3)
        self.assertEqual(out[0][1].shape[0], 3)
        self.assertEqual(out[0][3].shape[1], 4)
        self.assertEqual(out[0][4].shape[1], 1)
        self.assertEqual(out[0][5].shape[1], 1)
        self.assertGreaterEqual(ct, rcnn._maxiter)
Example #7
0
def create_reader(feed, max_iter=0, args_path=None, my_source=None):
    """
    Return iterable data reader.

    Args:
        max_iter (int): number of iterations.
        my_source (callable): callable function to create a source iterator
            which is used to provide source data in 'ppdet.data.reader'
    """

    # if `DATASET_DIR` does not exists, search ~/.paddle/dataset for a directory
    # named `DATASET_DIR` (e.g., coco, pascal), if not present either, download
    data_config = _prepare_data_config(feed, args_path)

    bufsize = getattr(feed, 'bufsize', 10)
    use_process = getattr(feed, 'use_process', False)
    memsize = getattr(feed, 'memsize', '3G')
    transform_config = {
        'WORKER_CONF': {
            'bufsize': bufsize,
            'worker_num': feed.num_workers,
            'use_process': use_process,
            'memsize': memsize
        },
        'BATCH_SIZE': feed.batch_size,
        'DROP_LAST': feed.drop_last,
        'USE_PADDED_IM_INFO': feed.use_padded_im_info,
    }

    batch_transforms = feed.batch_transforms
    pad = [t for t in batch_transforms if isinstance(t, PadBatch)]
    rand_shape = [t for t in batch_transforms if isinstance(t, RandomShape)]
    multi_scale = [t for t in batch_transforms if isinstance(t, MultiScale)]
    pad_ms_test = [t for t in batch_transforms if isinstance(t, PadMSTest)]

    if any(pad):
        transform_config['IS_PADDING'] = True
        if pad[0].pad_to_stride != 0:
            transform_config['COARSEST_STRIDE'] = pad[0].pad_to_stride
    if any(rand_shape):
        transform_config['RANDOM_SHAPES'] = rand_shape[0].sizes
    if any(multi_scale):
        transform_config['MULTI_SCALES'] = multi_scale[0].scales
    if any(pad_ms_test):
        transform_config['ENABLE_MULTISCALE_TEST'] = True
        transform_config['NUM_SCALE'] = feed.num_scale
        transform_config['COARSEST_STRIDE'] = pad_ms_test[0].pad_to_stride

    if hasattr(inspect, 'getfullargspec'):
        argspec = inspect.getfullargspec
    else:
        argspec = inspect.getargspec

    ops = []
    for op in feed.sample_transforms:
        op_dict = op.__dict__.copy()
        argnames = [
            arg for arg in argspec(type(op).__init__).args if arg != 'self'
        ]
        op_dict = {k: v for k, v in op_dict.items() if k in argnames}
        op_dict['op'] = op.__class__.__name__
        ops.append(op_dict)
    transform_config['OPS'] = ops

    return Reader.create(feed.mode, data_config, transform_config, max_iter,
                         my_source)