def prepare_datalist(args): dimensions = args.dimensions dataset_json = os.path.join(args.output, 'dataset.json') if not os.path.exists(dataset_json): logging.info('Processing dataset...') with open(os.path.join(args.dataset_json)) as f: datalist = json.load(f) datalist = create_dataset(datalist=datalist[args.datalist_key], base_dir=args.dataset_root, output_dir=args.output, dimension=dimensions, pixdim=[1.0] * dimensions, limit=args.limit, relative_path=args.relative_path) with open(dataset_json, 'w') as fp: json.dump(datalist, fp, indent=2) else: logging.info('Pre-load existing dataset.json') dataset_json = os.path.join(args.output, 'dataset.json') with open(dataset_json) as f: datalist = json.load(f) logging.info('+++ Dataset File: {}'.format(dataset_json)) logging.info('+++ Total Records: {}'.format(len(datalist))) logging.info('')
def test_create_dataset_3d(self): with tempfile.TemporaryDirectory() as tempdir: datalist = self._create_data(tempdir) output_dir = os.path.join(tempdir, "3d") deepgrow_datalist = create_dataset(datalist=datalist, output_dir=output_dir, dimension=3, pixdim=(1, 1, 1)) self.assertEqual(len(deepgrow_datalist), 1) self.assertEqual(deepgrow_datalist[0]["region"], 1)
def pre_process(self, request, datastore: Datastore): self.cleanup(request) cache_dir = self.get_cache_dir(request) output_dir = os.path.join(cache_dir, f"deepgrow_{self.dimension}D_train") logger.info( f"Preparing Dataset for Deepgrow-{self.dimension}D:: {output_dir}") datalist = create_dataset( datalist=datastore.datalist(), base_dir=None, output_dir=output_dir, dimension=self.dimension, pixdim=[1.0] * self.dimension, ) logging.info(f"+++ Total Records: {len(datalist)}") return datalist
def prepare_datalist(args): dimensions = args.dimensions dataset_json = os.path.join(args.output, 'dataset.json') logging.info('Processing dataset...') with open(os.path.join(args.dataset_json)) as f: datalist = json.load(f) datalist = create_dataset(datalist=datalist[args.datalist_key], base_dir=args.dataset_root, output_dir=args.output, dimension=dimensions, pixdim=[1.0] * dimensions, limit=args.limit, relative_path=args.relative_path) with open(dataset_json, 'w') as fp: json.dump(datalist, fp, indent=2) dataset_json = os.path.join(args.output, 'dataset.json') with open(dataset_json) as f: datalist = json.load(f) logging.info('+++ Dataset File: {}'.format(dataset_json)) logging.info('+++ Total Records: {}'.format(len(datalist))) logging.info('') train_ds, val_ds = partition_dataset(datalist, ratios=[args.split, (1 - args.split)], shuffle=True, seed=args.seed) dataset_json = os.path.join(args.output, 'dataset_0.json') with open(dataset_json, 'w') as fp: json.dump({'training': train_ds, 'validation': val_ds}, fp, indent=2) logging.info('*** Dataset File: {}'.format(dataset_json)) logging.info('*** Total Records for Training: {}'.format(len(train_ds))) logging.info('*** Total Records for Validation: {}'.format(len(val_ds))) assert len(train_ds) > 0, "Train Dataset/Records is EMPTY" assert len(val_ds) > 0, "Validation Dataset/Records is EMPTY"
def test_empty_datalist(self): with self.assertRaises(ValueError): create_dataset(datalist=[], output_dir=self.tempdir, dimension=3, pixdim=(1, 1, 1))
def test_invalid_dim(self): with self.assertRaises(ValueError): create_dataset(datalist=self._create_data(), output_dir=self.tempdir, dimension=4, pixdim=(1, 1, 1, 1))
def test_create_dataset(self, args, data_args, expected_length, expected_region): datalist = self._create_data(**data_args) deepgrow_datalist = create_dataset(datalist=datalist, output_dir=self.tempdir, **args) self.assertEqual(len(deepgrow_datalist), expected_length) if expected_region is not None: self.assertEqual(deepgrow_datalist[0]["region"], expected_region)