def __init__(self, args): self.args = args self.mode = self.args.exec_mode task_code = get_task_code(self.args) self.data = os.path.join(self.args.data, task_code) self.results = os.path.join(self.args.results, task_code) if self.mode == "test": self.data = os.path.join(self.data, "test") self.results = os.path.join(self.results, "test") self.vpf = self.args.vpf self.imgs = self.load_files("*x.npy") self.lbls = self.load_files("*y.npy")
def __init__(self, args): self.args = args self.ct_min = 0 self.ct_max = 0 self.ct_mean = 0 self.ct_std = 0 self.target_spacing = None self.task = args.task self.task_code = get_task_code(args) self.patch_size = patch_size[self.task_code] self.training = args.exec_mode == "training" self.data_path = os.path.join(args.data, task[args.task]) self.results = os.path.join(args.results, self.task_code) if not self.training: self.results = os.path.join(self.results, "test") self.metadata = json.load(open(os.path.join(self.data_path, "dataset.json"), "r")) self.modality = self.metadata["modality"]["0"] self.crop_foreg = transforms.CropForegroundd(keys=["image", "label"], source_key="image") self.normalize_intensity = transforms.NormalizeIntensity(nonzero=True, channel_wise=True)
def __init__(self, args): super().__init__() self.args = args self.tfrecords_train = [] self.tfrecords_val = [] self.tfrecords_test = [] self.train_idx = [] self.val_idx = [] self.test_idx = [] self.kfold = KFold(n_splits=self.args.nfolds, shuffle=True, random_state=12345) self.data_path = os.path.join(self.args.data, get_task_code(self.args)) if self.args.exec_mode == "predict" and not args.benchmark: self.data_path = os.path.join(self.data_path, "test") configs = get_config_file(self.args) self.kwargs = { "dim": self.args.dim, "patch_size": configs["patch_size"], "seed": self.args.seed, "gpus": self.args.gpus, "num_workers": self.args.num_workers, "oversampling": self.args.oversampling, "create_idx": self.args.create_idx, "benchmark": self.args.benchmark, }
def __init__(self, args): self.args = args self.target_spacing = None self.task = args.task self.task_code = get_task_code(args) self.verbose = args.verbose self.patch_size = patch_size[self.task_code] self.training = args.exec_mode == "training" self.data_path = os.path.join(args.data, task[args.task]) metadata_path = os.path.join(self.data_path, "dataset.json") self.metadata = json.load(open(metadata_path, "r")) self.modality = self.metadata["modality"]["0"] self.results = os.path.join(args.results, self.task_code) self.ct_min, self.ct_max, self.ct_mean, self.ct_std = (0,) * 4 if not self.training: self.results = os.path.join(self.results, self.args.exec_mode) self.crop_foreg = transforms.CropForegroundd(keys=["image", "label"], source_key="image") nonzero = True if self.modality != "CT" else False # normalize only non-zero region for MRI self.normalize_intensity = transforms.NormalizeIntensity(nonzero=nonzero, channel_wise=True) if self.args.exec_mode == "val": dataset_json = json.load(open(metadata_path, "r")) dataset_json["val"] = dataset_json["training"] with open(metadata_path, "w") as outfile: json.dump(dataset_json, outfile)
default="training", choices=["training", "test"], help="Mode for data preprocessing", ) parser.add_argument("--task", type=str, help="Number of task to be run. MSD uses numbers 01-10") parser.add_argument("--dim", type=int, default=3, choices=[2, 3], help="Data dimension to prepare") parser.add_argument("--n_jobs", type=int, default=-1, help="Number of parallel jobs for data preprocessing") parser.add_argument("--vpf", type=int, default=1, help="Volumes per tfrecord") if __name__ == "__main__": args = parser.parse_args() start = time.time() Preprocessor(args).run() Converter(args).run() task_code = get_task_code(args) path = os.path.join(args.data, task_code) if args.exec_mode == "test": path = os.path.join(path, "test") call(f'find {path} -name "*.npy" -print0 | xargs -0 rm', shell=True) end = time.time() print(f"Preprocessing time: {(end - start):.2f}")