def make_metrics(self, net, conv=True): """Calculate a table of :return: the DVH score and dose score for the "new_dose" relative to the "reference_dose" """ num_batches = len(self.data_loader) dose_score_vec = np.zeros(num_batches) # Only make calculations if data_loader is not empty if num_batches == 0: print('No patient information was given to calculate metrics') else: # Change batch size to 1 assert self.data_loader.batch_size == 1 # Loads data related to ground truth patient information if self.dose_loader is not None: assert self.dose_loader.batch_size == 1 # Loads data related to ground truth patient information for idx in tqdm(range(num_batches)): # Get roi masks for patient self.get_constant_patient_features(idx) # Get dose tensors for reference dose and evaluate criteria reference_dose = self.get_patient_dose_tensor(self.data_loader) if reference_dose is not None: self.reference_dose_metric_df = self.calculate_metrics( self.reference_dose_metric_df, reference_dose) else: raise # If a dose loader was provided, calculate the score if self.dose_loader is not None: new_dose = self.predict_patient_dose_tensor( self.dose_loader, net, conv=conv) # Make metric data frames self.new_dose_metric_df = self.calculate_metrics( self.new_dose_metric_df, new_dose) # Evaluate mean absolute error of 3D dose dose_score_vec[idx] = np.sum( np.abs(reference_dose - new_dose * self.possible_dose_mask.flatten())) / np.sum( self.possible_dose_mask) # Save metrics at the patient level (this is a template for how DVH stream participants could save # their files # self.dose_metric_df.loc[self.patient_list[0]].to_csv('{}.csv'.format(self.patient_list[0])) else: raise if self.dose_loader is not None: dvh_score = np.nanmean( np.abs(self.reference_dose_metric_df - self.new_dose_metric_df).values) dose_score = dose_score_vec.mean() return dvh_score, dose_score else: print( 'No new dose provided. Metrics were only calculated for the provided dose.' ) raise
def make_submission2D(config, net, data_loader, save_dir): assert data_loader.batch_size == 1 with torch.no_grad(): for img, (possible_dose_mask, item) in tqdm(data_loader): # Get patient ID and make a prediction pat_id = item['patient_list'][0][0] img = img[0].detach().cpu().numpy() # dose_pred_gy = net(img) dose_pred_gy = TTAflip(net, img) dose_pred_gy = (dose_pred_gy * (dose_pred_gy >= 0.)).astype('float64') dose_pred_gy = dose_pred_gy * possible_dose_mask.detach().cpu( ).numpy().astype('float64') # Prepare the dose to save dose_pred_gy = np.squeeze(dose_pred_gy) dose_to_save = sparse_vector_function(dose_pred_gy) dose_df = pd.DataFrame(data=dose_to_save['data'].squeeze(), index=dose_to_save['indices'].squeeze(), columns=['data']) dose_df.to_csv('{}/{}.csv'.format(save_dir, pat_id))
def save_pred_csvs(net, dl3D, dl2D, offset_lists, args, setname='test', fold=0): # Make Predictions dose_evaluator = EvaluateDose2D(config, net=net, data_loader=dl3D, dose_loader=dl2D, offset_lists=offset_lists, load_cache=not args.noloadc, store_cache=not args.nostorec, cache_dir='{}/{}/'.format( setname, args.metric), conv=False, evalbs=args.bs) if not args.nocsv: # Save in csv if 'maintest' in setname: SAVE_DIR = './subm/{}/{}_main_fold{}'.format( config.exp_name, config.exp_name, fold) elif 'test' in setname: SAVE_DIR = './subm/{}/{}_fold{}'.format(config.exp_name, config.exp_name, fold) elif 'localval' in setname: SAVE_DIR = './subm/{}/{}_localval_fold{}'.format( config.exp_name, config.exp_name, fold) else: raise ValueError if not os.path.exists(SAVE_DIR): os.makedirs(SAVE_DIR) with torch.no_grad(): is_train = dl3D.dataset.training dl3D.dataset.training = False for i, (_, (possible_dose_mask, item)) in enumerate(tqdm(dl3D)): pat_id = item['patient_list'][0][0] dose_pred_gy = dose_evaluator.preds[i] # (1, 128, 128, 128) assert dose_pred_gy.shape == (1, 128, 128, 128), dose_pred_gy.shape dose_pred_gy = (dose_pred_gy * (dose_pred_gy >= 0.)).astype('float64') dose_pred_gy = dose_pred_gy * possible_dose_mask.detach().cpu( ).numpy().astype('float64') dose_pred_gy = np.squeeze(dose_pred_gy) dose_to_save = sparse_vector_function(dose_pred_gy) dose_df = pd.DataFrame(data=dose_to_save['data'].squeeze(), index=dose_to_save['indices'].squeeze(), columns=['data']) dose_df.to_csv('{}/{}.csv'.format(SAVE_DIR, pat_id)) dl3D.dataset.training = is_train if not args.nozip: # Zip dose to submit save_path = shutil.make_archive( '{}_{}'.format(SAVE_DIR, args.metric), 'zip', SAVE_DIR) print('Saved to: ', '/'.join(save_path.split('/')[-3:])) else: print('Saved to: ', '/'.join(SAVE_DIR.split('/')[-3:])) gc.collect() sys.exit() return dose_evaluator
def __init__(self, config, net, data_loader, dose_loader=None, TTA_shift_by=4, offset_lists=[np.arange(-3,4)], conv=True, load_cache=True, store_cache=True, cache_dir='', evalbs=128): """ Prepare the class for evaluating dose distributions :param data_loader: a data loader object that loads data from the reference dataset :param dose_loader: a data loader object that loads a dose tensor from any dataset (e.g., predictions) """ # Initialize objects self.config = config self.data_loader = data_loader # Loads data related to ground truth patient information self.dose_loader = dose_loader # Loads the data for a benchmark dose self.TTA_shift_by = TTA_shift_by # Initialize objects for later self.patient_list = None self.roi_mask = None self.new_dose = None self.reference_dose = None self.voxel_size = None self.possible_dose_mask = None # Set metrics to be evaluated self.oar_eval_metrics = ['D_0.1_cc', 'mean'] self.tar_eval_metrics = ['D_99', 'D_95', 'D_1'] # Name metrics for data frame oar_metrics = list(it_product(self.oar_eval_metrics, self.data_loader.dataset.defdataset.rois['oars'])) target_metrics = list(it_product(self.tar_eval_metrics, self.data_loader.dataset.defdataset.rois['targets'])) # Make data frame to store dose metrics and the difference data frame self.metric_difference_df = pd.DataFrame(index=self.data_loader.dataset.defdataset.patient_id_list, columns=[*oar_metrics, *target_metrics]) self.reference_dose_metric_df = self.metric_difference_df.copy() self.new_dose_metric_df = self.metric_difference_df.copy() if net is not None: net.eval() with torch.no_grad(): self.preds = [np.zeros((1, 128, 128, 128)) for i in range(len(data_loader))] for off in offset_lists: print("Offset list: ", off) off_id = make_offset_id(off) CACHE_PATH = './preds/{}/{}{}'.format(config.exp_name, cache_dir, off_id) if store_cache and not os.path.exists(CACHE_PATH): os.makedirs(CACHE_PATH) loaded = False if load_cache: if len(glob.glob(os.path.join(CACHE_PATH, '*.npy'))) != len(data_loader): print(CACHE_PATH) print('Not found sufficient files for loading offset {}, predicting...'.format(off_id)) else: print('Loading offset {} from cache...'.format(off_id)) for i in range(len(data_loader)): pat_id = os.path.basename(data_loader.dataset.data_df.loc[i, 'Id']) curpred = np.load(os.path.join(CACHE_PATH, '{}.npy'.format(pat_id))).astype('float32') self.preds[i] += curpred/len(offset_lists) loaded = True if not loaded: evalds = EvalDataset(dose_loader.dataset, off) evaldl = DataLoader(evalds, batch_size=1, shuffle=False, num_workers=2) print('Making predictions from network...') for i, img in enumerate(tqdm(evaldl)): curpred = TTAflip(net, img[0], axis=self.config.axis, shift_by=self.TTA_shift_by, conv=conv, evalbs=evalbs) if type(curpred) == torch.Tensor: curpred = curpred.numpy() curpred = np.moveaxis(curpred, 0, config.axis) # (1, 128, 128, 128) if conv: print('conv...') curpred = convolve(curpred[0], gau_filter3, mode='same')[None, :] if config.resample is not None: voxel_sz = evaldl.dataset.ds.originalvoxelsz[i] resampled_sz = config.resample.copy() resampled_sz[2] = voxel_sz[0,2] curpred = resample(curpred, np.array(resampled_sz)[None], voxel_sz[0]) if store_cache: curpredhalf = curpred.astype('float16') pat_id = os.path.basename(dose_loader.dataset.data_df.loc[i, 'Id']) np.save(os.path.join(CACHE_PATH, '{}.npy'.format(pat_id)), curpredhalf) self.preds[i] += curpred/len(offset_lists) print('Done inference! Making metrics...')
loss_func = KBPLoss(config) history_df = [] if not os.path.exists('./model_weights/{}/models'.format(config.exp_name)): os.makedirs('./model_weights/{}/models'.format(config.exp_name)) if not os.path.exists('./logs/{}'.format(config.exp_name)): os.makedirs('./logs/{}'.format(config.exp_name)) best_loss, best_dose, best_dvh = np.inf, np.inf, np.inf evalbatchaccum = EvalBatchAccumulator(config, target_bs=128, num_metrics=4) for epoch in range(opt.epoch_count, opt.niter + opt.niter_decay + 1): epoch_start_time = time.time() iter_data_time = time.time() epoch_iter = 0 for i, data in enumerate(tqdm(dataset, total=len(dataset.dataloader))): iter_start_time = time.time() if total_steps % opt.print_freq == 0: t_data = iter_start_time - iter_data_time # visualizer.reset() total_steps += opt.batchSize epoch_iter += opt.batchSize model.set_input(data) model.optimize_parameters() # if total_steps % opt.display_freq == 0: # save_result = total_steps % opt.update_html_freq == 0 # visualizer.display_current_results(model.get_current_visuals(), # epoch, save_result) # if total_steps % opt.print_freq == 0: # errors = model.get_current_errors()
help='Evaluate on the maintest dataset') args = parser.parse_args() assert args.exp is not None if args.maintest: pats = ['pt_{}'.format(i) for i in range(241, 341)] SAVE_DIR = './subm/{}/{}_main_ensemble'.format(args.exp, args.exp) else: pats = ['pt_{}'.format(i) for i in range(201, 241)] SAVE_DIR = './subm/{}/{}_ensemble'.format(args.exp, args.exp) if not os.path.exists(SAVE_DIR): os.makedirs(SAVE_DIR) for pt in tqdm(pats): pt_preds = [] for fold in range(5): if args.maintest: pt_fold = './subm/{}/{}_main_fold{}/{}.csv'.format( args.exp, args.exp, fold, pt) else: pt_fold = './subm/{}/{}_fold{}/{}.csv'.format( args.exp, args.exp, fold, pt) pt_pred_csv = load_file(pt_fold) pt_pred = np.zeros((128, 128, 128, 1), dtype='float64') np.put(pt_pred, pt_pred_csv['indices'], pt_pred_csv['data']) pt_preds.append(pt_pred[None, :, :, :, 0]) pt_preds = np.concatenate(pt_preds) assert pt_preds.shape == (5, 128, 128, 128), pt_preds.shape
def __init__(self, config, data_df, training=True, valid=False, transform=None): print('Using Concat Dataset') self.config = copy.deepcopy(config) self.data_df = data_df.copy() self.training = training self.valid = valid self.transform = transform self.data_df['loadj'] = config.loadjaehee if config.loadjaehee and config.nloadjaehee > 0: self.data_df.loc[len(self.data_df)-config.nloadjaehee:, 'loadj'] = False assert config.axis in [1, 2, 3] self.axis = config.axis print('Loading data along axis: {}'.format(self.axis)) self.mode = 'training_model' if training else 'dose_prediction' self.defdataset = DefaultDataLoader(self.data_df['Id'], batch_size=1, shuffle=False, mode_name=self.mode, pseudo_path=config.pseudo_path) self.imgcache = [] self.targetcache = [] self.pdmcache = [] self.smcache = [] self.voxelcache = [] self.originalvoxelsz = [] self.jaeheecache = [] self.pos_map = [(i, j) for i in range(len(self.data_df)) for j in range(128)] for i in tqdm(range(len(self.data_df))): item = self.defdataset.get_batch(index=i) img = item['ct'][:,:,:,:,0].astype('int16') if self.training: target = item['dose'][:,:,:,:,0].astype('float16') else: target = img.copy() pdm = item['possible_dose_mask'][:,:,:,:,0].astype('bool') sm = np.moveaxis(item['structure_masks'][0].astype('bool'), -1, 0) voxel = item['voxel_dimensions'].astype('float32') self.originalvoxelsz.append(np.copy(voxel)) if config.resample is not None: img = resample(img, voxel, config.resample.copy()) pdm = resample(pdm, voxel, config.resample.copy()) sm = resample(sm, voxel, config.resample.copy()) target = resample(target, voxel, config.resample.copy()) voxel[0,:2] = config.resample[:2] if config.imgmulpdm: img *= pdm self.imgcache.append(np.ascontiguousarray(img)) self.targetcache.append(np.ascontiguousarray(target)) self.pdmcache.append(np.ascontiguousarray(pdm)) self.smcache.append(np.ascontiguousarray(sm)) self.voxelcache.append(np.ascontiguousarray(voxel)) if config.addjaehee and self.data_df.loc[i, 'loadj']: assert self.config.axis == 3 pat_id = self.data_df.loc[i, 'Id'].split('/')[-1].split('_')[1] while len(pat_id) < 3: pat_id = '0' + pat_id jc = [] for sl in range(128): jc.append(np.load('./data/data_Jaehee/comb_data/{}_Feature_{}.npy'.format(pat_id, sl))) self.jaeheecache.append(np.concatenate(jc, axis=3)) if self.config.addtargets: assert self.jaeheecache[-1].shape == (27, 128, 128, 128), self.jaeheecache[-1].shape else: assert self.jaeheecache[-1].shape == (23, 128, 128, 128), self.jaeheecache[-1].shape assert self.jaeheecache[-1].dtype == np.float16 # Resampling self.notargetreplace = config.notargetreplace if self.training and not self.valid and self.notargetreplace is not None: self.notargetreplaceweights = {'pdm': [], 'sm': []} for i in range(len(self.pdmcache)): pdmsum = self.pdmcache[i].sum(tuple([ax for ax in range(4) if ax != config.axis])) smsum = self.smcache[i].sum(tuple([ax for ax in range(4) if ax != config.axis])) self.notargetreplaceweights['pdm'].append(pdmsum/pdmsum.sum()) self.notargetreplaceweights['sm'].append(smsum/smsum.sum()) self.pos_map = [] for i in range(len(self.pdmcache)): nzind = np.where(self.notargetreplaceweights['pdm'][i])[0] for j in nzind: self.pos_map.append((i, j)) # For Profiling shared_array_base = mp.Array(ctypes.c_float, 3) shared_array = np.ctypeslib.as_array(shared_array_base.get_obj()) self.timeindexing = shared_array
def __init__(self, config, data_df, training=True, valid=False, transform=None): print('Using Stack Dataset') self.config = config self.data_df = data_df self.training = training self.valid = valid self.transform = transform assert config.axis in [1, 2, 3] self.axis = config.axis print('Loading data along axis: {}'.format(self.axis)) assert config.notargetreplace is None self.mode = 'training_model' if training else 'dose_prediction' self.defdataset = DefaultDataLoader(self.data_df['Id'], batch_size=1, shuffle=False, mode_name=self.mode) assert np.array(config.offset_list).sum() == 0 assert config.offset_list[len(config.offset_list) // 2] == 0 totalch = 12 * (128 * len(data_df) + len(config.offset_list) // 2 * (len(data_df) + 1)) self.imgcache = np.zeros((totalch, 128, 128), dtype='int16') self.targetcache = [] self.voxelcache = [] self.originalvoxelsz = [] cache_idx = 12 * (len(config.offset_list) // 2) for i in tqdm(range(len(self.data_df))): item = self.defdataset.get_batch(index=i) img, pdm, sm = item['ct'], item['possible_dose_mask'], item[ 'structure_masks'] assert np.array_equal(img, img.astype('int16').astype('float64')) assert np.array_equal(pdm, pdm.astype('int16').astype('float64')) assert np.array_equal(sm, sm.astype('int16').astype('float64')) if self.training: target = item['dose'][:, :, :, :, 0].astype('float32') else: target = np.zeros_like(img[:, :, :, :, 0]) voxel = item['voxel_dimensions'].astype('float32') self.originalvoxelsz.append(np.copy(voxel)) if config.resample is not None: img = resample(img, voxel, config.resample.copy()) pdm = resample(pdm, voxel, config.resample.copy()) sm = resample(sm, voxel, config.resample.copy()) target = resample(target, voxel, config.resample.copy()) voxel[0, :2] = config.resample[:2] if config.imgmulpdm: img *= pdm img = img.astype('int16')[0] pdm = pdm.astype('int16')[0] sm = sm.astype('int16')[0] img = np.moveaxis(np.concatenate((img, pdm, sm), axis=3), 3, 0) img = np.reshape(np.moveaxis(img, config.axis, 1), (12 * 128, 128, 128), order='F') self.imgcache[cache_idx:cache_idx + 12 * 128] = img cache_idx += 12 * (128 + len(config.offset_list) // 2) self.targetcache.append(np.ascontiguousarray(target)) self.voxelcache.append(np.ascontiguousarray(voxel)) # For Profiling shared_array_base = mp.Array(ctypes.c_float, 3) shared_array = np.ctypeslib.as_array(shared_array_base.get_obj()) self.timeindexing = shared_array
test_dl3D = DataLoader(test_ds3D, batch_size=1, shuffle=False, num_workers=configs[0].num_workers) dose_evaluators = [ get_eval_exp(config, test_dl3D, fpath, setname='test') for config in configs ] for pt in range(len(dose_evaluators[0].preds)): for exp in range(1, len(dose_evaluators)): dose_evaluators[0].preds[pt] += dose_evaluators[exp].preds[pt] dose_evaluators[0].preds[pt] /= len(dose_evaluators) for i, (_, (possible_dose_mask, item)) in enumerate(tqdm(test_dl3D)): pat_id = item['patient_list'][0][0] dose_pred_gy = dose_evaluators[0].preds[i] # (1, 128, 128, 128) assert dose_pred_gy.shape == (1, 128, 128, 128), dose_pred_gy.shape dose_pred_gy = (dose_pred_gy * (dose_pred_gy >= 0.)).astype('float64') dose_pred_gy = dose_pred_gy * possible_dose_mask.detach().cpu( ).numpy().astype('float64') dose_pred_gy = np.squeeze(dose_pred_gy) dose_to_save = sparse_vector_function(dose_pred_gy) dose_df = pd.DataFrame(data=dose_to_save['data'].squeeze(), index=dose_to_save['indices'].squeeze(), columns=['data']) dose_df.to_csv('{}/{}.csv'.format(SAVE_DIR, pat_id)) # Zip dose to submit