def make_metrics(self, net, conv=True):
        """Calculate a table of
        :return: the DVH score and dose score for the "new_dose" relative to the "reference_dose"
        """
        num_batches = len(self.data_loader)
        dose_score_vec = np.zeros(num_batches)

        # Only make calculations if data_loader is not empty
        if num_batches == 0:
            print('No patient information was given to calculate metrics')
        else:
            # Change batch size to 1
            assert self.data_loader.batch_size == 1  # Loads data related to ground truth patient information
            if self.dose_loader is not None:
                assert self.dose_loader.batch_size == 1  # Loads data related to ground truth patient information

            for idx in tqdm(range(num_batches)):
                # Get roi masks for patient
                self.get_constant_patient_features(idx)
                # Get dose tensors for reference dose and evaluate criteria
                reference_dose = self.get_patient_dose_tensor(self.data_loader)
                if reference_dose is not None:
                    self.reference_dose_metric_df = self.calculate_metrics(
                        self.reference_dose_metric_df, reference_dose)
                else:
                    raise
                # If a dose loader was provided, calculate the score
                if self.dose_loader is not None:
                    new_dose = self.predict_patient_dose_tensor(
                        self.dose_loader, net, conv=conv)
                    # Make metric data frames
                    self.new_dose_metric_df = self.calculate_metrics(
                        self.new_dose_metric_df, new_dose)
                    # Evaluate mean absolute error of 3D dose
                    dose_score_vec[idx] = np.sum(
                        np.abs(reference_dose - new_dose *
                               self.possible_dose_mask.flatten())) / np.sum(
                                   self.possible_dose_mask)
                    # Save metrics at the patient level (this is a template for how DVH stream participants could save
                    # their files
                    # self.dose_metric_df.loc[self.patient_list[0]].to_csv('{}.csv'.format(self.patient_list[0]))
                else:
                    raise

            if self.dose_loader is not None:
                dvh_score = np.nanmean(
                    np.abs(self.reference_dose_metric_df -
                           self.new_dose_metric_df).values)
                dose_score = dose_score_vec.mean()
                return dvh_score, dose_score
            else:
                print(
                    'No new dose provided. Metrics were only calculated for the provided dose.'
                )
                raise
def make_submission2D(config, net, data_loader, save_dir):
    assert data_loader.batch_size == 1
    with torch.no_grad():
        for img, (possible_dose_mask, item) in tqdm(data_loader):
            # Get patient ID and make a prediction
            pat_id = item['patient_list'][0][0]
            img = img[0].detach().cpu().numpy()
            #             dose_pred_gy = net(img)
            dose_pred_gy = TTAflip(net, img)
            dose_pred_gy = (dose_pred_gy *
                            (dose_pred_gy >= 0.)).astype('float64')
            dose_pred_gy = dose_pred_gy * possible_dose_mask.detach().cpu(
            ).numpy().astype('float64')
            # Prepare the dose to save
            dose_pred_gy = np.squeeze(dose_pred_gy)
            dose_to_save = sparse_vector_function(dose_pred_gy)
            dose_df = pd.DataFrame(data=dose_to_save['data'].squeeze(),
                                   index=dose_to_save['indices'].squeeze(),
                                   columns=['data'])
            dose_df.to_csv('{}/{}.csv'.format(save_dir, pat_id))
Exemple #3
0
def save_pred_csvs(net,
                   dl3D,
                   dl2D,
                   offset_lists,
                   args,
                   setname='test',
                   fold=0):
    # Make Predictions
    dose_evaluator = EvaluateDose2D(config,
                                    net=net,
                                    data_loader=dl3D,
                                    dose_loader=dl2D,
                                    offset_lists=offset_lists,
                                    load_cache=not args.noloadc,
                                    store_cache=not args.nostorec,
                                    cache_dir='{}/{}/'.format(
                                        setname, args.metric),
                                    conv=False,
                                    evalbs=args.bs)

    if not args.nocsv:
        # Save in csv
        if 'maintest' in setname:
            SAVE_DIR = './subm/{}/{}_main_fold{}'.format(
                config.exp_name, config.exp_name, fold)
        elif 'test' in setname:
            SAVE_DIR = './subm/{}/{}_fold{}'.format(config.exp_name,
                                                    config.exp_name, fold)
        elif 'localval' in setname:
            SAVE_DIR = './subm/{}/{}_localval_fold{}'.format(
                config.exp_name, config.exp_name, fold)
        else:
            raise ValueError

        if not os.path.exists(SAVE_DIR):
            os.makedirs(SAVE_DIR)
        with torch.no_grad():
            is_train = dl3D.dataset.training
            dl3D.dataset.training = False
            for i, (_, (possible_dose_mask, item)) in enumerate(tqdm(dl3D)):
                pat_id = item['patient_list'][0][0]
                dose_pred_gy = dose_evaluator.preds[i]  # (1, 128, 128, 128)
                assert dose_pred_gy.shape == (1, 128, 128,
                                              128), dose_pred_gy.shape
                dose_pred_gy = (dose_pred_gy *
                                (dose_pred_gy >= 0.)).astype('float64')
                dose_pred_gy = dose_pred_gy * possible_dose_mask.detach().cpu(
                ).numpy().astype('float64')
                dose_pred_gy = np.squeeze(dose_pred_gy)
                dose_to_save = sparse_vector_function(dose_pred_gy)
                dose_df = pd.DataFrame(data=dose_to_save['data'].squeeze(),
                                       index=dose_to_save['indices'].squeeze(),
                                       columns=['data'])
                dose_df.to_csv('{}/{}.csv'.format(SAVE_DIR, pat_id))
            dl3D.dataset.training = is_train

        if not args.nozip:
            # Zip dose to submit
            save_path = shutil.make_archive(
                '{}_{}'.format(SAVE_DIR, args.metric), 'zip', SAVE_DIR)
            print('Saved to: ', '/'.join(save_path.split('/')[-3:]))
        else:
            print('Saved to: ', '/'.join(SAVE_DIR.split('/')[-3:]))
        gc.collect()
        sys.exit()
    return dose_evaluator
Exemple #4
0
    def __init__(self, config, net, data_loader, dose_loader=None, TTA_shift_by=4, offset_lists=[np.arange(-3,4)], conv=True, load_cache=True, store_cache=True, cache_dir='', evalbs=128):
        """
        Prepare the class for evaluating dose distributions
        :param data_loader: a data loader object that loads data from the reference dataset
        :param dose_loader: a data loader object that loads a dose tensor from any dataset (e.g., predictions)
        """
        # Initialize objects
        self.config = config
        self.data_loader = data_loader  # Loads data related to ground truth patient information
        self.dose_loader = dose_loader  # Loads the data for a benchmark dose
        self.TTA_shift_by = TTA_shift_by

        # Initialize objects for later
        self.patient_list = None
        self.roi_mask = None
        self.new_dose = None
        self.reference_dose = None
        self.voxel_size = None
        self.possible_dose_mask = None

        # Set metrics to be evaluated
        self.oar_eval_metrics = ['D_0.1_cc', 'mean']
        self.tar_eval_metrics = ['D_99', 'D_95', 'D_1']

        # Name metrics for data frame
        oar_metrics = list(it_product(self.oar_eval_metrics, self.data_loader.dataset.defdataset.rois['oars']))
        target_metrics = list(it_product(self.tar_eval_metrics, self.data_loader.dataset.defdataset.rois['targets']))

        # Make data frame to store dose metrics and the difference data frame
        self.metric_difference_df = pd.DataFrame(index=self.data_loader.dataset.defdataset.patient_id_list,
                                                 columns=[*oar_metrics, *target_metrics])
        self.reference_dose_metric_df = self.metric_difference_df.copy()
        self.new_dose_metric_df = self.metric_difference_df.copy()
                
        if net is not None:
            net.eval()
        with torch.no_grad():
            self.preds = [np.zeros((1, 128, 128, 128)) for i in range(len(data_loader))]
            for off in offset_lists:
                print("Offset list: ", off)
                off_id = make_offset_id(off)
                CACHE_PATH = './preds/{}/{}{}'.format(config.exp_name, cache_dir, off_id)
                if store_cache and not os.path.exists(CACHE_PATH):
                    os.makedirs(CACHE_PATH)
                
                loaded = False
                if load_cache:
                    if len(glob.glob(os.path.join(CACHE_PATH, '*.npy'))) != len(data_loader):
                        print(CACHE_PATH)
                        print('Not found sufficient files for loading offset {}, predicting...'.format(off_id))
                    else:
                        print('Loading offset {} from cache...'.format(off_id))
                        for i in range(len(data_loader)):
                            pat_id = os.path.basename(data_loader.dataset.data_df.loc[i, 'Id'])
                            curpred = np.load(os.path.join(CACHE_PATH, '{}.npy'.format(pat_id))).astype('float32')
                            self.preds[i] += curpred/len(offset_lists)
                        loaded = True
                if not loaded:
                    evalds = EvalDataset(dose_loader.dataset, off)
                    evaldl = DataLoader(evalds, batch_size=1, shuffle=False, num_workers=2)
                    print('Making predictions from network...')
                    for i, img in enumerate(tqdm(evaldl)):
                        curpred = TTAflip(net, img[0], axis=self.config.axis, shift_by=self.TTA_shift_by, conv=conv, evalbs=evalbs)
                        if type(curpred) == torch.Tensor:
                            curpred = curpred.numpy()
                        curpred = np.moveaxis(curpred, 0, config.axis)  # (1, 128, 128, 128)
                        if conv:
                            print('conv...')
                            curpred = convolve(curpred[0], gau_filter3, mode='same')[None, :]
                        
                        if config.resample is not None:
                            voxel_sz = evaldl.dataset.ds.originalvoxelsz[i]
                            resampled_sz = config.resample.copy()
                            resampled_sz[2] = voxel_sz[0,2]
                            curpred = resample(curpred, np.array(resampled_sz)[None], voxel_sz[0])
                        
                        if store_cache:
                            curpredhalf = curpred.astype('float16')
                            pat_id = os.path.basename(dose_loader.dataset.data_df.loc[i, 'Id'])
                            np.save(os.path.join(CACHE_PATH, '{}.npy'.format(pat_id)), curpredhalf)
                        self.preds[i] += curpred/len(offset_lists)
        print('Done inference! Making metrics...')
loss_func = KBPLoss(config)
history_df = []
if not os.path.exists('./model_weights/{}/models'.format(config.exp_name)):
    os.makedirs('./model_weights/{}/models'.format(config.exp_name))
if not os.path.exists('./logs/{}'.format(config.exp_name)):
    os.makedirs('./logs/{}'.format(config.exp_name))
best_loss, best_dose, best_dvh = np.inf, np.inf, np.inf

evalbatchaccum = EvalBatchAccumulator(config, target_bs=128, num_metrics=4)

for epoch in range(opt.epoch_count, opt.niter + opt.niter_decay + 1):
    epoch_start_time = time.time()
    iter_data_time = time.time()
    epoch_iter = 0

    for i, data in enumerate(tqdm(dataset, total=len(dataset.dataloader))):
        iter_start_time = time.time()
        if total_steps % opt.print_freq == 0:
            t_data = iter_start_time - iter_data_time
#         visualizer.reset()
        total_steps += opt.batchSize
        epoch_iter += opt.batchSize
        model.set_input(data)
        model.optimize_parameters()
        #         if total_steps % opt.display_freq == 0:
        #             save_result = total_steps % opt.update_html_freq == 0
        #             visualizer.display_current_results(model.get_current_visuals(),
        #                                                epoch, save_result)

        #         if total_steps % opt.print_freq == 0:
        #             errors = model.get_current_errors()
                        help='Evaluate on the maintest dataset')
    args = parser.parse_args()

    assert args.exp is not None

    if args.maintest:
        pats = ['pt_{}'.format(i) for i in range(241, 341)]
        SAVE_DIR = './subm/{}/{}_main_ensemble'.format(args.exp, args.exp)
    else:
        pats = ['pt_{}'.format(i) for i in range(201, 241)]
        SAVE_DIR = './subm/{}/{}_ensemble'.format(args.exp, args.exp)

    if not os.path.exists(SAVE_DIR):
        os.makedirs(SAVE_DIR)

    for pt in tqdm(pats):
        pt_preds = []
        for fold in range(5):
            if args.maintest:
                pt_fold = './subm/{}/{}_main_fold{}/{}.csv'.format(
                    args.exp, args.exp, fold, pt)
            else:
                pt_fold = './subm/{}/{}_fold{}/{}.csv'.format(
                    args.exp, args.exp, fold, pt)
            pt_pred_csv = load_file(pt_fold)
            pt_pred = np.zeros((128, 128, 128, 1), dtype='float64')
            np.put(pt_pred, pt_pred_csv['indices'], pt_pred_csv['data'])
            pt_preds.append(pt_pred[None, :, :, :, 0])
        pt_preds = np.concatenate(pt_preds)
        assert pt_preds.shape == (5, 128, 128, 128), pt_preds.shape
    def __init__(self, config, data_df, training=True, valid=False, transform=None):
        print('Using Concat Dataset')
        self.config = copy.deepcopy(config)
        self.data_df = data_df.copy()
        self.training = training
        self.valid = valid
        self.transform = transform
        
        self.data_df['loadj'] = config.loadjaehee
        if config.loadjaehee and config.nloadjaehee > 0:
            self.data_df.loc[len(self.data_df)-config.nloadjaehee:, 'loadj'] = False
        
        assert config.axis in [1, 2, 3]
        self.axis = config.axis
        print('Loading data along axis: {}'.format(self.axis))

        self.mode = 'training_model' if training else 'dose_prediction'
        self.defdataset = DefaultDataLoader(self.data_df['Id'], batch_size=1, shuffle=False, mode_name=self.mode, pseudo_path=config.pseudo_path)
        
        self.imgcache = []
        self.targetcache = []
        self.pdmcache = []
        self.smcache = []
        self.voxelcache = []
        self.originalvoxelsz = []
        self.jaeheecache = []
        
        self.pos_map = [(i, j) for i in range(len(self.data_df)) for j in range(128)]
        for i in tqdm(range(len(self.data_df))):
            item = self.defdataset.get_batch(index=i)
            img = item['ct'][:,:,:,:,0].astype('int16')
            if self.training:
                target = item['dose'][:,:,:,:,0].astype('float16')
            else:
                target = img.copy()
            pdm = item['possible_dose_mask'][:,:,:,:,0].astype('bool')
            sm = np.moveaxis(item['structure_masks'][0].astype('bool'), -1, 0)
            voxel = item['voxel_dimensions'].astype('float32')
            self.originalvoxelsz.append(np.copy(voxel))
            
            if config.resample is not None:
                img = resample(img, voxel, config.resample.copy())
                pdm = resample(pdm, voxel, config.resample.copy())
                sm = resample(sm, voxel, config.resample.copy())
                target = resample(target, voxel, config.resample.copy())
                voxel[0,:2] = config.resample[:2]
            
            if config.imgmulpdm:
                img *= pdm
            self.imgcache.append(np.ascontiguousarray(img))
            self.targetcache.append(np.ascontiguousarray(target))
            self.pdmcache.append(np.ascontiguousarray(pdm))
            self.smcache.append(np.ascontiguousarray(sm))
            self.voxelcache.append(np.ascontiguousarray(voxel))
            if config.addjaehee and self.data_df.loc[i, 'loadj']:
                assert self.config.axis == 3
                pat_id = self.data_df.loc[i, 'Id'].split('/')[-1].split('_')[1]
                while len(pat_id) < 3:
                    pat_id = '0' + pat_id
                jc = []
                for sl in range(128):
                    jc.append(np.load('./data/data_Jaehee/comb_data/{}_Feature_{}.npy'.format(pat_id, sl)))
                self.jaeheecache.append(np.concatenate(jc, axis=3))
                if self.config.addtargets:
                    assert self.jaeheecache[-1].shape == (27, 128, 128, 128), self.jaeheecache[-1].shape
                else:
                    assert self.jaeheecache[-1].shape == (23, 128, 128, 128), self.jaeheecache[-1].shape
                assert self.jaeheecache[-1].dtype == np.float16
        
        
#         Resampling
        self.notargetreplace = config.notargetreplace
        if self.training and not self.valid and self.notargetreplace is not None:
            self.notargetreplaceweights = {'pdm': [], 'sm': []}
            for i in range(len(self.pdmcache)):
                pdmsum = self.pdmcache[i].sum(tuple([ax for ax in range(4) if ax != config.axis]))
                smsum = self.smcache[i].sum(tuple([ax for ax in range(4) if ax != config.axis]))
                self.notargetreplaceweights['pdm'].append(pdmsum/pdmsum.sum())
                self.notargetreplaceweights['sm'].append(smsum/smsum.sum())
            
            self.pos_map = []
            for i in range(len(self.pdmcache)):
                nzind = np.where(self.notargetreplaceweights['pdm'][i])[0]
                for j in nzind:
                    self.pos_map.append((i, j))
        
#         For Profiling
        shared_array_base = mp.Array(ctypes.c_float, 3)
        shared_array = np.ctypeslib.as_array(shared_array_base.get_obj())
        self.timeindexing = shared_array
    def __init__(self,
                 config,
                 data_df,
                 training=True,
                 valid=False,
                 transform=None):
        print('Using Stack Dataset')
        self.config = config
        self.data_df = data_df
        self.training = training
        self.valid = valid
        self.transform = transform

        assert config.axis in [1, 2, 3]
        self.axis = config.axis
        print('Loading data along axis: {}'.format(self.axis))

        assert config.notargetreplace is None

        self.mode = 'training_model' if training else 'dose_prediction'
        self.defdataset = DefaultDataLoader(self.data_df['Id'],
                                            batch_size=1,
                                            shuffle=False,
                                            mode_name=self.mode)

        assert np.array(config.offset_list).sum() == 0
        assert config.offset_list[len(config.offset_list) // 2] == 0
        totalch = 12 * (128 * len(data_df) + len(config.offset_list) // 2 *
                        (len(data_df) + 1))
        self.imgcache = np.zeros((totalch, 128, 128), dtype='int16')
        self.targetcache = []
        self.voxelcache = []
        self.originalvoxelsz = []

        cache_idx = 12 * (len(config.offset_list) // 2)
        for i in tqdm(range(len(self.data_df))):
            item = self.defdataset.get_batch(index=i)
            img, pdm, sm = item['ct'], item['possible_dose_mask'], item[
                'structure_masks']
            assert np.array_equal(img, img.astype('int16').astype('float64'))
            assert np.array_equal(pdm, pdm.astype('int16').astype('float64'))
            assert np.array_equal(sm, sm.astype('int16').astype('float64'))
            if self.training:
                target = item['dose'][:, :, :, :, 0].astype('float32')
            else:
                target = np.zeros_like(img[:, :, :, :, 0])
            voxel = item['voxel_dimensions'].astype('float32')
            self.originalvoxelsz.append(np.copy(voxel))

            if config.resample is not None:
                img = resample(img, voxel, config.resample.copy())
                pdm = resample(pdm, voxel, config.resample.copy())
                sm = resample(sm, voxel, config.resample.copy())
                target = resample(target, voxel, config.resample.copy())
                voxel[0, :2] = config.resample[:2]

            if config.imgmulpdm:
                img *= pdm

            img = img.astype('int16')[0]
            pdm = pdm.astype('int16')[0]
            sm = sm.astype('int16')[0]
            img = np.moveaxis(np.concatenate((img, pdm, sm), axis=3), 3, 0)
            img = np.reshape(np.moveaxis(img, config.axis, 1),
                             (12 * 128, 128, 128),
                             order='F')

            self.imgcache[cache_idx:cache_idx + 12 * 128] = img
            cache_idx += 12 * (128 + len(config.offset_list) // 2)

            self.targetcache.append(np.ascontiguousarray(target))
            self.voxelcache.append(np.ascontiguousarray(voxel))

#         For Profiling
        shared_array_base = mp.Array(ctypes.c_float, 3)
        shared_array = np.ctypeslib.as_array(shared_array_base.get_obj())
        self.timeindexing = shared_array
        test_dl3D = DataLoader(test_ds3D,
                               batch_size=1,
                               shuffle=False,
                               num_workers=configs[0].num_workers)

        dose_evaluators = [
            get_eval_exp(config, test_dl3D, fpath, setname='test')
            for config in configs
        ]

        for pt in range(len(dose_evaluators[0].preds)):
            for exp in range(1, len(dose_evaluators)):
                dose_evaluators[0].preds[pt] += dose_evaluators[exp].preds[pt]
            dose_evaluators[0].preds[pt] /= len(dose_evaluators)

        for i, (_, (possible_dose_mask, item)) in enumerate(tqdm(test_dl3D)):
            pat_id = item['patient_list'][0][0]
            dose_pred_gy = dose_evaluators[0].preds[i]  # (1, 128, 128, 128)
            assert dose_pred_gy.shape == (1, 128, 128, 128), dose_pred_gy.shape
            dose_pred_gy = (dose_pred_gy *
                            (dose_pred_gy >= 0.)).astype('float64')
            dose_pred_gy = dose_pred_gy * possible_dose_mask.detach().cpu(
            ).numpy().astype('float64')
            dose_pred_gy = np.squeeze(dose_pred_gy)
            dose_to_save = sparse_vector_function(dose_pred_gy)
            dose_df = pd.DataFrame(data=dose_to_save['data'].squeeze(),
                                   index=dose_to_save['indices'].squeeze(),
                                   columns=['data'])
            dose_df.to_csv('{}/{}.csv'.format(SAVE_DIR, pat_id))

        # Zip dose to submit