def write_cati_csv(): import pandas as pd data_path = '/network/lustre/iss01/cenir/analyse/irm/users/romain.valabregue/QCcnn/CATI_datasets/' fcsv = data_path + 'all_cati.csv' res = pd.read_csv(fcsv) ser_dir = res.cenir_QC_path ser_dir = res.cenir_QC_path[res.globalQualitative > 3].values dcat = gdir(ser_dir, 'cat12') fT1 = gfile(dcat, '^s.*nii') fms = gfile(dcat, '^ms.*nii') fs_brain = gfile(dcat, '^brain_s.*nii') # return fT1, fms, fs_brain ind_perm = np.random.permutation(range(0, len(fT1))) itrain = ind_perm[0:100] ival = ind_perm[100:] dd = pd.DataFrame({'filename': fT1}) dd.to_csv(data_path + 'cati_cenir_QC4_all_T1.csv', index=False) dd.loc[ival, :].to_csv(data_path + 'cati_cenir_QC4_val_T1.csv', index=False) dd.loc[itrain, :].to_csv(data_path + 'cati_cenir_QC4_train_T1.csv', index=False) dd = pd.DataFrame({'filename': fms}) dd.to_csv(data_path + 'cati_cenir_QC4_all_ms.csv', index=False) dd.loc[ival, :].to_csv(data_path + 'cati_cenir_QC4_val_ms.csv', index=False) dd.loc[itrain, :].to_csv(data_path + 'cati_cenir_QC4_train_ms.csv', index=False) dd = pd.DataFrame({'filename': fs_brain}) dd.to_csv(data_path + 'cati_cenir_QC4_all_brain.csv', index=False) dd.loc[ival, :].to_csv(data_path + 'cati_cenir_QC4_val_brain.csv', index=False) dd.loc[itrain, :].to_csv(data_path + 'cati_cenir_QC4_train_brain.csv', index=False) dd = pd.DataFrame({'filename': fT1}) dd.to_csv(data_path + 'cati_cenir_all_T1.csv', index=False) dd = pd.DataFrame({'filename': fms}) dd.to_csv(data_path + 'cati_cenir_all_ms.csv', index=False) dd = pd.DataFrame({'filename': fs_brain}) dd.to_csv(data_path + 'cati_cenir_all_brain.csv', index=False) #add brain mask in csv allcsv = gfile('/home/romain.valabregue/datal/QCcnn/CATI_datasets', 'cati_cenir.*csv') for onecsv in allcsv: res = pd.read_csv(onecsv) resout = onecsv[:-4] + '_mask.csv' fmask = [] for ft1 in res.filename: d = get_parent_path(ft1)[0] fmask += gfile(d, '^mask', opts={"items": 1}) res['brain_mask'] = fmask res.to_csv(resout, index=False)
def __getitem__(self, index: int) -> Subject: if not isinstance(index, int): raise ValueError(f'Index "{index}" must be int, not {type(index)}') if self.load_from_dir: subject = torch.load(self._subjects[index]) if self.add_to_load is not None: #print('adding subject with {}'.format(self.add_to_load)) ii = subject.get_images() image_path = ii[0]['path'] if 'original' in self.add_to_load: #print('adding original subject') ss = Subject(image = Image(image_path, INTENSITY)) # sss = self._get_sample_dict_from_subject(ss) #sss = copy.deepcopy(ss) sss = ss subject['original'] = sss['image'] if self.add_to_load=='original': #trick to use both orig and mask :hmmm.... add_to_load = None else: add_to_load = self.add_to_load[8:] else: add_to_load = self.add_to_load if add_to_load is not None: image_add = gfile(get_parent_path(image_path), self.add_to_load_regexp)[0] #print('adding image {} to {}'.format(image_add,self.add_to_load)) ss = Subject(image = Image(image_add, LABEL)) #sss = self._get_sample_dict_from_subject(ss) #sss = copy.deepcopy(ss) sss = ss hh = subject.history for hhh in hh: if 'RandomElasticDeformation' in hhh[0]: from torchio.transforms import RandomElasticDeformation num_cp = hhh[1]['coarse_grid'].shape[1] rr = RandomElasticDeformation(num_control_points=num_cp) sss = rr.apply_given_transform(sss, hhh[1]['coarse_grid']) subject[add_to_load] = sss['image'] #print('subject with keys {}'.format(subject.keys())) else: subject = self._subjects[index] subject = copy.deepcopy(subject) # cheap since images not loaded yet if self.load_getitem: subject.load() # Apply transform (this is usually the bottleneck) if self._transform is not None: subject = self._transform(subject) if self.save_to_dir is not None: res_dir = self.save_to_dir fname = res_dir + '/subject{:05d}'.format(index) if 'image_orig' in subject: subject.pop('image_orig') torch.save(subject, fname + '_subject.pt') return subject
def load_existing_weights_if_exist(resdir, model, model_name='model', log=None, device='cuda', index_mod=-1, res_model_file=None): from utils_file import gfile, get_parent_path ep_start = 0 if res_model_file is None: resume_mod = gfile(resdir, '.*pt$') else: resume_mod = [res_model_file] if len(resume_mod) > 0: dir_mod, fn = get_parent_path(resume_mod) ffn = [ff[ff.find('_ep') + 3:-3] for ff in fn] key_list = [] for fff, fffn in zip(ffn, fn): if '_it' in fff: ind = fff.find('_it') ep = int(fff[0:ind]) it = int(fff[ind + 3:]) else: ep = int(fff) it = 100000000 key_list.append([fffn, ep, it]) aa = np.array(sorted(key_list, key=lambda x: (x[1], x[2]))) name_sorted, ep_sorted = aa[:, 0], aa[:, 1] ep_start = int(ep_sorted[index_mod]) thelast = dir_mod[0] + '/' + name_sorted[index_mod] log.info('RESUME model from epoch {} weight loaded from {}'.format( ep_start, thelast)) tl = torch.load(thelast, map_location=device) if model_name not in tl: model_name = list(tl.items())[0][0] prefix = 'model.' state_dict = tl[model_name] aa = next(iter(state_dict)) if prefix in aa: new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k[len(prefix):] # remove 'module.' of dataparallel new_state_dict[name] = v model.load_state_dict(new_state_dict) else: model.load_state_dict(tl[model_name]) else: log.info('New training starting epoch {}'.format(ep_start)) thelast = resdir log.info('Resdir is {}'.format(resdir)) return ep_start, get_parent_path([thelast])[1][0]
def write_cati_csv(): import pandas as pd data_path = '/network/lustre/iss01/cenir/analyse/irm/users/romain.valabregue/QCcnn/CATI_datasets/' fcsv = data_path + 'all_cati.csv' res = pd.read_csv(fcsv) ser_dir = res.cenir_QC_path ser_dir = res.cenir_QC_path[res.globalQualitative > 3].values dcat = gdir(ser_dir, 'cat12') fT1 = gfile(dcat, '^s.*nii') fms = gfile(dcat, '^ms.*nii') fs_brain = gfile(dcat, '^brain_s.*nii') # return fT1, fms, fs_brain ind_perm = np.random.permutation(range(0, len(fT1))) itrain = ind_perm[0:100] ival = ind_perm[100:] dd = pd.DataFrame({'filename': fT1}) dd.to_csv(data_path + 'cati_cenir_QC4_all_T1.csv', index=False) dd.loc[ival, :].to_csv(data_path + 'cati_cenir_QC4_val_T1.csv', index=False) dd.loc[itrain, :].to_csv(data_path + 'cati_cenir_QC4_train_T1.csv', index=False) dd = pd.DataFrame({'filename': fms}) dd.to_csv(data_path + 'cati_cenir_QC4_all_ms.csv', index=False) dd.loc[ival, :].to_csv(data_path + 'cati_cenir_QC4_val_ms.csv', index=False) dd.loc[itrain, :].to_csv(data_path + 'cati_cenir_QC4_train_ms.csv', index=False) dd = pd.DataFrame({'filename': fs_brain}) dd.to_csv(data_path + 'cati_cenir_QC4_all_brain.csv', index=False) dd.loc[ival, :].to_csv(data_path + 'cati_cenir_QC4_val_brain.csv', index=False) dd.loc[itrain, :].to_csv(data_path + 'cati_cenir_QC4_train_brain.csv', index=False) dd = pd.DataFrame({'filename': fT1}) dd.to_csv(data_path + 'cati_cenir_all_T1.csv', index=False) dd = pd.DataFrame({'filename': fms}) dd.to_csv(data_path + 'cati_cenir_all_ms.csv', index=False) dd = pd.DataFrame({'filename': fs_brain}) dd.to_csv(data_path + 'cati_cenir_all_brain.csv', index=False)
def get_subject_list_from_file_list(fin, mask_regex=None, mask_key='brain'): subjects_list = [] for ff in fin: one_suj = {'image': Image(ff, INTENSITY)} if mask_regex is not None: dir_file = get_parent_path(ff)[0] fmask = gfile(dir_file, mask_regex, {"items": 1}) one_suj[mask_key] = Image(fmask[0], LABEL) subjects_list.append(Subject(one_suj)) return subjects_list
res = gdir('/home/romain.valabregue/datal/PVsynth/jzay/training/RES1mm','data') res = gdir('/home/romain.valabregue/datal/PVsynth/training/RES_14mm_tissue','data') res = gdir('/home/romain.valabregue/datal/PVsynth/jzay/training/RES1mm_prob','pve_synth_mod3_P128$') res = gdir('/home/romain.valabregue/datal/PVsynth/jzay/training/RES1mm_prob','aniso') res = gdir(res,'results_cluster') res = ['/network/lustre/iss01/cenir/analyse/irm/users/romain.valabregue/PVsynth/jzay/training/RES1mm_prob/pve_synth_mod3_P128_aniso_LogLkd_reg_multi/result', '/network/lustre/iss01/cenir/analyse/irm/users/romain.valabregue/PVsynth/jzay/training/RES1mm_prob/pve_synth_mod3_P128_aniso_LogLkd_reg_unis_lam1/results_cluster', '/network/lustre/iss01/cenir/analyse/irm/users/romain.valabregue/PVsynth/jzay/training/RES1mm_prob/pve_synth_mod3_P128_aniso_LogLkd_classif/results_cluster', ] res = ['/home/romain.valabregue/datal/PVsynth/jzay/training/RES1mm_prob/pve_synth_mod3_P128/results_cluster/'] report_learning_curves(res) #explore synthetic data histogram results_dirs = glob.glob('/home/romain.valabregue/datal/PVsynth/RES_1.4mm/t*/513130') f=gfile(results_dirs,'^5.*nii') resname = get_parent_path(results_dirs,2)[1] resfig = '/home/romain.valabregue/datal/PVsynth/figure/volume_synth/' for i, ff in enumerate(f): img = nb.load(ff) data = img.get_fdata(dtype=np.float32) fig = plt.figure(resname[i]) hh = plt.hist(data.flatten(), bins=500) axes = plt.gca() axes.set_ylim([0 , 40000]) #fig.savefig(resfig + resname[i] + '.png') #concat eval.csv
def plot_train_val_results(dres, train_csv_regex='Train.*csv', val_csv_regex='Val.*csv', prediction_column_name='prediction', target_column_name='targets', target_scale=1, fign='Res', sresname=None): legend_str=[] col = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'w'] for ii, oneres in enumerate(dres): fresT = gfile(oneres, train_csv_regex) fresV=gfile(oneres, val_csv_regex) if len(fresV)==0: print('{} empty dir {} '.format(colored('Skiping','red'), get_parent_path(oneres)[1])) continue is_train = False if len(fresT)==0 else True if is_train: resT = [pd.read_csv(ff) for ff in fresT] resdir, resname = get_parent_path(fresV) nbite = len(resT[0]) if is_train else 80000 fresV_sorted, b, c = get_ep_iter_from_res_name(resname, nbite) ite_tot = c+b*nbite ite_tottt = np.hstack([0, ite_tot]) print(ite_tot) resV = [pd.read_csv(resdir[0] + '/' + ff) for ff in fresV_sorted] df_val = pd.concat(resV, ignore_index=True, sort=False) for rr in resV: if 'sample_time' not in rr: rr['sample_time'] = rr['batch_time'] / 4 #for old result always runed with batchsize 4 if isinstance(rr[prediction_column_name][0], str): rr[prediction_column_name] = rr[prediction_column_name].apply( lambda s: convert_string_array_to_array(s)) rr[target_column_name] = rr[target_column_name].apply( lambda s: convert_string_array_to_array(s)) if is_train: for rr in resT: if 'sample_time' not in rr: rr['sample_time'] = rr['batch_time'] / 4 if isinstance(rr[prediction_column_name][0], str): rr[prediction_column_name] = rr[prediction_column_name].apply( lambda s: convert_string_array_to_array(s)) rr[target_column_name] = rr[target_column_name].apply( lambda s: convert_string_array_to_array(s)) if is_train: df_train = pd.concat(resT, ignore_index=True, sort=False) errorT = np.abs(df_train.loc[:,prediction_column_name].values -df_train.loc[:, target_column_name].values*target_scale) train_time = df_train.loc[:,'sample_time'] #average between validation point itte_tot LmTrain = [np.mean(errorT[ite_tottt[ii]:ite_tottt[ii+1]]) for ii in range(0, len(ite_tot)) ] TimeTrain = [np.mean(train_time[ite_tottt[ii]:ite_tottt[ii + 1]]) for ii in range(0, len(ite_tot))] LmVal = [np.mean(np.abs(rr.loc[:,prediction_column_name]-rr.loc[:, target_column_name].values*target_scale)) for rr in resV] #LmVal = np.mean(np.abs(df_val.loc[:,prediction_column_name].values - df_val.loc[:,target_column_name].values*target_scale)) TimeVal = [np.mean(rr.loc[:,'sample_time']) for rr in resV] plt.figure('MeanL1_'+fign); legend_str.append('V{}'.format(sresname[ii])); if is_train: legend_str.append('T{}'.format(sresname[ii])) plt.plot(ite_tot, LmVal,'--',color=col[ii]) if is_train: plt.plot(ite_tot, LmTrain,color=col[ii], linewidth=6) plt.figure('Time_'+fign); plt.plot(ite_tot, TimeVal,'--',color=col[ii]) if is_train: plt.plot(ite_tot, TimeTrain,color=col[ii], linewidth=6) #print some summary information on the results if not is_train: TimeTrain=0 nb_res = len(resT) if is_train else 0 np_iter = len(resT[0]) if is_train else 0 totiter, mbtt, mbtv = ite_tot[-1] / 1000, np.nanmean(TimeTrain), np.mean(TimeVal) tot_time = nb_res * np_iter * mbtt + len(resV) * len(resV[0]) * mbtv percent_train = nb_res * np_iter * mbtt / tot_time tot_time_day = np.floor( tot_time/60/60/24 ) tot_time_hour = (tot_time - tot_time_day*24*60*60) / 60/60 print('Result : {} \t {} '.format( colored(get_parent_path(resdir[0])[1], 'green'), sresname[ii] )) print('\t{} epoch of {} vol {} val on {} vol Tot ({:.1f}%train) {} d {:.1f} h'.format( nb_res, np_iter, len(resV),len(resV[0]), percent_train, tot_time_day, tot_time_hour )) fj = gfile(oneres,'data.json') if len(fj)==1: data_struc = cc.read_json(fj[0]) bs, nw = data_struc['batch_size'], data_struc['num_workers'] else: bs, nw = 0, -1 print('\tBatch size {} \tNum worker {} \t{:.1f} mille iter \t train/val meanTime {:.2f} / {:.2f} '.format\ (bs, nw, totiter, mbtt, mbtv)) plt.figure('MeanL1_'+fign); plt.legend(legend_str); plt.grid() ff=plt.gcf();ff.set_size_inches([15, 7]); #ff.tight_layout() plt.subplots_adjust(left=0.05, right=1, bottom=0.05, top=1, wspace=0, hspace=0) plt.ylabel('L1 loss') plt.figure('Time_'+fign); plt.legend(legend_str); plt.grid() plt.ylabel('time in second')
def get_pandadf_from_res_valOn_csv(dres, resname, csv_regex='res_valOn', data_name_list=None, select_last=None, target='ssim', target_scale=1): if len(dres) != len(resname) : raise('length problem between dres and resname') resdf_list = [] for oneres, resn in zip(dres, resname): fres_valOn = gfile(oneres, csv_regex) print('Found {} <{}> for {} '.format(len(fres_valOn), csv_regex, resn)) if len(fres_valOn) == 0: continue ftrain = gfile(oneres, 'res_train_ep01.csv') rrt = pd.read_csv(ftrain[0]) nb_it = rrt.shape[0]; resdir, resname_val = get_parent_path(fres_valOn) resname_sorted, b, c = get_ep_iter_from_res_name(resname_val, 0) if select_last is not None: if select_last<0: resname_sorted = resname_sorted[select_last:] else: nb_iter = b*nb_it+c resname_sorted = resname_sorted[np.argwhere(nb_iter > select_last)[1:8]] resV = [pd.read_csv(resdir[0] + '/' + ff) for ff in resname_sorted] resdf = pd.DataFrame() for ii, fres in enumerate(resname_sorted): iind = [i for i, s in enumerate(data_name_list) if s in fres] if len(iind) ==1: #!= 1: raise ("bad size do not find which sample") data_name = data_name_list[iind[0]] else: data_name = 'res_valds' iind = fres.find(data_name) ddn = remove_extension(fres[iind + len(data_name) + 1:]) new_col_name = 'Mout_' + ddn iind = ddn.find('model_ep') if iind==0: transfo='raw' else: transfo = ddn[:iind - 1] if transfo[0] == '_': #if start with _ no legend ... ! transfo = transfo[1:] model_name = ddn[iind:] aa, bb, cc = get_ep_iter_from_res_name([fres], nb_it) nb_iter = bb[0] * nb_it + cc[0] rr = resV[ii].copy() rr['evalOn'], rr['transfo'] = data_name, transfo rr['model_name'], rr['submodel_name'], rr['nb_iter'] = resn, model_name, str(nb_iter) rr[target] = rr[target] * target_scale resdf = pd.concat([resdf, rr], axis=0, sort=True) resdf['error'] = resdf[target] - resdf['model_out'] resdf['error_abs'] = np.abs(resdf[target] - resdf['model_out']) resdf_list.append(resdf) return resdf_list
rlabel.index = rid rlabel = rlabel.sort_index() # alphabetic order labelsujid = rlabel.index rr = rlabell.reindex(rlabel.index).loc[:, ['lesion_PV', 'lesion_WM']] rlabel = pd.concat([rlabel, rr], axis=1, sort=True) # reorder the label as res[0] # rlabel = rlabel.loc[sujid] # ytrue = rlabel.QCOK.values ytrue = rlabel.globalQualitative.values print_accuracy_df(rlabel, ytrue) # prediction mriqc rd = '/network/lustre/dtlake01/opendata/data/ABIDE/mriqc_data/retrain' resfile = gfile(rd, 'data_CATI.*csv$') resname = get_parent_path(resfile, 1)[1] res = [pd.read_csv(f) for f in resfile] for ii, rr in enumerate(res): sujid = [] for ff in rr.subject_id: dd = ff.split('/') if dd[-1] is '': dd.pop() nn = len(dd) sujid.append(dd[nn - 3] + '+' + dd[nn - 2] + '+' + dd[nn - 1]) rr.index = sujid res[ii] = rr.loc[labelsujid] # rr.loc[sujid[::-1]] print_accuracy(res, resname,
dres = gdir(dqc, 'RegMotNew.*hcp400_ms.*B4.*L1.*0001') dres = gdir(dqc, 'R.*') resname = get_parent_path(dres)[1] #sresname = [rr[rr.find('hcp400_')+7: rr.find('hcp400_')+17] for rr in resname ]; sresname[2] += 'le-4' sresname = resname commonstr, sresname = reduce_name_list(sresname) print('common str {}'.format(commonstr)) target = 'ssim' target_scale = 1 #target='random_noise'; target_scale=10 legend_str = [] col = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'w'] for ii, oneres in enumerate(dres): fresV = gfile(oneres, 'res_val') fresT = gfile(oneres, 'train.*csv') is_train = False if len(fresT) == 0 else True if is_train: resT = [pd.read_csv(ff) for ff in fresT] resV = [pd.read_csv(ff) for ff in fresV] resname = get_parent_path(fresV)[1] nbite = len(resT[0]) if is_train else 80000 a, b, c = get_ep_iter_from_res_name(resname, nbite) ite_tot = c + b * nbite if is_train: errorT = np.hstack([ np.abs(rr.model_out.values - rr.loc[:, target].values * target_scale) for rr in resT ]) ite_tottt = np.hstack([0, ite_tot])
root_dir = '/network/lustre/iss01/cenir/analyse/irm/users/romain.valabregue/QCcnn/NN_regres_motion/' prefix = "/network/lustre/iss01/cenir/analyse/irm/users/romain.valabregue/QCcnn/job/job_eval_again/" #model = root_dir + 'RegMotNew_mvt_train_hcp400_ms_B4_nw0_Size182_ConvN_C16_256_Lin40_50_D0_DC0.5_BN_Loss_L1_lr0.0001/' model = root_dir + 'RegMotNew_ela1_train200_hcp400_ms_B4_nw0_Size182_ConvN_C16_256_Lin40_50_D0_BN_Loss_L1_lr0.0001/' #model = root_dir +'RegMotNew_mvt_train_hcp400_ms_B4_nw0_Size182_ConvN_C16_256_Lin40_50_D0_BN_Loss_L1_lr0.0001' #model = gdir(root_dir,'New_ela1_train_hcp400_T1') #model = gdir(root_dir,'New_resc.*hcp.*T1.*DC0.1') model = gdir(root_dir, 'rescal.*cati') model = gdir(root_dir, 'RegMotNew_ela1.*cati_ms') model = gdir(root_dir, 'Mask_rescale_ela1.*hcp.*T1') model = gdir(root_dir, 'Mask.*ela1.*hcp.*ms') model = gdir(root_dir, 'Reg.*D0') #saved_models = gfile(model, '_ep(30|20|10)_.*000.pt$') saved_models = gfile(model, '_ep9_.*000.pt$') saved_models = saved_models[0:10] #saved_models = gfile(model, '_ep(24|46|48)_.*000.pt$'); #saved_models = gfile(model, '_ep(10|11)_.*000.pt$'); #saved_models = gfile(model, '_ep(10|9)_.*000.pt$'); saved_models = [] for mm in model: ss_models = gfile(mm, '_ep.*pt$') fresV_sorted, b, c = get_ep_iter_from_res_name(ss_models, 20000) ss_models = list(fresV_sorted[-10:]) saved_models = ss_models + saved_models #name_list_val = ['ela1_val_hcp200_ms'] name_list_val = ['mvt_val_hcp200_ms', 'ela1_val_hcp200_ms'] name_list_val = ['ela1_val_hcp200_T1', 'mvt_val_hcp200_T1'] name_list_val = ['ela1_val_cati_ms', 'mvt_train_cati_ms', 'mvt_val_cati_ms']
rotation_matrices = np.apply_along_axis(create_rotation_matrix_3d, axis=0, arr=rot).transpose([-1, 0, 1]) tt = fitpars[0:3, :].transpose([1, 0]) affs = np.tile(affine, [fitpars.shape[1], 1, 1]) affs[:,0:3,0:3] = rotation_matrices affs[:, 0:3, 3] = tt from scipy.linalg import logm, expm weights, matrices = ss[0], affs logs = [w * logm(A) for (w, A) in zip(weights, matrices)] logs = np.array(logs) logs_sum = logs.sum(axis=0) expm(logs_sum/np.sum(weights, axis=0) ) #a 10-2 pres c'est bien l'identite ! rp_files = gfile('/data/romain/HCPdata/suj_274542/Motion_ms','^rp') rp_files = gfile('/data/romain/HCPdata/suj_274542/mot_separate','^rp') rpf = rp_files[10] res = pd.DataFrame() for rpf in rp_files: dirpath,name = get_parent_path([rpf]) fout = dirpath[0] + '/check/'+name[0][3:-4] + '.nii' t = RandomMotionFromTimeCourse(fitpars=rpf, nufft=True, oversampling_pct=0, keep_original=True, verbose=True) dataset = ImagesDataset(suj, transform=t) sample = dataset[0] dicm = sample['T1']['metrics'] dicm['fname'] = fout res = res.append(dicm, ignore_index=True) dataset.save_sample(sample, dict(T1=fout))
root_dir = '/network/lustre/iss01/cenir/analyse/irm/users/romain.valabregue/QCcnn/NN_regres_motion/' prefix = "/network/lustre/iss01/cenir/analyse/irm/users/romain.valabregue/QCcnn/job/job_eval_again/" #model = root_dir + 'RegMotNew_mvt_train_hcp400_ms_B4_nw0_Size182_ConvN_C16_256_Lin40_50_D0_DC0.5_BN_Loss_L1_lr0.0001/' model = root_dir + 'RegMotNew_ela1_train200_hcp400_ms_B4_nw0_Size182_ConvN_C16_256_Lin40_50_D0_BN_Loss_L1_lr0.0001/' #model = root_dir +'RegMotNew_mvt_train_hcp400_ms_B4_nw0_Size182_ConvN_C16_256_Lin40_50_D0_BN_Loss_L1_lr0.0001' #model = gdir(root_dir,'New_ela1_train_hcp400_T1') #model = gdir(root_dir,'New_resc.*hcp.*T1.*DC0.1') model = gdir(root_dir, 'rescal.*cati') model = gdir(root_dir, 'RegMotNew_ela1.*cati_ms') model = gdir(root_dir, 'Mask_rescale_ela1.*hcp.*T1') model = gdir(root_dir, 'Mask.*ela1.*hcp.*ms') model = gdir(root_dir, 'Reg.*D0') #saved_models = gfile(model, '_ep(30|20|10)_.*000.pt$') saved_models = gfile(model, '_ep9_.*000.pt$') saved_models = saved_models[0:10] #saved_models = gfile(model, '_ep(24|46|48)_.*000.pt$'); #saved_models = gfile(model, '_ep(10|11)_.*000.pt$'); #saved_models = gfile(model, '_ep(10|9)_.*000.pt$'); saved_models = gfile(model, '_ep([789]|10)_.*4500.pt$') saved_models = [] for mm in model: ss_models = gfile(mm, '_ep.*pt$') fresV_sorted, b, c = get_ep_iter_from_res_name(ss_models, 20000) ss_models = list(fresV_sorted[-10:]) saved_models = ss_models + saved_models #name_list_val = ['ela1_val_hcp200_ms'] name_list_val = ['mvt_val_hcp200_ms', 'ela1_val_hcp200_ms'] name_list_val = ['ela1_val_hcp200_T1', 'mvt_val_hcp200_T1']
import torch import torch.nn as nn import torch.optim as optim from torchvision.transforms import Compose import torchio as tio from torchio.data.io import write_image, read_image from utils_file import get_parent_path, gfile, gdir from slices_2 import do_figures_from_file from utils import reduce_name_list, remove_string_from_name_list from utils_plot_results import get_ep_iter_from_res_name, plot_resdf, plot_train_val_results, \ transform_history_to_factor, parse_history import commentjson as json #res_valOn dd = gfile('/network/lustre/dtlake01/opendata/data/ds000030/rrr/CNN_cache_new', '_') dir_fig = '/network/lustre/iss01/cenir/analyse/irm/users/romain.valabregue/QCcnn/NN_regres_motion/figure/motion_regress/eval2/' dir_fig = '/network/lustre/iss01/cenir/analyse/irm/users/romain.valabregue/QCcnn/NN_regres_random_noise/figure2/' data_name_list = get_parent_path(dd)[1] dres_reg_exp, figname = ['Reg.*D0_DC0'], ['noise'] dres_reg_exp, figname = ['.*hcp.*ms', '.*hcp.*T1', 'cati.*ms', 'cati.*T1' ], ['hcp_ms', 'hcp_T1', 'cati_ms', 'cati_T1'] sns.set(style="whitegrid") csv_regex = 'res_valOn_' for rr, fign in zip(dres_reg_exp, figname): dres = gdir(dqc, rr) resname = get_parent_path(dres)[1] resname = remove_string_from_name_list(resname, [ 'RegMotNew_', 'Size182_ConvN_C16_256_Lin40_50_', '_B4',
for rr, fign in zip(dres_reg_exp, figname): dres = gdir(dqc, rr) resname = get_parent_path(dres)[1] print(len(resname)); print(resname) #sresname = [rr[rr.find('hcp400_')+7: rr.find('hcp400_')+17] for rr in resname ]; sresname[2] += 'le-4' sresname = resname commonstr, sresname = reduce_name_list(sresname) print('common str {}'.format(commonstr)) target='ssim'; target_scale=1 #target='random_noise'; target_scale=10 legend_str=[] col = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'w'] for ii, oneres in enumerate(dres): fresV=gfile(oneres,'res_val') fresT = gfile(oneres,'train.*csv') fresV=gfile(oneres,'res_val_') is_train = False if len(fresT)==0 else True if is_train: resT = [pd.read_csv(ff) for ff in fresT] resdir, resname = get_parent_path(fresV) nbite = len(resT[0]) if is_train else 80000 fresV_sorted, b, c = get_ep_iter_from_res_name(resname, nbite) resV = [pd.read_csv(resdir[0] + '/' + ff) for ff in fresV_sorted] ite_tot = c+b*nbite if is_train: errorT = np.hstack( [ np.abs(rr.model_out.values - rr.loc[:,target].values*target_scale) for rr in resT] ) ite_tottt = np.hstack([0, ite_tot])
if __name__ == "__main__": import pandas as pd import utils_file as uf from slices_2 import * if 0 == 3: ds = pd.read_csv( '/home/romain.valabregue/datal/QCcnn/res/res_cat12seg_18999.csv') rootdir = '/network/lustre/iss01/scratch/CENIR/users/romain.valabregue/dicom/nifti_proc' ind_sel = np.random.randint(0, ds.shape[0], 2) din = ds.iloc[ind_sel, 1] #[ds.iloc[ii,1] for ii in ind_sel] din_list = din.tolist() din_list = ["/network/lustre/iss01/" + s for s in din_list] fin = uf.gfile(din_list, '^s.*nii.gz') print(din) faff = uf.gfile(din_list, '^aff.*txt') fmask = uf.gfile(din_list, '^niw_Mean') #fmask = [None for i in range(0,3,1)] l_view = [ ("sag", "vox", 0.5), ("sag", "voxmm", -32), ("sag", "mm", -32), ("sag", "mm_mni", -32), ("ax", "vox", 0.5), ("ax", "voxmm", -43), ("ax", "mm", -43), ("ax", "mm_mni", -43), ("cor", "vox", 0.5), ("cor", "voxmm", 54),
#write separate json file res = '/home/romain.valabregue/datal/PVsynth/validation_set/RES_1mm/' res='' for k in conf_all.keys(): filename = res+k+'.json' with open(filename, 'w') as file: json.dump(conf_all[k], file, indent=4, sort_keys=True) filename = res+'grid_search.json' with open(filename, 'w') as file: json.dump(gs, file, indent=4, sort_keys=True) # generting jobs for validation from utils_file import gdir, gfile, get_parent_path f = gfile('/network/lustre/iss01/cenir/analyse/irm/users/romain.valabregue/PVsynth/training/RES_1mm_tissue/pve_synth_data_92_common_noise_no_gamma/results_cluster', 'model.*tar') d='/network/lustre/iss01/cenir/analyse/irm/users/romain.valabregue/PVsynth/training/RES_1mm_tissue/' dres = gdir(d,['.*','result']) dresname = get_parent_path(dres,level=2)[1] dresname = [dd.split('_')[0] + '_' + dd.split('_')[1] for dd in dresname] for one_res, resn in zip(dres, dresname): f = gfile(one_res,'model.*tar') fname = get_parent_path(f)[1] for ff in f: print('\"{}\",'.format(ff)) for one_res, resn in zip(dres, dresname): f = gfile(one_res,'model.*tar') fname = get_parent_path(f)[1]
def set_data_loader(self, train_csv_file='', val_csv_file='', transforms=None, batch_size=1, num_workers=0, par_queue=None, save_to_dir=None, load_from_dir=None, replicate_suj=0, shuffel_train=True, get_condition_csv=None, get_condition_field='', get_condition_nb_wanted=1 / 4, collate_fn=None, add_to_load=None, add_to_load_regexp=None): if not isinstance(transforms, torchvision.transforms.transforms.Compose ) and transforms is not None: transforms = Compose(transforms) if load_from_dir is not None: if type(load_from_dir) == str: load_from_dir = [load_from_dir, load_from_dir] fsample_train, fsample_val = gfile(load_from_dir[0], 'sample.*pt'), gfile( load_from_dir[1], 'sample.*pt') #random.shuffle(fsample_train) #fsample_train = fsample_train[0:10000] if get_condition_csv is not None: res = pd.read_csv(load_from_dir[0] + '/' + get_condition_csv) cond_val = res[get_condition_field].values y = np.linspace(np.min(cond_val), np.max(cond_val), 101) nb_wanted_per_interval = int( np.round(len(cond_val) * get_condition_nb_wanted / 100)) y_select = [] for i in range(len(y) - 1): indsel = np.where((cond_val > y[i]) & (cond_val < y[i + 1]))[0] nb_select = len(indsel) if nb_select < nb_wanted_per_interval: print( ' only {} / {} for interval {} {:,.3f} | {:,.3f} ' .format(nb_select, nb_wanted_per_interval, i, y[i], y[i + 1])) y_select.append(indsel) else: pind = np.random.permutation(range(0, nb_select)) y_select.append(indsel[pind[0:nb_wanted_per_interval]]) #print('{} selecting {}'.format(i, len(y_select[-1]))) ind_select = np.hstack(y_select) y = cond_val[ind_select] fsample_train = [fsample_train[ii] for ii in ind_select] self.log_string += '\nfinal selection {} soit {:,.3f} % instead of {:,.3f} %'.format( len(y), len(y) / len(cond_val) * 100, get_condition_nb_wanted * 100) #conditions = [("MSE", ">", 0.0028),] #select_ind = apply_conditions_on_dataset(res,conditions) #fsel = [fsample_train[ii] for ii,jj in enumerate(select_ind) if jj] self.log_string += '\nloading {} train sample from {}'.format( len(fsample_train), load_from_dir[0]) self.log_string += '\nloading {} val sample from {}'.format( len(fsample_val), load_from_dir[1]) train_dataset = ImagesDataset( fsample_train, load_from_dir=load_from_dir[0], transform=transforms, add_to_load=add_to_load, add_to_load_regexp=add_to_load_regexp) self.train_csv_load_file_train = fsample_train val_dataset = ImagesDataset(fsample_val, load_from_dir=load_from_dir[1], transform=transforms, add_to_load=add_to_load, add_to_load_regexp=add_to_load_regexp) self.train_csv_load_file_train = fsample_val else: data_parameters = { 'image': { 'csv_file': train_csv_file }, } data_parameters_val = { 'image': { 'csv_file': val_csv_file }, } paths_dict, info = get_subject_list_and_csv_info_from_data_prameters( data_parameters, fpath_idx='filename') paths_dict_val, info_val = get_subject_list_and_csv_info_from_data_prameters( data_parameters_val, fpath_idx='filename', shuffle_order=False) if replicate_suj: lll = [] for i in range(0, replicate_suj): lll.extend(paths_dict) paths_dict = lll self.log_string += 'Replicating train dataSet {} times, new length is {}'.format( replicate_suj, len(lll)) train_dataset = ImagesDataset(paths_dict, transform=transforms, save_to_dir=save_to_dir) val_dataset = ImagesDataset(paths_dict_val, transform=transforms, save_to_dir=save_to_dir) self.res_name += '_B{}_nw{}'.format(batch_size, num_workers) if par_queue is not None: self.patch = True windows_size = par_queue['windows_size'] if len(windows_size) == 1: windows_size = [ windows_size[0], windows_size[0], windows_size[0] ] train_queue = Queue(train_dataset, par_queue['queue_length'], par_queue['samples_per_volume'], windows_size, ImageSampler, num_workers=num_workers, verbose=self.verbose) val_queue = Queue(val_dataset, par_queue['queue_length'], 1, windows_size, ImageSampler, num_workers=num_workers, shuffle_subjects=False, shuffle_patches=False, verbose=self.verbose) self.res_name += '_spv{}'.format(par_queue['samples_per_volume']) self.train_dataloader = DataLoader(train_queue, batch_size=batch_size, shuffle=shuffel_train, collate_fn=collate_fn) self.val_dataloader = DataLoader(val_queue, batch_size=batch_size, shuffle=False, collate_fn=collate_fn) else: self.train_dataset = train_dataset self.train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffel_train, num_workers=num_workers, collate_fn=collate_fn) self.val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, collate_fn=collate_fn)
#plt.imshow(fig, origin = "lower") #rrr import pandas as pd import utils_file as uf import importlib importlib.reload(uf) ds = pd.read_csv( '/network/lustre/iss01/cenir/analyse/irm/users/romain.valabregue/dicom/res/res_cat12seg_18999.csv' ) rootdir = '/network/lustre/iss01/scratch/CENIR/users/romain.valabregue/dicom/nifti_proc' ind_sel = np.random.randint(0, ds.shape[0], 2) din = ds.iloc[ind_sel, 1] #[ds.iloc[ii,1] for ii in ind_sel] fin = uf.gfile(din.tolist(), '^s.*nii') faff = uf.gfile(din.tolist(), '^aff.*txt') l_view = [("sag", "mm", 0) for i in range(0, 3, 1)] l_view = [ ("sag", "vox", 0.5), ("sag", "mm", 0), ("sag", "mm_mni", 0), ("ax", "vox", 0.5), ("ax", "mm", 0), ("ax", "mm_mni", 0), ("cor", "vox", 0.5), ("cor", "mm", 0), ("cor", "mm_mni", 0), ]
weights1 = prefix + "NN_saved_pytorch/modelV2_one256_msbrain_098_equal_BN0_b4_BCEWithLogitsLoss_SDG/quadriview_ep10.pt" weights1 = prefix + "modelV2_last128_msbrain_098_equal_BN0_b4_BCEWithLogitsLoss_SDG/quadriview_ep10.pt" name = "cati_modelV2_last128_msbrain_098_equal_BN05_b4_BCEWithLogitsLoss_SDG" resdir = prefix + "predict_torch/" + name + '/' py_options = ' --BN_momentum 0.5 --do_reslice --apply_mask --model_type=2' # --use_gpu 0 ' # for CATI tab = pd.read_csv(prefix + "CATI_datasets/all_cati.csv", index_col=0) clip_val = tab.meanW.values + 3 * tab.stdW.values dcat = gdir(tab.cenir_QC_path, 'cat12') # dspm = gdir(tab.cenir_QC_path,'spm' ) fms = gfile(dcat, '^ms.*ni', opts={"items": 1}) fmask = gfile(dcat, '^mask_brain.*gz', opts={"items": 1}) # fms = gfile(dspm,'^ms.*ni',opts={"items":1}) faff = gfile(dcat, '^aff.*txt', opts={"items": 1}) fref = '/network/lustre/iss01/cenir/analyse/irm/users/romain.valabregue/HCPdata/suj_100307/T1w_1mm.nii.gz' # for ABIDE dcat = gdir('/network/lustre/dtlake01/opendata/data/ABIDE/cat12', ['^su', 'anat']) # for ds30 dcat = gdir('/network/lustre/dtlake01/opendata/data/ds000030/cat12', ['^su', 'anat']) # for validation tab = pd.read_csv(prefix + "Motion_brain_ms_val_hcp200.csv", index_col=0) fms = tab.img_file.values fmask = fms faff = fms
root_dir = '/network/lustre/iss01/cenir/analyse/irm/users/romain.valabregue/QCcnn/NN_regres_random_noise/' model = gdir(root_dir, 'Reg.*D0_DC') # saved_models = [] # for mm in model: # ss_models = gfile(mm, '_ep.*pt$'); # nb_it=8000 # fresV_sorted, b, c = get_ep_iter_from_res_name(ss_models, nb_it) # nb_iter = b * nb_it + c # ii = np.where(nb_iter > 200000)[1:8] # ss_models = list(ss_models[ii]) # # #ss_models = list(fresV_sorted[-8:]) # # saved_models = ss_models + saved_models saved_models = gfile(model, '_ep27_.*pt$') tlist, tname = get_tranformation_list(choice=[1, 2]) for saved_model in saved_models: doit.set_model_from_file(saved_model, cuda=cuda) doit.val_dataloader = doit.train_dataloader basename = 'res_valOn_val_hcp_ms' + name_suffix #doit.eval_regress_motion(1000, 10, target='random_noise', basename=basename) doit.eval_multiple_transform(1000, 10, target='random_noise', basename=basename, transform_list=tlist, transform_list_name=tname)
resdir_mvt = res_dir + '/mvt_param/' resdir_fig = res_dir + '/fig/' try: #on cluster, all job are doing the mkdir at the same time ... if not os.path.isdir(resdir_mvt): os.mkdir(resdir_mvt) if not os.path.isdir(resdir_fig): os.mkdir(resdir_fig) except: pass transfo = get_motion_transform(type=motion_type) torch.manual_seed(seed) np.random.seed(seed) dir_img = get_parent_path([fin])[0] fm = gfile(dir_img, '^mask', {"items": 1}) fp1 = gfile(dir_img, '^p1', {"items": 1}) fp2 = gfile(dir_img, '^p2', {"items": 1}) if len(fm) == 0: #may be in cat12 subdir (like for HCP) fm = gfile(dir_img, '^brain_T1', {"items": 1}) #dir_cat = gdir(dir_img,'cat12') #fm = gfile(dir_cat, '^mask_brain', {"items": 1}) #fp1 = gfile(dir_cat, '^p1', {"items": 1}) #fp2 = gfile(dir_cat, '^p2', {"items": 1}) one_suj = {'image': Image(fin, INTENSITY), 'brain': Image(fm[0], LABEL)} if len(fp1) == 1: one_suj['p1'] = Image(fp1[0], LABEL) if len(fp2) == 1: one_suj['p2'] = Image(fp2[0], LABEL)
transforms = [ #ZNormalization(verbose=verbose), RandomMotion(proportion_to_augment=1, seed=1, verbose=True), ] sample = dataset[0] for i, transform in enumerate(transforms): transformed = transform(sample) name = transform.__class__.__name__ path = f'/tmp/{i}_{name}_abs.nii.gz' dataset.save_sample(transformed, dict(T1=path)) #histo normalization from torchio.transforms.preprocessing.histogram_standardization import train, normalize suj = gdir('/network/lustre/iss01/cenir/analyse/irm/users/romain.valabregue/HCPdata','^suj') allfiles = gfile(suj,'^T1w_1mm.nii.gz') allfiles_mask = gfile(suj,'^brain_T1w_1mm.nii.gz') testf = allfiles[0:300] outname ='/data/romain/data_exemple/landmarks_hcp300_res100.npy' #outname ='/data/romain/data_exemple/landmarks_hcp300_res100_cutof01.npy' landmark = train(testf, output_path=outname, mask_path=allfiles_mask, cutoff=(0, 1)) nii = nib.load(testf[0]).get_fdata(dtype=np.float32) niim = normalize(nii, landmark) perc_database=np.load(outname) mm = np.mean(perc_database, axis=1) mr = np.tile(mm,(perc_database.shape[1], 1)).T
""" import nibabel as nb import numpy as np import pandas as pd import sys, os, logging from utils_file import get_parent_path, gfile from doit_train import do_training, get_motion_transform, get_train_and_val_csv, get_cache_dir from torchio.transforms import CropOrPad root_dir = '/network/lustre/iss01/cenir/analyse/irm/users/romain.valabregue/QCcnn/NN_regres_motion/' if __name__ == '__main__': model = root_dir + 'RegMotNew_ela1_train200_hcp400_ms_B4_nw0_Size182_ConvN_C16_256_Lin40_50_D0_BN_Loss_L1_lr0.0001/' saved_models = gfile(model, '_ep9_.*000.pt$') name_list_val = ['mvt_val_hcp200_ms', 'ela1_val_hcp200_ms'] dir_cache = '/network/lustre/dtlake01/opendata/data/ds000030/rrr/CNN_cache/' batch_size, num_workers = 4, 0 cuda, verbose = True, True target_shape, mask_key = (182, 218, 182), 'brain' tc = None # [CropOrPad(target_shape=target_shape, mask_name=mask_key), ] for data_name_val in name_list_val: dir_sample = '{}/{}/'.format(dir_cache, data_name_val) doit = do_training('/tmp/', 'not_use', verbose=True)