def select_and_write_data_bvals_bvecs(data_fname,bvals_file,bvecs_file,out_dir=None,bval_max_cutoff=3500,CLOBBER=False,IN_MEM=False): """ Create subset of data with the bvals that you are interested in (uses fslselectvols instead of loading into memory) Selects only the data and bvals/bvecs that are below the bval_max_cutoff, writes to files in input dir Returns output_filename, bvals, bvecs and selects vols in memory when IN_MEM=True """ import os import subprocess import numpy as np import nibabel as nb if out_dir is None: out_dir=os.path.dirname(data_fname) create_dir(out_dir) bvals=np.loadtxt(bvals_file) bvecs=np.loadtxt(bvecs_file) vol_list=[i for i,v in enumerate(bvals) if v < bval_max_cutoff] #rename and point to the correct directory out_fname=os.path.basename(data_fname).split(".nii")[0] + "_bvals_under" +str(bval_max_cutoff) + ".nii.gz" bvals_fname=os.path.basename(bvals_file).split(".")[0]+ "_bvals_under"+str(bval_max_cutoff) bvecs_fname=os.path.basename(bvecs_file).split(".")[0]+ "_bvals_under"+str(bval_max_cutoff) out_fname=os.path.join(out_dir,out_fname) bvals_fname=os.path.join(out_dir,bvals_fname) bvecs_fname=os.path.join(out_dir,bvecs_fname) print('Selecting appropriate volumes and bvals/bvecs for DKE.') if len(vol_list) == nb.load(data_fname).shape[3]: #if we are going to select all of the volumes anyway, don't bother copying them! print("All bvals selected, using original data file as input") out_fname=data_fname np.savetxt(bvals_fname,bvals[bvals<bval_max_cutoff]) np.savetxt(bvecs_fname,bvecs[:,bvals<bval_max_cutoff]) else: print('Output to file: ' + out_fname) if not IN_MEM: #if we think that it is going to be too big for memory, we use the fsl command-line tool vol_list=str(vol_list).strip('[]').replace(" ","") #strip the []s and remove spaces to format as expected by fslselectcols cmd_input=['fslselectvols','-i',data_fname,'-o',out_fname,'--vols='+vol_list] print(cmd_input) if not(os.path.isfile(out_fname)) or CLOBBER: np.savetxt(bvals_fname,bvals[bvals<bval_max_cutoff]) np.savetxt(bvecs_fname,bvecs[:,bvals<bval_max_cutoff]) subprocess.call(cmd_input) else: print("File exists, not overwriting.") else: if not(os.path.isfile(out_fname)) or CLOBBER: data,aff=imgLoad(data_fname) niiSave(out_fname,data[...,vol_list],aff,CLOBBER=CLOBBER) np.savetxt(bvals_fname,bvals[bvals<bval_max_cutoff]) np.savetxt(bvecs_fname,bvecs[:,bvals<bval_max_cutoff]) else: print("File exists, not overwriting.") return out_fname, bvals[bvals<bval_max_cutoff], bvecs[:,bvals<bval_max_cutoff]
def run_amico_noddi_dipy_v2(subject_root_dir, dwi_fnames, brain_mask_fnames, bvals_fnames, bvecs_fnames, out_root_dir, subject_dirs=None, b0_thr=0, bStep=[0, 1000, 2000, 3000], nthreads=8, mem=2.5, CLOBBER=False, SUBMIT=False): """ Updated version to take in params individually so that you can store the files however you want to. :param subject_root_dir: :param dwi_fnames: :param brain_mask_fnames: :param bvals_fnames: :param bvecs_fnames: :param out_root_dir: :param subject_dirs: :param b0_thr: :param bStep: :param nthreads: :param mem: :param CLOBBER: :param SUBMIT: :return: """ #No... requires closer to 36GB for the HCP data #when requesting cores, select 24 and take the whole memory (time it...) #currently requires the compiled version of spams that I have installed locally import os import sys from TractREC import create_dir spams_path = '/home/cic/stechr/Documents/code/spams-python' #import spams #this is added here so that the requirements.txt is updated import amico #this is added here so that the requirements.txt is updated #working_amico_path='/home/cic/stechr/Documents/code/amico_cjs/AMICO/python/amico' caller_path = os.path.dirname( os.path.abspath(__file__) ) #path to this script, so we can add it to a sys.addpath statement #caller_path="caller_path_test" #spams required by amico, along with specific numpy version (1.10 has a fortran issue that crops up here) sys.path.append(spams_path) #EW! TODO: make me permanent at some point... # if subject_dirs is None: # #subject_dirs=['100307'] #TODO, create a list of subjects from the subject_root_dir # subject_dirs=os.listdir(subject_root_dir) # if "kernels" in subject_dirs: subject_dirs.remove("kernels") #don't try to do this for the kernels directory, which AMICO hard-codes here if isinstance(dwi_fnames, basestring): dwi_fnames = [dwi_fnames] if isinstance(bvals_fnames, basestring): bvals_fnames = [ bvals_fnames ] #just to keep the same calling as the other lists, convert to list of one single_bvals = True if isinstance(bvecs_fnames, basestring): bvecs_fnames = [bvecs_fnames] single_bvecs = True #amico.core.setup() for idx, dwi_fname in enumerate( dwi_fnames ): #ID is the subdirectory off of subject_root_dir that contains each subject """ ae=amico.Evaluation(subject_root_dir,ID) amico.util.fsl2scheme(bvals_fname, bvecs_fname,scheme_fname,bStep) #ae.load_data(dwi_filename=dwi_fname,scheme_filename=scheme_fname,mask_filename=mask_fname,b0_thr=bo_thresh) #loading takes tabout 4-5 mins (HCP), and about 8gb ae.load_data(dwi_filename='data.nii.gz',scheme_filename='sanitised.scheme',mask_filename='nodif_brain_mask.nii.gz',b0_thr=0) ae.set_model("NODDI") ae.generate_kernels() #single core only, 2.5mins (HCP), apparently only needs to be done once if all data was acquired the same way (loading fits to the bvecs) - not impractical to do it every time, since I don't see a way to save it ae.load_kernels() #resamples the LUT for this subject - looks to use a lot of mem...and will use available cores - 2.5mins on 8 ae.fit() #numpy version 1.10.0 will not work, uses all cores and about 8GB for HCP data ae.save_results() #move to proper output directory so that we keep with our processing stream """ #dwi_fname=os.path.join(subject_root_dir,ID,"data.nii.gz") #bvals_fname=os.path.join(subject_root_dir,ID,"bvals") #bvecs_fname=os.path.join(subject_root_dir,ID,"bvecs") if single_bvals: bvals_fname = bvals_fnames[0] else: bvals_fname = bvals_fnames[idx] if single_bvecs: bvecs_fname = bvecs_fnames[0] else: bvecs_fname = bvecs_fnames[idx] ID = os.path.basename(dwi_fname).split(".")[0] scheme_fname = os.path.join(os.path.dirname(bvals_fname), "bvals_bvecs_sanitised.scheme") mask_fname = brain_mask_fnames[idx] out_dir = os.path.join(out_root_dir, ID) create_dir(out_dir) #bStep=[0,1000,2000,3000] #b0_thr=0 model = "NODDI" code = [ "#!/usr/bin/python", "", "import sys", "sys.path.append('{0}')".format(caller_path), "sys.path.append('{0}')".format(spams_path), "import spams", "import amico" ] code.append("import spams") code.append("") code.append("print(amico.__file__)") code.append("amico.core.setup()") code.append( "ae=amico.Evaluation('{subject_root_dir}','{ID}',output_path='{output_dir}')" .format(subject_root_dir=subject_root_dir, ID=ID, output_dir=out_dir)) code.append( "amico.util.fsl2scheme('{bvals_fname}', '{bvecs_fname}','{scheme_fname}',{bStep})" .format(bvals_fname=bvals_fname, bvecs_fname=bvecs_fname, scheme_fname=scheme_fname, bStep=bStep)) code.append( "ae.load_data(dwi_filename='{dwi_fname}',scheme_filename='{scheme_fname}',mask_filename='{mask_fname}',b0_thr={b0_thr})" .format(dwi_fname=dwi_fname, scheme_fname=scheme_fname, mask_fname=mask_fname, b0_thr=b0_thr)) code.append("ae.set_model('{model}')".format(model=model)) code.append("ae.set_config('OUTPUT_path','{OUTPUT_path}')".format( OUTPUT_path=out_dir)) code.append( "ae.generate_kernels()" ) #single core only, 2.5mins (HCP), apparently only needs to be done once if all data was acquired the same way (loading fits to the bvecs) - not impractical to do it every time, since I don't see a way to save it code.append( "ae.load_kernels()" ) #resamples the LUT for this subject - looks to use a lot of mem...and will use available cores - 2.5mins on 8 code.append("ae.fit()") #fit the model code.append("ae.save_results()") #return code py_sub_full_fname = create_python_exec( out_dir=out_root_dir, code=code, name='NOD_' + ID ) #set output_dir outside of the dir where NODDI will write, because it clears the dir! if CLOBBER: print( "Creating submission files and following your instructions for submission to que. (CLOBBER=True)" ), print(" (SUBMIT=" + str(SUBMIT) + ")") submit_via_qsub(template_text=None, code="python " + py_sub_full_fname, name='NOD_' + ID, nthreads=nthreads, mem=mem, outdir=out_dir, description="NODDI estimation with AMICO", SUBMIT=SUBMIT) else: print( "Creating submission files and following your instructions for submission to que. (CLOBBER=False)" ) print(" (SUBMIT=" + str(SUBMIT) + ")") submit_via_qsub(template_text=None, code="python " + py_sub_full_fname, name='NOD_' + ID, nthreads=nthreads, mem=mem, outdir=out_dir, description="NODDI estimation with AMICO", SUBMIT=SUBMIT) print(py_sub_full_fname)
def run_diffusion_kurtosis_estimator_dipy(data_fnames, bvals_fnames, bvecs_fnames, out_root_dir, IDs, bval_max_cutoff=3200, slices='all', nthreads=4, mem=3.75, SMTH_DEN=None, IN_MEM=True, SUBMIT=False, CLOBBER=False): """ Creates .py and .sub submission files for submission of DKE to SGE, submits if SUBMIT=True Pass matched lists of data filenames, bval filenames, and bvec filenames, along with a root directory for the output NOTE: CLOBBER and SUBMIT interact, CLOBBER and SUBMIT both need to be true to submit to SGE when the .py file already exists in the output dir INPUT: - data_fnames list of diffusion data files - bvals_fnames list of bvals files - bvecs_fnames list of bvecs files - out_root_dir root directory of output (see below for subdir name) - IDs list of IDs that are used to create subdir names and check that the correct files are being used IDs are used as the lookup to confirm full filenames of data, bvals, bvecs - so they must be unique (order does not matter, full search) only IDs included in this list will be processed (even if they are only a subset of the data_fnames etc) - TractREC_path path to TractREC code where the preprocessing scripts are held, needed for qsub submission - bval_max_cutoff bval cutoff value for selection of vols to be included in DKE - filenames are derived from this - slices list of slice indices to process, or 'all' XXX THIS ONLY WORKS FOR ALL NOW - SMTH_DEN list to select smooth, denoise, or not ['smth','nlmeans',''], may run out of memory with large datasets (i.e., HCP) - IN_MEM perform diffusion volume selection (based on bvals that were selected by bval_max_cutoff) in mem or with fslselectcols via command line - SUBMIT submit to SGE (False=just create the .py and .sub submission files) - CLOBBER force overwrite of output files (.py and .sub files are always overwritten regardless) RETURNS: - nothing, but dumps all DKE calcs (MK, RK, AK) in out_dir/ID """ import os caller_path = os.path.dirname( os.path.abspath(__file__) ) #path to this script, so we can add it to a sys.addpath statement print("Running the dipy-based diffusion kurtosis estimator.") for idx, ID in enumerate(IDs): fname = [ s for s in data_fnames if ID in s ] #we use the IDs as our master to lookup files in the provided lists, the full filename should have the ID SOMEWHERE! bvals = [s for s in bvals_fnames if ID in s] bvecs = [s for s in bvecs_fnames if ID in s] out_dir = os.path.join(out_root_dir, ID) create_dir(out_dir) #check that we are pulling the correct files if len(fname) > 1 or len(bvals) > 1 or len(bvecs) > 1: print("OH SHIT, too many possible files. This should not happen!") DATA_EXISTS = False elif len(fname) < 1 or len(bvals) < 1 or len(bvecs) < 1: print("OH SHIT, no matching file for this ID: " + ID) DATA_EXISTS = False else: fname = fname[0] #break it out of the list of one bvals = bvals[0] bvecs = bvecs[0] DATA_EXISTS = True print(ID) print(" input: " + (fname)) print(" input: " + (bvals)) print(" input: " + (bvecs)) print(" output: " + (out_dir)) if DATA_EXISTS: code = [ "#!/usr/bin/python", "", "import sys", "sys.path.append('{0}')".format(caller_path), "import preprocessing as pr" ] code.append("pr.DKE('{data_fname}','{bvals_fname}','{bvecs_fname}',bval_max_cutoff={bval_max_cutoff},out_dir='{out_dir}',slices='{slices}',SMTH_DEN={SMTH_DEN},IN_MEM={IN_MEM})""".format(data_fname=fname,bvals_fname=bvals,bvecs_fname=bvecs,\ bval_max_cutoff=bval_max_cutoff,out_dir=out_dir,slices=slices,SMTH_DEN=SMTH_DEN,IN_MEM=IN_MEM)) py_sub_full_fname = create_python_exec(out_dir=out_dir, code=code, name='DKE_' + ID) #XXX this check condition currently does nothing, because the .py file is always created. Dumb. if CLOBBER: print( "Creating submission files and following your instructions for submission to que. (CLOBBER=True)" ), print(" (SUBMIT=" + str(SUBMIT) + ")") submit_via_qsub(template_text=None,code="python " + py_sub_full_fname,name='DKE_'+ID,nthreads=nthreads,mem=mem,outdir=out_dir,\ description="Diffusion kurtosis estimation with dipy",SUBMIT=SUBMIT) else: print( "Creating submission files and following your instructions for submission to que. (CLOBBER=False)" ) print(" (SUBMIT=" + str(SUBMIT) + ")") submit_via_qsub(template_text=None,code="python " + py_sub_full_fname,name='DKE_'+ID,nthreads=nthreads,mem=mem,outdir=out_dir,\ description="Diffusion kurtosis estimation with dipy",SUBMIT=SUBMIT) print("")
def DKE(data_fname, bvals_fname, bvecs_fname, bval_max_cutoff=3200, out_dir=None, slices='all', SMTH_DEN=None, IN_MEM=False): """ DKE with dipy (dipy.__version__>=0.10.0), outputs MK, AK, and RK without and (potentially) with denoising SMTH_DEN can take multiple arguments in list format ['smth','nlmeans'] - currently always does DKE with native data as well (XXX could add this as 'natv') """ from dipy.core.gradients import gradient_table from dipy.segment.mask import median_otsu from dipy.denoise.noise_estimate import estimate_sigma from dipy.denoise.nlmeans import nlmeans import os GAUSS_SMTH_MULTIPLIER = 1.25 #taken from the DKI papers if out_dir is None: out_dir = os.path.dirname(data_fname) create_dir(out_dir) out_fname_base = os.path.join(out_dir, "DKE_") print("Selecting appropriate data and writing to disk") selected_data_fname, bvals, bvecs = select_and_write_data_bvals_bvecs( data_fname, bvals_fname, bvecs_fname, out_dir=out_dir, bval_max_cutoff=bval_max_cutoff, IN_MEM=IN_MEM) data, aff = imgLoad(selected_data_fname) bvals = sanitize_bvals(bvals) gtab = gradient_table(bvals, bvecs) #XXX vol_idx could be set according to b0 if you like, but this seems to work for now print("Creating brain mask") maskdata, mask = median_otsu(data, 4, 2, False, vol_idx=[0, 1], dilate=1) #denoising could be necessary because DKE is sensitive to outliers, look to be able to skip this for HCP data, aslo runs out of memory with this data... if 'nlmeans' in SMTH_DEN: sigma = estimate_sigma(data, N=4) den = nlmeans(data, sigma=sigma, mask=mask.astype('bool')) if 'smth' in SMTH_DEN: import nibabel as nb vox_dims = nb.load(selected_data_fname).get_header()['pixdim'][1:4] smth = smooth_data_array(data, aff, fwhm=vox_dims * GAUSS_SMTH_MULTIPLIER, ensure_finite=True, copy=True) smth[mask == 0] = 0 del data #initiate and run the DKE model print("Running model on raw data") print("=========================") DK_stats = DKE_by_slice(maskdata, gtab, slices=slices) del maskdata #clear this from mem, just in case it is huuuuge! out_fname = out_fname_base + "MK.nii.gz" niiSave(out_fname, DK_stats[..., 0], aff) out_fname = out_fname_base + "AK.nii.gz" niiSave(out_fname, DK_stats[..., 1], aff) out_fname = out_fname_base + "RK.nii.gz" niiSave(out_fname, DK_stats[..., 2], aff) del DK_stats #remove from mem if 'nlmeans' in SMTH_DEN: print("") print("Running the model on denoised data") print("==========================================") DK_stats_den = DKE_by_slice(den, gtab, slices=slices) out_fname = out_fname_base + "MK_den.nii.gz" niiSave(out_fname, DK_stats_den[..., 0], aff) out_fname = out_fname_base + "AK_den.nii.gz" niiSave(out_fname, DK_stats_den[..., 1], aff) out_fname = out_fname_base + "RK_den.nii.gz" niiSave(out_fname, DK_stats_den[..., 2], aff) del DK_stats_den if 'smth' in SMTH_DEN: print("") print("Running the model on smoothed data " + "(vox_dim*" + str(GAUSS_SMTH_MULTIPLIER) + ")") print("=========================================================") DK_stats_smth = DKE_by_slice(smth, gtab, slices=slices) out_fname = out_fname_base + "MK_smth.nii.gz" niiSave(out_fname, DK_stats_smth[..., 0], aff) out_fname = out_fname_base + "AK_smth.nii.gz" niiSave(out_fname, DK_stats_smth[..., 1], aff) out_fname = out_fname_base + "RK_smth.nii.gz" niiSave(out_fname, DK_stats_smth[..., 2], aff)