def crawl_bucket(bucket, path, group=False): """ Gets subject list for a given S3 bucket and path """ if group: cmd = 'aws s3 ls s3://{}/{}/graphs/'.format(bucket, path) out, err = mgu().execute_cmd(cmd) atlases = re.findall('PRE (.+)/', out) print("Atlas IDs: " + ", ".join(atlases)) return atlases else: cmd = 'aws s3 ls s3://{}/{}/'.format(bucket, path) out, err = mgu().execute_cmd(cmd) subjs = re.findall('PRE sub-(.+)/', out) cmd = 'aws s3 ls s3://{}/{}/sub-{}/' seshs = OrderedDict() for subj in subjs: out, err = mgu().execute_cmd(cmd.format(bucket, path, subj)) sesh = re.findall('ses-(.+)/', out) seshs[subj] = sesh if sesh != [] else [None] print("Session IDs: " + ", ".join([ subj + '-' + sesh if sesh is not None else subj for subj in subjs for sesh in seshs[subj] ])) return seshs
def group_level(inDir, outDir, dataset=None, atlas=None, minimal=False, log=False, hemispheres=False): """ Crawls the output directory from ndmg and computes qc metrics on the derivatives produced """ # Make output dir outDir += "/qa/graphs/" mgu().execute_cmd("mkdir -p " + outDir) inDir += '/graphs/' # Get list of graphs labels = next(os.walk(inDir))[1] # Run for each if atlas is not None: labels = [atlas] for label in labels: print("Parcellation: " + label) tmp_in = op.join(inDir, label) fs = [op.join(tmp_in, fl) for root, dirs, files in os.walk(tmp_in) for fl in files if fl.endswith(".graphml") or fl.endswith(".gpickle")] tmp_out = op.join(outDir, label) mgu().execute_cmd("mkdir -p " + tmp_out) compute_metrics(fs, tmp_out, label) outf = op.join(tmp_out, 'plot') make_panel_plot(tmp_out, outf, dataset=dataset, atlas=label, minimal=minimal, log=log, hemispheres=hemispheres)
def kill_jobs(jobdir, reason='"Killing job"'): """ Given a list of jobs, kills them all. """ cmd_template1 = 'aws batch cancel-job --job-id {} --reason {}' cmd_template2 = 'aws batch terminate-job --job-id {} --reason {}' print("Canelling/Terminating jobs in {}/ids/...".format(jobdir)) jobs = os.listdir(jobdir + '/ids/') for job in jobs: with open('{}/ids/{}'.format(jobdir, job), 'r') as inf: submission = json.load(inf) jid = submission['jobId'] name = submission['jobName'] status = get_status(jobdir, jid) if status in ['SUCCEEDED', 'FAILED']: print("... No action needed for {}...".format(name)) elif status in ['SUBMITTED', 'PENDING', 'RUNNABLE']: cmd = cmd_template1.format(jid, reason) print("... Cancelling job {}...".format(name)) out, err = mgu().execute_cmd(cmd) elif status in ['STARTING', 'RUNNING']: cmd = cmd_template2.format(jid, reason) print("... Terminating job {}...".format(name)) out, err = mgu().execute_cmd(cmd) else: print("... Unknown status??")
def get_status(jobdir, jobid=None): """ Given list of jobs, returns status of each. """ cmd_template = 'aws batch describe-jobs --jobs {}' if jobid is None: print("Describing jobs in {}/ids/...".format(jobdir)) jobs = os.listdir(jobdir + '/ids/') for job in jobs: with open('{}/ids/{}'.format(jobdir, job), 'r') as inf: submission = json.load(inf) cmd = cmd_template.format(submission['jobId']) print("... Checking job {}...".format(submission['jobName'])) out, err = mgu().execute_cmd(cmd) status = re.findall('"status": "([A-Za-z]+)",', out)[0] print("... ... Status: {}".format(status)) return 0 else: print("Describing job id {}...".format(jobid)) cmd = cmd_template.format(jobid) out, err = mgu().execute_cmd(cmd) status = re.findall('"status": "([A-Za-z]+)",', out)[0] print("... Status: {}".format(status)) return status
def multigraphs(fibers, labels, outdir): """ Creates a brain graph from fiber streamlines """ startTime = datetime.now() fiber_name = mgu().get_filename(fibers) base = fiber_name.split('_fibers', 1)[0] # Create output directories for graphs label_name = [mgu().get_filename(x) for x in labels] for label in label_name: p = Popen("mkdir -p " + outdir + "/graphs/" + label, stdout=PIPE, stderr=PIPE, shell=True) # Create names of files to be produced graphs = [ outdir + "/graphs/" + x + '/' + base + "_" + x + ".graphml" for x in label_name ] print "Graphs of streamlines downsampled to given labels: " +\ (", ".join([x for x in graphs])) # Load fibers print "Loading fibers..." fiber_npz = np.load(fibers) tracks = fiber_npz[fiber_npz.keys()[0]] # Generate graphs from streamlines for each parcellation for idx, label in enumerate(label_name): print "Generating graph for " + label + " parcellation..." labels_im = nb.load(labels[idx]) g1 = mgg(len(np.unique(labels_im.get_data())) - 1, labels[idx]) g1.make_graph(tracks) g1.summary() g1.save_graph(graphs[idx]) print "Execution took: " + str(datetime.now() - startTime) print "Complete!" pass
def get_data(bucket, remote_path, local, subj=None, public=True): """ Given an s3 bucket, data location on the bucket, and a download location, crawls the bucket and recursively pulls all data. """ client = boto3.client('s3') if not public: bkts = [bk['Name'] for bk in client.list_buckets()['Buckets']] if bucket not in bkts: sys.exit("Error: could not locate bucket. Available buckets: " + ", ".join(bkts)) cmd = 'aws' if public: cmd += ' --no-sign-request --region=us-east-1' cmd = "".join([cmd, ' s3 cp --recursive s3://', bucket, '/', remote_path]) if subj is not None: cmd = "".join([cmd, '/sub-', subj]) std, err = mgu().execute_cmd('mkdir -p ' + local + '/sub-' + subj) local += '/sub-' + subj cmd = "".join([cmd, ' ', local]) std, err = mgu().execute_cmd(cmd) return
def multigraphs(fibers, labels, outdir): """ Creates a brain graph from fiber streamlines """ startTime = datetime.now() fiber_name = mgu().get_filename(fibers) base = fiber_name.split('_fibers', 1)[0] # Create output directories for graphs label_name = [mgu().get_filename(x) for x in labels] for label in label_name: p = Popen("mkdir -p " + outdir + "/graphs/" + label, stdout=PIPE, stderr=PIPE, shell=True) # Create names of files to be produced graphs = [outdir + "/graphs/" + x + '/' + base + "_" + x + ".graphml" for x in label_name] print "Graphs of streamlines downsampled to given labels: " +\ (", ".join([x for x in graphs])) # Load fibers print "Loading fibers..." fiber_npz = np.load(fibers) tracks = fiber_npz[fiber_npz.keys()[0]] # Generate graphs from streamlines for each parcellation for idx, label in enumerate(label_name): print "Generating graph for " + label + " parcellation..." labels_im = nb.load(labels[idx]) g1 = mgg(len(np.unique(labels_im.get_data()))-1, labels[idx]) g1.make_graph(tracks) g1.summary() g1.save_graph(graphs[idx]) print "Execution took: " + str(datetime.now() - startTime) print "Complete!" pass
def submit_jobs(jobs, jobdir): """ Give list of jobs to submit, submits them to AWS Batch """ cmd_template = 'aws batch submit-job --cli-input-json file://{}' for job in jobs: cmd = cmd_template.format(job) print("... Submitting job {}...".format(job)) out, err = mgu().execute_cmd(cmd) submission = ast.literal_eval(out) print("Job Name: {}, Job ID: {}".format(submission['jobName'], submission['jobId'])) sub_file = os.path.join(jobdir, 'ids', submission['jobName'] + '.json') with open(sub_file, 'w') as outfile: json.dump(submission, outfile) return 0
def dti2atlas(self, dti, gtab, mprage, atlas, aligned_dti, outdir): """ Aligns two images and stores the transform between them **Positional Arguments:** dti: - Input impage to be aligned as a nifti image file bvals: - File containing list of bvalues for each scan bvecs: - File containing gradient directions for each scan mprage: - Intermediate image being aligned to as a nifti image file atlas: - Terminal image being aligned to as a nifti image file aligned_dti: - Aligned output dti image as a nifti image file """ # Creates names for all intermediate files used # GK TODO: come up with smarter way to create these temp file names dti_name = op.splitext(op.splitext(op.basename(dti))[0])[0] mprage_name = op.splitext(op.splitext(op.basename(mprage))[0])[0] atlas_name = op.splitext(op.splitext(op.basename(atlas))[0])[0] dti2 = outdir + "/tmp/" + dti_name + "_t2.nii.gz" temp_aligned = outdir + "/tmp/" + dti_name + "_ta.nii.gz" b0 = outdir + "/tmp/" + dti_name + "_b0.nii.gz" xfm1 = outdir + "/tmp/" + dti_name + "_" + mprage_name + "_xfm.mat" xfm2 = outdir + "/tmp/" + mprage_name + "_" + atlas_name + "_xfm.mat" xfm3 = outdir + "/tmp/" + dti_name + "_" + atlas_name + "_xfm.mat" # Align DTI volumes to each other self.align_slices(dti, dti2, np.where(gtab.b0s_mask)[0]) # Loads DTI image in as data and extracts B0 volume import ndmg.utils as mgu dti_im = nb.load(dti2) b0_im = mgu().get_b0(gtab, dti_im.get_data()) # GK TODO: why doesn't top import work? # Wraps B0 volume in new nifti image b0_head = dti_im.get_header() b0_head.set_data_shape(b0_head.get_data_shape()[0:3]) b0_out = nb.Nifti1Image(b0_im, affine=dti_im.get_affine(), header=b0_head) b0_out.update_header() nb.save(b0_out, b0) # Algins B0 volume to MPRAGE, and MPRAGE to Atlas self.align(b0, mprage, xfm1) self.align(mprage, atlas, xfm2) # Combines transforms from previous registrations in proper order cmd = "convert_xfm -omat " + xfm3 + " -concat " + xfm2 + " " + xfm1 p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) p.communicate() # Applies combined transform to dti image volume self.applyxfm(dti2, atlas, xfm3, temp_aligned) self.resample(temp_aligned, aligned_dti, atlas) # Clean temp files cmd = "rm -f " + dti2 + " " + temp_aligned + " " + b0 + " " +\ xfm1 + " " + xfm2 + " " + xfm3 print "Cleaning temporary registration files..." p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) p.communicate()
def participant_level(inDir, outDir, subjs, sesh=None, debug=False): """ Crawls the given BIDS organized directory for data pertaining to the given subject and session, and passes necessary files to ndmg_pipeline for processing. """ # Get atlases ope = op.exists if any(not ope(l) for l in labels) or not (ope(atlas) and ope(atlas_mask)): print("Cannot find atlas information; downloading...") mgu().execute_cmd('mkdir -p ' + atlas_dir) cmd = " ".join(['wget -rnH --cut-dirs=3 --no-parent -P ' + atlas_dir, 'http://openconnecto.me/mrdata/share/atlases/']) mgu().execute_cmd(cmd) # Make output dir mgu().execute_cmd("mkdir -p " + outDir + " " + outDir + "/tmp") # Get subjects if subjs is None: subj_dirs = glob(op.join(inDir, "sub-*")) subjs = [subj_dir.split("-")[-1] for subj_dir in subj_dirs] bvec = [] bval = [] anat = [] dwi = [] # Get all image data for each subject for subj in subjs: if sesh is not None: anat_t = glob(op.join(inDir, "sub-{}/ses-{}".format(subj, sesh), "anat", "*_T1w.nii*")) dwi_t = glob(op.join(inDir, "sub-{}/ses-{}".format(subj, sesh), "dwi", "*_dwi.nii*")) else: anat_t = glob(op.join(inDir, "sub-%s" % subj, "anat", "*_T1w.nii*")) +\ glob(op.join(inDir, "sub-%s" % subj, "ses-*", "anat", "*_T1w.nii*")) dwi_t = glob(op.join(inDir, "sub-%s" % subj, "dwi", "*_dwi.nii*")) +\ glob(op.join(inDir, "sub-%s" % subj, "ses-*", "dwi", "*_dwi.nii*")) anat = anat + anat_t dwi = dwi + dwi_t bvec_t = [] bval_t = [] # Look for bval, bvec files for each DWI file for scan in dwi: step = op.dirname(scan) while not bval_t or not bvec_t: bval_t = glob(op.join(step, "*dwi.bval")) bvec_t = glob(op.join(step, "*dwi.bvec")) if step is op.abspath(op.join(inDir, os.pardir)): sys.exit("Error: No b-values or b-vectors found..\ \nPlease review BIDS spec (bids.neuroimaging.io).") step = op.abspath(op.join(step, os.pardir)) bvec.append(bvec_t[0]) bval.append(bval_t[0]) bvec_t = [] bval_t = [] assert(len(anat) == len(dwi)) assert(len(bvec) == len(dwi)) assert(len(bval) == len(dwi)) print(dwi) # Run for each for i, scans in enumerate(anat): print("T1 file: " + anat[i]) print("DWI file: " + dwi[i]) print("Bval file: " + bval[i]) print("Bvec file: " + bvec[i]) ndmg_pipeline(dwi[i], bval[i], bvec[i], anat[i], atlas, atlas_mask, labels, outDir, clean=(not debug))
def main(): parser = ArgumentParser(description="This is an end-to-end connectome \ estimation pipeline from sMRI and DTI images") parser.add_argument('bids_dir', help='The directory with the input dataset' ' formatted according to the BIDS standard.') parser.add_argument('output_dir', help='The directory where the output ' 'files should be stored. If you are running group ' 'level analysis this folder should be prepopulated ' 'with the results of the participant level analysis.') parser.add_argument('analysis_level', help='Level of the analysis that ' 'will be performed. Multiple participant level ' 'analyses can be run independently (in parallel) ' 'using the same output_dir.', choices=['participant', 'group']) parser.add_argument('--participant_label', help='The label(s) of the ' 'participant(s) that should be analyzed. The label ' 'corresponds to sub-<participant_label> from the BIDS ' 'spec (so it does not include "sub-"). If this ' 'parameter is not provided all subjects should be ' 'analyzed. Multiple participants can be specified ' 'with a space separated list.', nargs="+") parser.add_argument('--session_label', help='The label(s) of the ' 'session that should be analyzed. The label ' 'corresponds to ses-<participant_label> from the BIDS ' 'spec (so it does not include "ses-"). If this ' 'parameter is not provided all sessions should be ' 'analyzed. Multiple sessions can be specified ' 'with a space separated list.') parser.add_argument('--bucket', action='store', help='The name of ' 'an S3 bucket which holds BIDS organized data. You ' 'must have built your bucket with credentials to the ' 'S3 bucket you wish to access.') parser.add_argument('--remote_path', action='store', help='The path to ' 'the data on your S3 bucket. The data will be ' 'downloaded to the provided bids_dir on your machine.') parser.add_argument('--push_data', action='store_true', help='flag to ' 'push derivatives back up to S3.', default=False) parser.add_argument('--dataset', action='store', help='The name of ' 'the dataset you are perfoming QC on.') parser.add_argument('--atlas', action='store', help='The atlas ' 'being analyzed in QC (if you only want one).') parser.add_argument('--minimal', action='store_true', help='Determines ' 'whether to show a minimal or full set of plots.', default=False) parser.add_argument('--hemispheres', action='store_true', help='Whether ' 'or not to break degrees into hemispheres or not', default=False) parser.add_argument('--log', action='store_true', help='Determines ' 'axis scale for plotting.', default=False) parser.add_argument('--debug', action='store_true', help='flag to store ' 'temp files along the path of processing.', default=False) result = parser.parse_args() inDir = result.bids_dir outDir = result.output_dir subj = result.participant_label sesh = result.session_label buck = result.bucket remo = result.remote_path push = result.push_data level = result.analysis_level minimal = result.minimal log = result.log atlas = result.atlas hemi = result.hemispheres creds = bool(os.getenv("AWS_ACCESS_KEY_ID", 0) and os.getenv("AWS_SECRET_ACCESS_KEY", 0)) if level == 'participant': if buck is not None and remo is not None: print("Retrieving data from S3...") if subj is not None: [bids_s3.get_data(buck, remo, inDir, s, True) for s in subj] else: bids_s3.get_data(buck, remo, inDir, public=creds) modif = 'ndmg_{}'.format(ndmg.version.replace('.', '-')) participant_level(inDir, outDir, subj, sesh, result.debug) elif level == 'group': if buck is not None and remo is not None: print("Retrieving data from S3...") if atlas is not None: bids_s3.get_data(buck, remo+'/graphs/'+atlas, outDir+'/graphs/'+atlas, public=creds) else: bids_s3.get_data(buck, remo+'/graphs', outDir+'/graphs', public=creds) modif = 'qa' group_level(outDir, outDir, result.dataset, result.atlas, minimal, log, hemi) if push and buck is not None and remo is not None: print("Pushing results to S3...") cmd = "".join(['aws s3 cp --exclude "tmp/*" ', outDir, ' s3://', buck, '/', remo, '/', modif, '/ --recursive --acl public-read-write']) if not creds: print("Note: no credentials provided, may fail to push big files") cmd += ' --no-sign-request' print(cmd) mgu().execute_cmd(cmd) sys.exit(0)
def ndmg_pipeline(dti, bvals, bvecs, mprage, atlas, mask, labels, outdir, clean=False, fmt='gpickle'): """ Creates a brain graph from MRI data """ startTime = datetime.now() # Create derivative output directories dti_name = mgu().get_filename(dti) cmd = "".join([ "mkdir -p ", outdir, "/reg_dti ", outdir, "/tensors ", outdir, "/fibers ", outdir, "/graphs" ]) mgu().execute_cmd(cmd) # Graphs are different because of multiple atlases if isinstance(labels, list): label_name = [mgu().get_filename(x) for x in labels] for label in label_name: p = Popen("mkdir -p " + outdir + "/graphs/" + label, stdout=PIPE, stderr=PIPE, shell=True) else: label_name = mgu().get_filename(labels) p = Popen("mkdir -p " + outdir + "/graphs/" + label_name, stdout=PIPE, stderr=PIPE, shell=True) # Create derivative output file names aligned_dti = "".join([outdir, "/reg_dti/", dti_name, "_aligned.nii.gz"]) tensors = "".join([outdir, "/tensors/", dti_name, "_tensors.npz"]) fibers = "".join([outdir, "/fibers/", dti_name, "_fibers.npz"]) print("This pipeline will produce the following derivatives...") print("DTI volume registered to atlas: " + aligned_dti) print("Diffusion tensors in atlas space: " + tensors) print("Fiber streamlines in atlas space: " + fibers) # Again, graphs are different graphs = [ "".join([outdir, "/graphs/", x, '/', dti_name, "_", x, '.', fmt]) for x in label_name ] print("Graphs of streamlines downsampled to given labels: " + (", ".join([x for x in graphs]))) # Creates gradient table from bvalues and bvectors print "Generating gradient table..." dti1 = "".join([outdir, "/tmp/", dti_name, "_t1.nii.gz"]) bvecs1 = "".join([outdir, "/tmp/", dti_name, "_1.bvec"]) mgp.rescale_bvec(bvecs, bvecs1) gtab = mgu().load_bval_bvec_dti(bvals, bvecs1, dti, dti1) # Align DTI volumes to Atlas print("Aligning volumes...") mgr().dti2atlas(dti1, gtab, mprage, atlas, aligned_dti, outdir, clean) print("Beginning tractography...") # Compute tensors and track fiber streamlines tens, tracks = mgt().eudx_basic(aligned_dti, mask, gtab, stop_val=0.2) # And save them to disk np.savez(tensors, tens) np.savez(fibers, tracks) # Generate graphs from streamlines for each parcellation for idx, label in enumerate(label_name): print("Generating graph for " + label + "parcellation...") labels_im = nb.load(labels[idx]) g1 = mgg(len(np.unique(labels_im.get_data())) - 1, labels[idx]) g1.make_graph(tracks) g1.summary() g1.save_graph(graphs[idx], fmt=fmt) print("Execution took: " + str(datetime.now() - startTime)) # Clean temp files if clean: print("Cleaning up intermediate files... ") cmd = "".join([ 'rm -f ', tensors, ' ', outdir, '/tmp/', dti_name, '*', ' ', aligned_dti, ' ', fibers ]) mgu().execute_cmd(cmd) print("Complete!") pass
def ndmg_pipeline(dti, bvals, bvecs, mprage, atlas, mask, labels, outdir, clean=False, fmt='gpickle'): """ Creates a brain graph from MRI data """ startTime = datetime.now() print fmt # Create derivative output directories dti_name = op.splitext(op.splitext(op.basename(dti))[0])[0] cmd = "mkdir -p " + outdir + "/reg_dti " + outdir + "/tensors " +\ outdir + "/fibers " + outdir + "/graphs" p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) p.communicate() # Graphs are different because of multiple atlases label_name = [op.splitext(op.splitext(op.basename(x))[0])[0] for x in labels] for label in label_name: p = Popen("mkdir -p " + outdir + "/graphs/" + label, stdout=PIPE, stderr=PIPE, shell=True) # Create derivative output file names aligned_dti = outdir + "/reg_dti/" + dti_name + "_aligned.nii.gz" tensors = outdir + "/tensors/" + dti_name + "_tensors.npz" fibers = outdir + "/fibers/" + dti_name + "_fibers.npz" print "This pipeline will produce the following derivatives..." print "DTI volume registered to atlas: " + aligned_dti print "Diffusion tensors in atlas space: " + tensors print "Fiber streamlines in atlas space: " + fibers # Again, graphs are different graphs = [outdir + "/graphs/" + x + '/' + dti_name + "_" + x + '.' + fmt for x in label_name] print "Graphs of streamlines downsampled to given labels: " +\ (", ".join([x for x in graphs])) # Creates gradient table from bvalues and bvectors print "Generating gradient table..." dti1 = outdir + "/tmp/" + dti_name + "_t1.nii.gz" bvecs1 = outdir + "/tmp/" + dti_name + "_1.bvec" mgp.rescale_bvec(bvecs, bvecs1) gtab = mgu().load_bval_bvec_dti(bvals, bvecs1, dti, dti1) # Align DTI volumes to Atlas print "Aligning volumes..." mgr().dti2atlas(dti1, gtab, mprage, atlas, aligned_dti, outdir) print "Beginning tractography..." # Compute tensors and track fiber streamlines tens, tracks = mgt().eudx_basic(aligned_dti, mask, gtab, stop_val=0.2) # And save them to disk np.savez(tensors, tens) np.savez(fibers, tracks) # Generate graphs from streamlines for each parcellation for idx, label in enumerate(label_name): print "Generating graph for " + label + " parcellation..." labels_im = nb.load(labels[idx]) g1 = mgg(len(np.unique(labels_im.get_data()))-1, labels[idx]) g1.make_graph(tracks) g1.summary() g1.save_graph(graphs[idx]) print "Execution took: " + str(datetime.now() - startTime) # Clean temp files if clean: print "Cleaning up intermediate files... " cmd = 'rm -f ' + tensors + ' ' + dti1 + ' ' + aligned_dti + ' ' +\ bvecs1 p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) p.communicate() print "Complete!" pass
def create_json(bucket, path, threads, jobdir, group=False, credentials=None, debug=False, dataset=None, log=False): """ Takes parameters to make jsons """ mgu().execute_cmd("mkdir -p {}".format(jobdir)) mgu().execute_cmd("mkdir -p {}/jobs/".format(jobdir)) mgu().execute_cmd("mkdir -p {}/ids/".format(jobdir)) if group: template = group_templ atlases = threads else: template = participant_templ seshs = threads if not os.path.isfile('{}/{}'.format(jobdir, template.split('/')[-1])): cmd = 'wget --quiet -P {} {}'.format(jobdir, template) mgu().execute_cmd(cmd) with open('{}/{}'.format(jobdir, template.split('/')[-1]), 'r') as inf: template = json.load(inf) cmd = template['containerOverrides']['command'] env = template['containerOverrides']['environment'] if credentials is not None: cred = [line for line in csv.reader(open(credentials))] env[0]['value'] = [ cred[1][idx] for idx, val in enumerate(cred[0]) if "ID" in val ][0] # Adds public key ID to env env[1]['value'] = [ cred[1][idx] for idx, val in enumerate(cred[0]) if "Secret" in val ][0] # Adds secret key to env else: env = [] template['containerOverrides']['environment'] = env jobs = list() cmd[4] = re.sub('(<BUCKET>)', bucket, cmd[4]) cmd[6] = re.sub('(<PATH>)', path, cmd[6]) if group: if dataset is not None: cmd[9] = re.sub('(<DATASET>)', dataset, cmd[9]) else: cmd[9] = re.sub('(<DATASET>)', '', cmd[9]) batlas = ['slab907', 'DS03231', 'DS06481', 'DS16784', 'DS72784'] for atlas in atlases: if atlas in batlas: print("... Skipping {} parcellation".format(atlas)) continue print("... Generating job for {} parcellation".format(atlas)) job_cmd = deepcopy(cmd) job_cmd[11] = re.sub('(<ATLAS>)', atlas, job_cmd[11]) if log: job_cmd += ['--log'] if atlas == 'desikan': job_cmd += ['--hemispheres'] job_json = deepcopy(template) ver = ndmg.version.replace('.', '-') if dataset: name = 'ndmg_{}_{}_{}'.format(ver, dataset, atlas) else: name = 'ndmg_{}_{}'.format(ver, atlas) job_json['jobName'] = name job_json['containerOverrides']['command'] = job_cmd job = os.path.join(jobdir, 'jobs', name + '.json') with open(job, 'w') as outfile: json.dump(job_json, outfile) jobs += [job] else: for subj in seshs.keys(): print("... Generating job for sub-{}".format(subj)) for sesh in seshs[subj]: job_cmd = deepcopy(cmd) job_cmd[8] = re.sub('(<SUBJ>)', subj, job_cmd[8]) if sesh is not None: job_cmd += [u'--session_label'] job_cmd += [u'{}'.format(sesh)] if debug: job_cmd += [u'--debug'] job_json = deepcopy(template) ver = ndmg.version.replace('.', '-') if dataset: name = 'ndmg_{}_{}_sub-{}'.format(ver, dataset, subj) else: name = 'ndmg_{}_sub-{}'.format(ver, subj) if sesh is not None: name = '{}_ses-{}'.format(name, sesh) job_json['jobName'] = name job_json['containerOverrides']['command'] = job_cmd job = os.path.join(jobdir, 'jobs', name + '.json') with open(job, 'w') as outfile: json.dump(job_json, outfile) jobs += [job] return jobs
def ndmg_pipeline(dti, bvals, bvecs, mprage, atlas, mask, labels, outdir, clean=False, fmt='gpickle'): """ Creates a brain graph from MRI data """ startTime = datetime.now() # Create derivative output directories dti_name = mgu().get_filename(dti) cmd = "".join(["mkdir -p ", outdir, "/reg_dti ", outdir, "/tensors ", outdir, "/fibers ", outdir, "/graphs"]) mgu().execute_cmd(cmd) # Graphs are different because of multiple atlases if isinstance(labels, list): label_name = [mgu().get_filename(x) for x in labels] for label in label_name: p = Popen("mkdir -p " + outdir + "/graphs/" + label, stdout=PIPE, stderr=PIPE, shell=True) else: label_name = mgu().get_filename(labels) p = Popen("mkdir -p " + outdir + "/graphs/" + label_name, stdout=PIPE, stderr=PIPE, shell=True) # Create derivative output file names aligned_dti = "".join([outdir, "/reg_dti/", dti_name, "_aligned.nii.gz"]) tensors = "".join([outdir, "/tensors/", dti_name, "_tensors.npz"]) fibers = "".join([outdir, "/fibers/", dti_name, "_fibers.npz"]) print("This pipeline will produce the following derivatives...") print("DTI volume registered to atlas: " + aligned_dti) print("Diffusion tensors in atlas space: " + tensors) print("Fiber streamlines in atlas space: " + fibers) # Again, graphs are different graphs = ["".join([outdir, "/graphs/", x, '/', dti_name, "_", x, '.', fmt]) for x in label_name] print("Graphs of streamlines downsampled to given labels: " + (", ".join([x for x in graphs]))) # Creates gradient table from bvalues and bvectors print "Generating gradient table..." dti1 = "".join([outdir, "/tmp/", dti_name, "_t1.nii.gz"]) bvecs1 = "".join([outdir, "/tmp/", dti_name, "_1.bvec"]) mgp.rescale_bvec(bvecs, bvecs1) gtab = mgu().load_bval_bvec_dti(bvals, bvecs1, dti, dti1) # Align DTI volumes to Atlas print("Aligning volumes...") mgr().dti2atlas(dti1, gtab, mprage, atlas, aligned_dti, outdir, clean) print("Beginning tractography...") # Compute tensors and track fiber streamlines tens, tracks = mgt().eudx_basic(aligned_dti, mask, gtab, stop_val=0.2) # And save them to disk np.savez(tensors, tens) np.savez(fibers, tracks) # Generate graphs from streamlines for each parcellation for idx, label in enumerate(label_name): print("Generating graph for " + label + "parcellation...") labels_im = nb.load(labels[idx]) g1 = mgg(len(np.unique(labels_im.get_data()))-1, labels[idx]) g1.make_graph(tracks) g1.summary() g1.save_graph(graphs[idx], fmt=fmt) print("Execution took: " + str(datetime.now() - startTime)) # Clean temp files if clean: print("Cleaning up intermediate files... ") cmd = "".join(['rm -f ', tensors, ' ', outdir, '/tmp/', dti_name, '*', ' ', aligned_dti, ' ', fibers]) mgu().execute_cmd(cmd) print("Complete!") pass