def fieldmaps(conf, paths): """Prepare the fieldmaps""" logger = logging.getLogger(__name__) logger.info("Running fieldmap preparation...") epi_run = conf.subj.mot_base - 1 # set a corrected EPI to define the space to resample to ref_epi = paths.func.sts[epi_run].full(".nii[0]") # want to calculate a coarse brain mask from the epi mask_path = paths.fmap.mask.full(".nii") epi_path = paths.summ.st.full(".nii[{n:d}]".format(n=epi_run)) mask_cmd = [ "3dAutomask", "-SI", "{n:d}".format(n=conf.subj.mask_SI), "-overwrite", "-prefix", mask_path, epi_path ] runcmd.run_cmd(" ".join(mask_cmd)) fmri_tools.preproc.make_fieldmap(mag_path=paths.fmap.mag.full(), ph_path=paths.fmap.ph.full(), fmap_path=paths.fmap.fmap.full(), delta_te_ms=conf.acq.delta_te_ms, ref_img=ref_epi, recentre_ph="mean", recentre_mask=mask_path, strip_params=["-surface_coil"], strip_mag=False)
def centre_distances(conf, paths): "Write the distance of each node to its patch centre" for patch_id in conf.ana.valid_patch_ids: _patch_cent_dist(conf, paths, patch_id) os.chdir(paths.coh_ana.base.full()) # now to combine vf_lookup = {"lh": "R", "rh": "L"} # combine all into one for hemi in ["lh", "rh"]: comb_cmd = [ "3dMean", "-non_zero", "-sum", "-prefix", paths.coh_ana.patch_dist.full("_" + hemi + "-full.niml.dset"), "-overwrite" ] for patch_id in conf.ana.valid_patch_ids: if conf.stim.patches[patch_id]["vf"] == vf_lookup[hemi]: comb_cmd.append( paths.coh_ana.patch_dist.file("-patch_{n:d}".format( n=patch_id) + "_" + hemi + ".niml.dset")) runcmd.run_cmd(" ".join(comb_cmd))
def coh_glm(conf, paths): "Run the GLM(s) for a given subject" # we only want to run GLMs for the patches with acceptable node counts, so # remove those identified as lacking patch_ids = np.setdiff1d(conf.exp.mod_patches, conf.ana.exclude_patch_ids) # run the GLM for each patch for patch_id in patch_ids: _run_coh_glm(conf, paths, patch_id) vf_lookup = {"lh": "R", "rh": "L"} # combine all into one for hemi in ["lh", "rh"]: comb_cmd = [ "3dMean", "-non_zero", "-sum", "-prefix", paths.coh_ana.comb.full("_" + hemi + "-full.niml.dset"), "-overwrite" ] for patch_id in patch_ids: if conf.stim.patches[patch_id]["vf"] == vf_lookup[hemi]: comb_cmd.append( paths.coh_ana.psc.file("-patch_{n:d}".format(n=patch_id) + "_" + hemi + "-full.niml.dset")) runcmd.run_cmd(" ".join(comb_cmd))
def _loc_to_mask(subj_id, acq_date, conf): subj_dir = os.path.join(conf.ana.base_subj_dir, subj_id) loc_glm_dir = os.path.join(subj_dir, conf.ana.loc_glm_dir) os.chdir(loc_glm_dir) inf_str = subj_id + "_ul_sens_" + acq_date mask_paths = {} # go through combinations of visual field position and hemisphere for (vf, hemi) in itertools.product(("upper", "lower"), ("lh", "rh")): # this is the localiser GLM subbrick with the t-statistic for this # visual field location loc_t_path = "{s:s}-loc_{v:s}-glm-{h:s}_nf.niml.dset".format( s=inf_str, v=vf, h=hemi) + "[" + conf.ana.loc_glm_brick + "]" # check it is correct assert fmri_tools.utils.get_dset_label( loc_t_path)[0] == vf + "#0_Tstat" # subject's ROI definitions for this hemisphere roi_path = os.path.join( conf.ana.roi_dir, subj_id, "rois", "{s:s}_vis_loc_--rois-{h:s}_nf.niml.dset".format(s=subj_id, h=hemi)) # this is the mask file to write mask_path = "{s:s}-loc_{v:s}-mask-{h:s}_nf.niml.dset".format(s=inf_str, v=vf, h=hemi) # we want the roi file to be 'amongst' the identifiers for V1-V3 roi_test = "amongst(a," + ",".join(conf.ana.roi_numbers) + ")" # we also want the t-value to be above a certain threshold loc_test = "step(b-" + conf.ana.loc_glm_thresh + ")" # so it is an 'and' operation, and we want it to be labelled with the # ROI identified value so we multiply it by the outcome expr = "'a*and(" + roi_test + "," + loc_test + ")'" cmd = [ "3dcalc", "-overwrite", "-a", roi_path, "-b", loc_t_path, "-expr", expr, "-prefix", mask_path ] runcmd.run_cmd(" ".join(cmd)) # store the mask path to make it easier to access in the next step mask_paths[(vf, hemi)] = mask_path return mask_paths
def data_dump_depth(conf, paths): "Dump all the relevant data to a single text file" # ingredients: # -node coordinates # -patch IDp # -coherent PSC # -incoherent PSC patch_ids = np.setdiff1d(conf.exp.mod_patches, conf.ana.exclude_patch_ids) vf_lookup = {"lh": "R", "rh": "L"} os.chdir(paths.depth_ana.base.full()) for i_bin in xrange(len(conf.ana.bin_centres)): bin_ext = "_bin_" + str(i_bin) # open it this way so that we can write to it twice and it will append dump_handle = open(paths.depth_ana.comb.full(bin_ext + ".txt"), "w") for hemi in ["lh", "rh"]: hemi_ext = "_" + hemi psc_comb_path = paths.depth_ana.comb.full(bin_ext + hemi_ext + "-full.niml.dset") # now we need to know the patch IDs loc_id = conf.subj.subj_id + "_loc" loc_conf = ns_patches.config.get_conf(loc_id) loc_paths = ns_patches.paths.get_subj_paths(loc_conf) id_path = loc_paths.loc.patch_id_thr.full( "_{h:s}-full_Clustered_e1_a{n:.01f}.niml.dset".format( h=hemi, n=conf.loc.area_thr)) dump_path = paths.depth_ana.comb.full(bin_ext + hemi_ext + ".txt") if os.path.exists(dump_path): os.remove(dump_path) # now we have all our ingredients, we can write out the text file dump_cmd = [ "3dmaskdump", "-o", dump_path, "-mask", psc_comb_path, id_path, psc_comb_path ] runcmd.run_cmd(" ".join(dump_cmd)) # concatenate across hemis np.savetxt(dump_handle, np.loadtxt(dump_path)) dump_handle.close()
def vol_to_surf_depth(conf, paths): """Converts the functional volume-based images to SUMA surfaces.""" logger = logging.getLogger(__name__) logger.info("Running volume to surface projection at different depths...") start_dir = os.getcwd() for (uw_file, surf_file, run_dir) in zip(paths.func.uws, paths.func.surfs, paths.func.runs): os.chdir(run_dir.full()) for hemi in ["lh", "rh"]: spec_file = paths.reg.spec.full("_{hemi:s}.spec".format(hemi=hemi)) # replace the subject ID with what FreeSurfer/SUMA considers the subject # ID to be spec_file = spec_file.replace(conf.subj.subj_id, conf.subj.fs_subj_id) for (i_bin, bin_centre) in enumerate(conf.ana.bin_centres): white_frac = bin_centre - (conf.ana.bin_width / 2.0) grey_frac = bin_centre + (conf.ana.bin_width / 2.0) - 1.0 surf_path = surf_file.full("_bin_{i:d}_{h:s}.niml.dset".format( h=hemi, i=i_bin)) surf_cmd = [ "3dVol2Surf", "-spec", spec_file, "-surf_A", "smoothwm", "-surf_B", "pial", "-map_func", "ave", "-f_steps", "15", "-f_index", "nodes", "-f_p1_fr", str(white_frac), "-f_pn_fr", str(grey_frac), "-sv", paths.reg.anat_reg.full("+orig"), "-grid_parent", uw_file.full(".nii"), "-out_niml", surf_path, "-overwrite" ] runcmd.run_cmd(" ".join(surf_cmd)) # convert to full full_path = surf_file.full( "_bin_{i:d}_{h:s}-full.niml.dset".format(h=hemi, i=i_bin)) node_str = "{n:d}".format(n=conf.subj.node_k[hemi]) fmri_tools.utils.sparse_to_full(in_dset=surf_path, out_dset=full_path, pad_node=node_str) os.chdir(start_dir)
def rsq(subj_id, acq_date, conf): subj_dir = os.path.join(conf.ana.base_subj_dir, subj_id) ana_dir = os.path.join(subj_dir, "analysis") post_dir = os.path.join(subj_dir, conf.ana.post_dir) rsq_dir = os.path.join(post_dir, "rsq") if not os.path.isdir(post_dir): os.mkdir(post_dir) if not os.path.isdir(rsq_dir): os.mkdir(rsq_dir) os.chdir(rsq_dir) inf_str = subj_id + "_ul_sens_" + acq_date for vf in ["upper", "lower"]: # in glm_path = os.path.join( ana_dir, "{s:s}-{v:s}-glm-.niml.dset".format( s=inf_str, v=vf ) ) bricks = "[184,187]" # check the beta bricks are as expected dset_labels = fmri_tools.utils.get_dset_label( glm_path + bricks ) desired_labels = ["above_all_R^2", "below_all_R^2"] assert dset_labels == desired_labels cmd = [ "3dmaskdump", "-noijk", glm_path + bricks ] cmd_out = runcmd.run_cmd(" ".join(cmd)) roi_rsq = cmd_out.std_out.splitlines() rsq = [map(float, roi_r.split(" ")) for roi_r in roi_rsq] # out rsq_path = "{s:s}-{v:s}-rsq-.txt".format( s=inf_str, v=vf ) np.savetxt(rsq_path, rsq)
def patch_count( conf, paths ): "Count the number of nodes in each patch" logger = logging.getLogger( __name__ ) logger.info( "Running localiser patch cluster count..." ) os.chdir( paths.loc.base.full() ) for hemi in [ "lh", "rh" ]: hemi_ext = "_{h:s}-full.niml.dset".format( h = hemi ) id_ext = "_{h:s}-full_Clustered_e1_a{n:.01f}.niml.dset".format( h = hemi, n = conf.loc.area_thr ) id_path = paths.loc.patch_id_thr.full( id_ext ) out_path = paths.loc.patch_id_thr.full( "_" + hemi + ".txt" ) if os.path.exists( out_path ): os.remove( out_path ) cmd = [ "3dmaskdump", "-noijk", "-o", out_path, "-nozero", id_path ] runcmd.run_cmd( " ".join( cmd ) ) node_info = np.hstack( [ np.loadtxt( paths.loc.patch_id_thr.full( "_" + hemi + ".txt" ) ) for hemi in [ "lh", "rh" ] ] ) patch_k = [ np.sum( node_info == ( x + 1 ) ) for x in conf.exp.mod_patches ] np.savetxt( paths.loc.patch_id_count.full( ".txt" ), patch_k )
def plot_top_resp_diff(save_path=None): conf = ul_sens_fmri.config.get_conf() conf.ana = ul_sens_analysis.config.get_conf() # (img * src, 4) diff_data = np.load( os.path.join(conf.ana.base_group_dir, "ul_sens_group_amp_diffs_sorted.npy")) # img x src x LR x rows x cols x colours img_frags = get_img_fragments() n_to_show = 5 # they're ranked in ascending order i_bottoms = range(n_to_show) i_tops = range(-n_to_show, 0)[::-1] main_fig = sg.SVGFigure("13.7cm", "16.3cm") tmp_files = [] figs = [] texts = [] column = 0 for (i_rank, rank_type) in zip((i_tops, i_bottoms), ("top", "bottom")): row = 0 for i in i_rank: i_img = int(diff_data[i, 0]) i_src = int(diff_data[i, 2]) diff = diff_data[i, -1] diff_str = "Upper - lower = {n:.2f}".format(n=diff) if i_src == 0: src = "above" else: src = "below" diff_str += "; source: " + src if rank_type == "top": col_offset = 0 else: col_offset = 140 * 2 text = sg.TextElement(col_offset + 16, row * 117 + 10, diff_str, size=11, font="FreeSans") texts.append(text) for (i_side, side) in enumerate(("left", "right")): if side == "right": col_offset += 110 img_file = tempfile.NamedTemporaryFile(prefix=rank_type + "_" + str(i), delete=False) img_file.close() tmp_files.append(img_file.name) img = img_frags[i_img, i_src, i_side, ...] plt.imsave(fname=img_file.name + ".png", arr=img, vmin=0.0, vmax=1.0) cmd = [ "convert", img_file.name + ".png", img_file.name + ".svg" ] runcmd.run_cmd(" ".join(cmd)) with open(img_file.name + ".svg", "r") as svg_file: svg_data = svg_file.readlines() svg_data.insert( 3, '<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="128" height="128">' ) del svg_data[4] with open(img_file.name + ".svg", "w") as svg_file: svg_file.writelines(svg_data) fig = sg.fromfile(img_file.name + ".svg") fig_plot = fig.getroot() fig_plot.moveto(col_offset, row * 117 + 16, scale=0.7) figs.append(fig_plot) row += 1 column += 1 main_fig.append(figs) _ = [main_fig.append(text) for text in texts] main_fig.save(save_path + ".svg") figutils.svg_to_pdf(svg_path=save_path + ".svg", pdf_path=save_path + ".pdf") for tmp_file in tmp_files: os.remove(tmp_file) os.remove(tmp_file + ".png") os.remove(tmp_file + ".svg")
def node_distances(subj_id, acq_date, conf): subj_dir = os.path.join(conf.ana.base_subj_dir, subj_id) post_dir = os.path.join(subj_dir, conf.ana.post_dir) dist_dir = os.path.join(post_dir, "dist") os.chdir(dist_dir) inf_str = subj_id + "_ul_sens_" + acq_date # also hemispheres for hemi in ["lh", "rh"]: dist_paths = [] # different ROIs for upper and lower presentation for vf in ["upper", "lower"]: # each ROI for (roi_num, roi_name) in zip( conf.ana.roi_numbers, conf.ana.roi_names ): spec_path = os.path.join( "/sci/anat/db_ver1", subj_id, "SUMA", subj_id + "_" + hemi + ".spec" ) mask_path = os.path.join( conf.ana.base_subj_dir, subj_id, conf.ana.loc_glm_dir, "{s:s}-loc_{v:s}-mask-{h:s}_nf.niml.dset".format( s=inf_str, v=vf, h=hemi ) ) out_path = "{s:s}-post_{v:s}-{r:s}_mask-{h:s}_nf.niml.dset" out_path = out_path.format( s=subj_id, v=vf, r=roi_name, h=hemi ) cmd = [ "3dcalc", "-a", mask_path, "-expr", "equals(a,{n:s})".format(n=roi_num), "-prefix", out_path, "-overwrite" ] runcmd.run_cmd(" ".join(cmd)) # now to find the centre node centre_node = fmri_tools.utils.get_centre_node( surf_dset=out_path, spec_path=spec_path ) dist_path = "{s:s}-post_{v:s}-{r:s}_dist-{h:s}_nf" dist_path = dist_path.format( s=subj_id, v=vf, r=roi_name, h=hemi ) fmri_tools.utils.write_dist_to_centre( centre_node=centre_node, in_dset=out_path, spec_path=spec_path, dist_dset=dist_path, pad_to="d:" + out_path, inc_centre_node=True ) dist_paths.append(dist_path + ".niml.dset") # this is under the hemi comb_path = "{s:s}-post-dist-{h:s}_nf.niml.dset" comb_path = comb_path.format( s=subj_id, h=hemi ) comb_cmd = [ "3dMean", "-non_zero", "-sum", "-prefix", comb_path, "-overwrite" ] comb_cmd.extend(dist_paths) runcmd.run_cmd(" ".join(comb_cmd))
def run(options): # Check file exists if not os.path.exists(options.input): print("Error: Input file doesn't exist") exit(1) EXE_DIR = os.path.dirname(os.path.realpath(__file__)) if not os.path.exists(options.outDir): os.mkdir(options.outDir) else: shutil.rmtree(options.outDir) os.mkdir(options.outDir) tmpDir = options.outDir + "/intermediate" if not os.path.exists(tmpDir): os.mkdir(tmpDir) # Logging import logging logger = logging.getLogger("pipits_process") logger.setLevel(logging.DEBUG) streamLoggerFormatter = logging.Formatter("%(asctime)s %(levelname)s: %(message)s", tc.HEADER + "%Y-%m-%d %H:%M:%S" + tc.ENDC) streamLogger = logging.StreamHandler() if options.verbose: streamLogger.setLevel(logging.DEBUG) else: streamLogger.setLevel(logging.INFO) streamLogger.setFormatter(streamLoggerFormatter) logger.addHandler(streamLogger) fileLoggerFormatter = logging.Formatter("%(asctime)s %(levelname)s: %(message)s", tc.HEADER + "%Y-%m-%d %H:%M:%S" + tc.ENDC) fileLogger = logging.FileHandler(options.outDir + "/log.txt", "w") fileLogger.setLevel(logging.DEBUG) fileLogger.setFormatter(fileLoggerFormatter) logger.addHandler(fileLogger) # Summary file #summary_file = open(options.outDir + "/summary_pipits_process.txt", "w") # Start logger.info(tc.OKBLUE + "PIPITS PROCESS started" + tc.ENDC) # Check if the file is empty if os.stat(options.input).st_size == 0: logger.error("Input file is empty!") exit(0) # Derep with sgtk logger.info("Dereplicating and removing unique sequences prior to picking OTUs") cmd = " ".join([pd.VSEARCH, "--derep_fulllength", options.input, "--output", tmpDir + "/input_nr.fasta", "--minuniquesize 2", "--sizeout", "--threads", options.threads]) rc.run_cmd_VSEARCH(cmd, logger, options.verbose) #filesize = os.path.getsize(tmpDir + "/input_nr.fasta") / 1000.0 #logger.info("Dereplicating " + tc.OKGREEN + "(Done) " + tc.ENDC) #logger.info("\t" + tc.RED + "File size after initial dereplication: " + str(filesize) + " MB" + tc.ENDC) # Check if the file is empty if os.stat(tmpDir + "/input_nr.fasta").st_size == 0: logger.info(tc.OKYELLOW + "After dereplicating and removing unique sequences, there aren't no sequences! Processing stopped." + tc.ENDC) exit(0) # OTU clustering logger.info("Picking OTUs [VSEARCH]") cmd = " ".join([pd.VSEARCH, "--cluster_fast", tmpDir + "/input_nr.fasta", "--id", options.VSEARCH_id, "--centroids", tmpDir + "/input_nr_otus.fasta", "--uc", tmpDir + "/input_nr_otus.uc", "--threads", options.threads]) rc.run_cmd_VSEARCH(cmd, logger, options.verbose) # Chimera removal logger.info("Removing chimeras [VSEARCH]") cmd = " ".join([pd.VSEARCH, "--uchime_ref", tmpDir + "/input_nr_otus.fasta", "--db", pd.UNITE_REFERENCE_DATA_CHIMERA, "--nonchimeras", tmpDir + "/input_nr_otus_nonchimeras.fasta", "--threads", options.threads]) rc.run_cmd_VSEARCH(cmd, logger, options.verbose) # Rename OTUs logger.info("Renaming OTUs") def renumberOTUS(): handle_in = open(tmpDir + "/input_nr_otus_nonchimeras.fasta", "rU") handle_out = open(tmpDir + "/input_nr_otus_nonchimeras_relabelled.fasta", "w") for line in handle_in: if line.startswith(">"): newlabel = line[1:].split(";")[0] handle_out.write(">" + newlabel + "\n") else: handle_out.write(line.rstrip() + "\n") handle_in.close() handle_out.close() renumberOTUS() # Map reads to OTUs logger.info("Mapping reads onto centroids [VSEARCH]") cmd = " ".join([pd.VSEARCH, "--usearch_global", options.input, "--db", tmpDir + "/input_nr_otus_nonchimeras_relabelled.fasta", "--id", options.VSEARCH_id, "--uc", tmpDir + "/otus.uc", "--threads", options.threads]) rc.run_cmd_VSEARCH(cmd, logger, options.verbose) # OTU construction logger.info("Making OTU table") cmd = " ".join(["python", EXE_DIR + "/pipits_uc/uc2otutab.py", tmpDir + "/otus.uc", ">", tmpDir + "/otu_table_prelim.txt"]) rc.run_cmd_VSEARCH(cmd, logger, options.verbose) # Convert to biom logger.info("Converting classic tabular OTU into a BIOM format [BIOM]") try: os.remove(tmpDir + "/otu_table_prelim.biom") except OSError: pass cmd = " ".join([pd.BIOM, "convert", "-i", tmpDir + "/otu_table_prelim.txt", "-o", tmpDir + "/otu_table_prelim.biom", "--table-type=\"OTU table\""]) rc.run_cmd(cmd, logger, options.verbose) # Classifying OTUs # http://sourceforge.net/projects/rdp-classifier/files/RDP_Classifier_TrainingData/ logger.info("Assigning taxonomy [RDP Classifier]") cmd = " ".join(["java", "-jar", pd.RDP_CLASSIFIER_JAR, "classify", "-t", pd.UNITE_RETRAINED_DIR + "/rRNAClassifier.properties", "-o", options.outDir + "/assigned_taxonomy.txt", tmpDir + "/input_nr_otus_nonchimeras_relabelled.fasta"]) rc.run_cmd(cmd, logger, options.verbose) # Reformatting RDP_CLASSIFIER output for biom logger.info("Reformatting RDP_Classifier output") cmd = " ".join(["python", EXE_DIR + "/reformatAssignedTaxonomy.py", "-i", options.outDir + "/assigned_taxonomy.txt" , "-o", options.outDir + "/assigned_taxonomy_reformatted_filtered.txt", "-c", options.RDP_assignment_threshold]) rc.run_cmd(cmd, logger, options.verbose) # Adding RDP_CLASSIFIER output to OTU table logger.info("Adding assignment to OTU table [BIOM]") try: os.remove(options.outDir + "/otu_table.biom") except OSError: pass cmd = " ".join([pd.BIOM, "add-metadata", "-i", tmpDir + "/otu_table_prelim.biom", "-o", options.outDir + "/otu_table.biom", "--observation-metadata-fp", options.outDir + "/assigned_taxonomy_reformatted_filtered.txt", "--observation-header", "OTUID,taxonomy,confidence", "--sc-separated", "taxonomy", "--float-fields", "confidence"]) rc.run_cmd(cmd, logger, options.verbose) # Convert BIOM to TABLE logger.info("Converting OTU table with taxa assignment into a BIOM format [BIOM]") try: os.remove(options.outDir + "/otu_table.txt") except OSError: pass cmd = " ".join([pd.BIOM, "convert", "-i", options.outDir + "/otu_table.biom", "-o", options.outDir + "/otu_table.txt", "--header-key taxonomy", "-b"]) rc.run_cmd(cmd, logger, options.verbose) # Make phylotyp table logger.info("Phylotyping OTU table") cmd = " ".join(["python", EXE_DIR + "/phylotype_biom.py", "-i", options.outDir + "/otu_table.biom", "-o", options.outDir + "/phylotype_table.txt"]) rc.run_cmd(cmd, logger, options.verbose) try: os.remove(options.outDir + "/phylotype_table.biom") except OSError: pass cmd = " ".join([pd.BIOM, "convert", "-i", options.outDir + "/phylotype_table.txt", "-o", options.outDir + "/phylotype_table.biom", "--table-type=\"OTU table\" --process-obs-metadata=\"taxonomy\""]) rc.run_cmd(cmd, logger, options.verbose) # Move representative sequence file to outDir shutil.move(tmpDir + "/input_nr_otus_nonchimeras_relabelled.fasta", options.outDir + "/repseqs.fasta") # Remove tmp if options.remove: logger.info("Cleaning temporary directory") shutil.rmtree(tmpDir) # Final stats ############################# # Import json formatted OTU # ############################# def biomstats(BIOMFILE): import json jsondata = open(BIOMFILE) biom = json.load(jsondata) sampleSize = int(biom["shape"][1]) otus = int(biom["shape"][0]) taxonomies = [] for i in range(len(biom["rows"])): taxonomies.append("; ".join(biom["rows"][i]["metadata"]["taxonomy"])) sampleids = [] for i in range(len(biom["columns"])): sampleids.append(biom["columns"][i]["id"]) import numpy as np # BIOM table into matrix matrix = np.zeros(shape=(otus, sampleSize)) for i in biom["data"]: matrix[i[0], i[1]] = i[2] totalCount = matrix.sum() return totalCount, otus, sampleSize otu_reads_count, otu_count, otu_sample_count = biomstats(options.outDir + "/otu_table.biom") phylo_reads_count, phylo_count, phylo_sample_count = biomstats(options.outDir + "/phylotype_table.biom") outfile = open(options.outDir + "/summary_pipits_process.txt", "w") outfile.write("No.of reads after singletons and chimera removal: " + str(int(otu_reads_count)) + "\n") outfile.write("Number of OTUs: " + str(otu_count) + "\n") outfile.write("Number of phylotypes: " + str(phylo_count) + "\n") outfile.write("Number of samples: " + str(otu_sample_count) + "\n") logger.info(tc.RED + "\tNumber of reads after singletons and chimera removal: " + str(int(otu_reads_count)) + tc.ENDC) logger.info(tc.RED + "\tNumber of OTUs: " + str(otu_count) + tc.ENDC) logger.info(tc.RED + "\tNumber of phylotypes: " + str(phylo_count) + tc.ENDC) logger.info(tc.RED + "\tNumber of samples: " + str(otu_sample_count) + tc.ENDC) # Done! logger.info(tc.OKBLUE + "PIPITS_PROCESS ended successfully." + tc.ENDC) logger.info(tc.OKYELLOW + "Resulting files are in \"" + options.outDir + "\" directory" + tc.ENDC)
def data_dump(conf, paths): "Dump all the relevant data to a single text file" # ingredients: # -node coordinates # -patch IDp # -patch centre dist # -all > blank from GLM # -coherent PSC # -incoherent PSC patch_ids = np.setdiff1d(conf.exp.mod_patches, conf.ana.exclude_patch_ids) vf_lookup = {"lh": "R", "rh": "L"} # open it this way so that we can write to it twice and it will append dump_handle = open(paths.coh_ana.comb.full(".txt"), "w") os.chdir(paths.coh_ana.base.full()) for hemi in ["lh", "rh"]: hemi_ext = "_" + hemi glm_comb_path = paths.coh_ana.glm_comb.full(hemi_ext + "-full.niml.dset") # first, want to combine all the GLM data together comb_cmd = [ "3dMean", "-non_zero", "-sum", "-prefix", glm_comb_path, "-overwrite" ] for patch_id in patch_ids: if conf.stim.patches[patch_id]["vf"] == vf_lookup[hemi]: buck_file = paths.coh_ana.glm.full("-patch_{n:d}".format( n=patch_id) + hemi_ext + "-full.niml.dset" + "[6]") comb_cmd.append(buck_file) runcmd.run_cmd(" ".join(comb_cmd)) psc_comb_path = paths.coh_ana.comb.full(hemi_ext + "-full.niml.dset") # now we need to know the patch IDs loc_id = conf.subj.subj_id + "_loc" loc_conf = ns_patches.config.get_conf(loc_id) loc_paths = ns_patches.paths.get_subj_paths(loc_conf) id_path = loc_paths.loc.patch_id_thr.full( "_{h:s}-full_Clustered_e1_a{n:.01f}.niml.dset".format( h=hemi, n=conf.loc.area_thr)) dump_path = paths.coh_ana.comb.full(hemi_ext + ".txt") if os.path.exists(dump_path): os.remove(dump_path) # now we have all our ingredients, we can write out the text file dump_cmd = [ "3dmaskdump", "-o", dump_path, "-mask", psc_comb_path, id_path, paths.coh_ana.patch_dist.full("_" + hemi + "-full.niml.dset"), glm_comb_path, psc_comb_path ] runcmd.run_cmd(" ".join(dump_cmd)) # concatenate across hemis np.savetxt(dump_handle, np.loadtxt(dump_path)) dump_handle.close()
def _run_coh_glm(conf, paths, patch_id): "Run the coh/incoh GLM for a given patch" os.chdir(paths.coh_ana.base.full()) # contralateral organisation if conf.stim.patches[patch_id]["vf"] == "L": hemi = "rh" else: hemi = "lh" hemi_ext = "_" + hemi # [coh, incoh] timings = _get_coh_timing(conf, paths, patch_id) # write patch timings for (cond, cond_name) in zip(timings, ["coh", "incoh"]): cond_path = paths.coh_ana.stim_times.full( "-patch_{n:d}_{c:s}.txt".format(n=patch_id, c=cond_name)) with open(cond_path, "w") as cond_file: for run_cond in cond: cond_file.write("\t".join( ["{t:d}".format(t=x) for x in run_cond])) cond_file.write("\n") # write out the mask for this patch loc_id = conf.subj.subj_id + "_loc" loc_conf = ns_patches.config.get_conf(loc_id) loc_paths = ns_patches.paths.get_subj_paths(loc_conf) id_path = loc_paths.loc.patch_id_thr.full( "_{h:s}-full_Clustered_e1_a{n:.01f}.niml.dset".format( h=hemi, n=conf.loc.area_thr)) mask_path = paths.coh_ana.mask.full("-patch_{n:d}".format(n=patch_id) + hemi_ext + "-full.niml.dset") # the patch ID has a +1 in the below because they are stored in the niml as # 1-based mask_cmd = [ "3dcalc", "-a", id_path, "-expr", "equals(a,{x:d})".format(x=patch_id + 1), "-prefix", mask_path, "-overwrite" ] runcmd.run_cmd(" ".join(mask_cmd)) # right-o, ready for the GLM censor_vols = conf.exp.n_censor_vols - 1 censor_str = "*:0-{v:.0f}".format(v=censor_vols) model_str = "SPMG1({d:.0f})".format(d=conf.exp.img_on_s) glm_cmd = ["3dDeconvolve", "-input"] surf_paths = [ surf_path.full(hemi_ext + "-full.niml.dset") for surf_path in paths.func.surfs ] glm_cmd.extend(surf_paths) glm_cmd.extend([ "-force_TR", "{tr:.3f}".format(tr=conf.acq.tr_s), "-polort", "a", # auto baseline degree "-local_times", "-mask", mask_path, "-CENSORTR", censor_str, "-xjpeg", "exp_design_patch_{x:d}.png".format(x=patch_id), "-x1D", "exp_design_patch_{x:d}".format(x=patch_id), "-overwrite", "-x1D_stop", # want to use REML, so don't bother running "-num_stimts", "2" ]) for (i_cond, cond_name) in enumerate(["coh", "incoh"]): glm_cmd.extend( ["-stim_label", "{x:d}".format(x=i_cond + 1), cond_name]) glm_cmd.extend([ "-stim_times", "{x:d}".format(x=i_cond + 1), paths.coh_ana.stim_times.full("-patch_{n:d}_{c:s}.txt".format( n=patch_id, c=cond_name)), model_str ]) # all > blank contrast con_str = "SYM: +coh +incoh" glm_cmd.extend( ["-gltsym", "'" + con_str + "'", "-glt_label", "1", "all_gt_bl"]) runcmd.run_cmd(" ".join(glm_cmd)) os.remove("Decon.REML_cmd") beta_file = paths.coh_ana.beta.full("-patch_{n:d}".format(n=patch_id) + hemi_ext + "-full.niml.dset") buck_file = paths.coh_ana.glm.full("-patch_{n:d}".format(n=patch_id) + hemi_ext + "-full.niml.dset") reml_cmd = [ "3dREMLfit", "-matrix", "exp_design_patch_{x:d}.xmat.1D".format(x=patch_id), "-mask", mask_path, "-Rbeta", beta_file, "-tout", "-Rbuck", buck_file, "-overwrite", "-input" ] reml_cmd.append("'" + " ".join(surf_paths) + "'") # run the proper GLM runcmd.run_cmd(" ".join(reml_cmd)) # now to convert to PSC, while we're here design_path = "exp_design_patch_{x:d}.xmat.1D".format(x=patch_id) # to write ext = "-patch_{n:d}".format(n=patch_id) + hemi_ext + "-full.niml.dset" bltc_path = paths.coh_ana.bltc.file(ext) bl_path = paths.coh_ana.bl.file(ext) psc_path = paths.coh_ana.psc.file(ext) # 4 orthogonal polynomial regressors per run n_nuisance = conf.subj.n_runs * 4 # checked via '-verb' beta_bricks = "[{n:d}..$]".format(n=n_nuisance) fmri_tools.utils.beta_to_psc( beta_file, beta_bricks, design_path, bltc_path, bl_path, psc_path, )
def _run_coh_glm_depth(conf, paths, patch_id, i_bin): "Run the coh/incoh GLM for a given patch and depth" os.chdir(paths.depth_ana.base.full()) # contralateral organisation if conf.stim.patches[patch_id]["vf"] == "L": hemi = "rh" else: hemi = "lh" hemi_ext = "_" + hemi bin_ext = "_bin_" + str(i_bin) mask_path = paths.coh_ana.mask.full("-patch_{n:d}".format(n=patch_id) + hemi_ext + "-full.niml.dset") # right-o, ready for the GLM censor_vols = conf.exp.n_censor_vols - 1 censor_str = "*:0-{v:.0f}".format(v=censor_vols) model_str = "SPMG1({d:.0f})".format(d=conf.exp.img_on_s) glm_cmd = ["3dDeconvolve", "-input"] surf_paths = [ surf_path.full(bin_ext + hemi_ext + "-full.niml.dset") for surf_path in paths.func.surfs ] glm_cmd.extend(surf_paths) glm_cmd.extend([ "-force_TR", "{tr:.3f}".format(tr=conf.acq.tr_s), "-polort", "a", # auto baseline degree "-local_times", "-mask", mask_path, "-CENSORTR", censor_str, "-xjpeg", "exp_design_bin_{n:d}_patch_{x:d}.png".format(n=i_bin, x=patch_id), "-x1D", "exp_design_bin_{n:d}_patch_{x:d}".format(n=i_bin, x=patch_id), # "-overwrite", "-x1D_stop", # want to use REML, so don't bother running "-num_stimts", "2" ]) for (i_cond, cond_name) in enumerate(["coh", "incoh"]): glm_cmd.extend( ["-stim_label", "{x:d}".format(x=i_cond + 1), cond_name]) glm_cmd.extend([ "-stim_times", "{x:d}".format(x=i_cond + 1), paths.coh_ana.stim_times.full("-patch_{n:d}_{c:s}.txt".format( n=patch_id, c=cond_name)), model_str ]) runcmd.run_cmd(" ".join(glm_cmd)) os.remove("Decon.REML_cmd") beta_file = paths.depth_ana.beta.full("-patch_{n:d}".format(n=patch_id) + bin_ext + hemi_ext + "-full.niml.dset") buck_file = paths.depth_ana.glm.full("-patch_{n:d}".format(n=patch_id) + bin_ext + hemi_ext + "-full.niml.dset") reml_cmd = [ "3dREMLfit", "-matrix", "exp_design_bin_{n:d}_patch_{x:d}.xmat.1D".format(n=i_bin, x=patch_id), "-mask", mask_path, "-Rbeta", beta_file, "-tout", "-Rbuck", buck_file, # "-overwrite", "-input" ] reml_cmd.append("'" + " ".join(surf_paths) + "'") # run the proper GLM runcmd.run_cmd(" ".join(reml_cmd)) # now to convert to PSC, while we're here design_path = "exp_design_bin_{n:d}_patch_{x:d}.xmat.1D".format(n=i_bin, x=patch_id) # to write ext = "-patch_{n:d}".format( n=patch_id) + bin_ext + hemi_ext + "-full.niml.dset" bltc_path = paths.depth_ana.bltc.file(ext) bl_path = paths.depth_ana.bl.file(ext) psc_path = paths.depth_ana.psc.file(ext) # 4 orthogonal polynomial regressors per run n_nuisance = conf.subj.n_runs * 4 # checked via '-verb' beta_bricks = "[{n:d}..$]".format(n=n_nuisance) fmri_tools.utils.beta_to_psc( beta_file, beta_bricks, design_path, bltc_path, bl_path, psc_path, )
def _run_glm(subj_id, acq_date, conf, log_dir, loc_mask=True): inf_str = subj_id + "_ul_sens_" + acq_date subj_dir = os.path.join(conf.ana.base_subj_dir, subj_id) if loc_mask: glm_dir = os.path.join(subj_dir, "analysis") mask_descrip = "" else: glm_dir = os.path.join(subj_dir, "post_analysis", "ret_roi") mask_descrip = "_ret_roi" os.chdir(glm_dir) for vf in ("upper", "lower"): cond_details = _write_onsets( subj_id=subj_id, acq_date=acq_date, conf=conf, vf=vf, runs_type="all", log_dir=log_dir ) contrast_details = [] for src_loc in ["above", "below"]: contrast = [] for curr_cond in cond_details: if src_loc in curr_cond["name"]: contrast.append("+" + curr_cond["name"]) contrast = " ".join(contrast) contrast_details.append( { "label": src_loc + "_all", "contrast": contrast } ) # these files have three nodes, one for each visual area run_paths = [ os.path.join( subj_dir, "func", "run_{n:02d}".format(n=run_num), "{s:s}-run_{n:02d}-uw-{vf:s}{m:s}_data.niml.dset".format( s=inf_str, n=run_num, vf=vf, m=mask_descrip ) ) for run_num in range(1, conf.exp.n_runs + 1) ] # to write glm_filename = "{s:s}-{v:s}{m:s}-glm-.niml.dset".format( s=inf_str, v=vf, m=mask_descrip ) # to write beta_filename = "{s:s}-{v:s}{m:s}-beta-.niml.dset".format( s=inf_str, v=vf, m=mask_descrip ) # to write resid_filename = "{s:s}-{v:s}{m:s}-resid-.niml.dset".format( s=inf_str, v=vf, m=mask_descrip ) extra_reml_args = ["-Rerrts", resid_filename] # run the GLM on this visual field location fmri_tools.analysis.glm( run_paths=run_paths, output_dir=glm_dir, glm_filename=glm_filename, beta_filename=beta_filename, tr_s=conf.ana.tr_s, cond_details=cond_details, contrast_details=contrast_details, censor_str=conf.ana.censor_str, matrix_filename="exp_design_" + vf, extra_reml_args=extra_reml_args ) # now to convert the beta weights to percent signal change # baseline timecourse bltc_filename = "{s:s}-{v:s}{m:s}-bltc-.niml.dset".format( s=inf_str, v=vf, m=mask_descrip ) # baseline bl_filename = "{s:s}-{v:s}{m:s}-bltc-.niml.dset".format( s=inf_str, v=vf, m=mask_descrip ) # psc psc_filename = "{s:s}-{v:s}{m:s}-psc-.niml.dset".format( s=inf_str, v=vf, m=mask_descrip ) beta_bricks = "[40..$]" # check the beta bricks are as expected dset_labels = fmri_tools.utils.get_dset_label( beta_filename + beta_bricks ) desired_labels = [] for img_id in conf.exp.img_ids: for src_loc in ["above", "below"]: desired_labels.append( vf + "_" + src_loc + "_" + str(img_id) + "#0" ) assert dset_labels == desired_labels # run the PSC conversion fmri_tools.utils.beta_to_psc( beta_path=beta_filename, beta_bricks=beta_bricks, design_path="exp_design_" + vf + ".xmat.1D", bltc_path=bltc_filename, bl_path=bl_filename, psc_path=psc_filename, ) data_filename = "{s:s}-{v:s}{m:s}-data-amp.txt".format( s=inf_str, v=vf, m=mask_descrip ) if os.path.exists(data_filename): os.remove(data_filename) cmd = [ "3dmaskdump", "-noijk", "-o", data_filename, psc_filename ] runcmd.run_cmd(" ".join(cmd)) # save the betas as text file also, for exploration / checking b_filename = "{s:s}-{v:s}{m:s}-beta-amp.txt".format( s=inf_str, v=vf, m=mask_descrip ) if os.path.exists(b_filename): os.remove(b_filename) cmd = [ "3dmaskdump", "-noijk", "-o", b_filename, beta_filename ] runcmd.run_cmd(" ".join(cmd))
def run(options): PIPITS_PREP_OUTPUT = "prepped.fasta" # Make directories (outdir and tmpdir) if not os.path.exists(options.outDir): os.mkdir(options.outDir) else: shutil.rmtree(options.outDir) os.mkdir(options.outDir) tmpDir = options.outDir + "/intermediate" if not os.path.exists(tmpDir): os.mkdir(tmpDir) # Logging import logging logger = logging.getLogger("pipits_prep") logger.setLevel(logging.DEBUG) streamLoggerFormatter = logging.Formatter("%(asctime)s %(levelname)s: %(message)s", tc.HEADER + "%Y-%m-%d %H:%M:%S" + tc.ENDC) streamLogger = logging.StreamHandler() if options.verbose: streamLogger.setLevel(logging.DEBUG) else: streamLogger.setLevel(logging.INFO) streamLogger.setFormatter(streamLoggerFormatter) logger.addHandler(streamLogger) fileLoggerFormatter = logging.Formatter("%(asctime)s %(levelname)s: %(message)s", tc.HEADER + "%Y-%m-%d %H:%M:%S" + tc.ENDC) fileLogger = logging.FileHandler(options.outDir + "/log.txt", "w") fileLogger.setLevel(logging.DEBUG) fileLogger.setFormatter(fileLoggerFormatter) logger.addHandler(fileLogger) # Summary file summary_file = open(options.outDir + "/summary_pipits_prep.txt", "w") # Start! logger.info(tc.OKBLUE + "PIPITS PREP started" + tc.ENDC) EXE_DIR = os.path.dirname(os.path.realpath(__file__)) # Check for the presence of rawdata directory logger.debug("Checking for presence of input directory") if not os.path.exists(options.dataDir): logger.error("Cannot find \"" + options.dataDir + "\" directory. Ensure you have the correct name of the directory where your Illumina sequences are stored") exit(1) fastqs_l = [] fastqs_f = [] fastqs_r = [] # if list is provided... if options.listfile: logger.info("Processing user-provided listfile") try: listfile = open(options.listfile, "r") except IOError: logger.error("\"" + options.listfile + "\" not found.") exit(1) for l in listfile: if l.strip(" ").strip("\n") != "" and not l.startswith("#"): l = l.rstrip().split("\t") fastqs_l.append(l[0]) fastqs_f.append(l[1]) fastqs_r.append(l[2]) listfile.close() # if not provided if not options.listfile: logger.info("Getting list of fastq files and sample ID from input folder") fastqs = [] for file in os.listdir(options.dataDir): if \ file.endswith(".fastq.gz") or \ file.endswith(".bz2") or \ file.endswith(".fastq"): fastqs.append(file) if len(fastqs) % 2 != 0: logger.error("There are missing pair(s) in the Illumina sequences. Check your files and labelling") exit(1) coin = True for fastq in sorted(fastqs): if coin == True: fastqs_f.append(fastq) else: fastqs_r.append(fastq) coin = not coin for i in range(len(fastqs_f)): if fastqs_f[i].split("_")[0] != fastqs_r[i].split("_")[0]: logger.error("Problem with labelling FASTQ files.") exit(1) fastqs_l.append(fastqs_f[i].split("_")[0]) # Check if len(fastqs_f) != len(fastqs_r): logger.error("Different number of forward FASTQs and reverse FASTQs") exit(1) # Done loading. Now check the file extensions. filenameextensions = [] for filename in (fastqs_f + fastqs_r): filenameextensions.append(filename.split(".")[-1].rstrip()) if len(set(filenameextensions)) > 1: logger.error("More than two types of extensions") exit(1) extensionType = next(iter(filenameextensions)) # For summary 1: logger.info("Counting sequences in rawdata") numberofsequences = 0 for fr in fastqs_f: if extensionType == "gz": cmd = " ".join(["zcat", options.dataDir + "/" + fr, "|", "wc -l"]) elif extensionType =="bz2": cmd = " ".join(["bzcat", options.dataDir + "/" + fr, "|", "wc -l"]) elif extensionType =="fastq": cmd = " ".join(["cat", options.dataDir + "/" + fr, "|", "wc -l"]) else: logger.error("Unknown extension type.") exit(1) logger.debug(cmd) p = subprocess.Popen(cmd, shell=True, stdout = subprocess.PIPE) numberofsequences += int(p.communicate()[0]) / 4 p.wait() logger.info("\t" + tc.RED + "Number of paired-end reads in rawdata: " + str(numberofsequences) + tc.ENDC) summary_file.write("Number of paired-end reads in rawdata: " + str(numberofsequences) + "\n") # Join paired-end reads logger.info("Joining paired-end reads" + "[" + options.joiner_method + "]") if not os.path.exists(tmpDir + "/joined"): os.mkdir(tmpDir + "/joined") for i in range(len(fastqs_l)): if extensionType == "gz": cmd = " ".join(["gunzip -c", options.dataDir + "/" + fastqs_f[i], ">", tmpDir + "/joined/" + fastqs_f[i] + ".tmp"]) rc.run_cmd(cmd, logger, options.verbose) cmd = " ".join(["gunzip -c", options.dataDir + "/" + fastqs_r[i], ">", tmpDir + "/joined/" + fastqs_r[i] + ".tmp"]) rc.run_cmd(cmd, logger, options.verbose) elif extensionType == "bz2": cmd = " ".join(["bunzip2 -c", options.dataDir + "/" + fastqs_f[i], ">", tmpDir + "/joined/" + fastqs_f[i] + ".tmp"]) rc.run_cmd(cmd, logger, options.verbose) cmd = " ".join(["bunzip2 -c", options.dataDir + "/" + fastqs_r[i], ">", tmpDir + "/joined/" + fastqs_r[i] + ".tmp"]) rc.run_cmd(cmd, logger, options.verbose) elif extensionType == "fastq": cmd = " ".join(["ln -sf", os.path.abspath(options.dataDir + "/" + fastqs_f[i]), tmpDir + "/joined/" + fastqs_f[i] + ".tmp"]) rc.run_cmd(cmd, logger, options.verbose) cmd = " ".join(["ln -sf", os.path.abspath(options.dataDir + "/" + fastqs_r[i]), tmpDir + "/joined/" + fastqs_r[i] + ".tmp"]) rc.run_cmd(cmd, logger, options.verbose) else: print(extensionType) logger.error("Unknown extension found.") exit(1) # joiner_method = "PEAR" if options.joiner_method == "PEAR": cmd = " ".join([pd.PEAR, "-f", tmpDir + "/joined/" + fastqs_f[i] + ".tmp", "-r", tmpDir + "/joined/" + fastqs_r[i] + ".tmp", "-o", tmpDir + "/joined/" + fastqs_l[i], "-j", options.threads, "-b", options.base_phred_quality_score, "-q 30", "-p 0.0001"]) rc.run_cmd(cmd, logger, options.verbose) cmd = " ".join(["rm -v", tmpDir + "/joined/" + fastqs_f[i] + ".tmp", tmpDir + "/joined/" + fastqs_r[i] + ".tmp"]) rc.run_cmd(cmd, logger, options.verbose) cmd = " ".join(["mv -f", tmpDir + "/joined/" + fastqs_l[i] + ".assembled.fastq", tmpDir + "/joined/" + fastqs_l[i] + ".joined.fastq"]) rc.run_cmd(cmd, logger, options.verbose) elif options.joiner_method == "FASTQJOIN": cmd = " ".join(["fastq-join", tmpDir + "/joined/" + fastqs_f[i] + ".tmp", tmpDir + "/joined/" + fastqs_r[i] + ".tmp", "-o", tmpDir + "/joined/" + fastqs_l[i] + ".joined.fastq"]) rc.run_cmd(cmd, logger, options.verbose) cmd = " ".join(["mv -f", tmpDir + "/joined/" + fastqs_l[i] + ".joined.fastqjoin", tmpDir + "/joined/"+ fastqs_l[i] +".joined.fastq"]) rc.run_cmd(cmd, logger, options.verbose) # For summary 2: numberofsequences = 0 for i in range(len(fastqs_l)): cmd = " ".join(["cat", tmpDir + "/joined/" + fastqs_l[i] + ".joined.fastq", "|", "wc -l"]) logger.debug(cmd) p = subprocess.Popen(cmd, shell=True, stdout = subprocess.PIPE) numberofsequences += int(p.communicate()[0]) / 4 p.wait() logger.info("\t" + tc.RED + "Number of joined reads: " + str(numberofsequences) + tc.ENDC) summary_file.write("Number of joined reads: " + str(numberofsequences) + "\n") # Quality filter logger.info("Quality filtering [FASTX]") if not os.path.exists(tmpDir + "/fastqqualityfiltered"): os.mkdir(tmpDir + "/fastqqualityfiltered") for i in range(len(fastqs_f)): cmd = " ".join([pd.FASTX_FASTQ_QUALITY_FILTER, "-i", tmpDir + "/joined/" + fastqs_l[i] + ".joined.fastq", "-o", tmpDir + "/fastqqualityfiltered/" + fastqs_l[i] + ".fastq", "-q", options.FASTX_fastq_quality_filter_q, "-p", options.FASTX_fastq_quality_filter_p, "-Q" + options.base_phred_quality_score]) rc.run_cmd(cmd, logger, options.verbose) # For summary 3: numberofsequences = 0 for i in range(len(fastqs_l)): cmd = " ".join(["cat", tmpDir + "/fastqqualityfiltered/" + fastqs_l[i] + ".fastq", "|", "wc -l"]) p = subprocess.Popen(cmd, shell=True, stdout = subprocess.PIPE) numberofsequences += int(p.communicate()[0]) / 4 p.wait() logger.info("\t" + tc.RED + "Number of quality filtered reads: " + str(numberofsequences) + tc.ENDC) summary_file.write("Number of quality filtered reads: " + str(numberofsequences) + "\n") # Removing reads with \"N\" and FASTA conversion if options.FASTX_fastq_to_fasta_n: logger.info("Converting FASTQ to FASTA [FASTX]") else: logger.info("Converting FASTQ to FASTA and also removing reads with \"N\" nucleotide [FASTX]") if not os.path.exists(tmpDir + "/fastqtofasta"): os.mkdir(tmpDir + "/fastqtofasta") fastq_to_fasta_n = "" if options.FASTX_fastq_to_fasta_n: fastq_to_fasta_n = "-n" for i in range(len(fastqs_f)): cmd = " ".join([pd.FASTX_FASTQ_TO_FASTA, "-i", tmpDir + "/fastqqualityfiltered/" + fastqs_l[i] + ".fastq", "-o", tmpDir + "/fastqtofasta/" + fastqs_l[i] + ".fasta", "-Q33", fastq_to_fasta_n]) rc.run_cmd(cmd, logger, options.verbose) # For summary 3: numberofsequences = 0 for i in range(len(fastqs_l)): cmd = " ".join(["grep \"^>\"", tmpDir + "/fastqtofasta/" + fastqs_l[i] + ".fasta", "|", "wc -l"]) p = subprocess.Popen(cmd, shell=True, stdout = subprocess.PIPE) numberofsequences += int(p.communicate()[0]) p.wait() logger.info("\t" + tc.RED + "Number of N-less quality filtered sequences: " + str(numberofsequences) + tc.ENDC) summary_file.write("Number of N-less quality filtered sequences: " + str(numberofsequences) + "\n") # Re-ID and re-index FASTA and merging them all logger.info("Re-IDing and indexing FASTA, and merging all into a single file") outfileFinalFASTA = open(options.outDir + "/" + PIPITS_PREP_OUTPUT, "w") for i in range(len(fastqs_f)): line_index = 1 logger.debug("Reading " + tmpDir + "/fastqtofasta/" + fastqs_l[i] + ".fasta") infile_fasta = open(tmpDir + "/fastqtofasta/" + fastqs_l[i] + ".fasta") for line in infile_fasta: if line.startswith(">"): outfileFinalFASTA.write(">" + fastqs_l[i] + "_" + str(line_index) + "\n") line_index += 1 else: outfileFinalFASTA.write(line.rstrip() + "\n") outfileFinalFASTA.close() # Clean up tmp_directory if options.remove: logger.info("Cleaning temporary directory") shutil.rmtree(tmpDir) logger.info(tc.OKBLUE + "PIPITS PREP ended successfully. \"" + PIPITS_PREP_OUTPUT + "\" created in \"" + options.outDir + "\"" + tc.ENDC) logger.info(tc.OKYELLOW + "Next Step: PIPITS FUNITS [ Suggestion: pipits_funits -i " + options.outDir + "/" + PIPITS_PREP_OUTPUT + " -o out_funits -x YOUR_ITS_SUBREGION ]" + tc.ENDC) print("") summary_file.close()
def write_stim_library(save_path): conf = ul_sens_fmri.config.get_conf() conf.ana = ul_sens_analysis.config.get_conf() sshot_dir = "/sci/study/ul_sens/sshots" sshot_files = os.listdir(sshot_dir) cwd = os.getcwd() os.chdir(sshot_dir) pdf_list = [] for img_id in conf.exp.img_ids: for (i_src_loc, src_loc) in enumerate(("above", "below")): for (pres_loc, floc) in zip(("upper", "lower"), ("a", "b")): out_file = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) out_file.close() pdf_list.append(out_file.name) for sshot_file in sshot_files: if str(img_id) not in sshot_file: continue src_str = "src_loc_{n:.1f}".format(n=i_src_loc + 1) if src_str not in sshot_file: continue if "id_{n:.1f}".format(n=img_id) not in sshot_file: continue pres_str = "pres_loc_" + floc if pres_str not in sshot_file: continue if "crop" not in sshot_file: continue header = ("Image ID: " + str(img_id) + "; Source location: " + src_loc + "; Presentation location: " + pres_loc) # made it this far, must be OK cmd = [ "convert", "-append", "'label:" + header + "'", sshot_file, "-compress", "jpeg", out_file.name ] runcmd.run_cmd(" ".join(cmd)) break assert len(pdf_list) == 120 os.chdir(cwd) cmd = ["stapler", "cat"] cmd.extend(pdf_list) cmd.append(save_path) runcmd.run_cmd(" ".join(cmd)) for pdf_file in pdf_list: os.remove(pdf_file)
def patch_id( conf, paths ): """Form a mask from the GLM output""" logger = logging.getLogger( __name__ ) logger.info( "Running localiser patch identification..." ) # these are the t-statistics for each patch t_bricks = "[2..64(2)]" t_cutoff = scipy.stats.t.isf( conf.loc.t_p, conf.loc.dof ) os.chdir( paths.loc.base.full() ) for hemi in [ "lh", "rh" ]: hemi_ext = "_{h:s}.niml.dset".format( h = hemi ) glm_path = paths.loc.glm.full( hemi_ext + t_bricks ) # first, mark each regressor as significant or not sig_path = paths.loc.sig.full( hemi_ext ) sig_cmd = [ "3dcalc", "-a", glm_path, "-expr", "ispositive(step(a-{t:.4f}))".format( t = t_cutoff ), "-prefix", sig_path, "-overwrite" ] runcmd.run_cmd( " ".join( sig_cmd ) ) # then, count how many significant regressors there are at each node sig_sum_path = paths.loc.sig_sum.full( hemi_ext ) sig_sum_cmd = [ "3dTstat", "-overwrite", "-sum", "-prefix", sig_sum_path, sig_path ] runcmd.run_cmd( " ".join( sig_sum_cmd ) ) # now work out which ID is significant for each node, subject to the # constraint that there is only one significant patch all_id_path = paths.loc.all_patch_id.full( hemi_ext ) id_cmd = [ "3dTstat", "-overwrite", "-argmax1", "-prefix", all_id_path, "-mask", sig_sum_path, "-mrange", "1", "1", glm_path ] runcmd.run_cmd( " ".join( id_cmd ) ) full_hemi_ext = "_{h:s}-full.niml.dset".format( h = hemi ) # need to pad to full for integration with ROIs all_id_path_full = paths.loc.all_patch_id.full( full_hemi_ext ) pad_k = "{n:d}".format( n = conf.subj.node_k[ hemi ] ) fmri_tools.utils.sparse_to_full( in_dset = all_id_path, out_dset = all_id_path_full, pad_node = pad_k ) # now for V1 only id_path_full = paths.loc.patch_id.full( full_hemi_ext ) roi_path = paths.loc.vl.full( full_hemi_ext ) cmd = [ "3dcalc", "-a", all_id_path_full, "-b", roi_path, "-expr", "a*within(b,1,1)", "-overwrite", "-prefix", id_path_full ] runcmd.run_cmd( " ".join( cmd ) )
def glm( conf, paths ): """Localiser GLMs""" logger = logging.getLogger( __name__ ) logger.info( "Running localiser GLM..." ) start_dir = os.getcwd() os.chdir( paths.loc.base.full() ) # minus one because the range is inclusive censor_vols = conf.loc.n_cull_vol - 1 # in AFNI-aware format; (runs):start-end censor_str = "*:0-{v:.0f}".format( v = censor_vols ) # model as a typical SPM event model_str = "'SPMG1({d:.0f})'".format( d = conf.loc.dur_s ) for hemi in [ "lh", "rh" ]: glm_cmd = [ "3dDeconvolve", "-input" ] surf_paths = [ surf_path.full( "_{h:s}.niml.dset".format( h = hemi ) ) for surf_path in paths.func.surfs ] glm_cmd.extend( surf_paths ) glm_cmd.extend( [ "-force_TR", "{tr:.3f}".format( tr = conf.acq.tr_s ), "-polort", "a", # auto baseline degree "-local_times", "-CENSORTR", censor_str, "-xjpeg", "exp_design.png", "-x1D", "exp_design", "-overwrite", "-x1D_stop", # want to use REML, so don't bother running "-num_stimts", "{n:d}".format( n = conf.stim.n_patches ) ] ) for i_patch in xrange( conf.stim.n_patches ): timing_ext = "_{n:02d}.txt".format( n = i_patch ) timing_file = paths.loc.timing_base.full( timing_ext ) glm_cmd.extend( [ "-stim_times", "{n:d}".format( n = i_patch + 1 ), timing_file, model_str ] ) glm_cmd.extend( [ "-stim_label", "{n:d}".format( n = i_patch + 1 ), "p{n:02d}".format( n = i_patch ) ] ) # run this first GLM runcmd.run_cmd( " ".join( glm_cmd ) ) # delete the annoying command file that 3dDeconvolve writes os.remove( "Decon.REML_cmd" ) beta_file = paths.loc.beta.file( "_{h:s}.niml.dset".format( h = hemi ) ) buck_file = paths.loc.glm.file( "_{h:s}.niml.dset".format( h = hemi ) ) reml_cmd = [ "3dREMLfit", "-matrix", "exp_design.xmat.1D", "-Rbeta", beta_file, "-tout", "-Rbuck", buck_file, "-overwrite", "-input" ] reml_cmd.append( "'" + " ".join( surf_paths ) + "'" ) # run the proper GLM runcmd.run_cmd( " ".join( reml_cmd ) ) os.chdir( start_dir )
def _extract_data(subj_id, acq_date, conf, mask_paths, loc_mask=True): inf_str = subj_id + "_ul_sens_" + acq_date subj_dir = os.path.join(conf.ana.base_subj_dir, subj_id) if loc_mask: mask_dir = os.path.join(subj_dir, "loc_analysis") analysis_dir = os.path.join(subj_dir, "analysis") else: mask_dir = os.path.join(subj_dir, "post_analysis", "ret_roi") analysis_dir = mask_dir n_rois = len(conf.ana.roi_names) n_vols_per_run = int(conf.exp.run_len_s / conf.ana.tr_s) # initialise our data container; rois x runs x volumes x vf data = np.empty((n_rois, conf.exp.n_runs, n_vols_per_run, 2)) data.fill(np.NAN) for (i_vf, vf) in enumerate(("upper", "lower")): for run_num in range(1, conf.exp.n_runs + 1): run_dir = os.path.join(subj_dir, "func", "run_{n:02d}".format(n=run_num)) os.chdir(run_dir) # note that the last index here is hemisphere hemi_data = np.empty((n_rois, n_vols_per_run, 2)) hemi_data.fill(np.NAN) for (i_hemi, hemi) in enumerate(("lh", "rh")): run_path = "{s:s}-run_{n:02d}-uw-{h:s}_nf.niml.dset".format( s=inf_str, n=run_num, h=hemi) # average across all nodes in each ROI and dump the timecourse # to standard out cmd = [ "3dROIstats", "-mask", os.path.join(mask_dir, mask_paths[(vf, hemi)]), "-1Dformat", run_path ] # ... which we don't want to log! cmd_out = runcmd.run_cmd(" ".join(cmd), log_stdout=False) # check the header for correctness roi_header = cmd_out.std_out.splitlines()[1].split("\t")[-3:] # make sure that the columns are what I think they are for (roi_head, roi_index) in zip(roi_header, conf.ana.roi_numbers): assert roi_head.strip() == "Mean_" + roi_index # we want to clip out the header and the info lines run_data = cmd_out.std_out.splitlines()[3::2] # check we've done this correctly assert len(run_data) == n_vols_per_run for (i_vol, vol_data) in enumerate(run_data): # so this is just one line of data, tab-separated # we want to pull out our three ROIs, which will be the # last in the file vol_data = vol_data.split("\t")[-n_rois:] # store, for each of the ROIs hemi_data[:, i_vol, i_hemi] = vol_data # check that we've filled up the array as expected assert np.sum(np.isnan(hemi_data)) == 0 # average over hemispheres hemi_data = np.mean(hemi_data, axis=-1) if loc_mask: mask_descrip = "_" else: mask_descrip = "_ret_roi_" run_path = "{s:s}-run_{n:02d}-uw-{vf:s}{m:s}data.txt".format( s=inf_str, n=run_num, vf=vf, m=mask_descrip) # save it out as a text file for this run; rois x vols np.savetxt(run_path, hemi_data) # we also want to save what 'nodes' in this data corresponds to; ie # ROI identifiers run_nodes_path = "{s:s}-run_{n:02d}-uw-{vf:s}{m:s}nodes.txt".format( s=inf_str, n=run_num, vf=vf, m=mask_descrip) np.savetxt(run_nodes_path, map(int, conf.ana.roi_numbers), "%d") # now we want to make it into an AFNI dataset so we can run the GLM # using their software run_path_niml = "{s:s}-run_{n:02d}-uw-{v:s}{m:s}data.niml.dset".format( s=inf_str, n=run_num, v=vf, m=mask_descrip) cmd = [ "ConvertDset", "-i_1D", "-input", run_path, "-node_index_1D", run_nodes_path, "-o_niml", "-prefix", run_path_niml, "-overwrite" ] runcmd.run_cmd(" ".join(cmd)) data[:, run_num - 1, :, i_vf] = hemi_data assert np.sum(np.isnan(data)) == 0 os.chdir(analysis_dir) data_path = "{s:s}-{m:s}data.npy".format(s=inf_str, m=mask_descrip) # we save the data here so we can access it independent of AFNI np.save(data_path, data)
def get_mvpa_data(subj_id, acq_date, vf): conf = ul_sens_fmri.config.get_conf() conf.ana = ul_sens_analysis.config.get_conf() inf_str = subj_id + "_ul_sens_" + acq_date subj_dir = os.path.join(conf.ana.base_subj_dir, subj_id) glm_dir = os.path.join(subj_dir, conf.ana.post_dir, "mvpa_glm") beta = {} loc_t = {} raw_beta = [] raw_loc_t = [] for hemi in ("lh", "rh"): # the localiser mask mask_path = os.path.join( subj_dir, conf.ana.loc_glm_dir, "{s:s}-loc_{vf:s}-mask-{h:s}_nf.niml.dset".format(s=inf_str, h=hemi, vf=vf)) # GLM betas beta_filename = os.path.join( glm_dir, "{s:s}-{v:s}-mvpa_beta-{h:s}_nf.niml.dset".format(s=inf_str, v=vf, h=hemi)) # let's start with the betas cmd = [ "3dmaskdump", "-noijk", "-mask", mask_path, mask_path, # this holds the ROI indices, too beta_filename ] cmd_out = runcmd.run_cmd(" ".join(cmd), log_stdout=False) raw_out = cmd_out.std_out.splitlines() # keep track of the number of nodes - they should be the same for the # localiser t extraction. We'll check to make sure. n_beta_nodes = len(raw_out) # just extend the list, for now - we'll parse it later on raw_beta.extend(raw_out) # the localiser T loc_t_path = os.path.join( subj_dir, conf.ana.loc_glm_dir, "{s:s}-loc_{vf:s}-glm-{h:s}_nf.niml.dset".format(s=inf_str, h=hemi, vf=vf)) # now for the localiser T values cmd = [ "3dmaskdump", "-noijk", "-mask", mask_path, mask_path, loc_t_path ] cmd_out = runcmd.run_cmd(" ".join(cmd), log_stdout=False) raw_out = cmd_out.std_out.splitlines() # keep track of the number of nodes - they should be the same for the # localiser t extraction. We'll check to make sure. n_loc_t_nodes = len(raw_out) # just extend the list, for now - we'll parse it later on raw_loc_t.extend(raw_out) assert n_loc_t_nodes == n_beta_nodes # convert to numpy arrays; n_nodes x dumped vals beta_data = np.array( [map(float, raw_beta_node.split(" ")) for raw_beta_node in raw_beta]) loc_t_data = np.array([ map(float, raw_loc_t_node.split(" ")) for raw_loc_t_node in raw_loc_t ]) for (roi_num, roi_name) in zip(conf.ana.roi_numbers, conf.ana.roi_names): # find the nodes in the ROI in_roi = (beta_data[:, 0].astype("int") == int(roi_num)) # check that the localiser agrees assert np.all(in_roi == (loc_t_data[:, 0].astype("int") == int(roi_num))) n_roi_nodes = np.sum(in_roi) roi_beta_data = np.empty((conf.exp.n_img, conf.exp.n_src_locs, conf.exp.n_runs, n_roi_nodes)) roi_beta_data.fill(np.NAN) # need to farm out the beta data appropriately # we can use beta_filename because the hemisphere doesn't matter dset_labels = fmri_tools.utils.get_dset_label(beta_filename) # the -1 is because we also dumped the ROI index assert len(dset_labels) == (beta_data.shape[1] - 1) for (i_col, dset_label) in enumerate(dset_labels): # if it's one of the noise regressors, move along if dset_label[:3] == "Run": assert ("Pol" in dset_label) continue dset_params = dset_label.split("#")[0].split("_") (curr_vf, curr_sl, curr_id, curr_run) = dset_params # make sure we're looking at the correct file assert curr_vf == vf if curr_sl == "above": i_sl = 0 elif curr_sl == "below": i_sl = 1 else: raise ValueError() i_id = list(conf.exp.img_ids).index(int(curr_id)) i_run = int(curr_run) - 1 # the +1 is because of the first index being the ROI roi_beta_data[i_id, i_sl, i_run, :] = beta_data[in_roi, i_col + 1] # check that we've filled up the array assert np.sum(np.isnan(roi_beta_data)) == 0 # now for the localiser - more straightforward roi_loc_t_data = loc_t_data[in_roi, -1] assert roi_loc_t_data.shape[0] == roi_beta_data.shape[-1] beta[roi_name] = roi_beta_data loc_t[roi_name] = roi_loc_t_data return (beta, loc_t)
def run(options): if not os.path.exists(options.outDir): os.mkdir(options.outDir) else: shutil.rmtree(options.outDir) os.mkdir(options.outDir) tmpDir = options.outDir + "/intermediate" if not os.path.exists(tmpDir): os.mkdir(tmpDir) # Logging import logging logger = logging.getLogger("pipits_funits") logger.setLevel(logging.DEBUG) streamLoggerFormatter = logging.Formatter( "%(asctime)s %(levelname)s: %(message)s", tc.HEADER + "%Y-%m-%d %H:%M:%S" + tc.ENDC ) streamLogger = logging.StreamHandler() if options.verbose: streamLogger.setLevel(logging.DEBUG) else: streamLogger.setLevel(logging.INFO) streamLogger.setFormatter(streamLoggerFormatter) logger.addHandler(streamLogger) fileLoggerFormatter = logging.Formatter( "%(asctime)s %(levelname)s: %(message)s", tc.HEADER + "%Y-%m-%d %H:%M:%S" + tc.ENDC ) fileLogger = logging.FileHandler(options.outDir + "/log.txt", "w") fileLogger.setLevel(logging.DEBUG) fileLogger.setFormatter(fileLoggerFormatter) logger.addHandler(fileLogger) # Summary file summary_file = open(options.outDir + "/summary_pipits_funits.txt", "w") # Start! logger.info(tc.OKBLUE + "PIPITS FUNITS started" + tc.ENDC) # Scripts EXE_DIR = os.path.dirname(os.path.realpath(__file__)) PIPITS_SCRIPTS_DIR = EXE_DIR # Check integrity of the input file logger.info("Checking input FASTA for illegal characters") record = SeqIO.FastaParser(options.input) for i in record.keys(): description = record[i].description if description.find(" ") != -1: logger.error( 'Error: " " found in the headers. Please remove " " from headers in your FASTA file before proceeding to the next stage.' ) # For summary 1: logger.info("Counting input sequences") numberofsequences = 0 cmd = " ".join(['grep "^>"', options.input, "|", "wc -l"]) p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) numberofsequences += int(p.communicate()[0]) p.wait() logger.info("\t" + tc.RED + "Number of input sequences: " + str(numberofsequences) + tc.ENDC) summary_file.write("Number of input sequences: " + str(numberofsequences) + "\n") # Dereplicate logger.info("Dereplicating sequences for efficiency") cmd = " ".join( [ "python", PIPITS_SCRIPTS_DIR + "/dereplicate_fasta.py", "-i", options.input, "-o", tmpDir + "/derep.fasta", "--cluster", tmpDir + "/derep.json", ] ) rc.run_cmd(cmd, logger, options.verbose) # For summary 2: logger.debug("Counting dereplicated sequences") numberofsequences = 0 cmd = " ".join(['grep "^>"', tmpDir + "/derep.fasta", "|", "wc -l"]) p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) numberofsequences += int(p.communicate()[0]) p.wait() logger.debug("\t" + tc.RED + "Number of dereplicated sequences: " + str(numberofsequences) + tc.ENDC) # Run ITSx. Chop reads into regions. Re-orientate where needed # ITSx always prints something to STDERR and outputs nothing to STDOUT, so need to supress stdout in non-verbose mode # Returncode is always 0 no matter what... # No way to tell whether it quits with an error or not other than by capturing STDERR with a phrase "FATAL ERROR" - not implemented logger.info("Extracting " + options.ITSx_subregion + " from sequences [ITSx]") cmd = " ".join( [ pd.ITSx, "-i", tmpDir + "/derep.fasta", "-o", tmpDir + "/derep", "--preserve", "T", "-t", "F", "--cpu", options.threads, "--save_regions", options.ITSx_subregion, ] ) rc.run_cmd_ITSx(cmd, logger, options.verbose) # Removing short sequences (<100bp) logger.info("Removing sequences below < 100bp") cmd = " ".join( [ "python", PIPITS_SCRIPTS_DIR + "/fasta_filter_by_length.py", "-i", tmpDir + "/derep." + options.ITSx_subregion + ".fasta", "-o", tmpDir + "/derep." + options.ITSx_subregion + ".sizefiltered.fasta", "-l 100", ] ) rc.run_cmd(cmd, logger, options.verbose) # Re-inflate logger.info("Re-inflating sequences") cmd = " ".join( [ "python", PIPITS_SCRIPTS_DIR + "/inflate_fasta.py", "-i", tmpDir + "/derep." + options.ITSx_subregion + ".sizefiltered.fasta", "-o", options.outDir + "/ITS.fasta", "--cluster", tmpDir + "/derep.json", ] ) rc.run_cmd(cmd, logger, options.verbose) # Count number of ITS logger.info("Counting sequences after re-inflation") numberofsequences = 0 cmd = " ".join(['grep "^>"', options.outDir + "/ITS.fasta", "|", "wc -l"]) p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) numberofsequences = int(p.communicate()[0]) p.wait() if numberofsequences == 0: logger.info(tc.RED + "\tNumber of sequences with ITS subregion: " + str(numberofsequences) + tc.ENDC) logger.info(tc.RED + "Have you chosen the right subregion? Exiting as no sequences to process." + tc.ENDC) summary_file.write("Number of sequences with ITS subregion: " + str(numberofsequences) + "\n") exit(1) else: logger.info(tc.RED + "\tNumber of sequences with ITS subregion: " + str(numberofsequences) + tc.ENDC) summary_file.write("Number of sequences with ITS subregion: " + str(numberofsequences) + "\n") """ # Concatenating ITS1 and ITS2 logger.info("Concatenating ITS1 and ITS2 ...") cmd = " ".join(["python", PIPITS_SCRIPTS_DIR + "/concatenate_fasta.py", "-1", options.outDir + "/ITS1.fasta" , "-2", options.outDir + "/ITS2.fasta", "-o", options.outDir + "/ITS.fasta"]) rc.run_cmd(cmd, logger, options.verbose) logger.info("Concatenating ITS1 and ITS2 " + tc.OKGREEN + "(Done)" + tc.ENDC) """ # Finally move and delete tmp if options.remove: logger.info("Cleaning temporary directory") shutil.move(tmpDir + "/derep.summary.txt", options.outDir + "/ITSx_summary.txt") shutil.rmtree(tmpDir) logger.info( tc.OKBLUE + 'PIPITS FUNITS ended successfully. "' + "ITS.fasta" + '" created in "' + options.outDir + '"' + tc.ENDC ) logger.info( tc.OKYELLOW + "Next Step: PIPITS PROCESS [ Suggestion: pipits_process -i " + options.outDir + "/" + "ITS.fasta -o out_process ]" + tc.ENDC ) print("") summary_file.close()
def run(subj_id, acq_date): conf = ul_sens_fmri.config.get_conf() conf.ana = ul_sens_analysis.config.get_conf() inf_str = subj_id + "_ul_sens_" + acq_date subj_dir = os.path.join(conf.ana.base_subj_dir, subj_id) log_dir = os.path.join(subj_dir, "logs") log_path = os.path.join(log_dir, "{s:s}-post-log.txt".format(s=inf_str)) logger = logging.getLogger() logger.setLevel(logging.INFO) fmri_tools.utils.set_logger("screen") fmri_tools.utils.set_logger(log_path) ana_dir = os.path.join(subj_dir, "analysis") post_dir = os.path.join(subj_dir, conf.ana.post_dir) resid_dir = os.path.join(post_dir, "resid") if not os.path.isdir(post_dir): os.mkdir(post_dir) if not os.path.isdir(resid_dir): os.mkdir(resid_dir) os.chdir(resid_dir) n_window = 12 traces = np.zeros(( 3, # ROI 2, # vf 2, # src n_window # time )) for (i_vf, vf) in enumerate(["upper", "lower"]): # in - residuals from the GLM analysis resid_path = os.path.join( ana_dir, "{s:s}-{v:s}-resid-.niml.dset".format(s=inf_str, v=vf)) # get the residuals into an array by dumping from the dataset cmd = ["3dmaskdump", "-noijk", resid_path] cmd_out = runcmd.run_cmd(" ".join(cmd), log_stdout=False) # this converts the output string to ROIs x time resid_flat = np.array([ map(float, roi_resid.split(" ")) for roi_resid in cmd_out.std_out.splitlines() ]) # want to get the baseline values to normalise the residuals bl_path = os.path.join( ana_dir, "{s:s}-{v:s}-bltc-.niml.dset".format(s=inf_str, v=vf)) cmd = ["3dmaskdump", "-noijk", bl_path] cmd_out = runcmd.run_cmd(" ".join(cmd), log_stdout=False) # get a baseline value bl = np.array(map(float, cmd_out.std_out.splitlines())) # convert the residuals to to PSC units resid_flat = 100 * (resid_flat / bl[:, np.newaxis]) # want to split it into runs rather than one flat timecourse # want to exclude the initial censored volumes vols_per_run = (int(resid_flat.shape[1] / conf.exp.n_runs) - (conf.ana.n_to_censor + 1)) vols_per_run_total = int(resid_flat.shape[1]) / conf.exp.n_runs resid = np.empty(( resid_flat.shape[0], # rois conf.exp.n_runs, vols_per_run)) resid.fill(np.NAN) for i_run in xrange(conf.exp.n_runs): i_start = i_run * vols_per_run_total + conf.ana.n_to_censor + 1 i_end = i_start + vols_per_run resid[:, i_run, :] = resid_flat[:, i_start:i_end] # convert to squared error resid = resid**2 for i_run in xrange(conf.exp.n_runs): # run seq is (pres loc, trial number, trial info) # where trial info is: # 0: time, in seconds, when it starts # 1: source location 1 for above, 2 for below, 0 for null # 2: image id # 3: whether it is in the 'pre' events # 4: been prepped run_seq = np.load( os.path.join( subj_dir, "logs", "{s:s}_ul_sens_fmri_run_{n:02d}_seq.npy".format(s=subj_id, n=i_run + 1))) # pull out this visual field location - either upper or lower run_seq = run_seq[i_vf, ...] # axis 0 is now trials n_trials = run_seq.shape[0] # keep a track of how many trials we go through, just to check # everything is hunky dory trial_count = 0 for i_trial in xrange(n_trials): # check if its a 'trial' that we're interested in trial_ok = np.all([ run_seq[i_trial, 3] == 0, # not a pre event run_seq[i_trial, 2] > 0.5, # an image was shown run_seq[i_trial, 1] > 0 # not a null event ]) if not trial_ok: continue onset_s = run_seq[i_trial, 0] onset_vol = int(onset_s / conf.ana.tr_s) onset_vol -= conf.ana.st_vols_to_ignore # trial type is 1-based trial_type = run_seq[i_trial, 1] - 1 # move the residual timecourse so the first index aligns with # the trial onset shifted_resid = np.roll(resid[:, i_run, :], -onset_vol, axis=1) traces[:, i_vf, trial_type, :] += shifted_resid[:, :n_window] trial_count += 1 assert trial_count == 60 # convert to an average traces[:, i_vf, ...] /= (30.0 * conf.exp.n_runs) # out traces_path = "{s:s}--traces-.npy".format(s=inf_str) np.save(traces_path, traces)
def run(subj_id, acq_date): conf = ul_sens_fmri.config.get_conf() conf.ana = ul_sens_analysis.config.get_conf() inf_str = subj_id + "_ul_sens_" + acq_date subj_dir = os.path.join(conf.ana.base_subj_dir, subj_id) log_dir = os.path.join(subj_dir, "logs") log_path = os.path.join(log_dir, "{s:s}-post-log.txt".format(s=inf_str)) logger = logging.getLogger() logger.setLevel(logging.INFO) fmri_tools.utils.set_logger("screen") fmri_tools.utils.set_logger(log_path) ana_dir = os.path.join(subj_dir, "analysis") post_dir = os.path.join(subj_dir, conf.ana.post_dir) ret_roi_dir = os.path.join(post_dir, "ret_roi") if not os.path.isdir(post_dir): os.mkdir(post_dir) if not os.path.isdir(ret_roi_dir): os.mkdir(ret_roi_dir) os.chdir(ret_roi_dir) # phase ranges for the different ret roi specs phases = { "lh": { "upper": [0, 90], "lower": [270, 360] }, "rh": { "upper": [90, 180], "lower": [180, 270] } } mask_paths = {} # first, calculate the masks based on the ret phases for hemi in ["lh", "rh"]: if subj_id == "p1003": vis_loc = "vis_loc_ver1" else: vis_loc = "vis_loc" wedge_path = os.path.join( "/sci/vis_loc/db_ver1", subj_id, conf.ana.subj_wedge_sess[subj_id], "dt/wedge", "{s:s}_{vl:s}_{a:s}-wedge-angle-{h:s}_nf.niml.dset[0]".format( s=subj_id, a=conf.ana.subj_wedge_sess[subj_id], h=hemi, vl=vis_loc)) # subject's ROI definitions for this hemisphere roi_path = os.path.join( conf.ana.roi_dir, subj_id, "rois", "{s:s}_vis_loc_--rois-{h:s}_nf.niml.dset".format(s=subj_id, h=hemi)) for pres in ["upper", "lower"]: ret_roi_path = "{s:s}-{v:s}-ret_roi_-{h:s}_nf.niml.dset".format( s=inf_str, v=pres, h=hemi) # we want the roi file to be 'amongst' the identifiers for V1-V3 roi_test = "(amongst(b," + ",".join(conf.ana.roi_numbers) + ")*b)" ret_roi_test = "within(a,{l:d}, {u:d})".format( l=phases[hemi][pres][0], u=phases[hemi][pres][1]) cmd = [ "3dcalc", "-a", wedge_path, "-b", roi_path, "-expr", "'" + ret_roi_test + "*" + roi_test + "'", "-prefix", ret_roi_path, "-overwrite" ] runcmd.run_cmd(" ".join(cmd)) mask_paths[(pres, hemi)] = ret_roi_path ul_sens_analysis.glm_prep._extract_data(subj_id, acq_date, conf, mask_paths, loc_mask=False) ul_sens_analysis.glm._run_glm(subj_id, acq_date, conf, log_dir, loc_mask=False)
def resid(subj_id, acq_date, conf): subj_dir = os.path.join(conf.ana.base_subj_dir, subj_id) ana_dir = os.path.join(subj_dir, "analysis") post_dir = os.path.join(subj_dir, conf.ana.post_dir) resid_dir = os.path.join(post_dir, "resid") if not os.path.isdir(post_dir): os.mkdir(post_dir) if not os.path.isdir(resid_dir): os.mkdir(resid_dir) os.chdir(resid_dir) inf_str = subj_id + "_ul_sens_" + acq_date n_window = 12 traces = np.zeros( ( 3, # ROI 2, # vf 2, # src n_window # time ) ) for (i_vf, vf) in enumerate(["upper", "lower"]): # in resid_path = os.path.join( ana_dir, "{s:s}-{v:s}-resid-.niml.dset".format( s=inf_str, v=vf ) ) cmd = [ "3dmaskdump", "-noijk", resid_path ] cmd_out = runcmd.run_cmd(" ".join(cmd), log_stdout=False) # this is ROIs x time resid_flat = np.array( [ map(float, roi_resid.split(" ")) for roi_resid in cmd_out.std_out.splitlines() ] ) # baseline bl_path = os.path.join( ana_dir, "{s:s}-{v:s}-bltc-.niml.dset".format( s=inf_str, v=vf ) ) cmd = [ "3dmaskdump", "-noijk", bl_path ] cmd_out = runcmd.run_cmd(" ".join(cmd), log_stdout=False) # get a baseline value bl = np.array(map(float, cmd_out.std_out.splitlines())) # convert to PSC resid_flat = 100 * (resid_flat / bl[:, np.newaxis]) # want to split it into runs vols_per_run = ( int(resid_flat.shape[1] / conf.exp.n_runs) - (conf.ana.n_to_censor + 1) ) vols_per_run_total = int(resid_flat.shape[1]) / conf.exp.n_runs resid = np.empty( ( resid_flat.shape[0], conf.exp.n_runs, vols_per_run ) ) resid.fill(np.NAN) for i_run in xrange(conf.exp.n_runs): i_start = i_run * vols_per_run_total + conf.ana.n_to_censor + 1 i_end = i_start + vols_per_run resid[:, i_run, :] = resid_flat[:, i_start:i_end] # convert to squared error resid = resid ** 2 for i_run in xrange(conf.exp.n_runs): # run seq is (pres loc, trial number, trial info) # where trial info is: # 0: time, in seconds, when it starts # 1: source location 1 for above, 2 for below, 0 for null # 2: image id # 3: whether it is in the 'pre' events # 4: been prepped run_seq = np.load( os.path.join( subj_dir, "logs", "{s:s}_ul_sens_fmri_run_{n:02d}_seq.npy".format( s=subj_id, n=i_run + 1 ) ) ) # pull out this visual field location - either upper or lower run_seq = run_seq[i_vf, ...] # axis 0 is now trials n_trials = run_seq.shape[0] trial_count = 0 for i_trial in xrange(n_trials): trial_ok = np.all( [ run_seq[i_trial, 3] == 0, # not a pre event run_seq[i_trial, 2] > 0.5, # an image was shown run_seq[i_trial, 1] > 0 # not a null event ] ) if not trial_ok: continue onset_s = run_seq[i_trial, 0] onset_vol = int(onset_s / conf.ana.tr_s) onset_vol -= conf.ana.st_vols_to_ignore trial_type = run_seq[i_trial, 1] - 1 shifted_resid = np.roll( resid[:, i_run, :], -onset_vol, axis=1 ) traces[:, i_vf, trial_type, :] += shifted_resid[:, :n_window] trial_count += 1 assert trial_count == 60 traces[:, i_vf, ...] /= (30.0 * conf.exp.n_runs) # out traces_path = "{s:s}--traces-.npy".format( s=inf_str ) np.save(traces_path, traces)