Exemple #1
0
def fieldmaps(conf, paths):
    """Prepare the fieldmaps"""

    logger = logging.getLogger(__name__)
    logger.info("Running fieldmap preparation...")

    epi_run = conf.subj.mot_base - 1

    # set a corrected EPI to define the space to resample to
    ref_epi = paths.func.sts[epi_run].full(".nii[0]")

    # want to calculate a coarse brain mask from the epi
    mask_path = paths.fmap.mask.full(".nii")
    epi_path = paths.summ.st.full(".nii[{n:d}]".format(n=epi_run))

    mask_cmd = [
        "3dAutomask", "-SI", "{n:d}".format(n=conf.subj.mask_SI), "-overwrite",
        "-prefix", mask_path, epi_path
    ]

    runcmd.run_cmd(" ".join(mask_cmd))

    fmri_tools.preproc.make_fieldmap(mag_path=paths.fmap.mag.full(),
                                     ph_path=paths.fmap.ph.full(),
                                     fmap_path=paths.fmap.fmap.full(),
                                     delta_te_ms=conf.acq.delta_te_ms,
                                     ref_img=ref_epi,
                                     recentre_ph="mean",
                                     recentre_mask=mask_path,
                                     strip_params=["-surface_coil"],
                                     strip_mag=False)
Exemple #2
0
def centre_distances(conf, paths):
    "Write the distance of each node to its patch centre"

    for patch_id in conf.ana.valid_patch_ids:
        _patch_cent_dist(conf, paths, patch_id)

    os.chdir(paths.coh_ana.base.full())

    # now to combine
    vf_lookup = {"lh": "R", "rh": "L"}

    # combine all into one
    for hemi in ["lh", "rh"]:

        comb_cmd = [
            "3dMean", "-non_zero", "-sum", "-prefix",
            paths.coh_ana.patch_dist.full("_" + hemi + "-full.niml.dset"),
            "-overwrite"
        ]

        for patch_id in conf.ana.valid_patch_ids:

            if conf.stim.patches[patch_id]["vf"] == vf_lookup[hemi]:

                comb_cmd.append(
                    paths.coh_ana.patch_dist.file("-patch_{n:d}".format(
                        n=patch_id) + "_" + hemi + ".niml.dset"))

        runcmd.run_cmd(" ".join(comb_cmd))
Exemple #3
0
def coh_glm(conf, paths):
    "Run the GLM(s) for a given subject"

    # we only want to run GLMs for the patches with acceptable node counts, so
    # remove those identified as lacking
    patch_ids = np.setdiff1d(conf.exp.mod_patches, conf.ana.exclude_patch_ids)

    # run the GLM for each patch
    for patch_id in patch_ids:
        _run_coh_glm(conf, paths, patch_id)

    vf_lookup = {"lh": "R", "rh": "L"}

    # combine all into one
    for hemi in ["lh", "rh"]:

        comb_cmd = [
            "3dMean", "-non_zero", "-sum", "-prefix",
            paths.coh_ana.comb.full("_" + hemi + "-full.niml.dset"),
            "-overwrite"
        ]

        for patch_id in patch_ids:

            if conf.stim.patches[patch_id]["vf"] == vf_lookup[hemi]:

                comb_cmd.append(
                    paths.coh_ana.psc.file("-patch_{n:d}".format(n=patch_id) +
                                           "_" + hemi + "-full.niml.dset"))

        runcmd.run_cmd(" ".join(comb_cmd))
def _loc_to_mask(subj_id, acq_date, conf):

    subj_dir = os.path.join(conf.ana.base_subj_dir, subj_id)

    loc_glm_dir = os.path.join(subj_dir, conf.ana.loc_glm_dir)

    os.chdir(loc_glm_dir)

    inf_str = subj_id + "_ul_sens_" + acq_date

    mask_paths = {}

    # go through combinations of visual field position and hemisphere
    for (vf, hemi) in itertools.product(("upper", "lower"), ("lh", "rh")):

        # this is the localiser GLM subbrick with the t-statistic for this
        # visual field location
        loc_t_path = "{s:s}-loc_{v:s}-glm-{h:s}_nf.niml.dset".format(
            s=inf_str, v=vf, h=hemi) + "[" + conf.ana.loc_glm_brick + "]"

        # check it is correct
        assert fmri_tools.utils.get_dset_label(
            loc_t_path)[0] == vf + "#0_Tstat"

        # subject's ROI definitions for this hemisphere
        roi_path = os.path.join(
            conf.ana.roi_dir, subj_id,
            "rois", "{s:s}_vis_loc_--rois-{h:s}_nf.niml.dset".format(s=subj_id,
                                                                     h=hemi))

        # this is the mask file to write
        mask_path = "{s:s}-loc_{v:s}-mask-{h:s}_nf.niml.dset".format(s=inf_str,
                                                                     v=vf,
                                                                     h=hemi)

        # we want the roi file to be 'amongst' the identifiers for V1-V3
        roi_test = "amongst(a," + ",".join(conf.ana.roi_numbers) + ")"

        # we also want the t-value to be above a certain threshold
        loc_test = "step(b-" + conf.ana.loc_glm_thresh + ")"

        # so it is an 'and' operation, and we want it to be labelled with the
        # ROI identified value so we multiply it by the outcome
        expr = "'a*and(" + roi_test + "," + loc_test + ")'"

        cmd = [
            "3dcalc", "-overwrite", "-a", roi_path, "-b", loc_t_path, "-expr",
            expr, "-prefix", mask_path
        ]

        runcmd.run_cmd(" ".join(cmd))

        # store the mask path to make it easier to access in the next step
        mask_paths[(vf, hemi)] = mask_path

    return mask_paths
Exemple #5
0
def data_dump_depth(conf, paths):
    "Dump all the relevant data to a single text file"

    # ingredients:
    #   -node coordinates
    #   -patch IDp
    #   -coherent PSC
    #   -incoherent PSC

    patch_ids = np.setdiff1d(conf.exp.mod_patches, conf.ana.exclude_patch_ids)

    vf_lookup = {"lh": "R", "rh": "L"}

    os.chdir(paths.depth_ana.base.full())

    for i_bin in xrange(len(conf.ana.bin_centres)):

        bin_ext = "_bin_" + str(i_bin)

        # open it this way so that we can write to it twice and it will append
        dump_handle = open(paths.depth_ana.comb.full(bin_ext + ".txt"), "w")

        for hemi in ["lh", "rh"]:

            hemi_ext = "_" + hemi

            psc_comb_path = paths.depth_ana.comb.full(bin_ext + hemi_ext +
                                                      "-full.niml.dset")

            # now we need to know the patch IDs
            loc_id = conf.subj.subj_id + "_loc"
            loc_conf = ns_patches.config.get_conf(loc_id)
            loc_paths = ns_patches.paths.get_subj_paths(loc_conf)

            id_path = loc_paths.loc.patch_id_thr.full(
                "_{h:s}-full_Clustered_e1_a{n:.01f}.niml.dset".format(
                    h=hemi, n=conf.loc.area_thr))

            dump_path = paths.depth_ana.comb.full(bin_ext + hemi_ext + ".txt")

            if os.path.exists(dump_path):
                os.remove(dump_path)

            # now we have all our ingredients, we can write out the text file
            dump_cmd = [
                "3dmaskdump", "-o", dump_path, "-mask", psc_comb_path, id_path,
                psc_comb_path
            ]

            runcmd.run_cmd(" ".join(dump_cmd))

            # concatenate across hemis
            np.savetxt(dump_handle, np.loadtxt(dump_path))

        dump_handle.close()
Exemple #6
0
def vol_to_surf_depth(conf, paths):
    """Converts the functional volume-based images to SUMA surfaces."""

    logger = logging.getLogger(__name__)
    logger.info("Running volume to surface projection at different depths...")

    start_dir = os.getcwd()

    for (uw_file, surf_file, run_dir) in zip(paths.func.uws, paths.func.surfs,
                                             paths.func.runs):

        os.chdir(run_dir.full())

        for hemi in ["lh", "rh"]:

            spec_file = paths.reg.spec.full("_{hemi:s}.spec".format(hemi=hemi))

            # replace the subject ID with what FreeSurfer/SUMA considers the subject
            # ID to be
            spec_file = spec_file.replace(conf.subj.subj_id,
                                          conf.subj.fs_subj_id)

            for (i_bin, bin_centre) in enumerate(conf.ana.bin_centres):

                white_frac = bin_centre - (conf.ana.bin_width / 2.0)
                grey_frac = bin_centre + (conf.ana.bin_width / 2.0) - 1.0

                surf_path = surf_file.full("_bin_{i:d}_{h:s}.niml.dset".format(
                    h=hemi, i=i_bin))

                surf_cmd = [
                    "3dVol2Surf", "-spec", spec_file, "-surf_A", "smoothwm",
                    "-surf_B", "pial", "-map_func", "ave", "-f_steps", "15",
                    "-f_index", "nodes", "-f_p1_fr",
                    str(white_frac), "-f_pn_fr",
                    str(grey_frac), "-sv",
                    paths.reg.anat_reg.full("+orig"), "-grid_parent",
                    uw_file.full(".nii"), "-out_niml", surf_path, "-overwrite"
                ]

                runcmd.run_cmd(" ".join(surf_cmd))

                # convert to full
                full_path = surf_file.full(
                    "_bin_{i:d}_{h:s}-full.niml.dset".format(h=hemi, i=i_bin))

                node_str = "{n:d}".format(n=conf.subj.node_k[hemi])
                fmri_tools.utils.sparse_to_full(in_dset=surf_path,
                                                out_dset=full_path,
                                                pad_node=node_str)

    os.chdir(start_dir)
def rsq(subj_id, acq_date, conf):

    subj_dir = os.path.join(conf.ana.base_subj_dir, subj_id)
    ana_dir = os.path.join(subj_dir, "analysis")

    post_dir = os.path.join(subj_dir, conf.ana.post_dir)
    rsq_dir = os.path.join(post_dir, "rsq")

    if not os.path.isdir(post_dir):
        os.mkdir(post_dir)

    if not os.path.isdir(rsq_dir):
        os.mkdir(rsq_dir)

    os.chdir(rsq_dir)

    inf_str = subj_id + "_ul_sens_" + acq_date

    for vf in ["upper", "lower"]:

        # in
        glm_path = os.path.join(
            ana_dir,
            "{s:s}-{v:s}-glm-.niml.dset".format(
                s=inf_str, v=vf
            )
        )

        bricks = "[184,187]"

        # check the beta bricks are as expected
        dset_labels = fmri_tools.utils.get_dset_label(
            glm_path + bricks
        )

        desired_labels = ["above_all_R^2", "below_all_R^2"]

        assert dset_labels == desired_labels

        cmd = [
            "3dmaskdump",
            "-noijk",
            glm_path + bricks
        ]

        cmd_out = runcmd.run_cmd(" ".join(cmd))

        roi_rsq = cmd_out.std_out.splitlines()

        rsq = [map(float, roi_r.split(" ")) for roi_r in roi_rsq]

        # out
        rsq_path = "{s:s}-{v:s}-rsq-.txt".format(
                s=inf_str, v=vf
        )

        np.savetxt(rsq_path, rsq)
Exemple #8
0
def patch_count( conf, paths ):
    "Count the number of nodes in each patch"

    logger = logging.getLogger( __name__ )
    logger.info( "Running localiser patch cluster count..." )

    os.chdir( paths.loc.base.full() )

    for hemi in [ "lh", "rh" ]:

        hemi_ext = "_{h:s}-full.niml.dset".format( h = hemi )

        id_ext = "_{h:s}-full_Clustered_e1_a{n:.01f}.niml.dset".format( h = hemi,
                                                                        n = conf.loc.area_thr
                                                                      )

        id_path = paths.loc.patch_id_thr.full( id_ext )

        out_path = paths.loc.patch_id_thr.full( "_" + hemi + ".txt" )

        if os.path.exists( out_path ):
            os.remove( out_path )

        cmd = [ "3dmaskdump",
                "-noijk",
                "-o", out_path,
                "-nozero",
                id_path
              ]

        runcmd.run_cmd( " ".join( cmd ) )

    node_info = np.hstack( [ np.loadtxt( paths.loc.patch_id_thr.full( "_" + hemi + ".txt" ) )
                             for hemi in [ "lh", "rh" ]
                           ]
                         )

    patch_k = [ np.sum( node_info == ( x + 1 ) )
                for x in conf.exp.mod_patches
              ]

    np.savetxt( paths.loc.patch_id_count.full( ".txt" ), patch_k )
def plot_top_resp_diff(save_path=None):

    conf = ul_sens_fmri.config.get_conf()
    conf.ana = ul_sens_analysis.config.get_conf()

    # (img * src, 4)
    diff_data = np.load(
        os.path.join(conf.ana.base_group_dir,
                     "ul_sens_group_amp_diffs_sorted.npy"))

    # img x src x LR x rows x cols x colours
    img_frags = get_img_fragments()

    n_to_show = 5

    # they're ranked in ascending order
    i_bottoms = range(n_to_show)
    i_tops = range(-n_to_show, 0)[::-1]

    main_fig = sg.SVGFigure("13.7cm", "16.3cm")

    tmp_files = []
    figs = []
    texts = []

    column = 0

    for (i_rank, rank_type) in zip((i_tops, i_bottoms), ("top", "bottom")):

        row = 0

        for i in i_rank:

            i_img = int(diff_data[i, 0])
            i_src = int(diff_data[i, 2])

            diff = diff_data[i, -1]

            diff_str = "Upper - lower = {n:.2f}".format(n=diff)

            if i_src == 0:
                src = "above"
            else:
                src = "below"

            diff_str += "; source: " + src

            if rank_type == "top":
                col_offset = 0
            else:
                col_offset = 140 * 2

            text = sg.TextElement(col_offset + 16,
                                  row * 117 + 10,
                                  diff_str,
                                  size=11,
                                  font="FreeSans")

            texts.append(text)

            for (i_side, side) in enumerate(("left", "right")):

                if side == "right":
                    col_offset += 110

                img_file = tempfile.NamedTemporaryFile(prefix=rank_type + "_" +
                                                       str(i),
                                                       delete=False)
                img_file.close()

                tmp_files.append(img_file.name)

                img = img_frags[i_img, i_src, i_side, ...]

                plt.imsave(fname=img_file.name + ".png",
                           arr=img,
                           vmin=0.0,
                           vmax=1.0)

                cmd = [
                    "convert", img_file.name + ".png", img_file.name + ".svg"
                ]

                runcmd.run_cmd(" ".join(cmd))

                with open(img_file.name + ".svg", "r") as svg_file:
                    svg_data = svg_file.readlines()

                svg_data.insert(
                    3,
                    '<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="128" height="128">'
                )

                del svg_data[4]

                with open(img_file.name + ".svg", "w") as svg_file:
                    svg_file.writelines(svg_data)

                fig = sg.fromfile(img_file.name + ".svg")

                fig_plot = fig.getroot()

                fig_plot.moveto(col_offset, row * 117 + 16, scale=0.7)

                figs.append(fig_plot)

            row += 1

        column += 1

    main_fig.append(figs)
    _ = [main_fig.append(text) for text in texts]

    main_fig.save(save_path + ".svg")

    figutils.svg_to_pdf(svg_path=save_path + ".svg",
                        pdf_path=save_path + ".pdf")

    for tmp_file in tmp_files:
        os.remove(tmp_file)
        os.remove(tmp_file + ".png")
        os.remove(tmp_file + ".svg")
def node_distances(subj_id, acq_date, conf):

    subj_dir = os.path.join(conf.ana.base_subj_dir, subj_id)

    post_dir = os.path.join(subj_dir, conf.ana.post_dir)
    dist_dir = os.path.join(post_dir, "dist")
    os.chdir(dist_dir)

    inf_str = subj_id + "_ul_sens_" + acq_date


    # also hemispheres
    for hemi in ["lh", "rh"]:

        dist_paths = []

        # different ROIs for upper and lower presentation
        for vf in ["upper", "lower"]:

            # each ROI
            for (roi_num, roi_name) in zip(
                conf.ana.roi_numbers,
                conf.ana.roi_names
            ):

                spec_path = os.path.join(
                    "/sci/anat/db_ver1",
                    subj_id,
                    "SUMA",
                    subj_id + "_" + hemi + ".spec"
                )

                mask_path = os.path.join(
                    conf.ana.base_subj_dir,
                    subj_id,
                    conf.ana.loc_glm_dir,
                    "{s:s}-loc_{v:s}-mask-{h:s}_nf.niml.dset".format(
                        s=inf_str, v=vf, h=hemi
                    )
                )

                out_path = "{s:s}-post_{v:s}-{r:s}_mask-{h:s}_nf.niml.dset"
                out_path = out_path.format(
                    s=subj_id, v=vf, r=roi_name, h=hemi
                )

                cmd = [
                    "3dcalc",
                    "-a", mask_path,
                    "-expr", "equals(a,{n:s})".format(n=roi_num),
                    "-prefix", out_path,
                    "-overwrite"
                ]

                runcmd.run_cmd(" ".join(cmd))

                # now to find the centre node
                centre_node = fmri_tools.utils.get_centre_node(
                    surf_dset=out_path,
                    spec_path=spec_path
                )

                dist_path = "{s:s}-post_{v:s}-{r:s}_dist-{h:s}_nf"
                dist_path = dist_path.format(
                    s=subj_id, v=vf, r=roi_name, h=hemi
                )

                fmri_tools.utils.write_dist_to_centre(
                    centre_node=centre_node,
                    in_dset=out_path,
                    spec_path=spec_path,
                    dist_dset=dist_path,
                    pad_to="d:" + out_path,
                    inc_centre_node=True
                )

                dist_paths.append(dist_path + ".niml.dset")

        # this is under the hemi
        comb_path = "{s:s}-post-dist-{h:s}_nf.niml.dset"
        comb_path = comb_path.format(
            s=subj_id, h=hemi
        )

        comb_cmd = [
            "3dMean",
            "-non_zero",
            "-sum",
            "-prefix", comb_path,
            "-overwrite"
        ]

        comb_cmd.extend(dist_paths)

        runcmd.run_cmd(" ".join(comb_cmd))
Exemple #11
0
def run(options):

    # Check file exists
    if not os.path.exists(options.input):
        print("Error: Input file doesn't exist")
        exit(1)


    EXE_DIR = os.path.dirname(os.path.realpath(__file__))
    if not os.path.exists(options.outDir):
        os.mkdir(options.outDir)
    else:
        shutil.rmtree(options.outDir)
        os.mkdir(options.outDir)
    tmpDir = options.outDir + "/intermediate"
    if not os.path.exists(tmpDir):
        os.mkdir(tmpDir)


    # Logging
    import logging
    logger = logging.getLogger("pipits_process")
    logger.setLevel(logging.DEBUG)

    streamLoggerFormatter = logging.Formatter("%(asctime)s %(levelname)s: %(message)s", tc.HEADER + "%Y-%m-%d %H:%M:%S" + tc.ENDC)

    streamLogger = logging.StreamHandler()
    if options.verbose:
        streamLogger.setLevel(logging.DEBUG)
    else:
        streamLogger.setLevel(logging.INFO)
    streamLogger.setFormatter(streamLoggerFormatter)
    logger.addHandler(streamLogger)

    fileLoggerFormatter = logging.Formatter("%(asctime)s %(levelname)s: %(message)s", tc.HEADER + "%Y-%m-%d %H:%M:%S" + tc.ENDC)
    fileLogger = logging.FileHandler(options.outDir + "/log.txt", "w")
    fileLogger.setLevel(logging.DEBUG)
    fileLogger.setFormatter(fileLoggerFormatter)
    logger.addHandler(fileLogger)

    # Summary file
    #summary_file = open(options.outDir + "/summary_pipits_process.txt", "w")

    # Start
    logger.info(tc.OKBLUE + "PIPITS PROCESS started" + tc.ENDC)


    # Check if the file is empty
    if os.stat(options.input).st_size == 0:
        logger.error("Input file is empty!")
        exit(0)
        
    # Derep with sgtk
    logger.info("Dereplicating and removing unique sequences prior to picking OTUs")
    cmd = " ".join([pd.VSEARCH, "--derep_fulllength", options.input, 
                    "--output", tmpDir + "/input_nr.fasta", 
                    "--minuniquesize 2", 
                    "--sizeout",
                    "--threads", options.threads])
    rc.run_cmd_VSEARCH(cmd, logger, options.verbose)
    #filesize = os.path.getsize(tmpDir + "/input_nr.fasta") / 1000.0
    #logger.info("Dereplicating " + tc.OKGREEN + "(Done) " + tc.ENDC)
    #logger.info("\t" + tc.RED + "File size after initial dereplication: " + str(filesize) + " MB" + tc.ENDC)

    # Check if the file is empty
    if os.stat(tmpDir + "/input_nr.fasta").st_size == 0:
        logger.info(tc.OKYELLOW + "After dereplicating and removing unique sequences, there aren't no sequences! Processing stopped." + tc.ENDC)
        exit(0)


    # OTU clustering
    logger.info("Picking OTUs [VSEARCH]")
    cmd = " ".join([pd.VSEARCH, 
                    "--cluster_fast", tmpDir + "/input_nr.fasta", 
                    "--id", options.VSEARCH_id,
                    "--centroids", tmpDir + "/input_nr_otus.fasta",
                    "--uc", tmpDir + "/input_nr_otus.uc",
                    "--threads", options.threads])
    rc.run_cmd_VSEARCH(cmd, logger, options.verbose)


    # Chimera removal
    logger.info("Removing chimeras [VSEARCH]")
    cmd = " ".join([pd.VSEARCH, 
                    "--uchime_ref", tmpDir + "/input_nr_otus.fasta", 
                    "--db", pd.UNITE_REFERENCE_DATA_CHIMERA, 
                    "--nonchimeras", tmpDir + "/input_nr_otus_nonchimeras.fasta",
                    "--threads", options.threads])
    rc.run_cmd_VSEARCH(cmd, logger, options.verbose)


    # Rename OTUs
    logger.info("Renaming OTUs")
    def renumberOTUS():
        handle_in = open(tmpDir + "/input_nr_otus_nonchimeras.fasta", "rU")
        handle_out = open(tmpDir + "/input_nr_otus_nonchimeras_relabelled.fasta", "w")
        for line in handle_in:
            if line.startswith(">"):
                newlabel = line[1:].split(";")[0]
                handle_out.write(">" + newlabel + "\n")
            else:
                handle_out.write(line.rstrip() + "\n")
        handle_in.close()
        handle_out.close()
    renumberOTUS()


    # Map reads to OTUs
    logger.info("Mapping reads onto centroids [VSEARCH]")
    cmd = " ".join([pd.VSEARCH, 
                    "--usearch_global", options.input, 
                    "--db", tmpDir + "/input_nr_otus_nonchimeras_relabelled.fasta", 
                    "--id", options.VSEARCH_id, 
                    "--uc", tmpDir + "/otus.uc",
                    "--threads", options.threads])
    rc.run_cmd_VSEARCH(cmd, logger, options.verbose)


    # OTU construction
    logger.info("Making OTU table")
    cmd = " ".join(["python", EXE_DIR + "/pipits_uc/uc2otutab.py", tmpDir + "/otus.uc", 
                    ">", 
                    tmpDir + "/otu_table_prelim.txt"])
    rc.run_cmd_VSEARCH(cmd, logger, options.verbose)


    # Convert to biom
    logger.info("Converting classic tabular OTU into a BIOM format [BIOM]")
    try:
        os.remove(tmpDir + "/otu_table_prelim.biom")
    except OSError:
        pass
    cmd = " ".join([pd.BIOM, "convert", 
                    "-i", tmpDir + "/otu_table_prelim.txt", 
                    "-o", tmpDir + "/otu_table_prelim.biom", 
                    "--table-type=\"OTU table\""])
    rc.run_cmd(cmd, logger, options.verbose)


    # Classifying OTUs
    # http://sourceforge.net/projects/rdp-classifier/files/RDP_Classifier_TrainingData/ 
    logger.info("Assigning taxonomy [RDP Classifier]")
    cmd = " ".join(["java", "-jar", pd.RDP_CLASSIFIER_JAR, "classify", 
                    "-t", pd.UNITE_RETRAINED_DIR + "/rRNAClassifier.properties", 
                    "-o", options.outDir + "/assigned_taxonomy.txt", 
                    tmpDir + "/input_nr_otus_nonchimeras_relabelled.fasta"])
    rc.run_cmd(cmd, logger, options.verbose)


    # Reformatting RDP_CLASSIFIER output for biom
    logger.info("Reformatting RDP_Classifier output")
    cmd = " ".join(["python", EXE_DIR + "/reformatAssignedTaxonomy.py", 
                    "-i", options.outDir + "/assigned_taxonomy.txt" , 
                    "-o", options.outDir + "/assigned_taxonomy_reformatted_filtered.txt",
                    "-c", options.RDP_assignment_threshold])
    rc.run_cmd(cmd, logger, options.verbose)


    # Adding RDP_CLASSIFIER output to OTU table
    logger.info("Adding assignment to OTU table [BIOM]")
    try:
            os.remove(options.outDir + "/otu_table.biom")
    except OSError:
            pass
    cmd = " ".join([pd.BIOM, "add-metadata", 
                    "-i", tmpDir + "/otu_table_prelim.biom", 
                    "-o", options.outDir + "/otu_table.biom", 
                    "--observation-metadata-fp", options.outDir + "/assigned_taxonomy_reformatted_filtered.txt", 
                    "--observation-header", "OTUID,taxonomy,confidence", 
                    "--sc-separated", "taxonomy", 
                    "--float-fields", "confidence"])
    rc.run_cmd(cmd, logger, options.verbose)


    # Convert BIOM to TABLE
    logger.info("Converting OTU table with taxa assignment into a BIOM format [BIOM]")
    try:
        os.remove(options.outDir + "/otu_table.txt")
    except OSError:
        pass
    cmd = " ".join([pd.BIOM, "convert", 
                    "-i", options.outDir + "/otu_table.biom", 
                    "-o", options.outDir + "/otu_table.txt", 
                    "--header-key taxonomy",  
                    "-b"])
    rc.run_cmd(cmd, logger, options.verbose)


    # Make phylotyp table
    logger.info("Phylotyping OTU table")
    cmd = " ".join(["python", EXE_DIR + "/phylotype_biom.py", "-i", options.outDir + "/otu_table.biom", "-o", options.outDir + "/phylotype_table.txt"])
    rc.run_cmd(cmd, logger, options.verbose)

    try:
        os.remove(options.outDir + "/phylotype_table.biom")
    except OSError:
        pass
    cmd = " ".join([pd.BIOM, "convert",
                    "-i", options.outDir + "/phylotype_table.txt",
                    "-o", options.outDir + "/phylotype_table.biom",
                    "--table-type=\"OTU table\" --process-obs-metadata=\"taxonomy\""])
    rc.run_cmd(cmd, logger, options.verbose)


    # Move representative sequence file to outDir
    shutil.move(tmpDir + "/input_nr_otus_nonchimeras_relabelled.fasta", options.outDir + "/repseqs.fasta")


    # Remove tmp
    if options.remove:
        logger.info("Cleaning temporary directory")
        shutil.rmtree(tmpDir)


    # Final stats

    #############################
    # Import json formatted OTU #
    #############################

    def biomstats(BIOMFILE):
        import json
        jsondata = open(BIOMFILE)
        biom = json.load(jsondata)

        sampleSize = int(biom["shape"][1])
        otus = int(biom["shape"][0])

        taxonomies = []
        for i in range(len(biom["rows"])):
            taxonomies.append("; ".join(biom["rows"][i]["metadata"]["taxonomy"]))

        sampleids = []
        for i in range(len(biom["columns"])):
            sampleids.append(biom["columns"][i]["id"])

        import numpy as np

        # BIOM table into matrix
        matrix = np.zeros(shape=(otus, sampleSize))
        for i in biom["data"]:
            matrix[i[0], i[1]] = i[2]
        totalCount = matrix.sum()

        return totalCount, otus, sampleSize

    otu_reads_count, otu_count, otu_sample_count = biomstats(options.outDir + "/otu_table.biom")
    phylo_reads_count, phylo_count, phylo_sample_count = biomstats(options.outDir + "/phylotype_table.biom")

    outfile = open(options.outDir + "/summary_pipits_process.txt", "w")

    outfile.write("No.of reads after singletons and chimera removal: " + str(int(otu_reads_count)) + "\n")
    outfile.write("Number of OTUs:                                   " + str(otu_count) + "\n")
    outfile.write("Number of phylotypes:                             " + str(phylo_count) + "\n")
    outfile.write("Number of samples:                                " + str(otu_sample_count) + "\n")

    logger.info(tc.RED + "\tNumber of reads after singletons and chimera removal: " + str(int(otu_reads_count)) + tc.ENDC)
    logger.info(tc.RED + "\tNumber of OTUs:                                       " + str(otu_count) + tc.ENDC)
    logger.info(tc.RED + "\tNumber of phylotypes:                                 " + str(phylo_count) + tc.ENDC)
    logger.info(tc.RED + "\tNumber of samples:                                    " + str(otu_sample_count) + tc.ENDC)


    # Done!
    logger.info(tc.OKBLUE + "PIPITS_PROCESS ended successfully." + tc.ENDC)
    logger.info(tc.OKYELLOW + "Resulting files are in \"" + options.outDir + "\" directory" + tc.ENDC)
Exemple #12
0
def data_dump(conf, paths):
    "Dump all the relevant data to a single text file"

    # ingredients:
    #   -node coordinates
    #   -patch IDp
    #   -patch centre dist
    #   -all > blank from GLM
    #   -coherent PSC
    #   -incoherent PSC

    patch_ids = np.setdiff1d(conf.exp.mod_patches, conf.ana.exclude_patch_ids)

    vf_lookup = {"lh": "R", "rh": "L"}

    # open it this way so that we can write to it twice and it will append
    dump_handle = open(paths.coh_ana.comb.full(".txt"), "w")

    os.chdir(paths.coh_ana.base.full())

    for hemi in ["lh", "rh"]:

        hemi_ext = "_" + hemi

        glm_comb_path = paths.coh_ana.glm_comb.full(hemi_ext +
                                                    "-full.niml.dset")

        # first, want to combine all the GLM data together
        comb_cmd = [
            "3dMean", "-non_zero", "-sum", "-prefix", glm_comb_path,
            "-overwrite"
        ]

        for patch_id in patch_ids:

            if conf.stim.patches[patch_id]["vf"] == vf_lookup[hemi]:

                buck_file = paths.coh_ana.glm.full("-patch_{n:d}".format(
                    n=patch_id) + hemi_ext + "-full.niml.dset" + "[6]")

                comb_cmd.append(buck_file)

        runcmd.run_cmd(" ".join(comb_cmd))

        psc_comb_path = paths.coh_ana.comb.full(hemi_ext + "-full.niml.dset")

        # now we need to know the patch IDs
        loc_id = conf.subj.subj_id + "_loc"
        loc_conf = ns_patches.config.get_conf(loc_id)
        loc_paths = ns_patches.paths.get_subj_paths(loc_conf)

        id_path = loc_paths.loc.patch_id_thr.full(
            "_{h:s}-full_Clustered_e1_a{n:.01f}.niml.dset".format(
                h=hemi, n=conf.loc.area_thr))

        dump_path = paths.coh_ana.comb.full(hemi_ext + ".txt")

        if os.path.exists(dump_path):
            os.remove(dump_path)

        # now we have all our ingredients, we can write out the text file
        dump_cmd = [
            "3dmaskdump", "-o", dump_path, "-mask", psc_comb_path, id_path,
            paths.coh_ana.patch_dist.full("_" + hemi + "-full.niml.dset"),
            glm_comb_path, psc_comb_path
        ]

        runcmd.run_cmd(" ".join(dump_cmd))

        # concatenate across hemis
        np.savetxt(dump_handle, np.loadtxt(dump_path))

    dump_handle.close()
Exemple #13
0
def _run_coh_glm(conf, paths, patch_id):
    "Run the coh/incoh GLM for a given patch"

    os.chdir(paths.coh_ana.base.full())

    # contralateral organisation
    if conf.stim.patches[patch_id]["vf"] == "L":
        hemi = "rh"
    else:
        hemi = "lh"

    hemi_ext = "_" + hemi

    # [coh, incoh]
    timings = _get_coh_timing(conf, paths, patch_id)

    # write patch timings
    for (cond, cond_name) in zip(timings, ["coh", "incoh"]):

        cond_path = paths.coh_ana.stim_times.full(
            "-patch_{n:d}_{c:s}.txt".format(n=patch_id, c=cond_name))

        with open(cond_path, "w") as cond_file:

            for run_cond in cond:
                cond_file.write("\t".join(
                    ["{t:d}".format(t=x) for x in run_cond]))
                cond_file.write("\n")

    # write out the mask for this patch
    loc_id = conf.subj.subj_id + "_loc"
    loc_conf = ns_patches.config.get_conf(loc_id)
    loc_paths = ns_patches.paths.get_subj_paths(loc_conf)

    id_path = loc_paths.loc.patch_id_thr.full(
        "_{h:s}-full_Clustered_e1_a{n:.01f}.niml.dset".format(
            h=hemi, n=conf.loc.area_thr))

    mask_path = paths.coh_ana.mask.full("-patch_{n:d}".format(n=patch_id) +
                                        hemi_ext + "-full.niml.dset")

    # the patch ID has a +1 in the below because they are stored in the niml as
    # 1-based
    mask_cmd = [
        "3dcalc", "-a", id_path, "-expr",
        "equals(a,{x:d})".format(x=patch_id + 1), "-prefix", mask_path,
        "-overwrite"
    ]

    runcmd.run_cmd(" ".join(mask_cmd))

    # right-o, ready for the GLM
    censor_vols = conf.exp.n_censor_vols - 1
    censor_str = "*:0-{v:.0f}".format(v=censor_vols)

    model_str = "SPMG1({d:.0f})".format(d=conf.exp.img_on_s)

    glm_cmd = ["3dDeconvolve", "-input"]

    surf_paths = [
        surf_path.full(hemi_ext + "-full.niml.dset")
        for surf_path in paths.func.surfs
    ]

    glm_cmd.extend(surf_paths)

    glm_cmd.extend([
        "-force_TR",
        "{tr:.3f}".format(tr=conf.acq.tr_s),
        "-polort",
        "a",  # auto baseline degree
        "-local_times",
        "-mask",
        mask_path,
        "-CENSORTR",
        censor_str,
        "-xjpeg",
        "exp_design_patch_{x:d}.png".format(x=patch_id),
        "-x1D",
        "exp_design_patch_{x:d}".format(x=patch_id),
        "-overwrite",
        "-x1D_stop",  # want to use REML, so don't bother running
        "-num_stimts",
        "2"
    ])

    for (i_cond, cond_name) in enumerate(["coh", "incoh"]):

        glm_cmd.extend(
            ["-stim_label", "{x:d}".format(x=i_cond + 1), cond_name])

        glm_cmd.extend([
            "-stim_times", "{x:d}".format(x=i_cond + 1),
            paths.coh_ana.stim_times.full("-patch_{n:d}_{c:s}.txt".format(
                n=patch_id, c=cond_name)), model_str
        ])

    # all > blank contrast
    con_str = "SYM: +coh +incoh"
    glm_cmd.extend(
        ["-gltsym", "'" + con_str + "'", "-glt_label", "1", "all_gt_bl"])

    runcmd.run_cmd(" ".join(glm_cmd))

    os.remove("Decon.REML_cmd")

    beta_file = paths.coh_ana.beta.full("-patch_{n:d}".format(n=patch_id) +
                                        hemi_ext + "-full.niml.dset")
    buck_file = paths.coh_ana.glm.full("-patch_{n:d}".format(n=patch_id) +
                                       hemi_ext + "-full.niml.dset")

    reml_cmd = [
        "3dREMLfit", "-matrix",
        "exp_design_patch_{x:d}.xmat.1D".format(x=patch_id), "-mask",
        mask_path, "-Rbeta", beta_file, "-tout", "-Rbuck", buck_file,
        "-overwrite", "-input"
    ]

    reml_cmd.append("'" + " ".join(surf_paths) + "'")

    # run the proper GLM
    runcmd.run_cmd(" ".join(reml_cmd))

    # now to convert to PSC, while we're here
    design_path = "exp_design_patch_{x:d}.xmat.1D".format(x=patch_id)

    # to write
    ext = "-patch_{n:d}".format(n=patch_id) + hemi_ext + "-full.niml.dset"
    bltc_path = paths.coh_ana.bltc.file(ext)
    bl_path = paths.coh_ana.bl.file(ext)
    psc_path = paths.coh_ana.psc.file(ext)

    # 4 orthogonal polynomial regressors per run
    n_nuisance = conf.subj.n_runs * 4

    # checked via '-verb'
    beta_bricks = "[{n:d}..$]".format(n=n_nuisance)

    fmri_tools.utils.beta_to_psc(
        beta_file,
        beta_bricks,
        design_path,
        bltc_path,
        bl_path,
        psc_path,
    )
Exemple #14
0
def _run_coh_glm_depth(conf, paths, patch_id, i_bin):
    "Run the coh/incoh GLM for a given patch and depth"

    os.chdir(paths.depth_ana.base.full())

    # contralateral organisation
    if conf.stim.patches[patch_id]["vf"] == "L":
        hemi = "rh"
    else:
        hemi = "lh"

    hemi_ext = "_" + hemi
    bin_ext = "_bin_" + str(i_bin)

    mask_path = paths.coh_ana.mask.full("-patch_{n:d}".format(n=patch_id) +
                                        hemi_ext + "-full.niml.dset")

    # right-o, ready for the GLM
    censor_vols = conf.exp.n_censor_vols - 1
    censor_str = "*:0-{v:.0f}".format(v=censor_vols)

    model_str = "SPMG1({d:.0f})".format(d=conf.exp.img_on_s)

    glm_cmd = ["3dDeconvolve", "-input"]

    surf_paths = [
        surf_path.full(bin_ext + hemi_ext + "-full.niml.dset")
        for surf_path in paths.func.surfs
    ]

    glm_cmd.extend(surf_paths)

    glm_cmd.extend([
        "-force_TR",
        "{tr:.3f}".format(tr=conf.acq.tr_s),
        "-polort",
        "a",  # auto baseline degree
        "-local_times",
        "-mask",
        mask_path,
        "-CENSORTR",
        censor_str,
        "-xjpeg",
        "exp_design_bin_{n:d}_patch_{x:d}.png".format(n=i_bin, x=patch_id),
        "-x1D",
        "exp_design_bin_{n:d}_patch_{x:d}".format(n=i_bin, x=patch_id),
        #            "-overwrite",
        "-x1D_stop",  # want to use REML, so don't bother running
        "-num_stimts",
        "2"
    ])

    for (i_cond, cond_name) in enumerate(["coh", "incoh"]):

        glm_cmd.extend(
            ["-stim_label", "{x:d}".format(x=i_cond + 1), cond_name])

        glm_cmd.extend([
            "-stim_times", "{x:d}".format(x=i_cond + 1),
            paths.coh_ana.stim_times.full("-patch_{n:d}_{c:s}.txt".format(
                n=patch_id, c=cond_name)), model_str
        ])

    runcmd.run_cmd(" ".join(glm_cmd))

    os.remove("Decon.REML_cmd")

    beta_file = paths.depth_ana.beta.full("-patch_{n:d}".format(n=patch_id) +
                                          bin_ext + hemi_ext +
                                          "-full.niml.dset")
    buck_file = paths.depth_ana.glm.full("-patch_{n:d}".format(n=patch_id) +
                                         bin_ext + hemi_ext +
                                         "-full.niml.dset")

    reml_cmd = [
        "3dREMLfit",
        "-matrix",
        "exp_design_bin_{n:d}_patch_{x:d}.xmat.1D".format(n=i_bin, x=patch_id),
        "-mask",
        mask_path,
        "-Rbeta",
        beta_file,
        "-tout",
        "-Rbuck",
        buck_file,
        #        "-overwrite",
        "-input"
    ]

    reml_cmd.append("'" + " ".join(surf_paths) + "'")

    # run the proper GLM
    runcmd.run_cmd(" ".join(reml_cmd))

    # now to convert to PSC, while we're here
    design_path = "exp_design_bin_{n:d}_patch_{x:d}.xmat.1D".format(n=i_bin,
                                                                    x=patch_id)

    # to write
    ext = "-patch_{n:d}".format(
        n=patch_id) + bin_ext + hemi_ext + "-full.niml.dset"
    bltc_path = paths.depth_ana.bltc.file(ext)
    bl_path = paths.depth_ana.bl.file(ext)
    psc_path = paths.depth_ana.psc.file(ext)

    # 4 orthogonal polynomial regressors per run
    n_nuisance = conf.subj.n_runs * 4

    # checked via '-verb'
    beta_bricks = "[{n:d}..$]".format(n=n_nuisance)

    fmri_tools.utils.beta_to_psc(
        beta_file,
        beta_bricks,
        design_path,
        bltc_path,
        bl_path,
        psc_path,
    )
def _run_glm(subj_id, acq_date, conf, log_dir, loc_mask=True):

    inf_str = subj_id + "_ul_sens_" + acq_date

    subj_dir = os.path.join(conf.ana.base_subj_dir, subj_id)

    if loc_mask:
        glm_dir = os.path.join(subj_dir, "analysis")
        mask_descrip = ""
    else:
        glm_dir = os.path.join(subj_dir, "post_analysis", "ret_roi")
        mask_descrip = "_ret_roi"

    os.chdir(glm_dir)

    for vf in ("upper", "lower"):

        cond_details = _write_onsets(
            subj_id=subj_id,
            acq_date=acq_date,
            conf=conf,
            vf=vf,
            runs_type="all",
            log_dir=log_dir
        )

        contrast_details = []

        for src_loc in ["above", "below"]:

            contrast = []

            for curr_cond in cond_details:

                if src_loc in curr_cond["name"]:

                    contrast.append("+" + curr_cond["name"])

            contrast = " ".join(contrast)

            contrast_details.append(
                {
                    "label": src_loc + "_all",
                    "contrast": contrast
                }
            )

        # these files have three nodes, one for each visual area
        run_paths = [
            os.path.join(
                subj_dir,
                "func",
                "run_{n:02d}".format(n=run_num),
                "{s:s}-run_{n:02d}-uw-{vf:s}{m:s}_data.niml.dset".format(
                    s=inf_str, n=run_num, vf=vf, m=mask_descrip
                )
            )
            for run_num in range(1, conf.exp.n_runs + 1)
        ]

        # to write
        glm_filename = "{s:s}-{v:s}{m:s}-glm-.niml.dset".format(
            s=inf_str, v=vf, m=mask_descrip
        )

        # to write
        beta_filename = "{s:s}-{v:s}{m:s}-beta-.niml.dset".format(
            s=inf_str, v=vf, m=mask_descrip
        )

        # to write
        resid_filename = "{s:s}-{v:s}{m:s}-resid-.niml.dset".format(
            s=inf_str, v=vf, m=mask_descrip
        )

        extra_reml_args = ["-Rerrts", resid_filename]

        # run the GLM on this visual field location
        fmri_tools.analysis.glm(
            run_paths=run_paths,
            output_dir=glm_dir,
            glm_filename=glm_filename,
            beta_filename=beta_filename,
            tr_s=conf.ana.tr_s,
            cond_details=cond_details,
            contrast_details=contrast_details,
            censor_str=conf.ana.censor_str,
            matrix_filename="exp_design_" + vf,
            extra_reml_args=extra_reml_args
        )

        # now to convert the beta weights to percent signal change

        # baseline timecourse
        bltc_filename = "{s:s}-{v:s}{m:s}-bltc-.niml.dset".format(
            s=inf_str, v=vf, m=mask_descrip
        )

        # baseline
        bl_filename = "{s:s}-{v:s}{m:s}-bltc-.niml.dset".format(
            s=inf_str, v=vf, m=mask_descrip
        )

        # psc
        psc_filename = "{s:s}-{v:s}{m:s}-psc-.niml.dset".format(
            s=inf_str, v=vf, m=mask_descrip
        )

        beta_bricks = "[40..$]"

        # check the beta bricks are as expected
        dset_labels = fmri_tools.utils.get_dset_label(
            beta_filename + beta_bricks
        )

        desired_labels = []

        for img_id in conf.exp.img_ids:
            for src_loc in ["above", "below"]:
                desired_labels.append(
                    vf + "_" + src_loc + "_" + str(img_id) + "#0"
                )

        assert dset_labels == desired_labels

        # run the PSC conversion
        fmri_tools.utils.beta_to_psc(
            beta_path=beta_filename,
            beta_bricks=beta_bricks,
            design_path="exp_design_" + vf + ".xmat.1D",
            bltc_path=bltc_filename,
            bl_path=bl_filename,
            psc_path=psc_filename,
        )

        data_filename = "{s:s}-{v:s}{m:s}-data-amp.txt".format(
            s=inf_str, v=vf, m=mask_descrip
        )

        if os.path.exists(data_filename):
            os.remove(data_filename)

        cmd = [
            "3dmaskdump",
            "-noijk",
            "-o", data_filename,
            psc_filename
        ]

        runcmd.run_cmd(" ".join(cmd))

        # save the betas as text file also, for exploration / checking
        b_filename = "{s:s}-{v:s}{m:s}-beta-amp.txt".format(
            s=inf_str, v=vf, m=mask_descrip
        )

        if os.path.exists(b_filename):
            os.remove(b_filename)

        cmd = [
            "3dmaskdump",
            "-noijk",
            "-o", b_filename,
            beta_filename
        ]

        runcmd.run_cmd(" ".join(cmd))
Exemple #16
0
def run(options):

    PIPITS_PREP_OUTPUT = "prepped.fasta"


    # Make directories (outdir and tmpdir)
    if not os.path.exists(options.outDir):
        os.mkdir(options.outDir)
    else:
        shutil.rmtree(options.outDir)
        os.mkdir(options.outDir)

    tmpDir = options.outDir + "/intermediate"
    if not os.path.exists(tmpDir):
        os.mkdir(tmpDir)


    # Logging
    import logging
    logger = logging.getLogger("pipits_prep")
    logger.setLevel(logging.DEBUG)

    streamLoggerFormatter = logging.Formatter("%(asctime)s %(levelname)s: %(message)s", tc.HEADER + "%Y-%m-%d %H:%M:%S" + tc.ENDC)
    streamLogger = logging.StreamHandler()
    if options.verbose:
        streamLogger.setLevel(logging.DEBUG)
    else:
        streamLogger.setLevel(logging.INFO)
    streamLogger.setFormatter(streamLoggerFormatter)
    logger.addHandler(streamLogger)

    fileLoggerFormatter = logging.Formatter("%(asctime)s %(levelname)s: %(message)s", tc.HEADER + "%Y-%m-%d %H:%M:%S" + tc.ENDC)
    fileLogger = logging.FileHandler(options.outDir + "/log.txt", "w")
    fileLogger.setLevel(logging.DEBUG)
    fileLogger.setFormatter(fileLoggerFormatter)
    logger.addHandler(fileLogger)

    # Summary file
    summary_file = open(options.outDir + "/summary_pipits_prep.txt", "w")

    # Start!
    logger.info(tc.OKBLUE + "PIPITS PREP started" + tc.ENDC)

    EXE_DIR = os.path.dirname(os.path.realpath(__file__))


    # Check for the presence of rawdata directory
    logger.debug("Checking for presence of input directory")
    if not os.path.exists(options.dataDir):
        logger.error("Cannot find \"" + options.dataDir + "\" directory. Ensure you have the correct name of the directory where your Illumina sequences are stored")
        exit(1)


    fastqs_l = []
    fastqs_f = []
    fastqs_r = []

    # if list is provided...
    if options.listfile:
        logger.info("Processing user-provided listfile")
        try:
            listfile = open(options.listfile, "r")
        except IOError:
            logger.error("\"" + options.listfile + "\" not found.")
            exit(1)

        for l in listfile:
            if l.strip(" ").strip("\n") != "" and not l.startswith("#"):
                l = l.rstrip().split("\t")
                fastqs_l.append(l[0])
                fastqs_f.append(l[1])
                fastqs_r.append(l[2])
        listfile.close()


    # if not provided
    if not options.listfile:
        logger.info("Getting list of fastq files and sample ID from input folder")
        fastqs = []
        for file in os.listdir(options.dataDir):
            if \
                    file.endswith(".fastq.gz") or \
                    file.endswith(".bz2") or \
                    file.endswith(".fastq"):
                fastqs.append(file)

        if len(fastqs) % 2 != 0:
            logger.error("There are missing pair(s) in the Illumina sequences. Check your files and labelling")
            exit(1)

        coin = True
        for fastq in sorted(fastqs):
            if coin == True:
                fastqs_f.append(fastq)
            else:
                fastqs_r.append(fastq)
            coin = not coin

        for i in range(len(fastqs_f)):
            if fastqs_f[i].split("_")[0] != fastqs_r[i].split("_")[0]:
                logger.error("Problem with labelling FASTQ files.")
                exit(1)
            fastqs_l.append(fastqs_f[i].split("_")[0])


    # Check
    if len(fastqs_f) != len(fastqs_r):
        logger.error("Different number of forward FASTQs and reverse FASTQs")
        exit(1)


    # Done loading. Now check the file extensions.
    filenameextensions = []
    for filename in (fastqs_f + fastqs_r):
        filenameextensions.append(filename.split(".")[-1].rstrip())
    if len(set(filenameextensions)) > 1:
        logger.error("More than two types of extensions")
        exit(1)
    extensionType = next(iter(filenameextensions))


    # For summary 1:
    logger.info("Counting sequences in rawdata")
    numberofsequences = 0
    for fr in fastqs_f:
        if extensionType == "gz":
            cmd = " ".join(["zcat", options.dataDir + "/" + fr, "|", "wc -l"])
        elif extensionType =="bz2":
            cmd = " ".join(["bzcat", options.dataDir + "/" + fr, "|", "wc -l"])
        elif extensionType =="fastq":
            cmd = " ".join(["cat", options.dataDir + "/" + fr, "|", "wc -l"])
        else:
            logger.error("Unknown extension type.")
            exit(1)

        logger.debug(cmd)
        p = subprocess.Popen(cmd, shell=True, stdout = subprocess.PIPE)
        numberofsequences += int(p.communicate()[0]) / 4
        p.wait()
    logger.info("\t" + tc.RED + "Number of paired-end reads in rawdata: " + str(numberofsequences) + tc.ENDC)
    summary_file.write("Number of paired-end reads in rawdata: " + str(numberofsequences) + "\n")


    # Join paired-end reads                                                                                                                                                             
    logger.info("Joining paired-end reads" + "[" + options.joiner_method + "]")
    if not os.path.exists(tmpDir + "/joined"):
        os.mkdir(tmpDir + "/joined")

    for i in range(len(fastqs_l)):

        if extensionType == "gz":
            cmd = " ".join(["gunzip -c", options.dataDir + "/" + fastqs_f[i], ">", tmpDir + "/joined/" + fastqs_f[i] + ".tmp"])
            rc.run_cmd(cmd, logger, options.verbose)
            cmd = " ".join(["gunzip -c", options.dataDir + "/" + fastqs_r[i], ">", tmpDir + "/joined/" + fastqs_r[i] + ".tmp"])
            rc.run_cmd(cmd, logger, options.verbose)
        elif extensionType == "bz2":
            cmd = " ".join(["bunzip2 -c", options.dataDir + "/" + fastqs_f[i], ">", tmpDir + "/joined/" + fastqs_f[i] + ".tmp"])
            rc.run_cmd(cmd, logger, options.verbose)
            cmd = " ".join(["bunzip2 -c", options.dataDir + "/" + fastqs_r[i], ">", tmpDir + "/joined/" + fastqs_r[i] + ".tmp"])
            rc.run_cmd(cmd, logger, options.verbose)
        elif extensionType == "fastq":
            cmd = " ".join(["ln -sf", 
                            os.path.abspath(options.dataDir + "/" + fastqs_f[i]), 
                            tmpDir + "/joined/" + fastqs_f[i] + ".tmp"])
            rc.run_cmd(cmd, logger, options.verbose)
            cmd = " ".join(["ln -sf",
                            os.path.abspath(options.dataDir + "/" + fastqs_r[i]),
                            tmpDir + "/joined/" + fastqs_r[i] + ".tmp"])
            rc.run_cmd(cmd, logger, options.verbose)
        else:
            print(extensionType)
            logger.error("Unknown extension found.")
            exit(1)
        
#        joiner_method = "PEAR"

        if options.joiner_method == "PEAR":
            cmd = " ".join([pd.PEAR,
                            "-f", tmpDir + "/joined/" + fastqs_f[i] + ".tmp",
                            "-r", tmpDir + "/joined/" + fastqs_r[i] + ".tmp",
                            "-o", tmpDir + "/joined/" + fastqs_l[i],
                            "-j", options.threads,
                            "-b", options.base_phred_quality_score,
                            "-q 30",
                            "-p 0.0001"])
            rc.run_cmd(cmd, logger, options.verbose)

            cmd = " ".join(["rm -v",
                            tmpDir + "/joined/" + fastqs_f[i] + ".tmp",
                            tmpDir + "/joined/" + fastqs_r[i] + ".tmp"])
            rc.run_cmd(cmd, logger, options.verbose)

            cmd = " ".join(["mv -f", 
                            tmpDir + "/joined/" + fastqs_l[i] + ".assembled.fastq", 
                            tmpDir + "/joined/" + fastqs_l[i] + ".joined.fastq"])
            rc.run_cmd(cmd, logger, options.verbose)

        elif options.joiner_method == "FASTQJOIN":
            cmd = " ".join(["fastq-join",
                            tmpDir + "/joined/" + fastqs_f[i] + ".tmp",
                            tmpDir + "/joined/" + fastqs_r[i] + ".tmp",
                            "-o",
                            tmpDir + "/joined/" + fastqs_l[i] + ".joined.fastq"])
            rc.run_cmd(cmd, logger, options.verbose)

            cmd = " ".join(["mv -f",
                            tmpDir + "/joined/" + fastqs_l[i] + ".joined.fastqjoin",
                            tmpDir + "/joined/"+ fastqs_l[i] +".joined.fastq"])
            rc.run_cmd(cmd, logger, options.verbose)


    # For summary 2:
    numberofsequences = 0
    for i in range(len(fastqs_l)):
        cmd = " ".join(["cat", tmpDir + "/joined/" + fastqs_l[i] + ".joined.fastq", "|", "wc -l"])
        logger.debug(cmd)
        p = subprocess.Popen(cmd, shell=True, stdout = subprocess.PIPE)
        numberofsequences += int(p.communicate()[0]) / 4
        p.wait()
    logger.info("\t" + tc.RED + "Number of joined reads: " + str(numberofsequences) + tc.ENDC)
    summary_file.write("Number of joined reads: " + str(numberofsequences) + "\n")

    # Quality filter
    logger.info("Quality filtering [FASTX]")
    if not os.path.exists(tmpDir + "/fastqqualityfiltered"):
        os.mkdir(tmpDir + "/fastqqualityfiltered")

    for i in range(len(fastqs_f)):
        cmd = " ".join([pd.FASTX_FASTQ_QUALITY_FILTER,
                        "-i", tmpDir + "/joined/" + fastqs_l[i] + ".joined.fastq", 
                        "-o", tmpDir + "/fastqqualityfiltered/" + fastqs_l[i] + ".fastq", 
                        "-q", options.FASTX_fastq_quality_filter_q,
                        "-p", options.FASTX_fastq_quality_filter_p,
                        "-Q" + options.base_phred_quality_score])
        rc.run_cmd(cmd, logger, options.verbose)


    # For summary 3:
    numberofsequences = 0
    for i in range(len(fastqs_l)):
        cmd = " ".join(["cat", tmpDir + "/fastqqualityfiltered/" + fastqs_l[i] + ".fastq", "|", "wc -l"])
        p = subprocess.Popen(cmd, shell=True, stdout = subprocess.PIPE)
        numberofsequences += int(p.communicate()[0]) / 4
        p.wait()
    logger.info("\t" + tc.RED + "Number of quality filtered reads: " + str(numberofsequences) + tc.ENDC)
    summary_file.write("Number of quality filtered reads: " + str(numberofsequences) + "\n")


    # Removing reads with \"N\" and FASTA conversion
    if options.FASTX_fastq_to_fasta_n:
        logger.info("Converting FASTQ to FASTA [FASTX]")
    else:
        logger.info("Converting FASTQ to FASTA and also removing reads with \"N\" nucleotide [FASTX]")

    if not os.path.exists(tmpDir + "/fastqtofasta"):
        os.mkdir(tmpDir + "/fastqtofasta")

    fastq_to_fasta_n = ""
    if options.FASTX_fastq_to_fasta_n:
        fastq_to_fasta_n = "-n"

    for i in range(len(fastqs_f)):
        cmd = " ".join([pd.FASTX_FASTQ_TO_FASTA, 
                        "-i", tmpDir + "/fastqqualityfiltered/" + fastqs_l[i] + ".fastq", 
                        "-o", tmpDir + "/fastqtofasta/" + fastqs_l[i] + ".fasta", 
                        "-Q33",
                        fastq_to_fasta_n])
        rc.run_cmd(cmd, logger, options.verbose)


    # For summary 3:
    numberofsequences = 0
    for i in range(len(fastqs_l)):
        cmd = " ".join(["grep \"^>\"", tmpDir + "/fastqtofasta/" + fastqs_l[i] + ".fasta", "|", "wc -l"])
        p = subprocess.Popen(cmd, shell=True, stdout = subprocess.PIPE)
        numberofsequences += int(p.communicate()[0])
        p.wait()
    logger.info("\t" + tc.RED + "Number of N-less quality filtered sequences: " + str(numberofsequences) + tc.ENDC)
    summary_file.write("Number of N-less quality filtered sequences: " + str(numberofsequences) + "\n")


    # Re-ID and re-index FASTA and merging them all
    logger.info("Re-IDing and indexing FASTA, and merging all into a single file")
    outfileFinalFASTA = open(options.outDir + "/" + PIPITS_PREP_OUTPUT, "w")
    for i in range(len(fastqs_f)):
        line_index = 1
        logger.debug("Reading " + tmpDir + "/fastqtofasta/" + fastqs_l[i] + ".fasta")
        infile_fasta = open(tmpDir + "/fastqtofasta/" + fastqs_l[i] + ".fasta")
        for line in infile_fasta:
            if line.startswith(">"):
                outfileFinalFASTA.write(">" + fastqs_l[i] + "_" + str(line_index) + "\n")
                line_index += 1
            else:
                outfileFinalFASTA.write(line.rstrip() + "\n")
    outfileFinalFASTA.close()


    # Clean up tmp_directory
    if options.remove:
        logger.info("Cleaning temporary directory")
        shutil.rmtree(tmpDir)


    logger.info(tc.OKBLUE + "PIPITS PREP ended successfully. \"" + PIPITS_PREP_OUTPUT + "\" created in \"" + options.outDir + "\"" + tc.ENDC)
    logger.info(tc.OKYELLOW + "Next Step: PIPITS FUNITS [ Suggestion: pipits_funits -i " + options.outDir + "/" + PIPITS_PREP_OUTPUT + " -o out_funits -x YOUR_ITS_SUBREGION ]" + tc.ENDC)
    print("")
    summary_file.close()
def write_stim_library(save_path):

    conf = ul_sens_fmri.config.get_conf()
    conf.ana = ul_sens_analysis.config.get_conf()

    sshot_dir = "/sci/study/ul_sens/sshots"
    sshot_files = os.listdir(sshot_dir)

    cwd = os.getcwd()
    os.chdir(sshot_dir)

    pdf_list = []

    for img_id in conf.exp.img_ids:
        for (i_src_loc, src_loc) in enumerate(("above", "below")):
            for (pres_loc, floc) in zip(("upper", "lower"), ("a", "b")):

                out_file = tempfile.NamedTemporaryFile(suffix=".pdf",
                                                       delete=False)
                out_file.close()

                pdf_list.append(out_file.name)

                for sshot_file in sshot_files:

                    if str(img_id) not in sshot_file:
                        continue

                    src_str = "src_loc_{n:.1f}".format(n=i_src_loc + 1)
                    if src_str not in sshot_file:
                        continue

                    if "id_{n:.1f}".format(n=img_id) not in sshot_file:
                        continue

                    pres_str = "pres_loc_" + floc
                    if pres_str not in sshot_file:
                        continue

                    if "crop" not in sshot_file:
                        continue

                    header = ("Image ID: " + str(img_id) +
                              "; Source location: " + src_loc +
                              "; Presentation location: " + pres_loc)

                    # made it this far, must be OK
                    cmd = [
                        "convert", "-append", "'label:" + header + "'",
                        sshot_file, "-compress", "jpeg", out_file.name
                    ]

                    runcmd.run_cmd(" ".join(cmd))

                    break

    assert len(pdf_list) == 120

    os.chdir(cwd)

    cmd = ["stapler", "cat"]

    cmd.extend(pdf_list)

    cmd.append(save_path)

    runcmd.run_cmd(" ".join(cmd))

    for pdf_file in pdf_list:
        os.remove(pdf_file)
Exemple #18
0
def patch_id( conf, paths ):
    """Form a mask from the GLM output"""

    logger = logging.getLogger( __name__ )
    logger.info( "Running localiser patch identification..." )

    # these are the t-statistics for each patch
    t_bricks = "[2..64(2)]"

    t_cutoff = scipy.stats.t.isf( conf.loc.t_p, conf.loc.dof )

    os.chdir( paths.loc.base.full() )

    for hemi in [ "lh", "rh" ]:

        hemi_ext = "_{h:s}.niml.dset".format( h = hemi )

        glm_path = paths.loc.glm.full( hemi_ext + t_bricks )

        # first, mark each regressor as significant or not
        sig_path = paths.loc.sig.full( hemi_ext )

        sig_cmd = [ "3dcalc",
                    "-a", glm_path,
                    "-expr", "ispositive(step(a-{t:.4f}))".format( t = t_cutoff ),
                    "-prefix", sig_path,
                    "-overwrite"
                  ]

        runcmd.run_cmd( " ".join( sig_cmd ) )


        # then, count how many significant regressors there are at each node
        sig_sum_path = paths.loc.sig_sum.full( hemi_ext )

        sig_sum_cmd = [ "3dTstat",
                        "-overwrite",
                        "-sum",
                        "-prefix", sig_sum_path,
                        sig_path
                      ]

        runcmd.run_cmd( " ".join( sig_sum_cmd ) )


        # now work out which ID is significant for each node, subject to the
        # constraint that there is only one significant patch
        all_id_path = paths.loc.all_patch_id.full( hemi_ext )

        id_cmd = [ "3dTstat",
                   "-overwrite",
                   "-argmax1",
                   "-prefix", all_id_path,
                   "-mask", sig_sum_path,
                   "-mrange", "1", "1",
                   glm_path
                 ]

        runcmd.run_cmd( " ".join( id_cmd ) )

        full_hemi_ext = "_{h:s}-full.niml.dset".format( h = hemi )

        # need to pad to full for integration with ROIs
        all_id_path_full = paths.loc.all_patch_id.full( full_hemi_ext )

        pad_k = "{n:d}".format( n = conf.subj.node_k[ hemi ] )

        fmri_tools.utils.sparse_to_full( in_dset = all_id_path,
                                         out_dset = all_id_path_full,
                                         pad_node = pad_k
                                       )

        # now for V1 only
        id_path_full = paths.loc.patch_id.full( full_hemi_ext )
        roi_path = paths.loc.vl.full( full_hemi_ext )

        cmd = [ "3dcalc",
                "-a", all_id_path_full,
                "-b", roi_path,
                "-expr", "a*within(b,1,1)",
                "-overwrite",
                "-prefix", id_path_full
              ]

        runcmd.run_cmd( " ".join( cmd ) )
Exemple #19
0
def glm( conf, paths ):
    """Localiser GLMs"""

    logger = logging.getLogger( __name__ )
    logger.info( "Running localiser GLM..." )

    start_dir = os.getcwd()

    os.chdir( paths.loc.base.full() )

    # minus one because the range is inclusive
    censor_vols = conf.loc.n_cull_vol - 1
    # in AFNI-aware format; (runs):start-end
    censor_str = "*:0-{v:.0f}".format( v = censor_vols )

    # model as a typical SPM event
    model_str = "'SPMG1({d:.0f})'".format( d = conf.loc.dur_s )

    for hemi in [ "lh", "rh" ]:

        glm_cmd = [ "3dDeconvolve",
                    "-input"
                  ]

        surf_paths = [ surf_path.full( "_{h:s}.niml.dset".format( h = hemi ) )
                       for surf_path in paths.func.surfs
                     ]

        glm_cmd.extend( surf_paths )

        glm_cmd.extend( [ "-force_TR", "{tr:.3f}".format( tr = conf.acq.tr_s ),
                          "-polort", "a",  # auto baseline degree
                          "-local_times",
                          "-CENSORTR", censor_str,
                          "-xjpeg", "exp_design.png",
                          "-x1D", "exp_design",
                          "-overwrite",
                          "-x1D_stop",  # want to use REML, so don't bother running
                          "-num_stimts", "{n:d}".format( n = conf.stim.n_patches )
                        ]
                      )

        for i_patch in xrange( conf.stim.n_patches ):

            timing_ext = "_{n:02d}.txt".format( n = i_patch )

            timing_file = paths.loc.timing_base.full( timing_ext )

            glm_cmd.extend( [ "-stim_times",
                              "{n:d}".format( n = i_patch + 1 ),
                              timing_file,
                              model_str
                            ]
                          )

            glm_cmd.extend( [ "-stim_label",
                              "{n:d}".format( n = i_patch + 1 ),
                              "p{n:02d}".format( n = i_patch )
                            ]
                          )

        # run this first GLM
        runcmd.run_cmd( " ".join( glm_cmd ) )

        # delete the annoying command file that 3dDeconvolve writes
        os.remove( "Decon.REML_cmd" )

        beta_file = paths.loc.beta.file( "_{h:s}.niml.dset".format( h = hemi ) )
        buck_file = paths.loc.glm.file( "_{h:s}.niml.dset".format( h = hemi ) )

        reml_cmd = [ "3dREMLfit",
                     "-matrix", "exp_design.xmat.1D",
                     "-Rbeta", beta_file,
                     "-tout",
                     "-Rbuck", buck_file,
                     "-overwrite",
                     "-input"
                   ]

        reml_cmd.append( "'" + " ".join( surf_paths ) + "'" )

        # run the proper GLM
        runcmd.run_cmd( " ".join( reml_cmd ) )

    os.chdir( start_dir )
def _extract_data(subj_id, acq_date, conf, mask_paths, loc_mask=True):

    inf_str = subj_id + "_ul_sens_" + acq_date

    subj_dir = os.path.join(conf.ana.base_subj_dir, subj_id)

    if loc_mask:
        mask_dir = os.path.join(subj_dir, "loc_analysis")
        analysis_dir = os.path.join(subj_dir, "analysis")
    else:
        mask_dir = os.path.join(subj_dir, "post_analysis", "ret_roi")
        analysis_dir = mask_dir

    n_rois = len(conf.ana.roi_names)
    n_vols_per_run = int(conf.exp.run_len_s / conf.ana.tr_s)

    # initialise our data container; rois x runs x volumes x vf
    data = np.empty((n_rois, conf.exp.n_runs, n_vols_per_run, 2))
    data.fill(np.NAN)

    for (i_vf, vf) in enumerate(("upper", "lower")):

        for run_num in range(1, conf.exp.n_runs + 1):

            run_dir = os.path.join(subj_dir, "func",
                                   "run_{n:02d}".format(n=run_num))

            os.chdir(run_dir)

            # note that the last index here is hemisphere
            hemi_data = np.empty((n_rois, n_vols_per_run, 2))
            hemi_data.fill(np.NAN)

            for (i_hemi, hemi) in enumerate(("lh", "rh")):

                run_path = "{s:s}-run_{n:02d}-uw-{h:s}_nf.niml.dset".format(
                    s=inf_str, n=run_num, h=hemi)

                # average across all nodes in each ROI and dump the timecourse
                # to standard out
                cmd = [
                    "3dROIstats", "-mask",
                    os.path.join(mask_dir, mask_paths[(vf, hemi)]),
                    "-1Dformat", run_path
                ]

                # ... which we don't want to log!
                cmd_out = runcmd.run_cmd(" ".join(cmd), log_stdout=False)

                # check the header for correctness
                roi_header = cmd_out.std_out.splitlines()[1].split("\t")[-3:]

                # make sure that the columns are what I think they are
                for (roi_head, roi_index) in zip(roi_header,
                                                 conf.ana.roi_numbers):
                    assert roi_head.strip() == "Mean_" + roi_index

                # we want to clip out the header and the info lines
                run_data = cmd_out.std_out.splitlines()[3::2]

                # check we've done this correctly
                assert len(run_data) == n_vols_per_run

                for (i_vol, vol_data) in enumerate(run_data):

                    # so this is just one line of data, tab-separated
                    # we want to pull out our three ROIs, which will be the
                    # last in the file
                    vol_data = vol_data.split("\t")[-n_rois:]

                    # store, for each of the ROIs
                    hemi_data[:, i_vol, i_hemi] = vol_data

            # check that we've filled up the array as expected
            assert np.sum(np.isnan(hemi_data)) == 0

            # average over hemispheres
            hemi_data = np.mean(hemi_data, axis=-1)

            if loc_mask:
                mask_descrip = "_"
            else:
                mask_descrip = "_ret_roi_"

            run_path = "{s:s}-run_{n:02d}-uw-{vf:s}{m:s}data.txt".format(
                s=inf_str, n=run_num, vf=vf, m=mask_descrip)

            # save it out as a text file for this run; rois x vols
            np.savetxt(run_path, hemi_data)

            # we also want to save what 'nodes' in this data corresponds to; ie
            # ROI identifiers
            run_nodes_path = "{s:s}-run_{n:02d}-uw-{vf:s}{m:s}nodes.txt".format(
                s=inf_str, n=run_num, vf=vf, m=mask_descrip)

            np.savetxt(run_nodes_path, map(int, conf.ana.roi_numbers), "%d")

            # now we want to make it into an AFNI dataset so we can run the GLM
            # using their software
            run_path_niml = "{s:s}-run_{n:02d}-uw-{v:s}{m:s}data.niml.dset".format(
                s=inf_str, n=run_num, v=vf, m=mask_descrip)

            cmd = [
                "ConvertDset", "-i_1D", "-input", run_path, "-node_index_1D",
                run_nodes_path, "-o_niml", "-prefix", run_path_niml,
                "-overwrite"
            ]

            runcmd.run_cmd(" ".join(cmd))

            data[:, run_num - 1, :, i_vf] = hemi_data

    assert np.sum(np.isnan(data)) == 0

    os.chdir(analysis_dir)

    data_path = "{s:s}-{m:s}data.npy".format(s=inf_str, m=mask_descrip)

    # we save the data here so we can access it independent of AFNI
    np.save(data_path, data)
def get_mvpa_data(subj_id, acq_date, vf):

    conf = ul_sens_fmri.config.get_conf()
    conf.ana = ul_sens_analysis.config.get_conf()

    inf_str = subj_id + "_ul_sens_" + acq_date

    subj_dir = os.path.join(conf.ana.base_subj_dir, subj_id)

    glm_dir = os.path.join(subj_dir, conf.ana.post_dir, "mvpa_glm")

    beta = {}
    loc_t = {}

    raw_beta = []
    raw_loc_t = []

    for hemi in ("lh", "rh"):

        # the localiser mask
        mask_path = os.path.join(
            subj_dir, conf.ana.loc_glm_dir,
            "{s:s}-loc_{vf:s}-mask-{h:s}_nf.niml.dset".format(s=inf_str,
                                                              h=hemi,
                                                              vf=vf))

        # GLM betas
        beta_filename = os.path.join(
            glm_dir,
            "{s:s}-{v:s}-mvpa_beta-{h:s}_nf.niml.dset".format(s=inf_str,
                                                              v=vf,
                                                              h=hemi))

        # let's start with the betas
        cmd = [
            "3dmaskdump",
            "-noijk",
            "-mask",
            mask_path,
            mask_path,  # this holds the ROI indices, too
            beta_filename
        ]

        cmd_out = runcmd.run_cmd(" ".join(cmd), log_stdout=False)
        raw_out = cmd_out.std_out.splitlines()

        # keep track of the number of nodes - they should be the same for the
        # localiser t extraction. We'll check to make sure.
        n_beta_nodes = len(raw_out)

        # just extend the list, for now - we'll parse it later on
        raw_beta.extend(raw_out)

        # the localiser T
        loc_t_path = os.path.join(
            subj_dir, conf.ana.loc_glm_dir,
            "{s:s}-loc_{vf:s}-glm-{h:s}_nf.niml.dset".format(s=inf_str,
                                                             h=hemi,
                                                             vf=vf))

        # now for the localiser T values
        cmd = [
            "3dmaskdump", "-noijk", "-mask", mask_path, mask_path, loc_t_path
        ]

        cmd_out = runcmd.run_cmd(" ".join(cmd), log_stdout=False)
        raw_out = cmd_out.std_out.splitlines()

        # keep track of the number of nodes - they should be the same for the
        # localiser t extraction. We'll check to make sure.
        n_loc_t_nodes = len(raw_out)

        # just extend the list, for now - we'll parse it later on
        raw_loc_t.extend(raw_out)

        assert n_loc_t_nodes == n_beta_nodes

    # convert to numpy arrays; n_nodes x dumped vals
    beta_data = np.array(
        [map(float, raw_beta_node.split(" ")) for raw_beta_node in raw_beta])

    loc_t_data = np.array([
        map(float, raw_loc_t_node.split(" ")) for raw_loc_t_node in raw_loc_t
    ])

    for (roi_num, roi_name) in zip(conf.ana.roi_numbers, conf.ana.roi_names):

        # find the nodes in the ROI
        in_roi = (beta_data[:, 0].astype("int") == int(roi_num))

        # check that the localiser agrees
        assert np.all(in_roi == (loc_t_data[:,
                                            0].astype("int") == int(roi_num)))

        n_roi_nodes = np.sum(in_roi)

        roi_beta_data = np.empty((conf.exp.n_img, conf.exp.n_src_locs,
                                  conf.exp.n_runs, n_roi_nodes))
        roi_beta_data.fill(np.NAN)

        # need to farm out the beta data appropriately

        # we can use beta_filename because the hemisphere doesn't matter
        dset_labels = fmri_tools.utils.get_dset_label(beta_filename)

        # the -1 is because we also dumped the ROI index
        assert len(dset_labels) == (beta_data.shape[1] - 1)

        for (i_col, dset_label) in enumerate(dset_labels):

            # if it's one of the noise regressors, move along
            if dset_label[:3] == "Run":

                assert ("Pol" in dset_label)
                continue

            dset_params = dset_label.split("#")[0].split("_")

            (curr_vf, curr_sl, curr_id, curr_run) = dset_params

            # make sure we're looking at the correct file
            assert curr_vf == vf

            if curr_sl == "above":
                i_sl = 0
            elif curr_sl == "below":
                i_sl = 1
            else:
                raise ValueError()

            i_id = list(conf.exp.img_ids).index(int(curr_id))

            i_run = int(curr_run) - 1

            # the +1 is because of the first index being the ROI
            roi_beta_data[i_id, i_sl, i_run, :] = beta_data[in_roi, i_col + 1]

        # check that we've filled up the array
        assert np.sum(np.isnan(roi_beta_data)) == 0

        # now for the localiser - more straightforward
        roi_loc_t_data = loc_t_data[in_roi, -1]

        assert roi_loc_t_data.shape[0] == roi_beta_data.shape[-1]

        beta[roi_name] = roi_beta_data
        loc_t[roi_name] = roi_loc_t_data

    return (beta, loc_t)
Exemple #22
0
def run(options):

    if not os.path.exists(options.outDir):
        os.mkdir(options.outDir)
    else:
        shutil.rmtree(options.outDir)
        os.mkdir(options.outDir)
    tmpDir = options.outDir + "/intermediate"
    if not os.path.exists(tmpDir):
        os.mkdir(tmpDir)

    # Logging
    import logging

    logger = logging.getLogger("pipits_funits")
    logger.setLevel(logging.DEBUG)

    streamLoggerFormatter = logging.Formatter(
        "%(asctime)s %(levelname)s: %(message)s", tc.HEADER + "%Y-%m-%d %H:%M:%S" + tc.ENDC
    )

    streamLogger = logging.StreamHandler()
    if options.verbose:
        streamLogger.setLevel(logging.DEBUG)
    else:
        streamLogger.setLevel(logging.INFO)
    streamLogger.setFormatter(streamLoggerFormatter)
    logger.addHandler(streamLogger)

    fileLoggerFormatter = logging.Formatter(
        "%(asctime)s %(levelname)s: %(message)s", tc.HEADER + "%Y-%m-%d %H:%M:%S" + tc.ENDC
    )
    fileLogger = logging.FileHandler(options.outDir + "/log.txt", "w")
    fileLogger.setLevel(logging.DEBUG)
    fileLogger.setFormatter(fileLoggerFormatter)
    logger.addHandler(fileLogger)

    # Summary file
    summary_file = open(options.outDir + "/summary_pipits_funits.txt", "w")

    # Start!
    logger.info(tc.OKBLUE + "PIPITS FUNITS started" + tc.ENDC)

    # Scripts
    EXE_DIR = os.path.dirname(os.path.realpath(__file__))
    PIPITS_SCRIPTS_DIR = EXE_DIR

    # Check integrity of the input file
    logger.info("Checking input FASTA for illegal characters")
    record = SeqIO.FastaParser(options.input)
    for i in record.keys():
        description = record[i].description
        if description.find(" ") != -1:
            logger.error(
                'Error: "  " found in the headers. Please remove " " from headers in your FASTA file before proceeding to the next stage.'
            )

    # For summary 1:
    logger.info("Counting input sequences")
    numberofsequences = 0
    cmd = " ".join(['grep "^>"', options.input, "|", "wc -l"])
    p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
    numberofsequences += int(p.communicate()[0])
    p.wait()
    logger.info("\t" + tc.RED + "Number of input sequences: " + str(numberofsequences) + tc.ENDC)
    summary_file.write("Number of input sequences: " + str(numberofsequences) + "\n")

    # Dereplicate
    logger.info("Dereplicating sequences for efficiency")
    cmd = " ".join(
        [
            "python",
            PIPITS_SCRIPTS_DIR + "/dereplicate_fasta.py",
            "-i",
            options.input,
            "-o",
            tmpDir + "/derep.fasta",
            "--cluster",
            tmpDir + "/derep.json",
        ]
    )
    rc.run_cmd(cmd, logger, options.verbose)

    # For summary 2:
    logger.debug("Counting dereplicated sequences")
    numberofsequences = 0
    cmd = " ".join(['grep "^>"', tmpDir + "/derep.fasta", "|", "wc -l"])
    p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
    numberofsequences += int(p.communicate()[0])
    p.wait()
    logger.debug("\t" + tc.RED + "Number of dereplicated sequences: " + str(numberofsequences) + tc.ENDC)

    # Run ITSx. Chop reads into regions. Re-orientate where needed
    # ITSx always prints something to STDERR and outputs nothing to STDOUT, so need to supress stdout in non-verbose mode
    # Returncode is always 0 no matter what...
    # No way to tell whether it quits with an error or not other than by capturing STDERR with a phrase "FATAL ERROR" - not implemented
    logger.info("Extracting " + options.ITSx_subregion + " from sequences [ITSx]")
    cmd = " ".join(
        [
            pd.ITSx,
            "-i",
            tmpDir + "/derep.fasta",
            "-o",
            tmpDir + "/derep",
            "--preserve",
            "T",
            "-t",
            "F",
            "--cpu",
            options.threads,
            "--save_regions",
            options.ITSx_subregion,
        ]
    )
    rc.run_cmd_ITSx(cmd, logger, options.verbose)

    # Removing short sequences (<100bp)
    logger.info("Removing sequences below < 100bp")
    cmd = " ".join(
        [
            "python",
            PIPITS_SCRIPTS_DIR + "/fasta_filter_by_length.py",
            "-i",
            tmpDir + "/derep." + options.ITSx_subregion + ".fasta",
            "-o",
            tmpDir + "/derep." + options.ITSx_subregion + ".sizefiltered.fasta",
            "-l 100",
        ]
    )
    rc.run_cmd(cmd, logger, options.verbose)

    # Re-inflate
    logger.info("Re-inflating sequences")
    cmd = " ".join(
        [
            "python",
            PIPITS_SCRIPTS_DIR + "/inflate_fasta.py",
            "-i",
            tmpDir + "/derep." + options.ITSx_subregion + ".sizefiltered.fasta",
            "-o",
            options.outDir + "/ITS.fasta",
            "--cluster",
            tmpDir + "/derep.json",
        ]
    )
    rc.run_cmd(cmd, logger, options.verbose)

    # Count number of ITS
    logger.info("Counting sequences after re-inflation")
    numberofsequences = 0

    cmd = " ".join(['grep "^>"', options.outDir + "/ITS.fasta", "|", "wc -l"])
    p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    numberofsequences = int(p.communicate()[0])
    p.wait()

    if numberofsequences == 0:
        logger.info(tc.RED + "\tNumber of sequences with ITS subregion: " + str(numberofsequences) + tc.ENDC)
        logger.info(tc.RED + "Have you chosen the right subregion? Exiting as no sequences to process." + tc.ENDC)
        summary_file.write("Number of sequences with ITS subregion: " + str(numberofsequences) + "\n")
        exit(1)
    else:
        logger.info(tc.RED + "\tNumber of sequences with ITS subregion: " + str(numberofsequences) + tc.ENDC)
        summary_file.write("Number of sequences with ITS subregion: " + str(numberofsequences) + "\n")

    """
    # Concatenating ITS1 and ITS2
    logger.info("Concatenating ITS1 and ITS2 ...")
    cmd = " ".join(["python", PIPITS_SCRIPTS_DIR + "/concatenate_fasta.py", 
                    "-1", options.outDir + "/ITS1.fasta" , 
                    "-2", options.outDir + "/ITS2.fasta",
                    "-o", options.outDir + "/ITS.fasta"])
    rc.run_cmd(cmd, logger, options.verbose)
    logger.info("Concatenating ITS1 and ITS2 " + tc.OKGREEN + "(Done)" + tc.ENDC)
    """

    # Finally move and delete tmp
    if options.remove:
        logger.info("Cleaning temporary directory")
        shutil.move(tmpDir + "/derep.summary.txt", options.outDir + "/ITSx_summary.txt")
        shutil.rmtree(tmpDir)

    logger.info(
        tc.OKBLUE
        + 'PIPITS FUNITS ended successfully. "'
        + "ITS.fasta"
        + '" created in "'
        + options.outDir
        + '"'
        + tc.ENDC
    )
    logger.info(
        tc.OKYELLOW
        + "Next Step: PIPITS PROCESS [ Suggestion: pipits_process -i "
        + options.outDir
        + "/"
        + "ITS.fasta -o out_process ]"
        + tc.ENDC
    )
    print("")
    summary_file.close()
def run(subj_id, acq_date):

    conf = ul_sens_fmri.config.get_conf()
    conf.ana = ul_sens_analysis.config.get_conf()

    inf_str = subj_id + "_ul_sens_" + acq_date

    subj_dir = os.path.join(conf.ana.base_subj_dir, subj_id)

    log_dir = os.path.join(subj_dir, "logs")
    log_path = os.path.join(log_dir, "{s:s}-post-log.txt".format(s=inf_str))

    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    fmri_tools.utils.set_logger("screen")
    fmri_tools.utils.set_logger(log_path)

    ana_dir = os.path.join(subj_dir, "analysis")

    post_dir = os.path.join(subj_dir, conf.ana.post_dir)
    resid_dir = os.path.join(post_dir, "resid")

    if not os.path.isdir(post_dir):
        os.mkdir(post_dir)

    if not os.path.isdir(resid_dir):
        os.mkdir(resid_dir)

    os.chdir(resid_dir)

    n_window = 12

    traces = np.zeros((
        3,  # ROI
        2,  # vf
        2,  # src
        n_window  # time
    ))

    for (i_vf, vf) in enumerate(["upper", "lower"]):

        # in - residuals from the GLM analysis
        resid_path = os.path.join(
            ana_dir, "{s:s}-{v:s}-resid-.niml.dset".format(s=inf_str, v=vf))

        # get the residuals into an array by dumping from the dataset
        cmd = ["3dmaskdump", "-noijk", resid_path]
        cmd_out = runcmd.run_cmd(" ".join(cmd), log_stdout=False)

        # this converts the output string to ROIs x time
        resid_flat = np.array([
            map(float, roi_resid.split(" "))
            for roi_resid in cmd_out.std_out.splitlines()
        ])

        # want to get the baseline values to normalise the residuals
        bl_path = os.path.join(
            ana_dir, "{s:s}-{v:s}-bltc-.niml.dset".format(s=inf_str, v=vf))

        cmd = ["3dmaskdump", "-noijk", bl_path]
        cmd_out = runcmd.run_cmd(" ".join(cmd), log_stdout=False)

        # get a baseline value
        bl = np.array(map(float, cmd_out.std_out.splitlines()))

        # convert the residuals to to PSC units
        resid_flat = 100 * (resid_flat / bl[:, np.newaxis])

        # want to split it into runs rather than one flat timecourse
        # want to exclude the initial censored volumes
        vols_per_run = (int(resid_flat.shape[1] / conf.exp.n_runs) -
                        (conf.ana.n_to_censor + 1))
        vols_per_run_total = int(resid_flat.shape[1]) / conf.exp.n_runs

        resid = np.empty((
            resid_flat.shape[0],  # rois
            conf.exp.n_runs,
            vols_per_run))
        resid.fill(np.NAN)

        for i_run in xrange(conf.exp.n_runs):

            i_start = i_run * vols_per_run_total + conf.ana.n_to_censor + 1
            i_end = i_start + vols_per_run

            resid[:, i_run, :] = resid_flat[:, i_start:i_end]

        # convert to squared error
        resid = resid**2

        for i_run in xrange(conf.exp.n_runs):

            # run seq is (pres loc, trial number, trial info)
            # where trial info is:
            #   0: time, in seconds, when it starts
            #   1: source location 1 for above, 2 for below, 0 for null
            #   2: image id
            #   3: whether it is in the 'pre' events
            #   4: been prepped
            run_seq = np.load(
                os.path.join(
                    subj_dir, "logs",
                    "{s:s}_ul_sens_fmri_run_{n:02d}_seq.npy".format(s=subj_id,
                                                                    n=i_run +
                                                                    1)))

            # pull out this visual field location - either upper or lower
            run_seq = run_seq[i_vf, ...]

            # axis 0 is now trials
            n_trials = run_seq.shape[0]

            # keep a track of how many trials we go through, just to check
            # everything is hunky dory
            trial_count = 0

            for i_trial in xrange(n_trials):

                # check if its a 'trial' that we're interested in
                trial_ok = np.all([
                    run_seq[i_trial, 3] == 0,  # not a pre event
                    run_seq[i_trial, 2] > 0.5,  # an image was shown
                    run_seq[i_trial, 1] > 0  # not a null event
                ])

                if not trial_ok:
                    continue

                onset_s = run_seq[i_trial, 0]
                onset_vol = int(onset_s / conf.ana.tr_s)
                onset_vol -= conf.ana.st_vols_to_ignore

                # trial type is 1-based
                trial_type = run_seq[i_trial, 1] - 1

                # move the residual timecourse so the first index aligns with
                # the trial onset
                shifted_resid = np.roll(resid[:, i_run, :], -onset_vol, axis=1)

                traces[:, i_vf, trial_type, :] += shifted_resid[:, :n_window]

                trial_count += 1

            assert trial_count == 60

        # convert to an average
        traces[:, i_vf, ...] /= (30.0 * conf.exp.n_runs)

    # out
    traces_path = "{s:s}--traces-.npy".format(s=inf_str)

    np.save(traces_path, traces)
Exemple #24
0
def run(subj_id, acq_date):

    conf = ul_sens_fmri.config.get_conf()
    conf.ana = ul_sens_analysis.config.get_conf()

    inf_str = subj_id + "_ul_sens_" + acq_date

    subj_dir = os.path.join(conf.ana.base_subj_dir, subj_id)

    log_dir = os.path.join(subj_dir, "logs")
    log_path = os.path.join(log_dir, "{s:s}-post-log.txt".format(s=inf_str))

    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    fmri_tools.utils.set_logger("screen")
    fmri_tools.utils.set_logger(log_path)

    ana_dir = os.path.join(subj_dir, "analysis")

    post_dir = os.path.join(subj_dir, conf.ana.post_dir)
    ret_roi_dir = os.path.join(post_dir, "ret_roi")

    if not os.path.isdir(post_dir):
        os.mkdir(post_dir)

    if not os.path.isdir(ret_roi_dir):
        os.mkdir(ret_roi_dir)

    os.chdir(ret_roi_dir)

    # phase ranges for the different ret roi specs
    phases = {
        "lh": {
            "upper": [0, 90],
            "lower": [270, 360]
        },
        "rh": {
            "upper": [90, 180],
            "lower": [180, 270]
        }
    }

    mask_paths = {}

    # first, calculate the masks based on the ret phases
    for hemi in ["lh", "rh"]:

        if subj_id == "p1003":
            vis_loc = "vis_loc_ver1"
        else:
            vis_loc = "vis_loc"

        wedge_path = os.path.join(
            "/sci/vis_loc/db_ver1", subj_id, conf.ana.subj_wedge_sess[subj_id],
            "dt/wedge",
            "{s:s}_{vl:s}_{a:s}-wedge-angle-{h:s}_nf.niml.dset[0]".format(
                s=subj_id,
                a=conf.ana.subj_wedge_sess[subj_id],
                h=hemi,
                vl=vis_loc))

        # subject's ROI definitions for this hemisphere
        roi_path = os.path.join(
            conf.ana.roi_dir, subj_id,
            "rois", "{s:s}_vis_loc_--rois-{h:s}_nf.niml.dset".format(s=subj_id,
                                                                     h=hemi))

        for pres in ["upper", "lower"]:

            ret_roi_path = "{s:s}-{v:s}-ret_roi_-{h:s}_nf.niml.dset".format(
                s=inf_str, v=pres, h=hemi)

            # we want the roi file to be 'amongst' the identifiers for V1-V3
            roi_test = "(amongst(b," + ",".join(conf.ana.roi_numbers) + ")*b)"

            ret_roi_test = "within(a,{l:d}, {u:d})".format(
                l=phases[hemi][pres][0], u=phases[hemi][pres][1])

            cmd = [
                "3dcalc", "-a", wedge_path, "-b", roi_path, "-expr",
                "'" + ret_roi_test + "*" + roi_test + "'", "-prefix",
                ret_roi_path, "-overwrite"
            ]

            runcmd.run_cmd(" ".join(cmd))

            mask_paths[(pres, hemi)] = ret_roi_path

    ul_sens_analysis.glm_prep._extract_data(subj_id,
                                            acq_date,
                                            conf,
                                            mask_paths,
                                            loc_mask=False)

    ul_sens_analysis.glm._run_glm(subj_id,
                                  acq_date,
                                  conf,
                                  log_dir,
                                  loc_mask=False)
def resid(subj_id, acq_date, conf):

    subj_dir = os.path.join(conf.ana.base_subj_dir, subj_id)
    ana_dir = os.path.join(subj_dir, "analysis")

    post_dir = os.path.join(subj_dir, conf.ana.post_dir)
    resid_dir = os.path.join(post_dir, "resid")

    if not os.path.isdir(post_dir):
        os.mkdir(post_dir)

    if not os.path.isdir(resid_dir):
        os.mkdir(resid_dir)

    os.chdir(resid_dir)

    inf_str = subj_id + "_ul_sens_" + acq_date

    n_window = 12

    traces = np.zeros(
        (
            3,  # ROI
            2,  # vf
            2,  # src
            n_window  # time
        )
    )

    for (i_vf, vf) in enumerate(["upper", "lower"]):

        # in
        resid_path = os.path.join(
            ana_dir,
            "{s:s}-{v:s}-resid-.niml.dset".format(
                s=inf_str, v=vf
            )
        )

        cmd = [
            "3dmaskdump",
            "-noijk",
            resid_path
        ]

        cmd_out = runcmd.run_cmd(" ".join(cmd), log_stdout=False)

        # this is ROIs x time
        resid_flat = np.array(
            [
                map(float, roi_resid.split(" "))
                for roi_resid in cmd_out.std_out.splitlines()
            ]
        )

        # baseline
        bl_path = os.path.join(
            ana_dir,
            "{s:s}-{v:s}-bltc-.niml.dset".format(
                s=inf_str, v=vf
            )
        )

        cmd = [
            "3dmaskdump",
            "-noijk",
            bl_path
        ]

        cmd_out = runcmd.run_cmd(" ".join(cmd), log_stdout=False)

        # get a baseline value
        bl = np.array(map(float, cmd_out.std_out.splitlines()))

        # convert to PSC
        resid_flat = 100 * (resid_flat / bl[:, np.newaxis])

        # want to split it into runs
        vols_per_run = (
            int(resid_flat.shape[1] / conf.exp.n_runs) - (conf.ana.n_to_censor + 1)
        )
        vols_per_run_total = int(resid_flat.shape[1]) / conf.exp.n_runs

        resid = np.empty(
            (
                resid_flat.shape[0],
                conf.exp.n_runs,
                vols_per_run
            )
        )
        resid.fill(np.NAN)

        for i_run in xrange(conf.exp.n_runs):

            i_start = i_run * vols_per_run_total + conf.ana.n_to_censor + 1
            i_end = i_start + vols_per_run

            resid[:, i_run, :] = resid_flat[:, i_start:i_end]

        # convert to squared error
        resid = resid ** 2

        for i_run in xrange(conf.exp.n_runs):

            # run seq is (pres loc, trial number, trial info)
            # where trial info is:
            #   0: time, in seconds, when it starts
            #   1: source location 1 for above, 2 for below, 0 for null
            #   2: image id
            #   3: whether it is in the 'pre' events
            #   4: been prepped
            run_seq = np.load(
                os.path.join(
                    subj_dir,
                    "logs",
                    "{s:s}_ul_sens_fmri_run_{n:02d}_seq.npy".format(
                        s=subj_id, n=i_run + 1
                    )
                )
            )

            # pull out this visual field location - either upper or lower
            run_seq = run_seq[i_vf, ...]

            # axis 0 is now trials
            n_trials = run_seq.shape[0]

            trial_count = 0

            for i_trial in xrange(n_trials):

                trial_ok = np.all(
                    [
                        run_seq[i_trial, 3] == 0,  # not a pre event
                        run_seq[i_trial, 2] > 0.5,  # an image was shown
                        run_seq[i_trial, 1] > 0  # not a null event
                    ]
                )

                if not trial_ok:
                    continue

                onset_s = run_seq[i_trial, 0]
                onset_vol = int(onset_s / conf.ana.tr_s)
                onset_vol -= conf.ana.st_vols_to_ignore

                trial_type = run_seq[i_trial, 1] - 1

                shifted_resid = np.roll(
                    resid[:, i_run, :],
                    -onset_vol,
                    axis=1
                )

                traces[:, i_vf, trial_type, :] += shifted_resid[:, :n_window]

                trial_count += 1

            assert trial_count == 60

            traces[:, i_vf, ...] /= (30.0 * conf.exp.n_runs)

    # out
    traces_path = "{s:s}--traces-.npy".format(
        s=inf_str
    )

    np.save(traces_path, traces)