Beispiel #1
0
def run_clustering(infile, outfile):
    '''Run clustering algorithms on tSNE dim reduced data'''

    job_memory = "20G"
    pipeline_dir = PARAMS["pipeline_dir"]

    clust_methods = PARAMS["analysis_clust_methods"].split(",")
    # Rphenograph, ClusterX, FlowSOM

    statements = []

    for clust in clust_methods:
        outname = outfile.replace(".cluster.touch",
                                  ".") + str(clust) + ".RData"
        log = outfile.replace(".cluster.touch", ".") + str(clust) + ".log"

        if clust == "Rphenograph":
            infile = infile.replace(
                ".tsne.RData",
                ".norm.RData")  # runs on high dimensionality data
            no_clust = " "
            print("Rphenograph")

        elif clust == "FlowSOM":
            infile = infile.replace(
                ".tsne.RData",
                ".norm.RData")  # runs on high dimensionality data
            no_clust = "--noClusters " + str(PARAMS["analysis_clust_no"])
            print("FlowSOM")

        elif clust == "ClusterX":
            infile = infile  # run on tSNE reduced data
            no_clust = " "
            print("ClusterX")

        else:
            print("Specify clustering method in pipeline.ini")

        statements.append('''Rscript %(pipeline_dir)s/R/CYTOFclust.R
                               --infile %(infile)s
                               --outfile %(outname)s
                               --clusterMethod %(clust)s
                               %(no_clust)s
                               &> %(log)s''' % locals())

    print(statements)

    P.run()

    IOTools.touchFile(outfile)
Beispiel #2
0
def run_tsne(infiles, outfile):
    '''Process FCS data and run tSNE. Supports multiple options for 
       tsne parameters "perplexity" and "iterations" '''

    job_memory = "40G"
    pipeline_dir = PARAMS["pipeline_dir"]

    infiles = ','.join(infiles)

    marker_list = PARAMS[
        "analysis_markers"]  # markers to use for tsne & downstream analysis
    perplexity = str(PARAMS["tsne_perplexity"]).split(",")
    iterations = str(PARAMS["tsne_iterations"]).split(",")
    events = PARAMS["analysis_no_events"]  # no events per sample

    statements = []

    for p in perplexity:
        for i in iterations:

            out_norm = outfile.replace(
                ".matrix.touch",
                "") + "." + str(p) + "_" + str(i) + ".norm.RData"
            out_tsne = outfile.replace(
                ".matrix.touch",
                "") + "." + str(p) + "_" + str(i) + ".tsne.RData"
            log = outfile.replace(".touch", ".log")

            statements.append('''Rscript %(pipeline_dir)s/R/cytofkit.R 
                                   --infiles %(infiles)s 
                                   --out_norm %(out_norm)s 
                                   --out_tsne %(out_tsne)s
                                   --markers %(marker_list)s                                    
                                   --perplexity %(p)s 
                                   --iterations %(i)s
                                   --no_events %(events)s
                                   &> %(log)s''' % locals())

    print(statements)

    P.run()

    IOTools.touchFile(outfile)  # creates sentinel file for task monitoring
Beispiel #3
0
def compute_file_metrics(infile, outfile, metric, suffixes):
    """apply a tool to compute metrics on a list of files matching
    regex_pattern."""

    if suffixes is None or len(suffixes) == 0:
        E.info("No metrics computed for {}".format(outfile))
        IOTools.touchFile(outfile)
        return

    track = P.snip(infile, ".log")

    # convert regex patterns to a suffix match:
    # prepend a .*
    # append a $
    regex_pattern = " -or ".join(["-regex .*{}$".format(pipes.quote(x))
                                  for x in suffixes])

    E.debug("applying metric {} to files matching {}".format(metric,
                                                             regex_pattern))

    if metric == "file":
        statement = '''find %(track)s.dir
        -type f
        -not -regex '.*\/report.*'
        -not -regex '.*\/_.*'
        \( %(regex_pattern)s \)
        | sort -k1,1
        > %(outfile)s'''
    else:
        statement = '''find %(track)s.dir
        -type f
        -not -regex '.*\/report.*'
        -not -regex '.*\/_.*'
        \( %(regex_pattern)s \)
        -exec %(pipeline_scriptsdir)s/cgat_file_apply.sh {} %(metric)s \;
        | perl -p -e "s/ +/\\t/g"
        | sort -k1,1
        > %(outfile)s'''

    P.run()