def run_clustering(infile, outfile): '''Run clustering algorithms on tSNE dim reduced data''' job_memory = "20G" pipeline_dir = PARAMS["pipeline_dir"] clust_methods = PARAMS["analysis_clust_methods"].split(",") # Rphenograph, ClusterX, FlowSOM statements = [] for clust in clust_methods: outname = outfile.replace(".cluster.touch", ".") + str(clust) + ".RData" log = outfile.replace(".cluster.touch", ".") + str(clust) + ".log" if clust == "Rphenograph": infile = infile.replace( ".tsne.RData", ".norm.RData") # runs on high dimensionality data no_clust = " " print("Rphenograph") elif clust == "FlowSOM": infile = infile.replace( ".tsne.RData", ".norm.RData") # runs on high dimensionality data no_clust = "--noClusters " + str(PARAMS["analysis_clust_no"]) print("FlowSOM") elif clust == "ClusterX": infile = infile # run on tSNE reduced data no_clust = " " print("ClusterX") else: print("Specify clustering method in pipeline.ini") statements.append('''Rscript %(pipeline_dir)s/R/CYTOFclust.R --infile %(infile)s --outfile %(outname)s --clusterMethod %(clust)s %(no_clust)s &> %(log)s''' % locals()) print(statements) P.run() IOTools.touchFile(outfile)
def run_tsne(infiles, outfile): '''Process FCS data and run tSNE. Supports multiple options for tsne parameters "perplexity" and "iterations" ''' job_memory = "40G" pipeline_dir = PARAMS["pipeline_dir"] infiles = ','.join(infiles) marker_list = PARAMS[ "analysis_markers"] # markers to use for tsne & downstream analysis perplexity = str(PARAMS["tsne_perplexity"]).split(",") iterations = str(PARAMS["tsne_iterations"]).split(",") events = PARAMS["analysis_no_events"] # no events per sample statements = [] for p in perplexity: for i in iterations: out_norm = outfile.replace( ".matrix.touch", "") + "." + str(p) + "_" + str(i) + ".norm.RData" out_tsne = outfile.replace( ".matrix.touch", "") + "." + str(p) + "_" + str(i) + ".tsne.RData" log = outfile.replace(".touch", ".log") statements.append('''Rscript %(pipeline_dir)s/R/cytofkit.R --infiles %(infiles)s --out_norm %(out_norm)s --out_tsne %(out_tsne)s --markers %(marker_list)s --perplexity %(p)s --iterations %(i)s --no_events %(events)s &> %(log)s''' % locals()) print(statements) P.run() IOTools.touchFile(outfile) # creates sentinel file for task monitoring
def compute_file_metrics(infile, outfile, metric, suffixes): """apply a tool to compute metrics on a list of files matching regex_pattern.""" if suffixes is None or len(suffixes) == 0: E.info("No metrics computed for {}".format(outfile)) IOTools.touchFile(outfile) return track = P.snip(infile, ".log") # convert regex patterns to a suffix match: # prepend a .* # append a $ regex_pattern = " -or ".join(["-regex .*{}$".format(pipes.quote(x)) for x in suffixes]) E.debug("applying metric {} to files matching {}".format(metric, regex_pattern)) if metric == "file": statement = '''find %(track)s.dir -type f -not -regex '.*\/report.*' -not -regex '.*\/_.*' \( %(regex_pattern)s \) | sort -k1,1 > %(outfile)s''' else: statement = '''find %(track)s.dir -type f -not -regex '.*\/report.*' -not -regex '.*\/_.*' \( %(regex_pattern)s \) -exec %(pipeline_scriptsdir)s/cgat_file_apply.sh {} %(metric)s \; | perl -p -e "s/ +/\\t/g" | sort -k1,1 > %(outfile)s''' P.run()