def run_multiple(self, jobs, num_procs=1): pid_list = [] for input_fastq, tophat_options, out_dir in jobs: if len(jobs) == 1: self.run_single(input_fastq, ([] if not tophat_options else tophat_options), (self.out_dir if not out_dir else out_dir)) return pid = _common.fork_and_run( self.run_single, input_fastq, ([] if not tophat_options else tophat_options), (self.out_dir if not out_dir else out_dir)) pid_list.append(pid) _common.wait_for_slot(pid_list, num_procs) _common.wait_for_slot(pid_list, 0, True)
def run_multiple(self, jobs, num_procs = 1): pid_list = [] for input_fastq, tophat_options, out_dir in jobs: if len(jobs) == 1: self.run_single(input_fastq, ([] if not tophat_options else tophat_options), (self.out_dir if not out_dir else out_dir)) return pid = _common.fork_and_run(self.run_single, input_fastq, ([] if not tophat_options else tophat_options), (self.out_dir if not out_dir else out_dir)) pid_list.append(pid) _common.wait_for_slot(pid_list, num_procs) _common.wait_for_slot(pid_list, 0, True)
def all_chrom(self, input_bam, scripture_options=[]): out_file = os.path.join(self.out_dir, "transcripts.bed") pid_list = [] for chrom in self.chroms: pid = _common.fork_and_run(self.single_chrom, input_bam, chrom, scripture_options) pid_list.append(pid) _common.wait_for_slot(pid_list, self.num_procs) _common.wait_for_slot(pid_list, 0, True) with open(out_file, "w") as final_fp: for chrom in self.chroms: in_file = os.path.join(self.out_dir, chrom + ".segments") if not os.path.exists(in_file): continue final_fp.write(open(in_file).read())
def all_chrom(self, input_bam, scripture_options = []): out_file = os.path.join(self.out_dir, "transcripts.bed") pid_list = [] for chrom in self.chroms: pid = _common.fork_and_run(self.single_chrom, input_bam, chrom, scripture_options) pid_list.append(pid) _common.wait_for_slot(pid_list, self.num_procs) _common.wait_for_slot(pid_list, 0, True) with open(out_file, "w") as final_fp: for chrom in self.chroms: in_file = os.path.join(self.out_dir, chrom + ".segments") if not os.path.exists(in_file): continue final_fp.write(open(in_file).read())
def main(): parse_options(sys.argv[1:]) if not os.path.exists(options.output_dir): os.mkdir(options.output_dir) # tophat runs if not options.skip_tophat: jobs = [] for label, run_params in zip(options.labels, options.runs): run_out_dir = os.path.join(options.output_dir, label) run_input_fastq = [run_params["left_reads"]] extra_params = ["--seed-length", run_params["seed_len"]] if run_params["right_reads"] != "None": run_input_fastq.append(run_params["right_reads"]) extra_params.extend(["--mate-inner-dist", run_params["inner_dist"], "--mate-std-dev", run_params["inner_dist_sd"]]) jobs.append((run_input_fastq, ["--GTF", options.reference, "--no-novel-juncs", "--no-novel-indels", "--no-coverage-search"] + \ extra_params, run_out_dir)) th = tophat.TopHat(options.bowtie_index, options.output_dir, options.num_threads) th.run_multiple(jobs, options.num_procs) # cufflinks runs if not options.skip_cufflinks: for label, run_params in zip(options.labels, options.runs): input_bam = os.path.join(options.output_dir, label, "accepted_hits.bam") if not os.path.exists(input_bam): print "Error: Could not find %s" % (input_bam,) continue run_out_dir = os.path.join(options.output_dir, "FPKM_" + label) cl = cufflinks.Cufflinks(run_out_dir, options.num_threads) cl.run(input_bam, ["--min-frags-per-transfrag", "0", "-G", options.reference] + \ ([] if not options.mask else ["-M", options.mask])) # make TDF files if not options.skip_tdf: # collect sample data sample_info = [] for label, run_params in zip(options.labels, options.runs): input_bam = os.path.join(options.output_dir, label, "accepted_hits.bam") if not os.path.exists(input_bam): print "Error: Could not find %s" % (input_bam,) continue wig_file = tempfile.NamedTemporaryFile("w", suffix=".wig", delete=False) tdf_out = os.path.join(options.output_dir, label, label + ".tdf") sample_info.append((input_bam, wig_file, tdf_out)) # make wigs st = samtools.SAMTools() pid_list = [] for input_bam, wig_file, tdf_out in sample_info: pid = _common.fork_and_run(st.wig, input_bam, wig_file, options.filter_less_than) pid_list.append(pid) _common.wait_for_slot(pid_list, options.num_threads) _common.wait_for_slot(pid_list, 0, True) # make tdfs igv = igvtools.IGVTools() pid_list = [] for input_bam, wig_file, tdf_out in sample_info: pid = _common.fork_and_run(igv.tile, wig_file.name, tdf_out, args[0]) pid_list.append(pid) _common.wait_for_slot(pid_list, options.num_threads) _common.wait_for_slot(pid_list, 0, True) # remove tempfiles for _, wig_file, _ in sample_info: os.unlink(wig_file.name)
def main(): parse_options(sys.argv[1:]) if not os.path.exists(options.output_dir): os.mkdir(options.output_dir) # tophat runs if not options.skip_tophat: jobs = [] for label, run_params in zip(options.labels, options.runs): run_out_dir = os.path.join(options.output_dir, label) run_input_fastq = [run_params["left_reads"]] extra_params = ["--seed-length", run_params["seed_len"]] if run_params["right_reads"] != "None": run_input_fastq.append(run_params["right_reads"]) extra_params.extend([ "--mate-inner-dist", run_params["inner_dist"], "--mate-std-dev", run_params["inner_dist_sd"] ]) jobs.append((run_input_fastq, ["--GTF", options.reference, "--no-novel-juncs", "--no-novel-indels", "--no-coverage-search"] + \ extra_params, run_out_dir)) th = tophat.TopHat(options.bowtie_index, options.output_dir, options.num_threads) th.run_multiple(jobs, options.num_procs) # cufflinks runs if not options.skip_cufflinks: for label, run_params in zip(options.labels, options.runs): input_bam = os.path.join(options.output_dir, label, "accepted_hits.bam") if not os.path.exists(input_bam): print "Error: Could not find %s" % (input_bam, ) continue run_out_dir = os.path.join(options.output_dir, "FPKM_" + label) cl = cufflinks.Cufflinks(run_out_dir, options.num_threads) cl.run(input_bam, ["--min-frags-per-transfrag", "0", "-G", options.reference] + \ ([] if not options.mask else ["-M", options.mask])) # make TDF files if not options.skip_tdf: # collect sample data sample_info = [] for label, run_params in zip(options.labels, options.runs): input_bam = os.path.join(options.output_dir, label, "accepted_hits.bam") if not os.path.exists(input_bam): print "Error: Could not find %s" % (input_bam, ) continue wig_file = tempfile.NamedTemporaryFile("w", suffix=".wig", delete=False) tdf_out = os.path.join(options.output_dir, label, label + ".tdf") sample_info.append((input_bam, wig_file, tdf_out)) # make wigs st = samtools.SAMTools() pid_list = [] for input_bam, wig_file, tdf_out in sample_info: pid = _common.fork_and_run(st.wig, input_bam, wig_file, options.filter_less_than) pid_list.append(pid) _common.wait_for_slot(pid_list, options.num_threads) _common.wait_for_slot(pid_list, 0, True) # make tdfs igv = igvtools.IGVTools() pid_list = [] for input_bam, wig_file, tdf_out in sample_info: pid = _common.fork_and_run(igv.tile, wig_file.name, tdf_out, args[0]) pid_list.append(pid) _common.wait_for_slot(pid_list, options.num_threads) _common.wait_for_slot(pid_list, 0, True) # remove tempfiles for _, wig_file, _ in sample_info: os.unlink(wig_file.name)