Beispiel #1
0
    def run_multiple(self, jobs, num_procs=1):
        pid_list = []

        for input_fastq, tophat_options, out_dir in jobs:
            if len(jobs) == 1:
                self.run_single(input_fastq,
                                ([] if not tophat_options else tophat_options),
                                (self.out_dir if not out_dir else out_dir))
                return

            pid = _common.fork_and_run(
                self.run_single, input_fastq,
                ([] if not tophat_options else tophat_options),
                (self.out_dir if not out_dir else out_dir))
            pid_list.append(pid)

            _common.wait_for_slot(pid_list, num_procs)

        _common.wait_for_slot(pid_list, 0, True)
Beispiel #2
0
    def run_multiple(self, jobs, num_procs = 1):
        pid_list = []
        
        for input_fastq, tophat_options, out_dir in jobs:
            if len(jobs) == 1:
                self.run_single(input_fastq,
                                ([] if not tophat_options else tophat_options),
                                (self.out_dir if not out_dir else out_dir))
                return

            pid = _common.fork_and_run(self.run_single,
                                       input_fastq,
                                       ([] if not tophat_options else tophat_options),
                                       (self.out_dir if not out_dir else out_dir))
            pid_list.append(pid)
            
            _common.wait_for_slot(pid_list, num_procs)
            
        _common.wait_for_slot(pid_list, 0, True)
Beispiel #3
0
    def all_chrom(self, input_bam, scripture_options=[]):
        out_file = os.path.join(self.out_dir, "transcripts.bed")
        pid_list = []

        for chrom in self.chroms:
            pid = _common.fork_and_run(self.single_chrom, input_bam, chrom,
                                       scripture_options)

            pid_list.append(pid)
            _common.wait_for_slot(pid_list, self.num_procs)

        _common.wait_for_slot(pid_list, 0, True)

        with open(out_file, "w") as final_fp:
            for chrom in self.chroms:
                in_file = os.path.join(self.out_dir, chrom + ".segments")

                if not os.path.exists(in_file):
                    continue

                final_fp.write(open(in_file).read())
Beispiel #4
0
 def all_chrom(self, input_bam, scripture_options = []):
     out_file = os.path.join(self.out_dir, "transcripts.bed")
     pid_list = []
     
     for chrom in self.chroms:
         pid = _common.fork_and_run(self.single_chrom,
                                    input_bam,
                                    chrom,
                                    scripture_options)
                                    
         pid_list.append(pid)
         _common.wait_for_slot(pid_list, self.num_procs)
         
     _common.wait_for_slot(pid_list, 0, True)
     
     with open(out_file, "w") as final_fp:           
         for chrom in self.chroms:
             in_file = os.path.join(self.out_dir, chrom + ".segments")
             
             if not os.path.exists(in_file):
                 continue
                 
             final_fp.write(open(in_file).read())
Beispiel #5
0
def main():
    parse_options(sys.argv[1:])

    if not os.path.exists(options.output_dir):
        os.mkdir(options.output_dir)

    # tophat runs
    if not options.skip_tophat:
        jobs = []

        for label, run_params in zip(options.labels, options.runs):
            run_out_dir = os.path.join(options.output_dir, label)
            run_input_fastq = [run_params["left_reads"]]
            extra_params = ["--seed-length", run_params["seed_len"]]

            if run_params["right_reads"] != "None":
                run_input_fastq.append(run_params["right_reads"])
                extra_params.extend(["--mate-inner-dist", run_params["inner_dist"],
                                     "--mate-std-dev", run_params["inner_dist_sd"]])

            jobs.append((run_input_fastq,
                         ["--GTF", options.reference,
                          "--no-novel-juncs",
                          "--no-novel-indels",
                          "--no-coverage-search"] + \
                         extra_params,
                         run_out_dir))

        th = tophat.TopHat(options.bowtie_index,
                           options.output_dir,
                           options.num_threads)
        th.run_multiple(jobs, options.num_procs)

    # cufflinks runs
    if not options.skip_cufflinks:
        for label, run_params in zip(options.labels, options.runs):
            input_bam = os.path.join(options.output_dir, label, "accepted_hits.bam")

            if not os.path.exists(input_bam):
                print "Error: Could not find %s" % (input_bam,)
                continue

            run_out_dir = os.path.join(options.output_dir, "FPKM_" + label)

            cl = cufflinks.Cufflinks(run_out_dir, options.num_threads)
            cl.run(input_bam, ["--min-frags-per-transfrag", "0",
                               "-G", options.reference] + \
                              ([] if not options.mask else ["-M", options.mask]))

    # make TDF files
    if not options.skip_tdf:
        # collect sample data
        sample_info = []

        for label, run_params in zip(options.labels, options.runs):
            input_bam = os.path.join(options.output_dir, label, "accepted_hits.bam")

            if not os.path.exists(input_bam):
                print "Error: Could not find %s" % (input_bam,)
                continue

            wig_file = tempfile.NamedTemporaryFile("w", suffix=".wig", delete=False)
            tdf_out = os.path.join(options.output_dir, label, label + ".tdf")

            sample_info.append((input_bam, wig_file, tdf_out))

        # make wigs
        st = samtools.SAMTools()
        pid_list = []

        for input_bam, wig_file, tdf_out in sample_info:
            pid = _common.fork_and_run(st.wig,
                                       input_bam,
                                       wig_file,
                                       options.filter_less_than)

            pid_list.append(pid)

            _common.wait_for_slot(pid_list, options.num_threads)

        _common.wait_for_slot(pid_list, 0, True)

        # make tdfs
        igv = igvtools.IGVTools()
        pid_list = []
 
        for input_bam, wig_file, tdf_out in sample_info:
            pid = _common.fork_and_run(igv.tile,
                                       wig_file.name,
                                       tdf_out,
                                       args[0])

            pid_list.append(pid)

            _common.wait_for_slot(pid_list, options.num_threads)

        _common.wait_for_slot(pid_list, 0, True)

        # remove tempfiles
        for _, wig_file, _ in sample_info:
            os.unlink(wig_file.name)
Beispiel #6
0
def main():
    parse_options(sys.argv[1:])

    if not os.path.exists(options.output_dir):
        os.mkdir(options.output_dir)

    # tophat runs
    if not options.skip_tophat:
        jobs = []

        for label, run_params in zip(options.labels, options.runs):
            run_out_dir = os.path.join(options.output_dir, label)
            run_input_fastq = [run_params["left_reads"]]
            extra_params = ["--seed-length", run_params["seed_len"]]

            if run_params["right_reads"] != "None":
                run_input_fastq.append(run_params["right_reads"])
                extra_params.extend([
                    "--mate-inner-dist", run_params["inner_dist"],
                    "--mate-std-dev", run_params["inner_dist_sd"]
                ])

            jobs.append((run_input_fastq,
                         ["--GTF", options.reference,
                          "--no-novel-juncs",
                          "--no-novel-indels",
                          "--no-coverage-search"] + \
                         extra_params,
                         run_out_dir))

        th = tophat.TopHat(options.bowtie_index, options.output_dir,
                           options.num_threads)
        th.run_multiple(jobs, options.num_procs)

    # cufflinks runs
    if not options.skip_cufflinks:
        for label, run_params in zip(options.labels, options.runs):
            input_bam = os.path.join(options.output_dir, label,
                                     "accepted_hits.bam")

            if not os.path.exists(input_bam):
                print "Error: Could not find %s" % (input_bam, )
                continue

            run_out_dir = os.path.join(options.output_dir, "FPKM_" + label)

            cl = cufflinks.Cufflinks(run_out_dir, options.num_threads)
            cl.run(input_bam, ["--min-frags-per-transfrag", "0",
                               "-G", options.reference] + \
                              ([] if not options.mask else ["-M", options.mask]))

    # make TDF files
    if not options.skip_tdf:
        # collect sample data
        sample_info = []

        for label, run_params in zip(options.labels, options.runs):
            input_bam = os.path.join(options.output_dir, label,
                                     "accepted_hits.bam")

            if not os.path.exists(input_bam):
                print "Error: Could not find %s" % (input_bam, )
                continue

            wig_file = tempfile.NamedTemporaryFile("w",
                                                   suffix=".wig",
                                                   delete=False)
            tdf_out = os.path.join(options.output_dir, label, label + ".tdf")

            sample_info.append((input_bam, wig_file, tdf_out))

        # make wigs
        st = samtools.SAMTools()
        pid_list = []

        for input_bam, wig_file, tdf_out in sample_info:
            pid = _common.fork_and_run(st.wig, input_bam, wig_file,
                                       options.filter_less_than)

            pid_list.append(pid)

            _common.wait_for_slot(pid_list, options.num_threads)

        _common.wait_for_slot(pid_list, 0, True)

        # make tdfs
        igv = igvtools.IGVTools()
        pid_list = []

        for input_bam, wig_file, tdf_out in sample_info:
            pid = _common.fork_and_run(igv.tile, wig_file.name, tdf_out,
                                       args[0])

            pid_list.append(pid)

            _common.wait_for_slot(pid_list, options.num_threads)

        _common.wait_for_slot(pid_list, 0, True)

        # remove tempfiles
        for _, wig_file, _ in sample_info:
            os.unlink(wig_file.name)