コード例 #1
0
ファイル: test_IceOptions.py プロジェクト: wenmm/pbtranscript
def copy_in_fasta_to_out(in_dir, out_dir, filename):
    """copy filename from in_dir (e.g., data) to out_dir,
    return out_fasta
    """
    mknewdir(out_dir)
    cmd = "cp %s %s" % (op.join(in_dir, filename), op.join(out_dir, filename))
    execute(cmd=cmd)
    return op.join(out_dir, filename)
コード例 #2
0
def copy_in_fasta_to_out(in_dir, out_dir, filename):
    """copy filename from in_dir (e.g., data) to out_dir,
    return out_fasta
    """
    mknewdir(out_dir)
    cmd = "cp %s %s" % (op.join(in_dir, filename),
                        op.join(out_dir, filename))
    execute(cmd=cmd)
    return op.join(out_dir, filename)
コード例 #3
0
    def setUp(self):
        """Initialize."""
        self.inputDir  = op.join(DATA_DIR, self.testName)
        self.outDir    = op.join(OUT_DIR,  self.testName)
        self.stdoutDir = op.join(STD_DIR,  self.testName)
        self.filename = "test_DazzIDHandler.contigset.xml"

        self.stdout_dazz_fasta = op.join(self.stdoutDir,
                                         self.filename[0:-4] + ".dazz.fasta")
        self.stdout_pickle = self.stdout_dazz_fasta + ".pickle"

        mknewdir(self.outDir)
コード例 #4
0
    def setUp(self):
        """Initialize."""
        self.inputDir = op.join(DATA_DIR, self.testName)
        self.outDir = op.join(OUT_DIR, self.testName)
        self.stdoutDir = op.join(STD_DIR, self.testName)
        self.filename = "test_DazzIDHandler.contigset.xml"

        self.stdout_dazz_fasta = op.join(self.stdoutDir,
                                         self.filename[0:-4] + ".dazz.fasta")
        self.stdout_pickle = self.stdout_dazz_fasta + ".pickle"

        mknewdir(self.outDir)
コード例 #5
0
    def _test_daligner_against_ref(self, test_name, use_sge, sge_opts,
                                   prob_model_from="fake"):
        """Test daligner_against_ref with and without using sge."""
        copy_dir = op.join(self.dataDir, "test_daligner_against_ref")
        output_dir = op.join(self.outDir, test_name)
        mknewdir(output_dir)

        qname, tname = "test_daligner_query.fasta", "test_daligner_target.fasta"
        query_filename = op.join(output_dir, qname)
        target_filename = op.join(output_dir, tname)

        prob_model = None
        if prob_model_from == "fake":
            prob_model = ProbFromModel(0.01, 0.07, 0.06)
        elif prob_model_from == "fastq":
            fastq_fn = op.join(copy_dir, "test_daligner_reads.fastq")
            prob_model = ProbFromFastq(fastq_fn)
        else:
            self.assertTrue(False)

        qver_get_func = prob_model.get_smoothed
        qvmean_get_func = prob_model.get_mean

        dummy_o, c, dummy_m = backticks("cp %s %s" % (op.join(copy_dir, qname), query_filename))
        self.assertTrue(c == 0)

        dummy_o, c, dummy_m = backticks("cp %s %s" % (op.join(copy_dir, tname), target_filename))
        self.assertTrue(c == 0)

        old_dir = os.getcwd()
        os.chdir(output_dir)

        runner = DalignerRunner(query_filename=query_filename,
                                target_filename=target_filename,
                                is_FL=True, same_strand_only=True,
                                use_sge=use_sge, sge_opts=sge_opts)
        runner.run(output_dir=op.join(self.outDir, test_name))

        hits = []

        for la4ice_filename in runner.la4ice_filenames:
            hits.extend(daligner_against_ref(query_dazz_handler=runner.query_dazz_handler,
                                             target_dazz_handler=runner.target_dazz_handler,
                                             la4ice_filename=la4ice_filename,
                                             is_FL=True, sID_starts_with_c=False,
                                             qver_get_func=qver_get_func,
                                             qvmean_get_func=qvmean_get_func))
        # Num of hits may change when daligner or parameters change.
        self.assertTrue(len(hits), 706)
        self.assertEqual(str(hits[0]),
                         "m54007_160109_025449/27984844/29_646_CCS/0_617 aligns to m54007_160109_025449/28836279/631_54_CCS")
        os.chdir(output_dir)
コード例 #6
0
    def _test_bin_manual(self, bin_manual, expected_bin_manual):
        """Test SeparateFLNCBySize setting bin manually."""
        out_dir=op.join(OUT_DIR, 'separate_flnc_by_size_bin_manual')
        mknewdir(out_dir)
        with SeparateFLNCBySize(flnc_filename=FLNC_FASTA,
                                bin_manual=bin_manual,
                                root_dir=out_dir) as obj:
             obj.run()

        self.assertEqual(obj.sorted_keys, expected_bin_manual)

        for index, key in enumerate(obj.sorted_keys):
            with FastaReader(obj.out_fasta_files[index]) as reader:
                self.assertTrue(all([key[0].contains(len(r.sequence)) for r in reader]))
コード例 #7
0
    def setUp(self):
        """Initialize."""
        self.inputDir  = op.join(DATA_DIR, self.testName)
        self.outDir    = op.join(OUT_DIR,  self.testName)
        self.stdoutDir = op.join(STD_DIR,  self.testName)
        self.fastaFileName = "test_DazzIDHandler.fasta"

        self.stdout_dazz_fasta = op.join(self.stdoutDir,
                                         self.fastaFileName[0:-6] + ".dazz.fasta")
        self.stdout_pickle = self.stdout_dazz_fasta + ".pickle"

        mknewdir(self.outDir)
        # Copy inputDir/test_DazzIDHandler.fasta to outDir.
        execute("cp %s %s" % (op.join(self.inputDir, self.fastaFileName),
                              op.join(self.outDir,   self.fastaFileName)))
コード例 #8
0
    def setUp(self):
        """Initialize."""
        self.inputDir = op.join(DATA_DIR, self.testName)
        self.outDir = op.join(OUT_DIR, self.testName)
        self.stdoutDir = op.join(STD_DIR, self.testName)
        self.fastaFileName = "test_DazzIDHandler.fasta"

        self.stdout_dazz_fasta = op.join(
            self.stdoutDir, self.fastaFileName[0:-6] + ".dazz.fasta")
        self.stdout_pickle = self.stdout_dazz_fasta + ".pickle"

        mknewdir(self.outDir)
        # Copy inputDir/test_DazzIDHandler.fasta to outDir.
        execute("cp %s %s" % (op.join(self.inputDir, self.fastaFileName),
                              op.join(self.outDir, self.fastaFileName)))
コード例 #9
0
    def _test_bin_manual(self, bin_manual, expected_bin_manual):
        """Test SeparateFLNCBySize setting bin manually."""
        out_dir = op.join(OUT_DIR, 'separate_flnc_by_size_bin_manual')
        mknewdir(out_dir)
        with SeparateFLNCBySize(flnc_filename=FLNC_FASTA,
                                bin_manual=bin_manual,
                                root_dir=out_dir) as obj:
            obj.run()

        self.assertEqual(obj.sorted_keys, expected_bin_manual)

        for index, key in enumerate(obj.sorted_keys):
            with FastaReader(obj.out_fasta_files[index]) as reader:
                self.assertTrue(
                    all([key[0].contains(len(r.sequence)) for r in reader]))
コード例 #10
0
class TestClusterBins(pbcommand.testkit.PbTestApp):
    """Call python -m pbtranscript.tasks.cluster_bins --resolved-tool-contract rtc.json"""
    out_dir = op.join(OUT_DIR, "test_cluster_bins")
    mknewdir(out_dir)

    out_cluster_chunks_pickle = op.join(out_dir, "cluster_chunks.pickle")
    make_pickle(in_pickle=cluster_chunks_pickle,
                out_pickle=out_cluster_chunks_pickle,
                root_dir=out_dir)

    DRIVER_BASE = "python -m pbtranscript.tasks.cluster_bins"
    INPUT_FILES = [
        out_cluster_chunks_pickle,  # input 0, cluster_chunks.pickle
        ccs_ds
    ]  # idx 1, ccs

    def run_after(self, rtc, output_dir):
        out_dir = op.join(OUT_DIR, "test_cluster_bins")
        cluster_out_dirs = [
            op.join(out_dir, bin_name, "cluster_out") for bin_name in BIN_NAMES
        ]
        self.assertTrue(op.exists(rtc.task.output_files[0]))
        out_consensus_isoforms = [
            op.join(d, "output", "final.consensus.fasta")
            for d in cluster_out_dirs
        ]
        print out_consensus_isoforms
        self.assertTrue(all([op.exists(f) for f in out_consensus_isoforms]))
コード例 #11
0
class TestGatherIcePartialPickle(pbcommand.testkit.PbTestApp):
    """Call python -m pbtranscript.tasks.gather_ice_partial_cluster_bins_pickle --resolved-tool-contract rtc.json"""
    out_dir = op.join(OUT_DIR, "test_gather_ice_partial_cluster_bins_pickle")
    mknewdir(out_dir)

    out_partial_chunks_pickle = op.join(out_dir, "partial_chunks.pickle")
    make_pickle(in_pickle=partial_chunks_pickle,
                out_pickle=out_partial_chunks_pickle,
                root_dir=out_dir,
                copy_consensus_isoforms=True,
                copy_nfl_pickle=True)

    DRIVER_BASE = "python -m pbtranscript.tasks.gather_ice_partial_cluster_bins_pickle"
    INPUT_FILES = [
        out_partial_chunks_pickle,  # input 0, partial_chunk.pickle
        done_txt
    ]  # idx 1, sentinel file

    def run_after(self, rtc, output_dir):
        self.assertTrue(op.exists(rtc.task.output_files[0]))

        out_dir = op.join(OUT_DIR,
                          "test_gather_ice_partial_cluster_bins_pickle")
        cluster_out_dirs = [
            op.join(out_dir, bin_name, "cluster_out") for bin_name in BIN_NAMES
        ]
        out_pickles = [
            IceFiles(prog_name="", root_dir=d).nfl_all_pickle_fn
            for d in cluster_out_dirs
        ]
        print "output nfl pickles are %s" % out_pickles
        self.assertTrue(all([op.exists(f) for f in out_pickles]))
コード例 #12
0
    def test_run(self):
        """Test run(output_dir, min_match_len, sensitive_mode).
        running on sge and locally.
        """
        run_on_sge = (backticks('qstat')[1] == 0)

        if run_on_sge:
            self.runner.use_sge = True
            self.runner.sge_opts = SgeOptions(100)
            mknewdir(self.out_dir)
            self.runner.run(output_dir=self.out_dir)

            for las_filename in self.runner.las_filenames:
                print "Checking existance of " + las_filename
                self.assertTrue(op.exists(las_filename))

            for la4ice_filename in self.runner.la4ice_filenames:
                print "Checking existance of " + la4ice_filename
                self.assertTrue(op.exists(la4ice_filename))

        # Run locally
        self.runner.use_sge = False
        mknewdir(self.out_dir)
        self.runner.run(output_dir=self.out_dir)

        for las_filename in self.runner.las_filenames:
            print "Checking existance of " + las_filename
            self.assertTrue(op.exists(las_filename))

        for la4ice_filename in self.runner.la4ice_filenames:
            print "Checking existance of " + la4ice_filename
            self.assertTrue(op.exists(la4ice_filename))

        # clean all output
        self.runner.clean_run()

        for las_filename in self.runner.las_filenames:
            print "Checking %s has been removed.\n" % las_filename
            self.assertTrue(not op.exists(las_filename))

        for la4ice_filename in self.runner.la4ice_filenames:
            print "Checking %s has been removed.\n" % la4ice_filename
            self.assertTrue(not op.exists(la4ice_filename))
コード例 #13
0
    def test_run(self):
        """Test run(output_dir, min_match_len, sensitive_mode).
        running on sge and locally.
        """
        run_on_sge = (backticks('qstat')[1] == 0)

        if run_on_sge:
            self.runner.use_sge = True
            self.runner.sge_opts = SgeOptions(100)
            mknewdir(self.out_dir)
            self.runner.run(output_dir=self.out_dir)

            for las_filename in self.runner.las_filenames:
                print "Checking existance of " + las_filename
                self.assertTrue(op.exists(las_filename))

            for la4ice_filename in self.runner.la4ice_filenames:
                print "Checking existance of " + la4ice_filename
                self.assertTrue(op.exists(la4ice_filename))

        # Run locally
        self.runner.use_sge = False
        mknewdir(self.out_dir)
        self.runner.run(output_dir=self.out_dir)

        for las_filename in self.runner.las_filenames:
            print "Checking existance of " + las_filename
            self.assertTrue(op.exists(las_filename))

        for la4ice_filename in self.runner.la4ice_filenames:
            print "Checking existance of " + la4ice_filename
            self.assertTrue(op.exists(la4ice_filename))

        # clean all output
        self.runner.clean_run()

        for las_filename in self.runner.las_filenames:
            print "Checking %s has been removed.\n" % las_filename
            self.assertTrue(not op.exists(las_filename))

        for la4ice_filename in self.runner.la4ice_filenames:
            print "Checking %s has been removed.\n" % la4ice_filename
            self.assertTrue(not op.exists(la4ice_filename))
コード例 #14
0
    def test_as_contigset(self):
        """Test as_contigset"""
        out_dir = op.join(OUT_DIR, 'test_Utils')
        mknewdir(out_dir)
        fa = op.join(out_dir, "empty.fasta")
        xml = op.join(out_dir, "empty.contigset.xml")
        fai = fa + ".fai"

        execute("touch %s" % fa)
        as_contigset(fa, xml)
        self.assertTrue(op.exists(xml))
        self.assertTrue(op.exists(fai))

        fn = 'reads_of_insert.fasta'
        shutil.copy(src=op.join(DATA_DIR, fn), dst=op.join(out_dir, fn))
        fa = op.join(out_dir, fn)
        as_contigset(fa, fa)

        fai = fa + ".fai"
        xml = op.join(out_dir, 'reads_of_insert.contigset.xml')
        as_contigset(fa, xml)
        self.assertTrue(op.exists(xml))
        self.assertTrue(op.exists(fai))
コード例 #15
0
class TestGatherPolishedIsoforms(pbcommand.testkit.PbTestApp):
    """Call python -m pbtranscript.tasks.gather_polished_isoforms_in_each_bin --resolved-tool-contract rtc.json"""
    out_dir = op.join(OUT_DIR, "test_gather_polished_isoforms_in_each_bin")
    mknewdir(out_dir)

    out_polish_chunks_pickle = op.join(out_dir, "polish_chunks.pickle")
    make_pickle(in_pickle=polish_chunks_pickle,
                out_pickle=out_polish_chunks_pickle,
                root_dir=out_dir,
                copy_consensus_isoforms=True,
                copy_flnc_pickle=True,
                copy_nfl_pickle=True,
                copy_quivered=True)

    DRIVER_BASE = "python -m pbtranscript.tasks.gather_polished_isoforms_in_each_bin"
    INPUT_FILES = [
        out_polish_chunks_pickle,  # input 0, polish_chunk.pickle
        done_txt
    ]  # idx 1, sentinel file

    def run_after(self, rtc, output_dir):
        self.assertTrue(op.exists(rtc.task.output_files[0]))

        out_dir = op.join(OUT_DIR, "test_gather_polished_isoforms_in_each_bin")
        cluster_out_dirs = [
            op.join(out_dir, bin_name, "cluster_out") for bin_name in BIN_NAMES
        ]
        out_hq_fns = [
            op.join(d, fn) for d in cluster_out_dirs for fn in HQ_ISOFORMS_FNS
        ]
        print "out_hq_fns %s" % out_hq_fns
        self.assertTrue(all([op.exists(f) for f in out_hq_fns]))

        out_lq_fns = [
            op.join(d, fn) for d in cluster_out_dirs for fn in LQ_ISOFORMS_FNS
        ]
        print "out_lq_fns %s" % out_lq_fns
        self.assertTrue(all([op.exists(f) for f in out_lq_fns]))

        print "out_lq_fa %s is not empty" % out_lq_fns[0]
        n = len([r for r in FastaReader(out_lq_fns[0])])
        self.assertTrue(n > 0)

        out_logs = [
            IceFiles(prog_name="", root_dir=d).submitted_quiver_jobs_log
            for d in cluster_out_dirs
        ]
        print "out_logs %s" % out_logs
        self.assertTrue(all([op.exists(f) for f in out_logs]))
コード例 #16
0
class TestCombineClusterBins(pbcommand.testkit.PbTestApp):
    """Call python -m pbtranscript.tasks.combine_cluster_bins --resolved-tool-contract rtc.json"""
    out_dir = op.join(OUT_DIR, "test_combine_cluster_bins")
    mknewdir(out_dir)

    out_cluster_chunks_pickle = op.join(out_dir, "cluster_chunks.pickle")
    make_pickle(in_pickle=cluster_chunks_pickle,
                out_pickle=out_cluster_chunks_pickle,
                root_dir=out_dir,
                copy_consensus_isoforms=True,
                copy_flnc_pickle=True,
                copy_nfl_pickle=True)

    cluster_out_dirs = [
        op.join(out_dir, bin_name, "cluster_out") for bin_name in BIN_NAMES
    ]

    for D, d in zip(CLUSTER_OUT_DIRS, cluster_out_dirs):
        polish_log = op.join("log", "submitted_quiver_jobs.txt")
        shutil.copy(op.join(D, polish_log), op.join(d, polish_log))
        for fn in HQ_ISOFORMS_FNS + LQ_ISOFORMS_FNS:
            shutil.copy(op.join(D, fn), op.join(d, fn))

    DRIVER_BASE = "python -m pbtranscript.tasks.combine_cluster_bins"
    INPUT_FILES = [
        out_cluster_chunks_pickle,  # input 0, cluster_chunk.pickle
        done_txt
    ]  # idx 1, sentinel file

    def run_after(self, rtc, output_dir):
        self.assertTrue(op.exists(rtc.task.output_files[i]) for i in range(7))

        out_dir = op.join(OUT_DIR, "test_combine_cluster_bins")
        cluster_out_dirs = [
            op.join(out_dir, bin_name, "cluster_out") for bin_name in BIN_NAMES
        ]

        combined_lq_cs = rtc.task.output_files[5]
        print "combined_lq_fa %s must not be empty" % combined_lq_cs
        n = len([r for r in ContigSet(combined_lq_cs)])
        self.assertTrue(n > 0)

        out_logs = [
            IceFiles(prog_name="", root_dir=d).submitted_quiver_jobs_log
            for d in cluster_out_dirs
        ]
        print "out_logs %s" % out_logs
        self.assertTrue(all([op.exists(f) for f in out_logs]))
コード例 #17
0
class TestCreateChunks(pbcommand.testkit.PbTestApp):
    """Call python -m pbtranscript.tasks.create_chunks --resolved-tool-contract rtc.json"""
    out_dir = op.join(OUT_DIR, "test_create_chunks")
    mknewdir(out_dir)
    separate_flnc_pickle = op.join(out_dir, "separate_flnc.pickle")
    call_separate_flnc(flnc_ds=flnc_ds,
                       out_dir=out_dir,
                       out_pickle=separate_flnc_pickle)

    DRIVER_BASE = "python -m pbtranscript.tasks.create_chunks"
    INPUT_FILES = [
        separate_flnc_pickle,  # input 0, separate_flnc.pickle
        nfl_ds
    ]  # input 1, nfl.xml

    def run_after(self, rtc, output_dir):
        print rtc.task.output_files[0]
        print rtc.task.output_files[1]
        print rtc.task.output_files[2]
コード例 #18
0
class TestIcePolishClusterBins(pbcommand.testkit.PbTestApp):
    """Call python -m pbtranscript.tasks.ice_polish_cluster_bins --resolved-tool-contract rtc.json"""
    out_dir = op.join(OUT_DIR, "test_ice_polish_cluster_bins")
    mknewdir(out_dir)

    out_polish_chunks_pickle = op.join(out_dir, "polish_chunks.pickle")
    make_pickle(in_pickle=polish_chunks_pickle,
                out_pickle=out_polish_chunks_pickle,
                root_dir=out_dir,
                copy_consensus_isoforms=True,
                copy_flnc_pickle=True,
                copy_nfl_pickle=True)

    DRIVER_BASE = "python -m pbtranscript.tasks.ice_polish_cluster_bins"
    INPUT_FILES = [
        out_polish_chunks_pickle,  # input 0, polish_chunk.pickle
        done_txt,  # idx 1, sentinel file
        subreads_ds
    ]  # idx 2, subreads.bam

    def run_after(self, rtc, output_dir):
        self.assertTrue(op.exists(rtc.task.output_files[0]))
コード例 #19
0
    def run(self, output_dir='.', min_match_len=300, sensitive_mode=False):
        """
        if self.use_sge --- writes to <scripts>/daligner_job_#.sh
        else --- run locally, dividing into self.cpus/4 tasks (capped max at 4)

        NOTE 1: when using SGE, be careful that multiple calls to this might
        end up writing to the SAME job.sh files, this should be avoided by
        changing <scripts> directory

        NOTE 2: more commonly this should be invoked locally
        (since ice_partial.py i/one be qsub-ed),
        in that case it is more recommended to keep self.cpus = 4 so that
        each daligner job is run consecutively and that the original qsub job
        should have been called with qsub -pe smp 4 (set by --blasr_nproc 4)
        In this way, the daligner jobs are called consecutively, but LA4Ice
        is parallelized 4X
        """
        self.output_dir = realpath(output_dir) # Reset output_dir
        old_dir = realpath(op.curdir)
        mkdir(output_dir)
        os.chdir(output_dir)

        if self.use_sge:
            mknewdir(self.script_dir)

        # prepare done scripts is no longer necessary.
        #self.write_daligner_done_script()
        #self.write_la4ice_done_script()

        # (a) run all daligner jobs
        daligner_cmds = self.daligner_cmds(min_match_len=min_match_len,
                                           sensitive_mode=sensitive_mode)

        logging.info("Start daligner cmds " +
                     ("using sge." if self.use_sge else "locally."))
        logging.debug("CMD: " + "\n".join(daligner_cmds))

        start_t = time.time()
        failed = []
        if self.use_sge:
            failed.extend(
                sge_job_runner(cmds_list=daligner_cmds,
                               script_files=self.daligner_scripts,
                               #done_script=self.daligner_done_script,
                               num_threads_per_job=DALIGNER_NUM_THREADS,
                               sge_opts=self.sge_opts, qsub_try_times=3,
                               wait_timeout=600, run_timeout=600,
                               rescue="sge", rescue_times=3))
        else:
            # max 4 at a time to avoid running out of memory...
            failed.extend(
                local_job_runner(cmds_list=daligner_cmds,
                                 num_threads=max(1, min(self.cpus/4, 4))))
        logging.info("daligner jobs took " + str(time.time()-start_t) + " sec.")

        # (b) run all LA4Ice jobs
        start_t = time.time()
        logging.info("Start LA4Ice cmds " +
                     ("using sge." if self.use_sge else "locally."))
        la4ice_cmds = self.la4ice_cmds
        logging.debug("CMD: " + "\n".join(la4ice_cmds))

        if self.use_sge:
            failed.extend(
                sge_job_runner(cmds_list=la4ice_cmds,
                               script_files=self.la4ice_scripts,
                               #done_script=self.la4ice_done_script,
                               num_threads_per_job=DALIGNER_NUM_THREADS,
                               sge_opts=self.sge_opts, qsub_try_times=3,
                               wait_timeout=600, run_timeout=600,
                               rescue="sge", rescue_times=3))
        else:
            # max 4 at a time to avoid running out of memory...
            failed.extend(
                local_job_runner(cmds_list=la4ice_cmds,
                                 num_threads=max(1, min(self.cpus, 4))))
        logging.info("LA4Ice jobs took " + str(time.time()-start_t) + " sec.")
        os.chdir(old_dir)

        if len(failed) == 0:
            return 0
        else:
            raise RuntimeError("%s.run failed, %s." %
                               (op.basename(self.__class__),
                                "\n".join([x[0] for x in failed])))
コード例 #20
0
    def _test_daligner_against_ref(self,
                                   test_name,
                                   use_sge,
                                   sge_opts,
                                   prob_model_from="fake"):
        """Test daligner_against_ref with and without using sge."""
        copy_dir = op.join(self.dataDir, "test_daligner_against_ref")
        output_dir = op.join(self.outDir, test_name)
        mknewdir(output_dir)

        qname, tname = "test_daligner_query.fasta", "test_daligner_target.fasta"
        query_filename = op.join(output_dir, qname)
        target_filename = op.join(output_dir, tname)

        prob_model = None
        if prob_model_from == "fake":
            prob_model = ProbFromModel(0.01, 0.07, 0.06)
        elif prob_model_from == "fastq":
            fastq_fn = op.join(copy_dir, "test_daligner_reads.fastq")
            prob_model = ProbFromFastq(fastq_fn)
        else:
            self.assertTrue(False)

        qver_get_func = prob_model.get_smoothed
        qvmean_get_func = prob_model.get_mean

        dummy_o, c, dummy_m = backticks(
            "cp %s %s" % (op.join(copy_dir, qname), query_filename))
        self.assertTrue(c == 0)

        dummy_o, c, dummy_m = backticks(
            "cp %s %s" % (op.join(copy_dir, tname), target_filename))
        self.assertTrue(c == 0)

        old_dir = os.getcwd()
        os.chdir(output_dir)

        runner = DalignerRunner(query_filename=query_filename,
                                target_filename=target_filename,
                                is_FL=True,
                                same_strand_only=True,
                                use_sge=use_sge,
                                sge_opts=sge_opts)
        runner.run(output_dir=op.join(self.outDir, test_name))

        hits = []

        for la4ice_filename in runner.la4ice_filenames:
            hits.extend(
                daligner_against_ref(
                    query_dazz_handler=runner.query_dazz_handler,
                    target_dazz_handler=runner.target_dazz_handler,
                    la4ice_filename=la4ice_filename,
                    is_FL=True,
                    sID_starts_with_c=False,
                    qver_get_func=qver_get_func,
                    qvmean_get_func=qvmean_get_func))
        # Num of hits may change when daligner or parameters change.
        self.assertTrue(len(hits), 706)
        self.assertEqual(
            str(hits[0]),
            "m54007_160109_025449/27984844/29_646_CCS/0_617 aligns to m54007_160109_025449/28836279/631_54_CCS"
        )
        os.chdir(output_dir)
コード例 #21
0
ファイル: IceDalign.py プロジェクト: wenmore/cDNA_Cupcake
    def run(self, output_dir='.', min_match_len=300, sensitive_mode=False):
        """
        if self.use_sge --- writes to <scripts>/daligner_job_#.sh
        else --- run locally, dividing into self.cpus/4 tasks (capped max at 4)

        NOTE 1: when using SGE, be careful that multiple calls to this might
        end up writing to the SAME job.sh files, this should be avoided by
        changing <scripts> directory

        NOTE 2: more commonly this should be invoked locally
        (since ice_partial.py i/one be qsub-ed),
        in that case it is more recommended to keep self.cpus = 4 so that
        each daligner job is run consecutively and that the original qsub job
        should have been called with qsub -pe smp 4 (set by --blasr_nproc 4)
        In this way, the daligner jobs are called consecutively, but LA4Ice
        is parallelized 4X
        """
        self.output_dir = realpath(output_dir) # Reset output_dir
        old_dir = realpath(op.curdir)
        mkdir(output_dir)
        os.chdir(output_dir)

        if self.use_sge:
            mknewdir(self.script_dir)

        # prepare done scripts is no longer necessary.
        #self.write_daligner_done_script()
        #self.write_la4ice_done_script()

        # (a) run all daligner jobs
        daligner_cmds = self.daligner_cmds(min_match_len=min_match_len,
                                           sensitive_mode=sensitive_mode)

        logging.info("Start daligner cmds " +
                     ("using sge." if self.use_sge else "locally."))
        logging.debug("CMD: " + "\n".join(daligner_cmds))

        start_t = time.time()
        failed = []
        if self.use_sge:
            failed.extend(
                sge_job_runner(cmds_list=daligner_cmds,
                               script_files=self.daligner_scripts,
                               #done_script=self.daligner_done_script,
                               num_threads_per_job=DALIGNER_NUM_THREADS,
                               sge_opts=self.sge_opts, qsub_try_times=3,
                               wait_timeout=600, run_timeout=600,
                               rescue="sge", rescue_times=3))
        else:
            # max 4 at a time to avoid running out of memory...
            failed.extend(
                local_job_runner(cmds_list=daligner_cmds,
                                 num_threads=max(1, min(self.cpus/4, 4))))
        logging.info("daligner jobs took " + str(time.time()-start_t) + " sec.")

        # (b) run all LA4Ice jobs
        start_t = time.time()
        logging.info("Start LA4Ice cmds " +
                     ("using sge." if self.use_sge else "locally."))
        la4ice_cmds = self.la4ice_cmds
        logging.debug("CMD: " + "\n".join(la4ice_cmds))

        if self.use_sge:
            failed.extend(
                sge_job_runner(cmds_list=la4ice_cmds,
                               script_files=self.la4ice_scripts,
                               #done_script=self.la4ice_done_script,
                               num_threads_per_job=DALIGNER_NUM_THREADS,
                               sge_opts=self.sge_opts, qsub_try_times=3,
                               wait_timeout=600, run_timeout=600,
                               rescue="sge", rescue_times=3))
        else:
            # max 4 at a time to avoid running out of memory...
            failed.extend(
                local_job_runner(cmds_list=la4ice_cmds,
                                 num_threads=max(1, min(self.cpus, 4))))
        logging.info("LA4Ice jobs took " + str(time.time()-start_t) + " sec.")
        os.chdir(old_dir)

        if len(failed) == 0:
            return 0
        else:
            raise RuntimeError("%s.run failed, %s." %
                               (op.basename(self.__class__),
                                "\n".join([x[0] for x in failed])))