コード例 #1
0
 def test_getBestFrontBackRecord(self):
     """Test function _parseBestFrontBackRecord()."""
     obj = Classifier()
     domFN = op.join(self.testDir, "data/test_parseHmmDom.dom")
     front, back = obj._getBestFrontBackRecord(domFN)
     # In the following, verify the front and back are equivalent
     # to stdout/test_parseHmmDom_dFront/Back.txt
     def prettystr(d):
         """Return Pretty print string for front & back."""
         return "\n".join(
             [key + ":\n" + "\n".join(
                 [k + ":" + str(v) for k, v in val.iteritems()])
              for key, val in d.iteritems()])
     frontFN = op.join(self.testDir, "out/test_parseHmmDom_dFront.txt")
     backFN = op.join(self.testDir, "out/test_parseHmmDom_dBack.txt")
     f = open(frontFN, 'w')
     f.write(prettystr(front))
     f.close()
     f = open(backFN, 'w')
     f.write(prettystr(back))
     f.close()
     stdoutFrontFN = op.join(self.testDir,
                             "stdout/test_parseHmmDom_dFront.txt")
     stdoutBackFN = op.join(self.testDir,
                            "stdout/test_parseHmmDom_dBack.txt")
     self.assertTrue(filecmp.cmp(frontFN, stdoutFrontFN))
     self.assertTrue(filecmp.cmp(backFN, stdoutBackFN))
コード例 #2
0
    def test_chunkReads(self):
        """Test function _chunkReads(readsFN, chunkSize, chunkedReadsFNs)."""
        obj = Classifier()
        readsFN = op.join(self.testDir, "data/test_chunkReads_1.fa")
        chunkedReadsFN = op.join(self.testDir,
                                 "out/test_chunkReads_1.fa")
        if op.exists(chunkedReadsFN):
            os.remove(chunkedReadsFN)
        stdoutChunkedReadsFN = op.join(self.testDir,
                                       "stdout/test_chunkReads_1.fa")

        obj._chunkReads(readsFN, 10, [chunkedReadsFN])
        self.assertTrue(filecmp.cmp(chunkedReadsFN, stdoutChunkedReadsFN))
コード例 #3
0
    def test_processPrimers(self):
        """Test function _processPrimers()."""
        inPFN = op.join(self.testDir, "data/test_primers_in.fa")
        obj = Classifier()

        # Test on an artificial example.
        outPFN = op.join(self.testDir, "out/test_primers_out.fa")
        stdoutPFN = op.join(self.testDir, "stdout/test_primers_out.fa")
        obj._processPrimers(primer_fn=inPFN, window_size=50,
                            primer_out_fn=outPFN,
                            revcmp_primers=False)

        self.assertTrue(filecmp.cmp(outPFN, stdoutPFN))

        # Test on real PacBio primers.fa
        pbPFN = op.join(self.testDir, "data/primers.fa")

        # outPFN2 = primers.fa for primer detection.
        outPFN2 = op.join(self.testDir, "out/test_primers_out_2.fa")
        stdoutPFN2 = op.join(self.testDir, "stdout/test_primers_out_2.fa")
        obj._processPrimers(primer_fn=pbPFN, window_size=50,
                            primer_out_fn=outPFN2,
                            revcmp_primers=False)
        self.assertTrue(filecmp.cmp(outPFN2, stdoutPFN2))

        # outPFN3 = primers.fa for chimera detection.
        outPFN2 = op.join(self.testDir, "out/test_primers_out_3.fa")
        stdoutPFN2 = op.join(self.testDir, "stdout/test_primers_out_3.fa")
        obj._processPrimers(primer_fn=pbPFN, window_size=50,
                            primer_out_fn=outPFN2,
                            revcmp_primers=True)
        self.assertTrue(filecmp.cmp(outPFN2, stdoutPFN2))
コード例 #4
0
 def test_findPolyA(self):
     """Test function _findPolyA(seq, minANum, p3Start)."""
     obj = Classifier()
     seq1 = ("GTGAAGTAGGTGTCCCGCACCAAGGCACGGAGCCAGAGAGGTGTGGGTGC" +
             "TAAAAGCCACCCGTTAGGACCCAGAGCAGCTGAAGCTGGATGCGAAAGGA" +
             "TACAGGCTTAGTAGCCATGGAGACCAAACTGGAACAAATGCCGACTGGAA" +
             "AGTGTATCTTATAACTTATTAAATAAAATGTTTGCTCCACGAAAAAAAAA" +
             "AAAAAAAAAAAAAAGTACTCTGCGTTGATACCACTGCTT")
     seq2 = ("TGGTTGGTCGGCGTTTAGCTTTGTGAGGCTCCCTGAACAGAAACACTGTT" +
             "GGAAGAAGAGTCCCCTGACATCACCCAGCGTCAAGTGGGAGTTAGCCTCT" +
             "GAAGTTCAGTGTATCACGTTAATGCTAATATGCTTTGTGGTGGCAGAATT" +
             "TATTTTGGCTTTTTGTCATTTAGCCAAATTAAAGGCAAACGCGTTTCTAA" +
             "AAAAAAAAAAAAAAAAAAAAGTAGCTCTGCGTTTGATACCACTGCTT")
     seq3 = ("TATTTTGGCTTTTTGTCATTTAGCCAAATTAAAGGCAAACGCGTTTCTAA")
     self.assertEqual(obj._findPolyA(seq1), 188)
     self.assertEqual(obj._findPolyA(seq2), 196)
     self.assertEqual(obj._findPolyA(seq3), -1)
コード例 #5
0
    def test_pickBestPrimerCombo(self):
        """Test funciton _pickBestPrimerCombo()."""
        obj = Classifier()
        domFN = op.join(self.testDir, "data/test_parseHmmDom.dom")
        front, back = obj._getBestFrontBackRecord(domFN)

        # Now pick up the best primer combo
        movie = "m131018_081703_42161_c100585152550000001823088404281404_s1_p0"
        rids = [movie + "/" + str(zmw) + "/ccs" for zmw in [43, 45, 54]]
        res = obj._pickBestPrimerCombo(
            front[rids[0]], back[rids[0]], [0, 1], 10)
        self.assertTrue(res[2] is None)
        self.assertTrue(res[3] is None)

        res = obj._pickBestPrimerCombo(
            front[rids[1]], back[rids[1]], [0, 1], 10)

        fw = DOMRecord("F1", movie + "/45/ccs", 33.0, 0, 30, 31, 0, 30, 100)
        rc = DOMRecord("R1", movie + "/45/ccs", 27.2, 0, 25, 25, 0, 25, 100)
        self.assertEqual(res[0], 1)
        self.assertEqual(res[1], "+")
        self.assertTrue(str(fw) == str(res[2]))
        self.assertTrue(str(rc) == str(res[3]))

        res = obj._pickBestPrimerCombo(
            front[rids[2]], back[rids[2]], [0, 1], 10)
        rc = DOMRecord("R1", movie + "/54/ccs", 22.3, 0, 25, 25, 0, 27, 100)
        self.assertEqual(res[0], 1)
        self.assertEqual(res[1], "+")
        self.assertTrue(res[2] is None)
        self.assertTrue(str(res[3]) == str(rc))
コード例 #6
0
    def run(self):
        """Run classify, cluster, polish or subset."""
        cmd = self.args.subCommand
        try:
            if cmd == 'classify':
                opts = ChimeraDetectionOptions(
                    min_seq_len=self.args.min_seq_len,
                    min_score=self.args.min_score,
                    min_dist_from_end=self.args.min_dist_from_end,
                    max_adjacent_hit_dist=self.args.max_adjacent_hit_dist,
                    primer_search_window=self.args.primer_search_window,
                    detect_chimera_nfl=self.args.detect_chimera_nfl)

                obj = Classifier(reads_fn=self.args.readsFN,
                                 out_dir=self.args.outDir,
                                 out_reads_fn=self.args.outReadsFN,
                                 primer_fn=self.args.primerFN,
                                 primer_report_fn=self.args.primerReportFN,
                                 summary_fn=self.args.summary_fn,
                                 cpus=self.args.cpus,
                                 change_read_id=True,
                                 opts=opts,
                                 out_flnc_fn=self.args.flnc_fa,
                                 out_nfl_fn=self.args.nfl_fa,
                                 ignore_polyA=self.args.ignore_polyA,
                                 reuse_dom=self.args.reuse_dom)
                obj.run()
            elif cmd == 'cluster':
                ice_opts = IceOptions(cDNA_size=self.args.cDNA_size,
                                      quiver=self.args.quiver,
                                      use_finer_qv=self.args.use_finer_qv)
                sge_opts = SgeOptions(unique_id=self.args.unique_id,
                                      use_sge=self.args.use_sge,
                                      max_sge_jobs=self.args.max_sge_jobs,
                                      blasr_nproc=self.args.blasr_nproc,
                                      quiver_nproc=self.args.quiver_nproc)
                ipq_opts = IceQuiverHQLQOptions(qv_trim_5=self.args.qv_trim_5,
                                                qv_trim_3=self.args.qv_trim_3,
                                                hq_quiver_min_accuracy=self.args.hq_quiver_min_accuracy,
                                                hq_isoforms_fa=self.args.hq_isoforms_fa,
                                                hq_isoforms_fq=self.args.hq_isoforms_fq,
                                                lq_isoforms_fa=self.args.lq_isoforms_fa,
                                                lq_isoforms_fq=self.args.lq_isoforms_fq)

                obj = Cluster(root_dir=self.args.root_dir,
                              flnc_fa=self.args.flnc_fa,
                              nfl_fa=self.args.nfl_fa,
                              bas_fofn=self.args.bas_fofn,
                              ccs_fofn=self.args.ccs_fofn,
                              fasta_fofn=self.args.fasta_fofn,
                              out_fa=self.args.consensusFa,
                              sge_opts=sge_opts,
                              ice_opts=ice_opts,
                              ipq_opts=ipq_opts,
                              report_fn=self.args.report_fn,
                              summary_fn=self.args.summary_fn,
                              nfl_reads_per_split=self.args.nfl_reads_per_split)
                obj.run()

            elif cmd == 'subset':
                rules = SubsetRules(FL=self.args.FL,
                                    nonChimeric=self.args.nonChimeric)

                obj = ReadsSubsetExtractor(inFN=self.args.readsFN,
                                           outFN=self.args.outFN,
                                           rules=rules,
                                           ignore_polyA=self.args.ignore_polyA,
                                           printReadLengthOnly=self.args.printReadLengthOnly)
                obj.run()
            else:
                raise PBTranscriptException(cmd,
                                            "Unknown command passed to pbtranscript.py:"
                                            + self.args.subName)
        except Exception:
            logging.exception("Exiting pbtranscript with return code 1.")
            return 1
        return 0
コード例 #7
0
ファイル: pbtranscript.py プロジェクト: avrajit/cDNA_primer
    def run(self):
        """Run classify, cluster, polish or subset."""
        cmd = self.args.subCommand
        try:
            if cmd == 'classify':
                opts = ChimeraDetectionOptions(
                    min_seq_len=self.args.min_seq_len,
                    min_score=self.args.min_score,
                    min_dist_from_end=self.args.min_dist_from_end,
                    max_adjacent_hit_dist=self.args.max_adjacent_hit_dist,
                    primer_search_window=self.args.primer_search_window)

                obj = Classifier(reads_fn=self.args.readsFN,
                                 out_dir=self.args.outDir,
                                 out_reads_fn=self.args.outReadsFN,
                                 primer_fn=self.args.primerFN,
                                 primer_report_fn=self.args.primerReportFN,
                                 summary_fn=self.args.summary_fn,
                                 cpus=self.args.cpus,
                                 change_read_id=True,
                                 opts=opts,
                                 out_flnc_fn=self.args.flnc_fa,
                                 out_nfl_fn=self.args.nfl_fa,
                                 ignore_polyA=self.args.ignore_polyA)
                obj.run()
            elif cmd == 'cluster':
                ice_opts = IceOptions(cDNA_size=self.args.cDNA_size,
                                      quiver=self.args.quiver)
                sge_opts = SgeOptions(unique_id=self.args.unique_id,
                                      use_sge=self.args.use_sge,
                                      max_sge_jobs=self.args.max_sge_jobs,
                                      blasr_nproc=self.args.blasr_nproc,
                                      quiver_nproc=self.args.quiver_nproc)

                obj = Cluster(root_dir=self.args.root_dir,
                              flnc_fa=self.args.flnc_fa,
                              nfl_fa=self.args.nfl_fa,
                              bas_fofn=self.args.bas_fofn,
                              ccs_fofn=self.args.ccs_fofn,
                              out_fa=self.args.consensusFa,
                              sge_opts=sge_opts,
                              ice_opts=ice_opts,
                              hq_isoforms_fa=self.args.hq_isoforms_fa,
                              hq_isoforms_fq=self.args.hq_isoforms_fq,
                              lq_isoforms_fa=self.args.lq_isoforms_fa,
                              lq_isoforms_fq=self.args.lq_isoforms_fq,
                              report_fn=self.args.report_fn,
                              summary_fn=self.args.summary_fn)
                obj.run()

            elif cmd == 'subset':
                rules = SubsetRules(FL=self.args.FL,
                                    nonChimeric=self.args.nonChimeric)

                obj = ReadsSubsetExtractor(inFN=self.args.readsFN,
                                           outFN=self.args.outFN,
                                           rules=rules,
                                           ignore_polyA=self.args.ignore_polyA,
                                           printReadLengthOnly=
                                           self.args.printReadLengthOnly)
                obj.run()
            else:
                raise PBTranscriptException(cmd,
                        "Unknown command passed to pbtranscript.py:" +
                        self.args.subName)
        except Exception as err:
            logging.error(str(err))
            return 1
        return 0
コード例 #8
0
ファイル: pbtranscript.py プロジェクト: 52teth/cDNA_primer
    def run(self):
        """Run classify, cluster, polish or subset."""
        cmd = self.args.subCommand
        try:
            if cmd == 'classify':
                opts = ChimeraDetectionOptions(
                    min_seq_len=self.args.min_seq_len,
                    min_score=self.args.min_score,
                    min_dist_from_end=self.args.min_dist_from_end,
                    max_adjacent_hit_dist=self.args.max_adjacent_hit_dist,
                    primer_search_window=self.args.primer_search_window,
                    detect_chimera_nfl=self.args.detect_chimera_nfl)

                obj = Classifier(reads_fn=self.args.readsFN,
                                 out_dir=self.args.outDir,
                                 out_reads_fn=self.args.outReadsFN,
                                 primer_fn=self.args.primerFN,
                                 primer_report_fn=self.args.primerReportFN,
                                 summary_fn=self.args.summary_fn,
                                 cpus=self.args.cpus,
                                 change_read_id=True,
                                 opts=opts,
                                 out_flnc_fn=self.args.flnc_fa,
                                 out_nfl_fn=self.args.nfl_fa,
                                 ignore_polyA=self.args.ignore_polyA,
                                 keep_primer=self.args.keep_primer,
                                 reuse_dom=self.args.reuse_dom)
                obj.run()
            elif cmd == 'cluster':
                ice_opts = IceOptions(quiver=self.args.quiver,
                                      use_finer_qv=self.args.use_finer_qv,
                                      targeted_isoseq=self.args.targeted_isoseq,
                                      ece_penalty=self.args.ece_penalty,
                                      ece_min_len=self.args.ece_min_len)
                sge_opts = SgeOptions(unique_id=self.args.unique_id,
                                      use_sge=self.args.use_sge,
                                      max_sge_jobs=self.args.max_sge_jobs,
                                      blasr_nproc=self.args.blasr_nproc,
                                      quiver_nproc=self.args.quiver_nproc,
                                      gcon_nproc=self.args.gcon_nproc,
                                      sge_env_name=self.args.sge_env_name,
                                      sge_queue=self.args.sge_queue)
                ipq_opts = IceQuiverHQLQOptions(qv_trim_5=self.args.qv_trim_5,
                                                qv_trim_3=self.args.qv_trim_3,
                                                hq_quiver_min_accuracy=self.args.hq_quiver_min_accuracy,
                                                hq_isoforms_fa=self.args.hq_isoforms_fa,
                                                hq_isoforms_fq=self.args.hq_isoforms_fq,
                                                lq_isoforms_fa=self.args.lq_isoforms_fa,
                                                lq_isoforms_fq=self.args.lq_isoforms_fq)

                obj = Cluster(root_dir=self.args.root_dir,
                              flnc_fa=self.args.flnc_fa,
                              nfl_fa=self.args.nfl_fa,
                              bas_fofn=self.args.bas_fofn,
                              ccs_fofn=self.args.ccs_fofn,
                              fasta_fofn=self.args.fasta_fofn,
                              out_fa=self.args.consensusFa,
                              sge_opts=sge_opts,
                              ice_opts=ice_opts,
                              ipq_opts=ipq_opts,
                              report_fn=self.args.report_fn,
                              summary_fn=self.args.summary_fn,
                              nfl_reads_per_split=self.args.nfl_reads_per_split)
                obj.run()

            elif cmd == 'subset':
                rules = SubsetRules(FL=self.args.FL,
                                    nonChimeric=self.args.nonChimeric)

                obj = ReadsSubsetExtractor(inFN=self.args.readsFN,
                                           outFN=self.args.outFN,
                                           rules=rules,
                                           ignore_polyA=self.args.ignore_polyA,
                                           printReadLengthOnly=self.args.printReadLengthOnly)
                obj.run()
            else:
                raise PBTranscriptException(cmd,
                                            "Unknown command passed to pbtranscript.py:"
                                            + self.args.subName)
        except Exception:
            logging.exception("Exiting pbtranscript with return code 1.")
            return 1
        return 0
コード例 #9
0
    def run(self):
        """Run classify, cluster, polish or subset."""
        cmd = self.args.subCommand
        try:
            if cmd == 'classify':
                opts = ChimeraDetectionOptions(
                    min_seq_len=self.args.min_seq_len,
                    min_score=self.args.min_score,
                    min_dist_from_end=self.args.min_dist_from_end,
                    max_adjacent_hit_dist=self.args.max_adjacent_hit_dist,
                    primer_search_window=self.args.primer_search_window)

                obj = Classifier(reads_fn=self.args.readsFN,
                                 out_dir=self.args.outDir,
                                 out_reads_fn=self.args.outReadsFN,
                                 primer_fn=self.args.primerFN,
                                 primer_report_fn=self.args.primerReportFN,
                                 summary_fn=self.args.summary_fn,
                                 cpus=self.args.cpus,
                                 change_read_id=True,
                                 opts=opts,
                                 out_flnc_fn=self.args.flnc_fa,
                                 out_nfl_fn=self.args.nfl_fa,
                                 ignore_polyA=self.args.ignore_polyA)
                obj.run()
            elif cmd == 'cluster':
                ice_opts = IceOptions(cDNA_size=self.args.cDNA_size,
                                      quiver=self.args.quiver)
                sge_opts = SgeOptions(unique_id=self.args.unique_id,
                                      use_sge=self.args.use_sge,
                                      max_sge_jobs=self.args.max_sge_jobs,
                                      blasr_nproc=self.args.blasr_nproc,
                                      quiver_nproc=self.args.quiver_nproc)

                obj = Cluster(root_dir=self.args.root_dir,
                              flnc_fa=self.args.flnc_fa,
                              nfl_fa=self.args.nfl_fa,
                              bas_fofn=self.args.bas_fofn,
                              ccs_fofn=self.args.ccs_fofn,
                              out_fa=self.args.consensusFa,
                              sge_opts=sge_opts,
                              ice_opts=ice_opts,
                              hq_isoforms_fa=self.args.hq_isoforms_fa,
                              hq_isoforms_fq=self.args.hq_isoforms_fq,
                              lq_isoforms_fa=self.args.lq_isoforms_fa,
                              lq_isoforms_fq=self.args.lq_isoforms_fq,
                              report_fn=self.args.report_fn,
                              summary_fn=self.args.summary_fn)
                obj.run()

            elif cmd == 'subset':
                rules = SubsetRules(FL=self.args.FL,
                                    nonChimeric=self.args.nonChimeric)

                obj = ReadsSubsetExtractor(
                    inFN=self.args.readsFN,
                    outFN=self.args.outFN,
                    rules=rules,
                    ignore_polyA=self.args.ignore_polyA,
                    printReadLengthOnly=self.args.printReadLengthOnly)
                obj.run()
            else:
                raise PBTranscriptException(
                    cmd, "Unknown command passed to pbtranscript.py:" +
                    self.args.subName)
        except Exception as err:
            logging.error(str(err))
            return 1
        return 0