예제 #1
0
    def write_summary(self, summary_fn, isoforms_fa, hq_fa=None, lq_fa=None):
        """Extract number of consensus isoforms predicted, and total
        number of bases in all consensuus isoforms from isoforms_fa and write
        the two attributes to summary_fn.

        if hq_fa (polished high-quality isoforms) is not None, report
            the number of polished hq clusters
        if lq_fa (polished high-quality isoforms) is not None, report
            the number of polished hq clusters
        """
        self.add_log("Writing a summary to {f}".format(f=summary_fn),
                     level=logging.INFO)
        try:
            summary = ClusterSummary()

            with FastaReader(isoforms_fa) as reader:
                for r in reader:
                    summary.numConsensusIsoforms += 1
                    summary.numTotalBases += len(r.sequence)

            if hq_fa is not None:
                summary.num_polished_hq_isoforms = 0
                with FastaReader(hq_fa) as reader:
                    for r in reader:
                        summary.num_polished_hq_isoforms += 1
            if lq_fa is not None:
                summary.num_polished_lq_isoforms = 0
                with FastaReader(lq_fa) as reader:
                    for r in reader:
                        summary.num_polished_lq_isoforms += 1
            summary.write(summary_fn)
        except ZeroDivisionError:
            errMsg = "No consensus isoforms predicted."
            self.add_log(errMsg, level=logging.ERROR)
            raise RuntimeError(errMsg)
예제 #2
0
    def test_write(self):
        """Test ClusterSummary.write."""
        outFN = op.join(self.testDir, "out/test_ClusterSummary.txt")
        stdoutFN = op.join(self.testDir, "stdout/test_ClusterSummary.txt")

        obj = ClusterSummary()
        obj.numConsensusIsoforms = 97
        obj.numTotalBases = 97 * 3945

        obj.write(outFN)
        self.assertTrue(filecmp.cmp(outFN, stdoutFN))
예제 #3
0
    def __init__(self,
                 root_dir,
                 flnc_fa,
                 nfl_fa,
                 bas_fofn,
                 ccs_fofn,
                 out_fa,
                 sge_opts,
                 ice_opts,
                 hq_isoforms_fa=None,
                 hq_isoforms_fq=None,
                 lq_isoforms_fa=None,
                 lq_isoforms_fq=None,
                 report_fn=None,
                 summary_fn=None):
        super(Cluster, self).__init__(prog_name="Cluster",
                                      root_dir=root_dir,
                                      bas_fofn=bas_fofn,
                                      ccs_fofn=ccs_fofn)

        self.flnc_fa, self.nfl_fa, self.ccs_fofn = self._validateInputs(
            flnc_fa, nfl_fa, ccs_fofn)

        self.root_dir, self.out_fa = self._validateOutputs(root_dir, out_fa)

        self.hq_isoforms_fa = hq_isoforms_fa
        self.hq_isoforms_fq = hq_isoforms_fq
        self.lq_isoforms_fa = lq_isoforms_fa
        self.lq_isoforms_fq = lq_isoforms_fq

        self.sge_opts = sge_opts  # SGE, CPU options and etc
        self.ice_opts = ice_opts  # The ICE clutering algorithm options

        self.sanity_check()

        self._probqv = None  # probability & quality value

        self._flnc_splitted_fas = []  # split flnc_fa into smaller files.
        self._nflncSplittedFas = []  # split nfl_fa into smaller files.
        self._logConfigs()  # Log configurations

        self.iceinit = None
        self.icec = None
        self.iceq = None
        self.pol = None

        self.report_fn = realpath(report_fn) if report_fn is not None \
                else op.join(self.root_dir, "cluster_report.csv")
        self.summary_fn = realpath(summary_fn) if summary_fn is not None \
                else op.join(self.root_dir, "cluster_summary.txt")

        self.summary = ClusterSummary()

        self.add_log("Finishing create Cluster Object.", level=logging.INFO)