def write_summary(self, summary_fn, isoforms_fa, hq_fa=None, lq_fa=None): """Extract number of consensus isoforms predicted, and total number of bases in all consensuus isoforms from isoforms_fa and write the two attributes to summary_fn. if hq_fa (polished high-quality isoforms) is not None, report the number of polished hq clusters if lq_fa (polished high-quality isoforms) is not None, report the number of polished hq clusters """ self.add_log("Writing a summary to {f}".format(f=summary_fn), level=logging.INFO) try: summary = ClusterSummary() with FastaReader(isoforms_fa) as reader: for r in reader: summary.numConsensusIsoforms += 1 summary.numTotalBases += len(r.sequence) if hq_fa is not None: summary.num_polished_hq_isoforms = 0 with FastaReader(hq_fa) as reader: for r in reader: summary.num_polished_hq_isoforms += 1 if lq_fa is not None: summary.num_polished_lq_isoforms = 0 with FastaReader(lq_fa) as reader: for r in reader: summary.num_polished_lq_isoforms += 1 summary.write(summary_fn) except ZeroDivisionError: errMsg = "No consensus isoforms predicted." self.add_log(errMsg, level=logging.ERROR) raise RuntimeError(errMsg)
def test_write(self): """Test ClusterSummary.write.""" outFN = op.join(self.testDir, "out/test_ClusterSummary.txt") stdoutFN = op.join(self.testDir, "stdout/test_ClusterSummary.txt") obj = ClusterSummary() obj.numConsensusIsoforms = 97 obj.numTotalBases = 97 * 3945 obj.write(outFN) self.assertTrue(filecmp.cmp(outFN, stdoutFN))
def __init__(self, root_dir, flnc_fa, nfl_fa, bas_fofn, ccs_fofn, out_fa, sge_opts, ice_opts, hq_isoforms_fa=None, hq_isoforms_fq=None, lq_isoforms_fa=None, lq_isoforms_fq=None, report_fn=None, summary_fn=None): super(Cluster, self).__init__(prog_name="Cluster", root_dir=root_dir, bas_fofn=bas_fofn, ccs_fofn=ccs_fofn) self.flnc_fa, self.nfl_fa, self.ccs_fofn = self._validateInputs( flnc_fa, nfl_fa, ccs_fofn) self.root_dir, self.out_fa = self._validateOutputs(root_dir, out_fa) self.hq_isoforms_fa = hq_isoforms_fa self.hq_isoforms_fq = hq_isoforms_fq self.lq_isoforms_fa = lq_isoforms_fa self.lq_isoforms_fq = lq_isoforms_fq self.sge_opts = sge_opts # SGE, CPU options and etc self.ice_opts = ice_opts # The ICE clutering algorithm options self.sanity_check() self._probqv = None # probability & quality value self._flnc_splitted_fas = [] # split flnc_fa into smaller files. self._nflncSplittedFas = [] # split nfl_fa into smaller files. self._logConfigs() # Log configurations self.iceinit = None self.icec = None self.iceq = None self.pol = None self.report_fn = realpath(report_fn) if report_fn is not None \ else op.join(self.root_dir, "cluster_report.csv") self.summary_fn = realpath(summary_fn) if summary_fn is not None \ else op.join(self.root_dir, "cluster_summary.txt") self.summary = ClusterSummary() self.add_log("Finishing create Cluster Object.", level=logging.INFO)