def _dump_indel_lens(self): run_name = self._run_file() if not os.path.exists(run_name): raise Exception("Run must be run before attempting dump") spats = Spats() spats.load(run_name) countinfo = spats.counters.counts_dict() ilen_cnt = [] for lc in sorted([int(k.split('_')[-1]) for k in countinfo.keys() if k.startswith('mapped_indel_len_')]): ilen_cnt.append((lc, countinfo["mapped_indel_len_{}".format(lc)])) output_path = os.path.join(self.path, 'mapped_indel_len_counts.csv') self._write_csv(output_path, [ "Indel Length", "Reads" ], ilen_cnt)
def validate(self): """Validate the results of a previous 'process' run against a second (slower) algorithm. """ run_name = self._run_file() if not os.path.exists(run_name): raise Exception("Run must be performed before validating") spats = Spats() spats.load(run_name) if spats.validate_results(self.r1, self.r2): self._add_note("Validation pass") else: self._add_note("Validation FAILURE")
def _dump_mut_counts(self): run_name = self._run_file() if not os.path.exists(run_name): raise Exception("Run must be run before attempting dump") spats = Spats() spats.load(run_name) countinfo = spats.counters.counts_dict() mut_cnts = [] for muts in sorted([int(k.split('_')[-1]) for k in countinfo.keys() if k.startswith('mut_count_')]): mut_cnts.append((muts, countinfo["mut_count_{}".format(muts)])) output_path = os.path.join(self.path, 'mut_counts.csv') self._write_csv(output_path, [ "Mutation Count", "Reads" ], mut_cnts) mut_cnts = [] for muts in sorted([int(k.split('_')[-1]) for k in countinfo.keys() if k.startswith('mapped_mut_count_')]): mut_cnts.append((muts, countinfo["mapped_mut_count_{}".format(muts)])) output_path = os.path.join(self.path, 'mapped_mut_counts.csv') self._write_csv(output_path, [ "Mutation Count", "Reads" ], mut_cnts)
def _dump_prefixes(self): run_name = self._run_file() if not os.path.exists(run_name): raise Exception("Run must be run before attempting dump") spats = Spats() spats.load(run_name) countinfo = spats.counters.counts_dict() total = float(countinfo['total_pairs']) / 100.0 for mask in spats.run.masks: prefixes = [] keyprefix = "prefix_{}_".format(mask) for key in sorted([k for k in countinfo.keys() if k.startswith(keyprefix)], key = lambda k : countinfo[k], reverse = True): prefixes.append((key[len(keyprefix):], float(countinfo[key]) / total, countinfo[key])) output_path = os.path.join(self.path, 'prefixes_{}.csv'.format(mask)) self._write_csv(output_path, [ "Tag", "Percentage", "Count" ], prefixes) total = float(countinfo['registered_pairs']) / 100.0 for mask in spats.run.masks: prefixes = [] keyprefix = "mapped_prefix_{}_".format(mask) for key in sorted([k for k in countinfo.keys() if k.startswith(keyprefix)], key = lambda k : countinfo[k], reverse = True): prefixes.append((key[len(keyprefix):], float(countinfo[key]) / total, countinfo[key])) output_path = os.path.join(self.path, 'mapped_prefixes_{}.csv'.format(mask)) self._write_csv(output_path, [ "Tag", "Percentage", "Count" ], prefixes)
def _dump_run(self): run_name = self._run_file() if not os.path.exists(run_name): raise Exception("Run must be run before attempting dump") spats = Spats() spats.load(run_name) profiles = spats.compute_profiles() mutations = spats.run.count_mutations indels = spats.run.handle_indels headers = [ "L", "site", "nt", "f+", "f-" ] if indels: headers += [ "ins+", "ins-", "del+", "del-" ] if mutations: headers += [ "mut+", "mut-", "beta", "mu", "r" ] else: headers += [ "beta", "theta", "rho" ] headers += [ "c", "c alt" ] data = [] if self.cotrans: tgt = spats.targets.targets[0] tseq = tgt.seq for key in profiles.cotrans_keys(): end = int(key.split('_')[-1]) prof = profiles.profilesForTargetAndEnd(tgt.name, end) for i in xrange(end + 1): datapt = [ end, i, tseq[i - 1] if i else '*', prof.treated[i], prof.untreated[i] ] if indels: datapt += [ prof.treated_inserts[i], prof.untreated_inserts[i], prof.treated_deletes[i], prof.untreated_deletes[i] ] if mutations: datapt += [ prof.treated_muts[i], prof.untreated_muts[i], prof.beta[i], prof.mu[i], prof.r_mut[i] ] else: datapt += [ prof.beta[i], prof.theta[i], prof.rho[i] ] datapt += [ prof.c, prof.c_alt ] data.append(datapt) output_path = os.path.join(self.path, '{}.csv'.format(tgt.name)) self._write_csv(output_path, headers, data) empty_cell = '' keys = [ 'treated', 'untreated' ] if indels: keys += [ 'treated_inserts', 'untreated_inserts', 'treated_deletes', 'untreated_deletes' ] if mutations: keys += [ 'treated_mut', 'untreated_mut', 'beta', 'mu', 'r' ] else: keys += [ 'beta', 'theta', 'rho' ] cotrans_keys = profiles.cotrans_keys() for key in keys: ncols = 0 mat = [] for pkey in cotrans_keys: end = int(pkey.split('_')[-1]) prof = profiles.profilesForTargetAndEnd(tgt.name, end) vals = getattr(prof, key) if not ncols: ncols = len(cotrans_keys) + len(vals) if len(vals) < ncols: vals += ([empty_cell] * (ncols - len(vals))) mat.append(vals) self._write_csv('{}_{}_mat.csv'.format(tgt.name, key), None, mat) else: for tgt in spats.targets.targets: tseq = tgt.seq end = len(tgt.seq) prof = profiles.profilesForTarget(tgt) data = [] for i in xrange(end + 1): datapt = [ end, i, tseq[i - 1] if i else '*', prof.treated[i], prof.untreated[i] ] if indels: datapt += [ prof.treated_inserts[i], prof.untreated_inserts[i], prof.treated_deletes[i], prof.untreated_deletes[i] ] if mutations: datapt += [ prof.treated_muts[i], prof.untreated_muts[i], prof.beta[i], prof.mu[i], prof.r_mut[i] ] else: datapt += [ prof.beta[i], prof.theta[i], prof.rho[i] ] datapt += [ prof.c, prof.c_alt ] data.append(datapt) output_path = os.path.join(self.path, '{}.csv'.format(tgt.name)) self._write_csv(output_path, headers, data)