Exemple #1
0
 def _dump_indel_lens(self):
     run_name = self._run_file()
     if not os.path.exists(run_name):
         raise Exception("Run must be run before attempting dump")
     spats = Spats()
     spats.load(run_name)
     countinfo = spats.counters.counts_dict()
     ilen_cnt = []
     for lc in sorted([int(k.split('_')[-1]) for k in countinfo.keys() if k.startswith('mapped_indel_len_')]):
         ilen_cnt.append((lc, countinfo["mapped_indel_len_{}".format(lc)]))
     output_path = os.path.join(self.path, 'mapped_indel_len_counts.csv')
     self._write_csv(output_path, [ "Indel Length", "Reads" ], ilen_cnt)
Exemple #2
0
    def validate(self):
        """Validate the results of a previous 'process' run against a second (slower) algorithm.
        """

        run_name = self._run_file()
        if not os.path.exists(run_name):
            raise Exception("Run must be performed before validating")

        spats = Spats()
        spats.load(run_name)
        if spats.validate_results(self.r1, self.r2):
            self._add_note("Validation pass")
        else:
            self._add_note("Validation FAILURE")
Exemple #3
0
 def _dump_mut_counts(self):
     run_name = self._run_file()
     if not os.path.exists(run_name):
         raise Exception("Run must be run before attempting dump")
     spats = Spats()
     spats.load(run_name)
     countinfo = spats.counters.counts_dict()
     mut_cnts = []
     for muts in sorted([int(k.split('_')[-1]) for k in countinfo.keys() if k.startswith('mut_count_')]):
         mut_cnts.append((muts, countinfo["mut_count_{}".format(muts)]))
     output_path = os.path.join(self.path, 'mut_counts.csv')
     self._write_csv(output_path, [ "Mutation Count", "Reads" ], mut_cnts)
     mut_cnts = []
     for muts in sorted([int(k.split('_')[-1]) for k in countinfo.keys() if k.startswith('mapped_mut_count_')]):
         mut_cnts.append((muts, countinfo["mapped_mut_count_{}".format(muts)]))
     output_path = os.path.join(self.path, 'mapped_mut_counts.csv')
     self._write_csv(output_path, [ "Mutation Count", "Reads" ], mut_cnts)
Exemple #4
0
 def _dump_prefixes(self):
     run_name = self._run_file()
     if not os.path.exists(run_name):
         raise Exception("Run must be run before attempting dump")
     spats = Spats()
     spats.load(run_name)
     countinfo = spats.counters.counts_dict()
     total = float(countinfo['total_pairs']) / 100.0
     for mask in spats.run.masks:
         prefixes = []
         keyprefix = "prefix_{}_".format(mask)
         for key in sorted([k for k in countinfo.keys() if k.startswith(keyprefix)], key = lambda k : countinfo[k], reverse = True):
             prefixes.append((key[len(keyprefix):], float(countinfo[key]) / total, countinfo[key]))
         output_path = os.path.join(self.path, 'prefixes_{}.csv'.format(mask))
         self._write_csv(output_path, [ "Tag", "Percentage", "Count" ], prefixes)
     total = float(countinfo['registered_pairs']) / 100.0
     for mask in spats.run.masks:
         prefixes = []
         keyprefix = "mapped_prefix_{}_".format(mask)
         for key in sorted([k for k in countinfo.keys() if k.startswith(keyprefix)], key = lambda k : countinfo[k], reverse = True):
             prefixes.append((key[len(keyprefix):], float(countinfo[key]) / total, countinfo[key]))
         output_path = os.path.join(self.path, 'mapped_prefixes_{}.csv'.format(mask))
         self._write_csv(output_path, [ "Tag", "Percentage", "Count" ], prefixes)
Exemple #5
0
    def _dump_run(self):
        run_name = self._run_file()
        if not os.path.exists(run_name):
            raise Exception("Run must be run before attempting dump")

        spats = Spats()
        spats.load(run_name)
        profiles = spats.compute_profiles()
        mutations = spats.run.count_mutations
        indels = spats.run.handle_indels
        headers = [ "L", "site", "nt", "f+", "f-" ]
        if indels:
            headers += [ "ins+", "ins-", "del+", "del-" ]
        if mutations:
            headers += [ "mut+", "mut-", "beta", "mu", "r" ]
        else:
            headers += [ "beta", "theta", "rho" ]
        headers += [ "c", "c alt" ]
        data = []

        if self.cotrans:
            tgt = spats.targets.targets[0]
            tseq = tgt.seq
            for key in profiles.cotrans_keys():
                end = int(key.split('_')[-1])
                prof = profiles.profilesForTargetAndEnd(tgt.name, end)
                for i in xrange(end + 1):
                    datapt = [ end, i, tseq[i - 1] if i else '*', prof.treated[i], prof.untreated[i] ]
                    if indels:
                        datapt += [ prof.treated_inserts[i], prof.untreated_inserts[i], prof.treated_deletes[i], prof.untreated_deletes[i] ]
                    if mutations:
                        datapt += [ prof.treated_muts[i], prof.untreated_muts[i], prof.beta[i], prof.mu[i], prof.r_mut[i] ]
                    else:
                        datapt += [ prof.beta[i], prof.theta[i], prof.rho[i] ]
                    datapt += [ prof.c, prof.c_alt ]
                    data.append(datapt)
            output_path = os.path.join(self.path, '{}.csv'.format(tgt.name))
            self._write_csv(output_path, headers, data)
            empty_cell = ''
            keys = [ 'treated', 'untreated' ]
            if indels:
                keys += [ 'treated_inserts', 'untreated_inserts', 'treated_deletes', 'untreated_deletes' ]
            if mutations:
                keys += [ 'treated_mut', 'untreated_mut', 'beta', 'mu', 'r' ]
            else:
                keys += [ 'beta', 'theta', 'rho' ]
            cotrans_keys = profiles.cotrans_keys()
            for key in keys:
                ncols = 0
                mat = []
                for pkey in cotrans_keys:
                    end = int(pkey.split('_')[-1])
                    prof = profiles.profilesForTargetAndEnd(tgt.name, end)
                    vals = getattr(prof, key)
                    if not ncols:
                        ncols = len(cotrans_keys) + len(vals)
                    if len(vals) < ncols:
                        vals += ([empty_cell] * (ncols - len(vals)))
                    mat.append(vals)
                self._write_csv('{}_{}_mat.csv'.format(tgt.name, key), None, mat)
        else:
            for tgt in spats.targets.targets:
                tseq = tgt.seq
                end = len(tgt.seq)
                prof = profiles.profilesForTarget(tgt)
                data = []
                for i in xrange(end + 1):
                    datapt = [ end, i, tseq[i - 1] if i else '*', prof.treated[i], prof.untreated[i] ]
                    if indels:
                        datapt += [ prof.treated_inserts[i], prof.untreated_inserts[i], prof.treated_deletes[i], prof.untreated_deletes[i] ]
                    if mutations:
                        datapt += [ prof.treated_muts[i], prof.untreated_muts[i], prof.beta[i], prof.mu[i], prof.r_mut[i] ]
                    else:
                        datapt += [ prof.beta[i], prof.theta[i], prof.rho[i] ]
                    datapt += [ prof.c, prof.c_alt ]
                    data.append(datapt)
                output_path = os.path.join(self.path, '{}.csv'.format(tgt.name))
                self._write_csv(output_path, headers, data)