Ejemplo n.º 1
0
 def test_observed_bases_obs_at_with_base_stranded_symmetry(self):
     self.alns[1].is_reverse = True
     res = observe.ObservedBases(self.alns, 20, 10)
     for base in 'ACGTN':
         self.assertEqual(res.obs_at(15, base, stranded=True),
                          res.obs_at(15, base.lower(), stranded=True))
     self.assertEqual(res.obs_at(17, '-', True), res.obs_at(17, '+', True))
Ejemplo n.º 2
0
def write_statistics(phylo, all_obs, contribs, contrib_reads, args):
    """
    Write a bunch of files to use for plotting the results of our EM and
    assembly steps. These will include 1) base observations for each
    contributor and 2) sites from phylotree that were used to estimate mixture
    contributions and whether or not we think these should be polymorphic
    or not.

    Args:
        phylo: The phylotree object these assignments are based on.
        ref: The reference sequence.
        all_obs: ObservedBases object of observations per reference position.
        contribs: The contributor table returned by assembly.get_contributors,
                  a list of (hap#, haplogroup, proportion) tuples.
        contrib_reads: a dictionary mapping hap#s to list of pysam
                       AlignedSegments
        args: The argparse namespace, used for the stats_prefix filename prefix
    Returns: nothing
    """
    haplogroups = {con[0]: con[1] for con in contribs}
    with open("%s.pos.tab" % (args.stats_prefix), 'w') as var_out:
        write_variants(var_out, phylo, contribs, all_obs, args)
    with open("%s.obs.tab" % (args.stats_prefix), 'w') as obs_out:
        for con in sorted(contrib_reads):
            obs_tab = observe.ObservedBases(contrib_reads[con], args.min_mq,
                                            args.min_bq)
            haplogroup = "unassigned"
            if con in haplogroups:
                haplogroup = haplogroups[con]
            write_base_obs(obs_out, obs_tab, phylo.refseq,
                           "%s\t%s" % (con, haplogroup))
        if len(contrib_reads) > 1:
            write_base_obs(obs_out, all_obs, phylo.refseq, "all\tmix")
    return
Ejemplo n.º 3
0
 def test_observed_bases_update_after_init(self):
     res = observe.ObservedBases(self.alns, 20, 10)
     res.update(self.alns)
     exp = {
         10: {
             'A': 2
         },
         11: {
             'A': 2
         },
         12: {
             'A': 4
         },
         13: {
             'A': 4
         },
         14: {
             'A': 4
         },
         15: {
             'G': 2,
             'T': 2
         },
         16: {
             'A': 4
         },
         17: {
             'A': 2,
             '-': 2
         },
         18: {
             'A': 2,
             '-': 2
         },
         19: {
             'A': 4
         },
         20: {
             'G': 2,
             'T': 2
         },
         21: {
             'A': 4
         },
         22: {
             'A': 4
         },
         23: {
             'A': 4
         },
         24: {
             'A': 4
         },
         25: {
             'G': 2,
             'T': 2
         }
     }
     self.assertEqual(res.obs_tab, exp)
Ejemplo n.º 4
0
 def test_observed_bases_obs_at_with_base_stranded(self):
     self.alns[1].is_reverse = True
     res = observe.ObservedBases(self.alns, 20, 10)
     self.assertEqual(res.obs_at(14, 'A', True), (1, 1))
     self.assertEqual(res.obs_at(14, 'G', True), (0, 0))
     self.assertEqual(res.obs_at(15, 'G', True), (0, 1))
     self.assertEqual(res.obs_at(17, '-', True), (0, 1))
     self.assertEqual(res.obs_at(17, '+', True), (0, 1))
Ejemplo n.º 5
0
 def test_observed_bases_obs_at_with_base(self):
     self.alns[1].is_reverse = True
     res = observe.ObservedBases(self.alns, 20, 10)
     self.assertEqual(res.obs_at(14, 'A'), 2)
     self.assertEqual(res.obs_at(14, 'G'), 0)
     self.assertEqual(res.obs_at(15, 'G'), 1)
     self.assertEqual(res.obs_at(17, '-'), 1)
     self.assertEqual(res.obs_at(17, '+'), 1)
Ejemplo n.º 6
0
 def test_observed_bases_init_from_alns(self):
     res = observe.ObservedBases(self.alns, 20, 10)
     exp = {
         10: {
             'A': 1
         },
         11: {
             'A': 1
         },
         12: {
             'A': 2
         },
         13: {
             'A': 2
         },
         14: {
             'A': 2
         },
         15: {
             'G': 1,
             'T': 1
         },
         16: {
             'A': 2
         },
         17: {
             'A': 1,
             '-': 1
         },
         18: {
             'A': 1,
             '-': 1
         },
         19: {
             'A': 2
         },
         20: {
             'G': 1,
             'T': 1
         },
         21: {
             'A': 2
         },
         22: {
             'A': 2
         },
         23: {
             'A': 2
         },
         24: {
             'A': 2
         },
         25: {
             'G': 1,
             'T': 1
         }
     }
     self.assertEqual(res.obs_tab, exp)
Ejemplo n.º 7
0
    def setUp(self):
        parser = argparse.ArgumentParser()
        self.args = parser.parse_args([])
        self.args.verbose = False
        self.args.min_reads = 1
        self.args.min_var_reads = 1
        self.args.frac_var_reads = 0.02
        self.args.var_fraction = 0.5
        self.args.var_count = None
        self.args.var_check = False
        self.args.contributors = None

        phy_in = [
            'I, A1G ,,', ',H, A3T A5T ,,', ',,F, A6T ,,', ',,,B, A8T ,,',
            ',,,C, T5A ,,', ',,G, A7T ,,', ',,,D, A9T ,,', ',,,E, A4T ,,',
            ',A, A2T A4T ,,'
        ]
        self.ref = "AAAAAAAAA"
        self.phy = phylotree.Phylotree(phy_in, refseq=self.ref)

        self.cons = [['A', 0.4], ['E', 0.3]]
        self.obs = observe.ObservedBases()
        self.obs.obs_tab[1]['T'] = 1
        self.obs.obs_tab[3]['T'] = 2
        self.obs.obs_tab[0]['G'] = 1
        self.obs.obs_tab[6]['T'] = 1
        self.obs.obs_tab[2]['T'] = 1
        self.obs.obs_tab[4]['T'] = 1

        self.wts = numpy.array([1, 1, 1])
        self.haps = list('ABCDEFGHI')
        self.props = numpy.array(
            [0.40, 0.01, 0.01, 0.01, 0.3, 0.01, 0.01, 0.01, 0.01])
        self.mix_mat = numpy.array(
            [[0.91, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01],
             [0.91, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01],
             [0.01, 0.01, 0.01, 0.01, 0.91, 0.01, 0.01, 0.01, 0.01]])
        self.em_results = (self.props, self.mix_mat)
Ejemplo n.º 8
0
def call_consensus(refseq, alns, min_cov, args, strict=True):
    """
    Generates a consensus sequence based on the list of AlignedSegments.

    Args:
        refseq: The reference sequence to which the fragments were aligned.
        alns: A list of pysam AlignedSegments
        min_cov: minimum coverage required to call a base.
        args: The argument values from mixemt's argparse results.
        strict: Whether to call a strict consensus or a majority base
    Returns:
        A string representing the consensus of the alignments in alns
    """
    def consensus_base(base_counts):
        """
        Given a Counter for a reference position, return the base that
        represents the consensus:
        N if coverage requirement is not met or if bases disagree
        or the observed base if all observations agree.
        """
        base_counts['N'] = 0  # first, ignore an missing observations
        total_obs = sum(base_counts.values())
        if total_obs < min_cov:
            return 'N'
        base, count = base_counts.most_common(1)[0]
        if strict and count != total_obs:
            return 'N'
        else:
            return base

    if not alns:
        # Sometimes alns can be empty.
        return ""
    obs_tab = observe.ObservedBases(alns, args.min_mq, args.min_bq)
    cons_bases = [
        consensus_base(obs_tab.obs_at(pos)) for pos in range(len(refseq))
    ]
    return str(''.join(cons_bases))
Ejemplo n.º 9
0
 def test_observed_bases_init_empty(self):
     res = observe.ObservedBases()
     self.assertEqual(res.obs_tab, {})
Ejemplo n.º 10
0
 def test_observed_bases_obs_at_bad_base(self):
     res = observe.ObservedBases(self.alns, 20, 10)
     with self.assertRaises(ValueError):
         res.obs_at(15, 'Q')
Ejemplo n.º 11
0
 def test_observed_bases_obs_at_basic_no_base_stranded(self):
     self.alns[1].is_reverse = True
     res = observe.ObservedBases(self.alns, 20, 10)
     self.assertEqual(res.obs_at(10, stranded=True), {'A': 1})
     self.assertEqual(res.obs_at(14, stranded=True), {'A': 1, 'a': 1})
     self.assertEqual(res.obs_at(17, stranded=True), {'A': 1, '+': 1})
Ejemplo n.º 12
0
 def test_observed_bases_obs_at_basic_no_base(self):
     self.alns[1].is_reverse = True
     res = observe.ObservedBases(self.alns, 20, 10)
     self.assertEqual(res.obs_at(10), {'A': 1})
     self.assertEqual(res.obs_at(14), {'A': 2})
     self.assertEqual(res.obs_at(17), {'A': 1, '-': 1})
Ejemplo n.º 13
0
 def test_observed_bases_init_reverse_strand(self):
     self.alns[1].is_reverse = True
     res = observe.ObservedBases(self.alns, 20, 10)
     exp = {
         10: {
             'A': 1
         },
         11: {
             'A': 1
         },
         12: {
             'A': 1,
             'a': 1
         },
         13: {
             'A': 1,
             'a': 1
         },
         14: {
             'A': 1,
             'a': 1
         },
         15: {
             'g': 1,
             'T': 1
         },
         16: {
             'A': 1,
             'a': 1
         },
         17: {
             'A': 1,
             '+': 1
         },
         18: {
             'A': 1,
             '+': 1
         },
         19: {
             'A': 1,
             'a': 1
         },
         20: {
             'g': 1,
             'T': 1
         },
         21: {
             'A': 1,
             'a': 1
         },
         22: {
             'A': 1,
             'a': 1
         },
         23: {
             'A': 1,
             'a': 1
         },
         24: {
             'A': 1,
             'a': 1
         },
         25: {
             'g': 1,
             'T': 1
         }
     }
     self.assertEqual(res.obs_tab, exp)