def plot_p(): with open('../../data/split/Seq2_Sus', 'r') as f: old_sequence = f.read()[10:].replace('\n', '').lower() seq = models.Sequence(old_sequence) codon_freq = codon_frequencies.F1x4(seq) q = models.goldman_Q(codon_freq=codon_freq) models.plot_p_over_time(q, t=0.1, codon='aaa', logscale=False) q = models.goldman_Q() models.plot_p_over_time(q, t=0.1, codon='aaa', logscale=False)
def test_convert_q_to_p(self): q = models.goldman_Q() p = models.convert_q_to_p(q, t=0) self.assertEqual(p.shape, (61, 61)) self.assertTrue((p==identity(61)).all()) p = models.convert_q_to_p(q, t=1) self.assertTrue((p.max() <= 1.0))
def test_evolve_sequence_with_q(self): q = models.goldman_Q(scale_q=True) new_sequence0 = evolve.evolve_sequence_with_q(models.Sequence(self.old_sequence), q, t=1) new_sequence1 = evolve.evolve_sequence_with_q(models.Sequence(self.old_sequence), q, t=1) new_seq0 = new_sequence0.seq new_seq1 = new_sequence1.seq self.assertNotEqual(self.old_sequence, new_seq0) # this has a very low probability of failing self.assertNotEqual(self.old_sequence, new_seq1) # this has a very low probability of failing self.assertNotEqual(new_seq0, new_seq1)
def test_make_sub_from_p(self): q = models.goldman_Q(scale_q=False) p = models.convert_q_to_p(q, t=10) p_cumsum, p_codons, p_cumsum_dict = models.get_cumulative_p(p, return_dict=True) old_codon_seq = 'aaa' old_codon = models.Codon(seq=old_codon_seq) models.make_sub_from_p(old_codon, p_cumsum_dict) self.assertEqual(len(old_codon.seq), 3) allowed_letters = 'atgc' for i in old_codon.seq: self.assertIn(i, allowed_letters)
def test_make_subs_in_locus(self): q = models.goldman_Q(scale_q=False) p = models.convert_q_to_p(q, t=10) p_cumsum, p_codons, p_cumsum_dict = models.get_cumulative_p(p, return_dict=True) old_codon_seq = 'aaa' old_codons = [models.Codon(seq=old_codon_seq)]*2 locus = models.Locus(codons=old_codons) old_seq = locus.sequence while locus.sequence == old_seq: models.make_subs_in_locus(locus, p_cumsum_dict) self.assertNotEqual(locus.history, [])
def evolve_tree(sequence, taxa=10, t=1e-2, omega=1.0, kappa=2.0, lmbda=1e-5, ti_td=0.1, codon_freq='F1x4', scale_q=True, **kwargs): """ Evolve a parent DNA sequence into a set of daughter sequences (taxa) by: 1. generating a random phylogenetic tree 2. intantiating a mutational model (e.g. Goldman-Yang-like by default) represented by Q-matrix, with indels 3. mutate sequence according to tree shape using model Args: sequence: a model.Sequence instance taxa: number of daughter sequences to evolve t: evolution time or branch length omega: dN/dS kappa: ratio of transition to transversion rates lmbda: probability of indel at codon ti_td: ratio of insertions to deletions codon_freq: codon frequency model, also know as equilibrium frequencies (default is F1x4) scale_q: scales Q so that the average rate of substitution at equilibrium equals 1. Branch lengths are thus expected number of nucleotide substitutions per codon. See Goldman (1994). Returns: tree instance populated with new sequence strings """ codon_freq = getattr(codon_frequencies, codon_freq)(sequence) q = models.goldman_Q(kappa=kappa, omega=omega, codon_freq=codon_freq, scale_q=scale_q, return_dict=False) tree = trees.random_tree(taxa) tree.value = sequence for node in trees.get_list_of_tree_nodes(tree)[1:]: node.value = evolve_sequence_with_q(node.parent.value, q, t=t, lmbda=lmbda, ti_td=ti_td) return tree
def evolve_tree(sequence, taxa=10, t=1e-2, omega=1.0, kappa=2.0, lmbda=1e-5, ti_td=0.1, codon_freq='F1x4', scale_q=True, **kwargs ): """ Evolve a parent DNA sequence into a set of daughter sequences (taxa) by: 1. generating a random phylogenetic tree 2. intantiating a mutational model (e.g. Goldman-Yang-like by default) represented by Q-matrix, with indels 3. mutate sequence according to tree shape using model Args: sequence: a model.Sequence instance taxa: number of daughter sequences to evolve t: evolution time or branch length omega: dN/dS kappa: ratio of transition to transversion rates lmbda: probability of indel at codon ti_td: ratio of insertions to deletions codon_freq: codon frequency model, also know as equilibrium frequencies (default is F1x4) scale_q: scales Q so that the average rate of substitution at equilibrium equals 1. Branch lengths are thus expected number of nucleotide substitutions per codon. See Goldman (1994). Returns: tree instance populated with new sequence strings """ codon_freq = getattr(codon_frequencies, codon_freq)(sequence) q = models.goldman_Q(kappa=kappa, omega=omega, codon_freq=codon_freq, scale_q=scale_q, return_dict=False) tree = trees.random_tree(taxa) tree.value = sequence for node in trees.get_list_of_tree_nodes(tree)[1:]: node.value = evolve_sequence_with_q(node.parent.value, q, t=t, lmbda=lmbda, ti_td=ti_td) return tree
def test_evolve_sequence_with_q(self): q = models.goldman_Q(scale_q=True) new_sequence0 = evolve.evolve_sequence_with_q(models.Sequence( self.old_sequence), q, t=1) new_sequence1 = evolve.evolve_sequence_with_q(models.Sequence( self.old_sequence), q, t=1) new_seq0 = new_sequence0.seq new_seq1 = new_sequence1.seq self.assertNotEqual( self.old_sequence, new_seq0) # this has a very low probability of failing self.assertNotEqual( self.old_sequence, new_seq1) # this has a very low probability of failing self.assertNotEqual(new_seq0, new_seq1)
def test_sample_model_mutation_probabilities_validation(self): q = models.goldman_Q(scale_q=False) sample = models.sample_model_mutation_probabilities('aaa', q) self.assertIsInstance(sample, list) self.assertEqual(len(sample), 100)
def test_get_mutation_from_cumulative_p(self): q = models.goldman_Q(scale_q=False) p = models.convert_q_to_p(q, t=10) pc, pcod, pcdict = models.get_cumulative_p(p, return_dict=True) old_codon = 'aaa' new_codon = models.get_mutation_from_cumulative_p(old_codon, pcdict)
def test_get_cumulative_p(self): q = models.goldman_Q(scale_q=False) p = models.convert_q_to_p(q, t=10) pc, pcod = models.get_cumulative_p(p) for i in pc: self.assertTrue(isclose(i[-1], 1))
def test_goldman_Q(self): cf = codon_frequencies.FEqual() q, qdict = models.goldman_Q(codon_freq=cf, scale_q=False, return_dict=True) self.assertEqual(q.shape, (61, 61)) for row in q: self.assertTrue(isclose(row.sum(), 0))