def test_cond_pos_differ(self): """lnL should differ when motif probs are not multiplicative""" dinuc_probs = {'AA': 0.088506666666666664, 'AC': 0.044746666666666664, 'GT': 0.056693333333333332, 'AG': 0.070199999999999999, 'CC': 0.048653333333333333, 'TT': 0.10678666666666667, 'CG': 0.0093600000000000003, 'GG': 0.049853333333333333, 'GC': 0.040253333333333335, 'AT': 0.078880000000000006, 'GA': 0.058639999999999998, 'TG': 0.081626666666666667, 'TA': 0.068573333333333333, 'CA': 0.06661333333333333, 'TC': 0.060866666666666666, 'CT': 0.069746666666666665} mg = Nucleotide(motif_length=2, motif_probs=dinuc_probs, mprob_model='monomer') mg_lf = mg.makeLikelihoodFunction(self.tree) mg_lf.setParamRule('length', is_independent=False, init=0.4) mg_lf.setAlignment(self.aln) cd = Nucleotide(motif_length=2, motif_probs=dinuc_probs, mprob_model='conditional') cd_lf = cd.makeLikelihoodFunction(self.tree) cd_lf.setParamRule('length', is_independent=False, init=0.4) cd_lf.setAlignment(self.aln) self.assertNotAlmostEqual(mg_lf.getLogLikelihood(), cd_lf.getLogLikelihood())
def test_lf_display(self): """str of likelihood functions should not fail""" for (dummy, model) in self.ordered_by_complexity: di = Nucleotide(motif_length=2, mprob_model=model) di.adaptMotifProbs(self.cond_root_probs, auto=True) lf = di.makeLikelihoodFunction(self.tree) s = str(lf)
def test_get_statistics(self): """get statistics should correctly apply arguments""" for (mprobs, model) in self.ordered_by_complexity: di = Nucleotide(motif_length=2, motif_probs=mprobs, mprob_model=model) lf = di.makeLikelihoodFunction(self.tree) for wm, wt in [(True, True), (True, False), (False, True), (False, False)]: stats = lf.getStatistics(with_motif_probs=wm, with_titles=wt)
def test_sim_alignment(self): """should be able to simulate an alignment under all models""" for (mprobs, model) in self.ordered_by_complexity: di = Nucleotide(motif_length=2, motif_probs=mprobs, mprob_model=model) lf = di.makeLikelihoodFunction(self.tree) lf.setParamRule('length', is_independent=False, init=0.4) lf.setAlignment(self.aln) sim = lf.simulateAlignment()
def test_reconstruct_ancestor(self): """should be able to reconstruct ancestral sequences under all models""" for (mprobs, model) in self.ordered_by_complexity: di = Nucleotide(motif_length=2, mprob_model=model) di.adaptMotifProbs(mprobs, auto=True) lf = di.makeLikelihoodFunction(self.tree) lf.setParamRule('length', is_independent=False, init=0.4) lf.setAlignment(self.aln) ancestor = lf.reconstructAncestralSeqs()
def test_getting_node_mprobs(self): """return correct motif probability vector for tree nodes""" tree = LoadTree(treestring='(a:.2,b:.2,(c:.1,d:.1):.1)') aln = LoadSeqs(data={ 'a': 'TGTG', 'b': 'TGTG', 'c': 'TGTG', 'd': 'TGTG', }) motifs = ['T', 'C', 'A', 'G'] aX = MotifChange(motifs[0], motifs[3], forward_only=True).aliased('aX') bX = MotifChange(motifs[3], motifs[0], forward_only=True).aliased('bX') edX = MotifChange(motifs[1], motifs[2], forward_only=True).aliased('edX') cX = MotifChange(motifs[2], motifs[1], forward_only=True).aliased('cX') sm = Nucleotide(predicates=[aX, bX, edX, cX], equal_motif_probs=True) lf = sm.makeLikelihoodFunction(tree) lf.setParamRule('aX', edge='a', value=8.0) lf.setParamRule('bX', edge='b', value=8.0) lf.setParamRule('edX', edge='edge.0', value=2.0) lf.setParamRule('cX', edge='c', value=0.5) lf.setParamRule('edX', edge='d', value=4.0) lf.setAlignment(aln) # we construct the hand calc variants mprobs = ones(4, float) * .25 a = make_p(.2, (0, 3), 8) a = dot(mprobs, a) b = make_p(.2, (3, 0), 8) b = dot(mprobs, b) e = make_p(.1, (1, 2), 2) e = dot(mprobs, e) c = make_p(.1, (2, 1), 0.5) c = dot(e, c) d = make_p(.1, (1, 2), 4) d = dot(e, d) prob_vectors = lf.getMotifProbsByNode() self.assertFloatEqual(prob_vectors['a'].array, a) self.assertFloatEqual(prob_vectors['b'].array, b) self.assertFloatEqual(prob_vectors['c'].array, c) self.assertFloatEqual(prob_vectors['d'].array, d) self.assertFloatEqual(prob_vectors['edge.0'].array, e)
def test_results_different(self): for (i, (mprobs, dummy)) in enumerate(self.ordered_by_complexity): results = [] for (dummy, model) in self.ordered_by_complexity: di = Nucleotide(motif_length=2, motif_probs=mprobs, mprob_model=model) lf = di.makeLikelihoodFunction(self.tree) lf.setParamRule('length', is_independent=False, init=0.4) lf.setAlignment(self.aln) lh = lf.getLogLikelihood() for other in results[:i]: self.failIfAlmostEqual(other, lh, places=2) for other in results[i:]: self.assertFloatEqual(other, lh) results.append(lh)
def test_getting_node_mprobs(self): """return correct motif probability vector for tree nodes""" tree = LoadTree(treestring='(a:.2,b:.2,(c:.1,d:.1):.1)') aln = LoadSeqs(data={ 'a': 'TGTG', 'b': 'TGTG', 'c': 'TGTG', 'd': 'TGTG', }) motifs = ['T', 'C', 'A', 'G'] aX = MotifChange(motifs[0], motifs[3], forward_only=True).aliased('aX') bX = MotifChange(motifs[3], motifs[0], forward_only=True).aliased('bX') edX = MotifChange(motifs[1], motifs[2], forward_only=True).aliased('edX') cX = MotifChange(motifs[2], motifs[1], forward_only=True).aliased('cX') sm = Nucleotide(predicates=[aX, bX, edX, cX], equal_motif_probs=True) lf = sm.makeLikelihoodFunction(tree) lf.setParamRule('aX', edge='a', value=8.0) lf.setParamRule('bX', edge='b', value=8.0) lf.setParamRule('edX', edge='edge.0', value=2.0) lf.setParamRule('cX', edge='c', value=0.5) lf.setParamRule('edX', edge='d', value=4.0) lf.setAlignment(aln) # we construct the hand calc variants mprobs = ones(4, float) * .25 a = make_p(.2, (0,3), 8) a = dot(mprobs, a) b = make_p(.2, (3, 0), 8) b = dot(mprobs, b) e = make_p(.1, (1, 2), 2) e = dot(mprobs, e) c = make_p(.1, (2, 1), 0.5) c = dot(e, c) d = make_p(.1, (1, 2), 4) d = dot(e, d) prob_vectors = lf.getMotifProbsByNode() self.assertFloatEqual(prob_vectors['a'].array, a) self.assertFloatEqual(prob_vectors['b'].array, b) self.assertFloatEqual(prob_vectors['c'].array, c) self.assertFloatEqual(prob_vectors['d'].array, d) self.assertFloatEqual(prob_vectors['edge.0'].array, e)
def test_results_different(self): for (i, (mprobs, dummy)) in enumerate(self.ordered_by_complexity): results = [] for (dummy, model) in self.ordered_by_complexity: di = Nucleotide(motif_length=2, motif_probs=mprobs, mprob_model=model) lf = di.makeLikelihoodFunction(self.tree) lf.setParamRule('length', is_independent=False, init=0.4) lf.setAlignment(self.aln) lh = lf.getLogLikelihood() for other in results[:i]: self.assertNotAlmostEqual(other, lh, places=2) for other in results[i:]: self.assertFloatEqual(other, lh) results.append(lh)
def test_newQ_is_nuc_process(self): """newQ is an extension of an independent nucleotide process""" nuc = Nucleotide(motif_probs = self.asymm_nuc_probs) new_di = Nucleotide(motif_length=2, mprob_model='monomer', motif_probs = self.asymm_root_probs) nuc_lf = nuc.makeLikelihoodFunction(self.tree) new_di_lf = new_di.makeLikelihoodFunction(self.tree) # newQ branch length is exactly motif_length*nuc branch length nuc_lf.setParamRule('length', is_independent=False, init=0.2) new_di_lf.setParamRule('length', is_independent=False, init=0.4) nuc_lf.setAlignment(self.aln) new_di_lf.setAlignment(self.aln) self.assertFloatEqual(nuc_lf.getLogLikelihood(), new_di_lf.getLogLikelihood())
def compare_models(motif_probs, motif_length): # if the 1st and 2nd position motifs are independent of each other # then conditional is the same as positional ps = Nucleotide(motif_length=motif_length, motif_probs=motif_probs, mprob_model='monomers') cd = Nucleotide(motif_length=motif_length,motif_probs=motif_probs, mprob_model='conditional') ps_lf = ps.makeLikelihoodFunction(self.tree) ps_lf.setParamRule('length', is_independent=False, init=0.4) ps_lf.setAlignment(self.aln) cd_lf = cd.makeLikelihoodFunction(self.tree) cd_lf.setParamRule('length', is_independent=False, init=0.4) cd_lf.setAlignment(self.aln) self.assertFloatEqual(cd_lf.getLogLikelihood(), ps_lf.getLogLikelihood())
def test_position_specific_mprobs(self): """correctly compute likelihood when positions have distinct probabilities""" aln_len = len(self.aln) posn1 = [] posn2 = [] for name, seq in self.aln.todict().items(): p1 = [seq[i] for i in range(0,aln_len,2)] p2 = [seq[i] for i in range(1,aln_len,2)] posn1.append([name, ''.join(p1)]) posn2.append([name, ''.join(p2)]) # the position specific alignments posn1 = LoadSeqs(data=posn1) posn2 = LoadSeqs(data=posn2) # a newQ dinucleotide model sm = Nucleotide(motif_length=2, mprob_model='monomer', do_scaling=False) lf = sm.makeLikelihoodFunction(self.tree) lf.setAlignment(posn1) posn1_lnL = lf.getLogLikelihood() lf.setAlignment(posn2) posn2_lnL = lf.getLogLikelihood() expect_lnL = posn1_lnL+posn2_lnL # the joint model lf.setAlignment(self.aln) aln_lnL = lf.getLogLikelihood() # setting the full alignment, which has different motif probs, should # produce a different lnL self.failIfAlmostEqual(expect_lnL, aln_lnL) # set the arguments for taking position specific mprobs sm = Nucleotide(motif_length=2, mprob_model='monomers', do_scaling=False) lf = sm.makeLikelihoodFunction(self.tree) lf.setAlignment(self.aln) posn12_lnL = lf.getLogLikelihood() self.assertFloatEqual(expect_lnL, posn12_lnL)
def fit_constructed_gen(results=results): if 'constructed_gen' in results: return preds = [ MotifChange(a, b, forward_only=True) for a, b in [['A', 'C'], ['A', 'G'], ['A', 'T'], ['C', 'A'], ['C', 'G'], ['C', 'T'], ['G', 'C'], ['G', 'T'], ['T', 'A'], ['T', 'C'], ['T', 'G']] ] nuc = Nucleotide(predicates=preds) nuc_lf = _make_likelihood(nuc, tree, results) nuc_lf.optimise(**opt_args) results['constructed_gen'] = nuc_lf
def test_newQ_is_nuc_process(self): """newQ is an extension of an independent nucleotide process""" nuc = Nucleotide(motif_probs=self.asymm_nuc_probs) new_di = Nucleotide(motif_length=2, mprob_model='monomer', motif_probs=self.asymm_root_probs) nuc_lf = nuc.makeLikelihoodFunction(self.tree) new_di_lf = new_di.makeLikelihoodFunction(self.tree) # newQ branch length is exactly motif_length*nuc branch length nuc_lf.setParamRule('length', is_independent=False, init=0.2) new_di_lf.setParamRule('length', is_independent=False, init=0.4) nuc_lf.setAlignment(self.aln) new_di_lf.setAlignment(self.aln) self.assertFloatEqual(nuc_lf.getLogLikelihood(), new_di_lf.getLogLikelihood())
def test_position_specific_mprobs(self): """correctly compute likelihood when positions have distinct probabilities""" aln_len = len(self.aln) posn1 = [] posn2 = [] for name, seq in list(self.aln.todict().items()): p1 = [seq[i] for i in range(0, aln_len, 2)] p2 = [seq[i] for i in range(1, aln_len, 2)] posn1.append([name, ''.join(p1)]) posn2.append([name, ''.join(p2)]) # the position specific alignments posn1 = LoadSeqs(data=posn1) posn2 = LoadSeqs(data=posn2) # a newQ dinucleotide model sm = Nucleotide(motif_length=2, mprob_model='monomer', do_scaling=False) lf = sm.makeLikelihoodFunction(self.tree) lf.setAlignment(posn1) posn1_lnL = lf.getLogLikelihood() lf.setAlignment(posn2) posn2_lnL = lf.getLogLikelihood() expect_lnL = posn1_lnL + posn2_lnL # the joint model lf.setAlignment(self.aln) aln_lnL = lf.getLogLikelihood() # setting the full alignment, which has different motif probs, should # produce a different lnL self.assertNotAlmostEqual(expect_lnL, aln_lnL) # set the arguments for taking position specific mprobs sm = Nucleotide(motif_length=2, mprob_model='monomers', do_scaling=False) lf = sm.makeLikelihoodFunction(self.tree) lf.setAlignment(self.aln) posn12_lnL = lf.getLogLikelihood() self.assertFloatEqual(expect_lnL, posn12_lnL)
def compare_models(motif_probs, motif_length): # if the 1st and 2nd position motifs are independent of each other # then conditional is the same as positional ps = Nucleotide(motif_length=motif_length, motif_probs=motif_probs, mprob_model='monomers') cd = Nucleotide(motif_length=motif_length, motif_probs=motif_probs, mprob_model='conditional') ps_lf = ps.makeLikelihoodFunction(self.tree) ps_lf.setParamRule('length', is_independent=False, init=0.4) ps_lf.setAlignment(self.aln) cd_lf = cd.makeLikelihoodFunction(self.tree) cd_lf.setParamRule('length', is_independent=False, init=0.4) cd_lf.setAlignment(self.aln) self.assertFloatEqual(cd_lf.getLogLikelihood(), ps_lf.getLogLikelihood())
def test_cond_pos_differ(self): """lnL should differ when motif probs are not multiplicative""" dinuc_probs = { 'AA': 0.088506666666666664, 'AC': 0.044746666666666664, 'GT': 0.056693333333333332, 'AG': 0.070199999999999999, 'CC': 0.048653333333333333, 'TT': 0.10678666666666667, 'CG': 0.0093600000000000003, 'GG': 0.049853333333333333, 'GC': 0.040253333333333335, 'AT': 0.078880000000000006, 'GA': 0.058639999999999998, 'TG': 0.081626666666666667, 'TA': 0.068573333333333333, 'CA': 0.06661333333333333, 'TC': 0.060866666666666666, 'CT': 0.069746666666666665 } mg = Nucleotide(motif_length=2, motif_probs=dinuc_probs, mprob_model='monomer') mg_lf = mg.makeLikelihoodFunction(self.tree) mg_lf.setParamRule('length', is_independent=False, init=0.4) mg_lf.setAlignment(self.aln) cd = Nucleotide(motif_length=2, motif_probs=dinuc_probs, mprob_model='conditional') cd_lf = cd.makeLikelihoodFunction(self.tree) cd_lf.setParamRule('length', is_independent=False, init=0.4) cd_lf.setAlignment(self.aln) self.assertNotAlmostEqual(mg_lf.getLogLikelihood(), cd_lf.getLogLikelihood())