def test_get_raw_estimates(self): """correctly return raw result object""" d = EstimateDistances(self.al, HKY85(), est_params=['kappa']) d.run() expect = { ('a', 'b'): { 'kappa': 1.0000226766004808e-06, 'length': 0.18232155856115662 }, ('a', 'c'): { 'kappa': 1.0010380037049357e-06, 'length': 0.087070406623635604 }, ('a', 'e'): { 'kappa': 2.3965871843412687, 'length': 0.4389176272584539 }, ('b', 'e'): { 'kappa': 2.3965871854366592, 'length': 0.43891762729173389 }, ('c', 'b'): { 'kappa': 1.0010380037049357e-06, 'length': 0.087070406623635604 }, ('c', 'e'): { 'kappa': 0.57046787478038707, 'length': 0.43260232210282784 } } got = d.getAllParamValues() for pair in expect: for param in expect[pair]: self.assertAlmostEqual(got[pair][param], expect[pair][param])
def test_EstimateDistances_other_model_params(self): """test getting other model params from EstimateDistances""" d = EstimateDistances(self.al, HKY85(), est_params=['kappa']) d.run() # this will be a Number object with Mean, Median etc .. kappa = d.getParamValues('kappa') self.assertAlmostEqual(kappa.Mean, 0.8939, 4) # this will be a dict with pairwise instances, it's called by the above # method, so the correctness of it's values is already checked kappa = d.getPairwiseParam('kappa')
def test_progressive_params(self): """excercise progressive alignment providing model params""" self._test_aln( { 'A': 'tacagta', 'B': 'tac-gtc', 'C': 'ta---ta', 'D': 'cac-cta', }, model=HKY85(), param_vals=[('kappa', 2.0)])
def test_EstimateDistancesWithMotifProbs(self): """EstimateDistances with supplied motif probs""" motif_probs= {'A':0.1,'C':0.2,'G':0.2,'T':0.5} d = EstimateDistances(self.al, HKY85(), motif_probs=motif_probs) d.run() canned_result = {('a', 'c'): 0.07537, ('b', 'c'): 0.07537, ('a', 'e'): 0.39921, ('a', 'b'): 0.15096, ('b', 'e'): 0.39921, ('c', 'e'): 0.37243} result = d.getPairwiseDistances() self.assertDistsAlmostEqual(canned_result, result)
def test_EstimateDistances_modify_lf(self): """tests modifying the lf""" def constrain_fit(lf): lf.setParamRule('kappa', is_const=True) lf.optimise(local=True, show_progress=False) return lf d = EstimateDistances(self.al, HKY85(), modify_lf=constrain_fit) d.run() result = d.getPairwiseDistances() d = EstimateDistances(self.al, F81()) d.run() expect = d.getPairwiseDistances() self.assertDistsAlmostEqual(expect, result)
def test_progressive_est_tree(self): """excercise progressive alignment without a guide tree""" seqs = LoadSeqs(data={ 'A': "TGTGGCACAAATGCTCATGCCAGCTCTTTACAGCATGAGAACA", 'B': "TGTGGCACAGATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTT", 'C': "TGTGGCACAAGTACTCATGCCAGCTCAGTACAGCATGAGAACAGCAGTTT" }, aligned=False) aln, tree = cogent.align.progressive.TreeAlign( HKY85(), seqs, show_progress=False, param_vals={'kappa': 4.0}) expect = { 'A': 'TGTGGCACAAATGCTCATGCCAGCTCTTTACAGCATGAGAACA-------', 'C': 'TGTGGCACAAGTACTCATGCCAGCTCAGTACAGCATGAGAACAGCAGTTT', 'B': 'TGTGGCACAGATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTT' } self.assertEqual(aln.todict(), expect)
def bh_fit(sa, st, return_lfs=None, global_optimisation=False, **kw): standard_params = {'is_independent': True, 'is_constant': False} optimise_params = {'local': not global_optimisation, 'show_progress': False, 'limit_action': 'raise'} sm = HKY85() lead_in_lf = sm.makeLikelihoodFunction(st) lead_in_lf.setAlignment(sa) lead_in_lf.optimise(**optimise_params) sm = DiscreteSubstitutionModel(DNA.Alphabet, optimise_motif_probs=True, model_gaps=False, recode_gaps=True, name='DiscreteSubstitutionModel') lf = sm.makeLikelihoodFunction(st) lf.setAlignment(sa) populate_parameters(lf, lead_in_lf, **standard_params) lf.optimise(**optimise_params) if return_lfs: return [lf] else: return [deflate_likelihood_function(lf)]
#!/usr/bin/env python # taken from http://pycogent.sourceforge.net/ from cogent import LoadSeqs, LoadTree from cogent.evolve.models import HKY85 from cogent.maths import stats aln = LoadSeqs(filename="data/long_testseqs.fasta") t = LoadTree(filename="data/test.tree") sm = HKY85() lf = sm.makeLikelihoodFunction(t, digits=2, space=3) lf.setLocalClock("Human", "HowlerMon") lf.setAlignment(aln) lf.optimise(show_progress=False) lf.setName("clock") print "lf =\n", lf print "lf.getStatistics() =\n", lf.getStatistics() opt_tr = lf.getAnnotatedTree() print "opt_tr.getNewick() =\n", opt_tr.getNewick(with_distances=True) null_lnL = lf.getLogLikelihood() null_nfp = lf.getNumFreeParams() lf.setParamRule('length', is_independent=True) lf.optimise(show_progress=False) lf.setName("non clock") print lf opt_tr = lf.getAnnotatedTree() print "opt_tr.getNewick() =\n", opt_tr.getNewick(with_distances=True) LR = 2 * (lf.getLogLikelihood() - null_lnL) df = lf.getNumFreeParams() - null_nfp P = stats.chisqprob(LR, df)
#!/usr/bin/env python # taken from http://pycogent.sourceforge.net/ from cogent.evolve.models import HKY85 from cogent import LoadSeqs, LoadTree model = HKY85() aln = LoadSeqs("data/primate_cdx2_promoter.fasta") tree = LoadTree(tip_names=aln.Names) lf = model.makeLikelihoodFunction(tree) lf.setAlignment(aln) lf.optimise(show_progress = False) print lf