def test_EstimateDistances_fromUnaligned(self): """Excercising estimate distances from unaligned sequences""" d = EstimateDistances(self.collection, JC69(), do_pair_align=True, rigorous_align=True) d.run() canned_result = {('b', 'e'): 0.440840, ('c', 'e'): 0.440840, ('a', 'c'): 0.088337, ('a', 'b'): 0.188486, ('a', 'e'): 0.440840, ('b', 'c'): 0.0883373} result = d.getPairwiseDistances() self.assertDistsAlmostEqual(canned_result, result) d = EstimateDistances(self.collection, JC69(), do_pair_align=True, rigorous_align=False) d.run() canned_result = {('b', 'e'): 0.440840, ('c', 'e'): 0.440840, ('a', 'c'): 0.088337, ('a', 'b'): 0.188486, ('a', 'e'): 0.440840, ('b', 'c'): 0.0883373} result = d.getPairwiseDistances() self.assertDistsAlmostEqual(canned_result, result)
def test_EstimateDistances_fromUnaligned(self): """Excercising estimate distances from unaligned sequences""" d = EstimateDistances(self.collection, JC69(), do_pair_align=True, rigorous_align=True) d.run() canned_result = { ('b', 'e'): 0.440840, ('c', 'e'): 0.440840, ('a', 'c'): 0.088337, ('a', 'b'): 0.188486, ('a', 'e'): 0.440840, ('b', 'c'): 0.0883373 } result = d.getPairwiseDistances() self.assertDistsAlmostEqual(canned_result, result) d = EstimateDistances(self.collection, JC69(), do_pair_align=True, rigorous_align=False) d.run() canned_result = { ('b', 'e'): 0.440840, ('c', 'e'): 0.440840, ('a', 'c'): 0.088337, ('a', 'b'): 0.188486, ('a', 'e'): 0.440840, ('b', 'c'): 0.0883373 } result = d.getPairwiseDistances() self.assertDistsAlmostEqual(canned_result, result)
def test_EstimateDistances_modify_lf(self): """tests modifying the lf""" def constrain_fit(lf): lf.setParamRule('kappa', is_constant=True) lf.optimise(local=True) return lf d = EstimateDistances(self.al, HKY85(), modify_lf=constrain_fit) d.run() result = d.getPairwiseDistances() d = EstimateDistances(self.al, F81()) d.run() expect = d.getPairwiseDistances() self.assertDistsAlmostEqual(expect, result)
def test_EstimateDistances_fromThreeway(self): """testing (well, exercising at least), EsimateDistances fromThreeway""" d = EstimateDistances(self.al, JC69(), threeway=True) d.run() canned_result = {('b', 'e'): 0.495312, ('c', 'e'): 0.479380, ('a', 'c'): 0.089934, ('a', 'b'): 0.190021, ('a', 'e'): 0.495305, ('b', 'c'): 0.0899339} result = d.getPairwiseDistances(summary_function="mean") self.assertDistsAlmostEqual(canned_result, result)
def test_EstimateDistancesWithMotifProbs(self): """EstimateDistances with supplied motif probs""" motif_probs= {'A':0.1,'C':0.2,'G':0.2,'T':0.5} d = EstimateDistances(self.al, HKY85(), motif_probs=motif_probs) d.run() canned_result = {('a', 'c'): 0.07537, ('b', 'c'): 0.07537, ('a', 'e'): 0.39921, ('a', 'b'): 0.15096, ('b', 'e'): 0.39921, ('c', 'e'): 0.37243} result = d.getPairwiseDistances() self.assertDistsAlmostEqual(canned_result, result)
def test_EstimateDistances(self): """testing (well, exercising at least), EstimateDistances""" d = EstimateDistances(self.al, JC69()) d.run() canned_result = {('b', 'e'): 0.440840, ('c', 'e'): 0.440840, ('a', 'c'): 0.088337, ('a', 'b'): 0.188486, ('a', 'e'): 0.440840, ('b', 'c'): 0.0883373} result = d.getPairwiseDistances() self.assertDistsAlmostEqual(canned_result, result) # excercise writing to file d.writeToFile('junk.txt') try: os.remove('junk.txt') except OSError: pass # probably parallel
def TreeAlign(model, seqs, tree=None, indel_rate=0.01, indel_length=0.01, ui = None, ests_from_pairwise=True, param_vals=None): """Returns a multiple alignment and tree. Uses the provided substitution model and a tree for determining the progressive order. If a tree is not provided a Neighbour Joining tree is constructed from pairwise distances estimated from pairwise aligning the sequences. If running in parallel, only the distance estimation is parallelised and only the master CPU returns the alignment and tree, other CPU's return None, None. Arguments: - model: a substitution model - seqs: a sequence collection - indel_rate, indel_length: parameters for the progressive pair-HMM - ests_from_pairwise: if no tree provided and True, the median value of the substitution model parameters are used - param_vals: named key, value pairs for model parameters. These override ests_from_pairwise. """ _exclude_params = ['mprobs', 'rate', 'bin_switch'] if param_vals: param_vals = dict(param_vals) else: param_vals = {} if isinstance(seqs, dict): seq_names = list(seqs.keys()) else: seq_names = seqs.getSeqNames() two_seqs = len(seq_names) == 2 if tree: tip_names = tree.getTipNames() tip_names.sort() seq_names.sort() assert tip_names == seq_names, \ "names don't match between seqs and tree: tree=%s; seqs=%s" % \ (tip_names, seq_names) ests_from_pairwise = False elif two_seqs: tree = LoadTree(tip_names=seqs.getSeqNames()) ests_from_pairwise = False else: if ests_from_pairwise: est_params = [param for param in model.getParamList() \ if param not in _exclude_params] else: est_params = None dcalc = EstimateDistances(seqs, model, do_pair_align=True, est_params=est_params) dcalc.run() dists = dcalc.getPairwiseDistances() tree = NJ.nj(dists) LF = model.makeLikelihoodFunction(tree.bifurcating(name_unnamed=True), aligned=False) if ests_from_pairwise and not param_vals: # we use the Median to avoid the influence of outlier pairs param_vals = {} for param in est_params: numbers = dcalc.getParamValues(param) print("Param Estimate Summary Stats: %s" % param) print(numbers.summarize()) param_vals[param] = numbers.Median ui.display("Doing %s alignment" % ["progressive", "pairwise"][two_seqs]) with LF.updatesPostponed(): for param, val in list(param_vals.items()): LF.setParamRule(param, value=val, is_constant=True) LF.setParamRule('indel_rate', value=indel_rate, is_constant=True) LF.setParamRule('indel_length', value=indel_length, is_constant=True) LF.setSequences(seqs) edge = LF.getLogLikelihood().edge align = edge.getViterbiPath().getAlignment() info = Info() info["AlignParams"] = param_vals info["AlignParams"].update(dict(indel_length=indel_length, indel_rate=indel_rate)) align.Info = info return align, tree