def test_distribution(): """distribution should return empirical distribution for DNA sequence""" al = get_aln('General', 1031).takeSeqs(('Mouse', )) distribution = jsd.distribution(al.getSeq('Mouse')) st = LoadTree(tip_names=('Mouse', )) sm = GTR() lf = sm.makeLikelihoodFunction(st) lf.setMotifProbsFromData(al) probs = lf.getMotifProbs() assert_array_almost_equal(array(probs), array(distribution))
def test_distribution(): """distribution should return empirical distribution for DNA sequence""" with GzipFile(os.path.join(get_data_dir(), 'General_1031.fasta.gz')) as ff: data = ff.read() al = Alignment(data=data).takeSeqs(('Mouse', )) distribution = jsd.distribution(al.getSeq('Mouse')) st = LoadTree(tip_names=('Mouse', )) sm = GTR() lf = sm.makeLikelihoodFunction(st) lf.setMotifProbsFromData(al) probs = lf.getMotifProbs() assert_array_almost_equal(array(probs), array(distribution))
def get_pairwise_distance(aln): seqs = [] for sp in aln: seqs.extend(['>{}'.format(sp.id), str(sp.seq)]) paln = LoadSeqs(data=seqs) d = distance.EstimateDistances(paln, submodel=GTR()) d.run(show_progress=False) pd = d.getPairwiseDistances() return pd.values()
def inflate_likelihood_function(data, model=None): supported_subs_models = ('GeneralStationary', 'General', 'DiscreteSubstitutionModel', 'General_with_gaps') if not model is None: model = model() elif data['name'] == 'GTR': if data['with_rate']: model = GTR(optimise_motif_probs=True, with_rate=True, distribution='gamma') else: model = GTR(optimise_motif_probs=True) elif data['name'] == 'General_with_gaps': assert not data['with_rate'], data['name'] + ' plus Gamma not supported' model = General(DNA.Alphabet, optimise_motif_probs=True, model_gaps=True, recode_gaps=False, name='General_with_gaps') elif data['name'] in supported_subs_models: assert not data['with_rate'], data['name'] + ' plus Gamma not supported' model = eval(data['name'])(DNA.Alphabet, optimise_motif_probs=True, model_gaps=False, recode_gaps=True, name=data['name']) else: st = 'inflate_likelihood_function: unsupported model ' + data['name'] raise NotImplementedError(st) if 'tree' in data: tree = LoadTree(treestring=data['tree'].encode('utf-8')) else: tip_names = [tip_name.encode('utf-8') for tip_name in data['tip_names']] tree = LoadTree(tip_names=tip_names) if data['with_rate']: lf = model.makeLikelihoodFunction(tree, bins=4) else: lf = model.makeLikelihoodFunction(tree) with lf.updatesPostponed(): lf.setMotifProbs(data['mprobs']) params = data['params'] for param in data['params']: dimensions = lf.defn_for[param].valid_dimensions if len(dimensions) == 0: lf.setParamRule(param, init=params[param]) elif 'edge' in dimensions and 'bin' in dimensions: for edge, bins in params[param].items(): for bin, init in bins.items(): lf.setParamRule(param, edge=edge, bin=bin, init=init) elif 'edge' in dimensions: for edge, init in params[param].items(): lf.setParamRule(param, edge=edge, init=init) elif 'bin' in dimensions: for bin, init in params[param].items(): lf.setParamRule(param, bin=bin, init=init) if 'dependencies' in data: for param, scopes in data['dependencies'].items(): for scope in scopes: lf.setParamRule(param, is_independent=False, **scope) return lf
def _fit_init(aln, tree, model, gc, omega_indep, **kw): if model == 'NG': sm = GTR(optimise_motif_probs=True) elif model in ('NFG', 'MG94G', 'MG94GTR', 'GNC', 'Y98GTR'): sm = MG94GTR(optimise_motif_probs=True, gc=gc) elif model == 'CNFGTR': # CNFGTR nests no models here sm = CNFGTR(optimise_motif_probs=True, gc=gc) elif model == 'Y98': # No need for nested fitting for Y98 sm = Y98(optimise_motif_probs=True, gc=gc) lf = sm.makeLikelihoodFunction(tree) lf.setAlignment(aln) with lf.updatesPostponed(): for param in lf.getParamNames(): if '/' in param: lf.setParamRule(param, **kw) if model in ('CNFGTR', 'Y98'): # set the omegas to be independent lf.setParamRule('omega', is_independent=omega_indep) lf.setParamRule('length', is_independent=True) lf.optimise(local=True, show_progress=False, limit_action='raise') return lf
def _fit(model, sa, st, outgroup=None, param_limit=None, with_rate=False, local=True, lf_from=None, **kw): assert model not in ('General', 'DiscreteSubstitutionModel', 'GeneralStationary') or not with_rate, model + ' plus Gamma not supported' assert not model == 'DiscreteSubstitutionModel' or outgroup is None, \ 'Clock test not supported for DiscreteSubstitutionModel' assert model in ('General', 'GTR', 'DiscreteSubstitutionModel', 'GeneralStationary'), model + ' not supported' if model == 'GTR': if with_rate: sm = GTR(optimise_motif_probs=True, with_rate=True, distribution='gamma') lf = sm.makeLikelihoodFunction(st, bins=4) lf.setParamRule('bprobs', is_constant=True) else: sm = GTR(optimise_motif_probs=True) lf = sm.makeLikelihoodFunction(st) else: if model == 'General' and outgroup is not None: sm = GeneralBen(DNA.Alphabet, recode_gaps=True, model_gaps=False, optimise_motif_probs=True) else: sm = eval(model)(DNA.Alphabet, recode_gaps=True, model_gaps=False, optimise_motif_probs=True, name=model) lf = sm.makeLikelihoodFunction(st) lf.setAlignment(sa) if lf_from is not None: populate_parameters(lf, lf_from, is_independent=True, is_constant=False, upper=param_limit) if model == 'GTR': for param in get_model_params(lf): lf.setParamRule(param, is_independent=False) elif param_limit is not None: for param in get_model_params(lf): dependencies = _get_dependencies_for(param, lf) for scope in dependencies: lf.setParamRule(param, upper=param_limit, is_independent=False, **scope) if outgroup is not None: ingroup = [e for e in st.getTipNames() if e != outgroup] lf.setParamRule('length', edges=ingroup, is_independent=False) if with_rate: lf.setParamRule('rate_shape', upper=100) lf.optimise(local=local, show_progress=False, limit_action='raise') return lf
def test_bin_options(self): kwargs = dict(with_rate=True, distribution='gamma') model = WG01(**kwargs) model = GTR(**kwargs)