Beispiel #1
0
def test_distribution():
    """distribution should return empirical distribution for DNA sequence"""
    al = get_aln('General', 1031).takeSeqs(('Mouse', ))
    distribution = jsd.distribution(al.getSeq('Mouse'))
    st = LoadTree(tip_names=('Mouse', ))
    sm = GTR()
    lf = sm.makeLikelihoodFunction(st)
    lf.setMotifProbsFromData(al)
    probs = lf.getMotifProbs()
    assert_array_almost_equal(array(probs), array(distribution))
Beispiel #2
0
def test_distribution():
    """distribution should return empirical distribution for DNA sequence"""
    with GzipFile(os.path.join(get_data_dir(), 'General_1031.fasta.gz')) as ff:
        data = ff.read()
    al = Alignment(data=data).takeSeqs(('Mouse', ))
    distribution = jsd.distribution(al.getSeq('Mouse'))
    st = LoadTree(tip_names=('Mouse', ))
    sm = GTR()
    lf = sm.makeLikelihoodFunction(st)
    lf.setMotifProbsFromData(al)
    probs = lf.getMotifProbs()
    assert_array_almost_equal(array(probs), array(distribution))
def get_pairwise_distance(aln):
    seqs = []
    for sp in aln:
        seqs.extend(['>{}'.format(sp.id), str(sp.seq)])
    paln = LoadSeqs(data=seqs)
    d = distance.EstimateDistances(paln, submodel=GTR())
    d.run(show_progress=False)
    pd = d.getPairwiseDistances()
    return pd.values()
Beispiel #4
0
def inflate_likelihood_function(data, model=None):
    supported_subs_models = ('GeneralStationary', 'General',
        'DiscreteSubstitutionModel', 'General_with_gaps')
    if not model is None:
        model = model()
    elif data['name'] == 'GTR':
        if data['with_rate']:
            model = GTR(optimise_motif_probs=True, with_rate=True,
                    distribution='gamma')
        else:
            model = GTR(optimise_motif_probs=True)
    elif data['name'] == 'General_with_gaps':
        assert not data['with_rate'], data['name'] + ' plus Gamma not supported'
        model = General(DNA.Alphabet, optimise_motif_probs=True,
                model_gaps=True, recode_gaps=False, name='General_with_gaps')
    elif data['name'] in supported_subs_models:
        assert not data['with_rate'], data['name'] + ' plus Gamma not supported'
        model = eval(data['name'])(DNA.Alphabet, optimise_motif_probs=True, 
                model_gaps=False, recode_gaps=True, name=data['name'])
    else:
        st = 'inflate_likelihood_function: unsupported model ' + data['name']
        raise NotImplementedError(st)
    
    if 'tree' in data:
        tree = LoadTree(treestring=data['tree'].encode('utf-8'))
    else:
        tip_names = [tip_name.encode('utf-8') for tip_name in data['tip_names']]
        tree = LoadTree(tip_names=tip_names)
    
    if data['with_rate']:
        lf = model.makeLikelihoodFunction(tree, bins=4)
    else:
        lf = model.makeLikelihoodFunction(tree)
    with lf.updatesPostponed():
        lf.setMotifProbs(data['mprobs'])
        params = data['params']
        for param in data['params']:
            dimensions = lf.defn_for[param].valid_dimensions
            if len(dimensions) == 0:
                lf.setParamRule(param, init=params[param])
            elif 'edge' in dimensions and 'bin' in dimensions:
                for edge, bins in params[param].items():
                    for bin, init in bins.items():
                        lf.setParamRule(param, edge=edge, bin=bin, init=init)
            elif 'edge' in dimensions:
                for edge, init in params[param].items():
                    lf.setParamRule(param, edge=edge, init=init)
            elif 'bin' in dimensions:
                for bin, init in params[param].items():
                    lf.setParamRule(param, bin=bin, init=init)

        if 'dependencies' in data:
            for param, scopes in data['dependencies'].items():
                for scope in scopes:
                    lf.setParamRule(param, is_independent=False, **scope)

    return lf
Beispiel #5
0
def _fit_init(aln, tree, model, gc, omega_indep, **kw):
    if model == 'NG':
        sm = GTR(optimise_motif_probs=True)
    elif model in ('NFG', 'MG94G', 'MG94GTR', 'GNC', 'Y98GTR'):
        sm = MG94GTR(optimise_motif_probs=True, gc=gc)
    elif model == 'CNFGTR':  # CNFGTR nests no models here
        sm = CNFGTR(optimise_motif_probs=True, gc=gc)
    elif model == 'Y98':  # No need for nested fitting for Y98
        sm = Y98(optimise_motif_probs=True, gc=gc)
    lf = sm.makeLikelihoodFunction(tree)
    lf.setAlignment(aln)
    with lf.updatesPostponed():
        for param in lf.getParamNames():
            if '/' in param:
                lf.setParamRule(param, **kw)
    if model in ('CNFGTR', 'Y98'):  # set the omegas to be independent
        lf.setParamRule('omega', is_independent=omega_indep)
        lf.setParamRule('length', is_independent=True)
    lf.optimise(local=True, show_progress=False, limit_action='raise')
    return lf
Beispiel #6
0
def _fit(model, sa, st, outgroup=None, param_limit=None, with_rate=False,
        local=True, lf_from=None, **kw): 
    assert model not in ('General', 'DiscreteSubstitutionModel',
        'GeneralStationary') or not with_rate, model + ' plus Gamma not supported'
    assert not model == 'DiscreteSubstitutionModel' or outgroup is None, \
            'Clock test not supported for DiscreteSubstitutionModel'
    assert model in ('General', 'GTR', 'DiscreteSubstitutionModel',
        'GeneralStationary'), model + ' not supported'

    if model == 'GTR':
        if with_rate:
            sm = GTR(optimise_motif_probs=True, with_rate=True, 
                    distribution='gamma')
            lf = sm.makeLikelihoodFunction(st, bins=4)
            lf.setParamRule('bprobs', is_constant=True)
        else:
            sm = GTR(optimise_motif_probs=True)
            lf = sm.makeLikelihoodFunction(st)
    else:
        if model == 'General' and outgroup is not None:
            sm = GeneralBen(DNA.Alphabet, recode_gaps=True, model_gaps=False,
                    optimise_motif_probs=True)
        else:
            sm = eval(model)(DNA.Alphabet, recode_gaps=True, model_gaps=False,
                optimise_motif_probs=True, name=model)
        lf = sm.makeLikelihoodFunction(st)
    lf.setAlignment(sa)
    if lf_from is not None:
        populate_parameters(lf, lf_from, is_independent=True,
                is_constant=False, upper=param_limit)
        if model == 'GTR':
            for param in get_model_params(lf):
                lf.setParamRule(param, is_independent=False)
    elif param_limit is not None:
        for param in get_model_params(lf):
            dependencies = _get_dependencies_for(param, lf)
            for scope in dependencies:
                lf.setParamRule(param, upper=param_limit, is_independent=False,
                        **scope)
    if outgroup is not None:
        ingroup = [e for e in st.getTipNames() if e != outgroup]
        lf.setParamRule('length', edges=ingroup, is_independent=False)
    if with_rate:
        lf.setParamRule('rate_shape', upper=100)
    lf.optimise(local=local, show_progress=False, limit_action='raise')
    return lf
Beispiel #7
0
 def test_bin_options(self):
     kwargs = dict(with_rate=True, distribution='gamma')
     model = WG01(**kwargs)
     model = GTR(**kwargs)