def test_deflate_likelihood_function():
    """deflate_likelihood_function produces internally consistent statistics"""
    lf = nest.inflate_likelihood_function(_General)
    aln = get_aln('General', _General['aln_length'])
    lf.setAlignment(aln)
    EN = nest.deflate_likelihood_function(lf)['EN']
    assert_equal(EN, nest.get_expected_no_subs(lf))
def test_hetero_fit():
    """hetero_fit should fit GTR plus Gamma models"""
    pre_lf = nest.inflate_likelihood_function(_GTRplusGamma)
    prefit = nest.get_expected_no_subs(pre_lf)
    aln = get_aln('GTRplusGamma', 100000)
    lfs = nest.hetero_fit(aln, pre_lf.tree, param_limit=20, return_lfs=True)
    postfit = nest.get_expected_no_subs(lfs[-1])
    for taxon in prefit:
        assert_almost_equal(postfit[taxon], prefit[taxon], decimal=2) 
Beispiel #3
0
def test_distribution():
    """distribution should return empirical distribution for DNA sequence"""
    al = get_aln('General', 1031).takeSeqs(('Mouse', ))
    distribution = jsd.distribution(al.getSeq('Mouse'))
    st = LoadTree(tip_names=('Mouse', ))
    sm = GTR()
    lf = sm.makeLikelihoodFunction(st)
    lf.setMotifProbsFromData(al)
    probs = lf.getMotifProbs()
    assert_array_almost_equal(array(probs), array(distribution))
def test_inflate_deflate_likelihood_function():
    """deflate/inflate_likelihood_function are reciprocal maps"""
    lf = nest.inflate_likelihood_function(_GTRplusGamma)
    aln = get_aln('GTRplusGamma', _GTRplusGamma['aln_length'])
    lf.setAlignment(aln)

    down = nest.deflate_likelihood_function(lf)
    down_up = nest.inflate_likelihood_function(down)
    down_up.setAlignment(aln)
    down_up_down = nest.deflate_likelihood_function(down_up)
    
    assert_equal(down, down_up_down)
def test_hetero_clock_fit():
    """hetero_clock_fit should fit a molecular clock constrained GTR plus Gamma
    model nested in a GTR plus Gamma model"""
    pre_lf = nest.inflate_likelihood_function(_GTRplusGammaClockTest)
    prefit = nest.get_expected_no_subs(pre_lf)
    aln = get_aln('GTRplusGammaClockTest', 100000)
    lfs = nest.hetero_clock_fit(aln, pre_lf.tree, outgroup='Opossum',
            param_limit=20, return_lfs=True)
    lf_equal_length, lf = lfs
    assert_less(lf_equal_length.getLogLikelihood(), lf.getLogLikelihood())
    postfit = nest.get_expected_no_subs(lf)
    postfit_equal_length = nest.get_expected_no_subs(lf_equal_length)
    for taxon in prefit:
        assert_almost_equal(postfit[taxon], prefit[taxon], decimal=2) 
        assert_almost_equal(postfit_equal_length[taxon], prefit[taxon], decimal=2) 
def test_seq_fit():
    """seq_fit should fit nested GTR and General models"""
    for model in 'GTR', 'General':
        pre_lf = nest.inflate_likelihood_function(eval('_'+model))
        prefit = nest.get_expected_no_subs(pre_lf)
        
        aln = get_aln(model, 100000)
        lfs = nest.seq_fit(aln, pre_lf.tree, param_limit=20, return_lfs=model)
        if model == 'General':
            assert_less(lfs[0].getLogLikelihood(),
                    lfs[1].getLogLikelihood())
        lf = lfs[-1]
        postfit = nest.get_expected_no_subs(lf)
        for taxon in prefit:
            assert_almost_equal(postfit[taxon], prefit[taxon], decimal=2) 
def test_clock_fit():
    """clock_fit should fit nested GTR, General, and GeneralBen models,
    some with equal branch lengths"""
    for modelname in ('GTRClockTest', 'GeneralBen'):
        model = eval('_' + modelname)
        pre_lf = nest.inflate_likelihood_function(model)
        prefit = nest.get_expected_no_subs(pre_lf)
        aln = get_aln(modelname, 100000)
        lfs = nest.clock_fit(aln, pre_lf.tree, outgroup='Opossum', param_limit=20, 
                return_lfs='GTR' if modelname.startswith('GTR') else 'General')
        lf_equal_length, lf = lfs[:2] if modelname[:3] == 'GTR' else lfs[2:]
        assert_less(lf_equal_length.getLogLikelihood(), lf.getLogLikelihood())
        if modelname == 'GeneralBen':
            assert_less(lfs[0].getLogLikelihood(),
                        lf_equal_length.getLogLikelihood())
        postfit = nest.get_expected_no_subs(lf)
        postfit_equal_length = nest.get_expected_no_subs(lf_equal_length)
        for taxon in prefit:
            assert_almost_equal(postfit[taxon], prefit[taxon], decimal=2) 
            assert_almost_equal(postfit_equal_length[taxon], prefit[taxon], 
                    decimal=2) 
def test_constrain_lengths():
    lf_gen = inflate_likelihood_function(_General)
    aln = get_aln('General', _General['aln_length'])

    model = GeneralBen(DNA.Alphabet,
                       recode_gaps=True,
                       model_gaps=False,
                       optimise_motif_probs=True)
    lf_ben = model.makeLikelihoodFunction(lf_gen.tree)
    for param in lf_ben.getParamNames():
        if '/' in param:
            lf_ben.setParamRule(param, is_independent=True, is_constant=False)
    lf_ben.setParamRule('length', is_independent=False)
    lf_ben.setParamRule('length', edge='Opossum', is_independent=True)
    lf_ben.setAlignment(aln)
    lf_ben.optimise(local=True, show_progress=False)

    ens = get_expected_no_subs(lf_ben)
    lens = lf_ben.getParamValueDict(['edge'])['length']
    assert_almost_equal(lens['Mouse'], lens['Human'])
    for edge in lf_ben.tree.getTipNames():
        assert_almost_equal(lens[edge], ens[edge])