Esempio n. 1
0
def test_populate_parameters():
    """populate_parameters should set up a nested likelihood function"""
    lf_file = open(os.path.join(get_data_dir(), 'brca1_murphy_gtr.json'))
    lf_json = json.load(lf_file)
    lf_GTR = nest.inflate_likelihood_function(lf_json)
    aln = LoadSeqs(os.path.join(get_data_dir(), 'brca1.fasta'))
    lf_GTR.setAlignment(aln)
    model = General(DNA.Alphabet,
                    optimise_motif_probs=True,
                    recode_gaps=True,
                    model_gaps=False)
    lf_General = model.makeLikelihoodFunction(lf_GTR.tree)
    nest.populate_parameters(lf_General, lf_GTR)
    lf_General.setAlignment(aln)
    assert_almost_equal(lf_GTR.getGStatistic(), lf_General.getGStatistic(), 6)

    lf_GTR = nest.inflate_likelihood_function(_GTR)
    lf_General = nest.inflate_likelihood_function(_General)
    for edge in lf_GTR.tree.getTipNames():
        assert not allclose(
            lf_GTR.getPsubForEdge(edge),
            lf_General.getPsubForEdge(edge)), 'models started close'
    nest.populate_parameters(lf_General, lf_GTR)
    for edge in lf_GTR.tree.getTipNames():
        assert_array_almost_equal(lf_GTR.getPsubForEdge(edge),
                                  lf_General.getPsubForEdge(edge))
Esempio n. 2
0
def test_inflate_deflate_likelihood_function():
    """deflate/inflate_likelihood_function are reciprocal maps"""
    lf = nest.inflate_likelihood_function(_GTRplusGamma)
    aln = get_aln('GTRplusGamma', _GTRplusGamma['aln_length'])
    lf.setAlignment(aln)

    down = nest.deflate_likelihood_function(lf)
    down_up = nest.inflate_likelihood_function(down)
    down_up.setAlignment(aln)
    down_up_down = nest.deflate_likelihood_function(down_up)

    assert_equal(down, down_up_down)
Esempio n. 3
0
def test_clock_fit():
    """clock_fit should fit nested GTR, General, and GeneralBen models,
    some with equal branch lengths"""
    for modelname in ('GTRClockTest', 'GeneralBen'):
        model = eval('_' + modelname)
        pre_lf = nest.inflate_likelihood_function(model)
        prefit = nest.get_expected_no_subs(pre_lf)
        aln = get_aln(modelname, 100000)
        lfs = nest.clock_fit(
            aln,
            pre_lf.tree,
            outgroup='Opossum',
            param_limit=20,
            return_lfs='GTR' if modelname.startswith('GTR') else 'General')
        lf_equal_length, lf = lfs[:2] if modelname[:3] == 'GTR' else lfs[2:]
        assert_less(lf_equal_length.getLogLikelihood(), lf.getLogLikelihood())
        if modelname == 'GeneralBen':
            assert_less(lfs[0].getLogLikelihood(),
                        lf_equal_length.getLogLikelihood())
        postfit = nest.get_expected_no_subs(lf)
        postfit_equal_length = nest.get_expected_no_subs(lf_equal_length)
        for taxon in prefit:
            assert_almost_equal(postfit[taxon], prefit[taxon], decimal=2)
            assert_almost_equal(postfit_equal_length[taxon],
                                prefit[taxon],
                                decimal=2)
Esempio n. 4
0
def test_deflate_likelihood_function():
    """deflate_likelihood_function produces internally consistent statistics"""
    lf = nest.inflate_likelihood_function(_General)
    aln = get_aln('General', _General['aln_length'])
    lf.setAlignment(aln)
    EN = nest.deflate_likelihood_function(lf)['EN']
    assert_equal(EN, nest.get_expected_no_subs(lf))
Esempio n. 5
0
def test_GNC():
    with open(os.path.join(get_data_dir(), 'GNC.json')) as infile:
        flat_lf = json.load(infile)

    lf = inflate_likelihood_function(flat_lf, ml.GNC)
    aln = util.get_aln(os.path.join(get_data_dir(),
                                    'ENSG00000100393.fasta.gz'),
                       codon_position=-1)
    lf.setAlignment(aln)

    flat_again = deflate_likelihood_function(lf)

    assert_almost_equal(flat_lf['EN'].values(), flat_again['EN'].values(), 9)
Esempio n. 6
0
def test_seq_fit():
    """seq_fit should fit nested GTR and General models"""
    for model in 'GTR', 'General':
        pre_lf = nest.inflate_likelihood_function(eval('_' + model))
        prefit = nest.get_expected_no_subs(pre_lf)

        aln = get_aln(model, 100000)
        lfs = nest.seq_fit(aln, pre_lf.tree, param_limit=20, return_lfs=model)
        if model == 'General':
            assert_less(lfs[0].getLogLikelihood(), lfs[1].getLogLikelihood())
        lf = lfs[-1]
        postfit = nest.get_expected_no_subs(lf)
        for taxon in prefit:
            assert_almost_equal(postfit[taxon], prefit[taxon], decimal=2)
Esempio n. 7
0
def generate_alignments():
    from gzip import GzipFile
    from data import get_data_dir
    from os.path import join
    alns = [('GTRplusGamma', _GTRplusGamma['aln_length']),
            ('General', _General['aln_length']), ('GTR', 100000),
            ('General', 100000), ('GTRplusGamma', 100000),
            ('GTRplusGammaClockTest', 100000), ('GTRClockTest', 100000),
            ('GeneralBen', 100000)]
    alns = [('GTRClockTest', 100000), ('GeneralBen', 100000)]
    for model, aln_len in alns:
        lf = nest.inflate_likelihood_function(eval('_' + model))
        aln = lf.simulateAlignment(aln_len)
        filename = '_'.join((model, str(aln_len))) + '.fasta.gz'
        with GzipFile(join(get_data_dir(), filename), 'w') as aln_file:
            aln_file.write(aln.toFasta())
    return 0
Esempio n. 8
0
def test_get_expected_no_subs():
    """expected_no_subs should return dictionary of ENS by edge"""
    GS_lf = nest.inflate_likelihood_function(_GeneralStationary)
    EN = nest.get_expected_no_subs(GS_lf)
    for name in GS_lf.tree.getTipNames():
        assert_almost_equal(EN[name], GS_lf.getParamValue('length', name))