Python inflate_likelihood_function Examples, nest.inflate_likelihood_function Python Examples

Example #1

0

Show file

File: test_nest.py Project: HuttleyLab/geneticdistance

def test_populate_parameters():
    """populate_parameters should set up a nested likelihood function"""
    lf_GTR = nest.inflate_likelihood_function(_GTR)
    lf_General = nest.inflate_likelihood_function(_General)
    for edge in lf_GTR.tree.getTipNames():
        assert not allclose(lf_GTR.getPsubForEdge(edge),
                lf_General.getPsubForEdge(edge)), 'models started close'
    nest.populate_parameters(lf_General, lf_GTR)
    for edge in lf_GTR.tree.getTipNames():
        assert_array_almost_equal(lf_GTR.getPsubForEdge(edge),
                lf_General.getPsubForEdge(edge))

Example #2

0

Show file

File: test_nest.py Project: HuttleyLab/geneticdistance

def test_inflate_deflate_likelihood_function():
    """deflate/inflate_likelihood_function are reciprocal maps"""
    lf = nest.inflate_likelihood_function(_GTRplusGamma)
    aln = get_aln('GTRplusGamma', _GTRplusGamma['aln_length'])
    lf.setAlignment(aln)

    down = nest.deflate_likelihood_function(lf)
    down_up = nest.inflate_likelihood_function(down)
    down_up.setAlignment(aln)
    down_up_down = nest.deflate_likelihood_function(down_up)
    
    assert_equal(down, down_up_down)

Example #3

0

Show file

def ml_bootstraps(empirical, num_bootstraps=100):
    assert empirical['model'] in \
        ('NG', 'NFG', 'MG94G', 'GNC', 'Y98GTR', 'CNFGTR', 'MG94GTR', 'Y98')
    gc = get_genetic_code(empirical['gc'].encode('utf-8'))
    model = lambda **kw: eval(empirical['model'])(gc=gc, **kw)
    elf = nest.inflate_likelihood_function(empirical['lf'], model)

    aln_length = empirical['lf']['aln_length']
    if empirical['model'] != 'NG':  # for unexpected simulateAlignment behaviour
        aln_length = int(aln_length / 3)
        assert empirical['lf']['aln_length'] == 3 * aln_length

    def bootstrap(empdoc):
        aln = elf.simulateAlignment(aln_length)
        simdoc = {'aln': str(aln), 'tree': empdoc['lf']['tree']}
        result = ml(simdoc, **empdoc)
        return result['lf']['gs']

    def extract_result(bootstraps):
        egs = empirical['lf']['gs']
        result = {
            'gstats': bootstraps,
            'gstat': egs,
            'pvalue': sum(g > egs for g in bootstraps) / (num_bootstraps + 1)
        }
        return result

    emp_gen = (empirical for i in [None] * num_bootstraps)

    return extract_result(map(bootstrap, emp_gen))

Example #4

0

Show file

File: test_nest.py Project: HuttleyLab/geneticdistance

def test_deflate_likelihood_function():
    """deflate_likelihood_function produces internally consistent statistics"""
    lf = nest.inflate_likelihood_function(_General)
    aln = get_aln('General', _General['aln_length'])
    lf.setAlignment(aln)
    EN = nest.deflate_likelihood_function(lf)['EN']
    assert_equal(EN, nest.get_expected_no_subs(lf))

Example #5

0

Show file

def split_ens(doc):
    assert doc['model'] in ('GNC', 'Y98GTR', 'CNFGTR')
    gc = get_genetic_code(doc['gc'].encode('utf-8'))
    model = lambda **kw: eval(doc['model'])(gc=gc, **kw)
    lf = inflate_likelihood_function(doc['lf'], model)
    ENS = get_expected_no_subs(lf, gc)
    return {'ENS' : get_expected_no_subs(lf, gc)}

Example #6

0

Show file

File: test_general_ben.py Project: HuttleyLab/geneticdistance

def test_makeContinuousPsubDefn():
    standard_params = {'is_independent': True, 'is_constant': False}

    lf_gen = inflate_likelihood_function(_General)

    model = GeneralBen(DNA.Alphabet,
                       recode_gaps=True,
                       model_gaps=False,
                       optimise_motif_probs=True,
                       name='GeneralBen')
    lf_ben = model.makeLikelihoodFunction(lf_gen.tree)

    populate_parameters(lf_ben, lf_gen, **standard_params)

    ben_ens = get_expected_no_subs(lf_ben)
    ben_lens = lf_ben.getParamValueDict(['edge'])['length']
    gen_ens = get_expected_no_subs(lf_gen)
    for edge in lf_gen.tree.getTipNames():
        assert_array_almost_equal(
            np.array(lf_ben.getRateMatrixForEdge(edge)) *
            lf_ben.getParamValue('length', edge),
            np.array(lf_gen.getRateMatrixForEdge(edge)) *
            lf_gen.getParamValue('length', edge))
        assert_almost_equal(ben_ens[edge], gen_ens[edge])
        assert_almost_equal(ben_lens[edge], ben_ens[edge])

Example #7

0

Show file

File: g_stats.py Project: HuttleyLab/geneticdistance

def param_bootstrap(stats, num_reps=None, model_pos=None, fitter=None, **kw):
    gene, f_stats, g_stats = stats
    try:
        f_row = f_stats[model_pos]
    except IndexError:
        logging.error(' Skipping ' + '/'.join(f_stats[0]['tip_names']) +
                      ' in ' + gene + ': position ' + model_pos + ' invalid')
        return
    model = f_row['name']

    if model_pos in g_stats:
        g_row = g_stats[model_pos]
    else:
        g_row = {
            'name': model,
            'tip_names': f_row['tip_names'],
            'gs_samples': [],
            'll_samples': [],
            'en_samples': []
        }
        g_stats[model_pos] = g_row
    gs_samples = g_row['gs_samples']
    ll_samples = g_row['ll_samples']
    en_samples = g_row['en_samples']
    if 'state' in g_row:
        random.setstate(eval(g_row['state']))

    lf = nest.inflate_likelihood_function(f_row)
    aln_length = f_row['aln_length']
    start = time.time()
    for i in 10 * range(num_reps):
        if len(gs_samples) >= num_reps:
            break
        try:
            aln = lf.simulateAlignment(aln_length, random_series=random)
            lfs = fitter(aln, lf.tree, return_lfs=model, **kw)
            fitted_lf = lfs[model_pos]
            ll_samples.append(fitted_lf.getLogLikelihood())
            gs_samples.append(fitted_lf.getGStatistic())
            if 'Q' in fitted_lf.defn_for:
                en_samples.append(nest.get_expected_no_subs(fitted_lf))
        except:
            logging.warning(' Missed a G stat for ' + model + ' and ' +
                            '/'.join(f_row['tip_names']) + ' in ' + gene +
                            ':\n' + format_exc())
    else:
        logging.error(
            ' Failed to compile sufficient bootstrap repetitions for ' +
            model + ' and ' + '/'.join(f_row['tip_names']) + ' in ' + gene)
    g_row['state'] = repr(random.getstate())
    f_row['gs_p'] = (sum(1 for g in gs_samples
                         if g < f_row['gs']), len(gs_samples) + 1)
    f_row['ll_p'] = (sum(1 for l in ll_samples
                         if l < f_row['ll']), len(ll_samples) + 1)
    logging.info(' Done ' + model + ' and ' + '/'.join(f_row['tip_names']) +
                 ' in ' + gene + ' in ' + str(time.time() - start) + ' secs')

    return gene, f_stats, g_stats

Example #8

0

Show file

File: test_nest.py Project: HuttleyLab/geneticdistance

def test_hetero_fit():
    """hetero_fit should fit GTR plus Gamma models"""
    pre_lf = nest.inflate_likelihood_function(_GTRplusGamma)
    prefit = nest.get_expected_no_subs(pre_lf)
    aln = get_aln('GTRplusGamma', 100000)
    lfs = nest.hetero_fit(aln, pre_lf.tree, param_limit=20, return_lfs=True)
    postfit = nest.get_expected_no_subs(lfs[-1])
    for taxon in prefit:
        assert_almost_equal(postfit[taxon], prefit[taxon], decimal=2)

Example #9

0

Show file

File: results.py Project: HuttleyLab/geneticdistance

 def check(self, row):
     try:
         armu = inflate_likelihood_function(row).allRateMatricesUnique()
     except (ArithmeticError, NotImplementedError):
         logging.debug(traceback.format_exc())
         return False
     except AssertionError:
         logging.warning(traceback.format_exc())
         return False
     return armu

Example #10

0

Show file

def fit(aln, tree, result, model, omega_indep, genetic_code, format):
    """ Fit the selected model to the input fasta ALN with the selected TREE 
    and output the RESULT. """
    data = aln.read()
    data = _decompress_if_zipped(data)
    doc = {'tree': tree.read().strip(), 'aln': data}
    doc = ml.ml(doc, model=model, omega_indep=omega_indep, gc=genetic_code)
    if format == 'json':
        json.dump(doc, result)
    else:
        lf = nest.inflate_likelihood_function(doc['lf'],
                                              lambda: getattr(ml, model)())
        result.write(str(lf) + '\n')
    return 0

Example #11

0

Show file

File: test_nest.py Project: HuttleyLab/geneticdistance

def test_hetero_clock_fit():
    """hetero_clock_fit should fit a molecular clock constrained GTR plus Gamma
    model nested in a GTR plus Gamma model"""
    pre_lf = nest.inflate_likelihood_function(_GTRplusGammaClockTest)
    prefit = nest.get_expected_no_subs(pre_lf)
    aln = get_aln('GTRplusGammaClockTest', 100000)
    lfs = nest.hetero_clock_fit(aln, pre_lf.tree, outgroup='Opossum',
            param_limit=20, return_lfs=True)
    lf_equal_length, lf = lfs
    assert_less(lf_equal_length.getLogLikelihood(), lf.getLogLikelihood())
    postfit = nest.get_expected_no_subs(lf)
    postfit_equal_length = nest.get_expected_no_subs(lf_equal_length)
    for taxon in prefit:
        assert_almost_equal(postfit[taxon], prefit[taxon], decimal=2) 
        assert_almost_equal(postfit_equal_length[taxon], prefit[taxon], decimal=2)

Example #12

0

Show file

File: test_nest.py Project: HuttleyLab/geneticdistance

def test_seq_fit():
    """seq_fit should fit nested GTR and General models"""
    for model in 'GTR', 'General':
        pre_lf = nest.inflate_likelihood_function(eval('_'+model))
        prefit = nest.get_expected_no_subs(pre_lf)
        
        aln = get_aln(model, 100000)
        lfs = nest.seq_fit(aln, pre_lf.tree, param_limit=20, return_lfs=model)
        if model == 'General':
            assert_less(lfs[0].getLogLikelihood(),
                    lfs[1].getLogLikelihood())
        lf = lfs[-1]
        postfit = nest.get_expected_no_subs(lf)
        for taxon in prefit:
            assert_almost_equal(postfit[taxon], prefit[taxon], decimal=2)

Example #13

0

Show file

File: test_nest.py Project: HuttleyLab/geneticdistance

def generate_alignments():
    from gzip import GzipFile
    from data import get_data_dir
    from os.path import join
    alns = [('GTRplusGamma', _GTRplusGamma['aln_length']),
            ('General', _General['aln_length']),
            ('GTR', 100000), ('General', 100000),
            ('GTRplusGamma', 100000), ('GTRplusGammaClockTest', 100000),
            ('GTRClockTest', 100000), ('GeneralBen', 100000)]
    alns = [('GTRClockTest', 100000), ('GeneralBen', 100000)]
    for model, aln_len in alns:
        lf = nest.inflate_likelihood_function(eval('_' + model))
        aln = lf.simulateAlignment(aln_len)
        filename = '_'.join((model, str(aln_len))) + '.fasta.gz'
        with GzipFile(join(get_data_dir(), filename), 'w') as aln_file:
            aln_file.write(aln.toFasta())
    return 0

Example #14

0

Show file

def omega(aln, tree, result, model, genetic_code, outgroup, neutral, format):
    """ Fit the selected model to the input fasta ALN with the selected TREE 
    and output the RESULT, with specific constraints on omega. """
    data = aln.read()
    data = _decompress_if_zipped(data)
    doc = {'tree': tree.read().strip(), 'aln': data}
    doc = omega_module.ml(doc,
                          model=model,
                          gc=genetic_code,
                          outgroup=outgroup,
                          neutral=neutral)
    if format == 'json':
        json.dump(doc, result)
    else:
        lf = nest.inflate_likelihood_function(doc['lf'],
                                              lambda: getattr(ml, model)())
        result.write(str(lf) + '\n')
    return 0

Example #15

0

Show file

def rooted(aln, tree, result, genetic_code, format):
    """ Fit GNC to the input fasta ALN with the selected TREE and output the
    RESULT. Parameters other than the scale parameter are constrained to be
    equal on branches connected to the root."""
    data = aln.read()
    data = _decompress_if_zipped(data)
    treestring = tree.read().strip()
    tree = LoadTree(treestring=treestring)
    assert len(tree.Children) == 2, 'Tree must be edge-rooted'
    rooted_edges = [child.Name for child in tree.Children]
    doc = {'tree': treestring, 'aln': data}
    doc = ml.rooted(doc, rooted_edges=rooted_edges, gc=genetic_code)
    if format == 'json':
        json.dump(doc, result)
    else:
        lf = nest.inflate_likelihood_function(doc['lf'], ml.GNC)
        result.write(str(lf) + '\n')
    return 0

Example #16

0

Show file

def clock(aln, tree, outgroup, result, model, omega_indep, genetic_code,
          format):
    """ Fit the selected model to the input fasta ALN with the input TREE with
    genetic distance constrained to be equal on all branches but the OUTGROUP
    and output the RESULT. """
    data = aln.read()
    data = _decompress_if_zipped(data)
    doc = {'tree': tree.read().strip(), 'aln': data}
    doc = clock_module.ml(doc,
                          model=model,
                          gc=genetic_code,
                          outgroup=outgroup,
                          omega_indep=omega_indep)
    if format == 'json':
        json.dump(doc, result)
    else:
        lf = nest.inflate_likelihood_function(doc['lf'],
                                              lambda: getattr(ml, model)())
        result.write(str(lf) + '\n')
    return 0

Example #17

0

Show file

File: test_nest.py Project: HuttleyLab/geneticdistance

def test_clock_fit():
    """clock_fit should fit nested GTR, General, and GeneralBen models,
    some with equal branch lengths"""
    for modelname in ('GTRClockTest', 'GeneralBen'):
        model = eval('_' + modelname)
        pre_lf = nest.inflate_likelihood_function(model)
        prefit = nest.get_expected_no_subs(pre_lf)
        aln = get_aln(modelname, 100000)
        lfs = nest.clock_fit(aln, pre_lf.tree, outgroup='Opossum', param_limit=20, 
                return_lfs='GTR' if modelname.startswith('GTR') else 'General')
        lf_equal_length, lf = lfs[:2] if modelname[:3] == 'GTR' else lfs[2:]
        assert_less(lf_equal_length.getLogLikelihood(), lf.getLogLikelihood())
        if modelname == 'GeneralBen':
            assert_less(lfs[0].getLogLikelihood(),
                        lf_equal_length.getLogLikelihood())
        postfit = nest.get_expected_no_subs(lf)
        postfit_equal_length = nest.get_expected_no_subs(lf_equal_length)
        for taxon in prefit:
            assert_almost_equal(postfit[taxon], prefit[taxon], decimal=2) 
            assert_almost_equal(postfit_equal_length[taxon], prefit[taxon], 
                    decimal=2)

Example #18

0

Show file

File: test_general_ben.py Project: HuttleyLab/geneticdistance

def test_constrain_lengths():
    lf_gen = inflate_likelihood_function(_General)
    aln = get_aln('General', _General['aln_length'])

    model = GeneralBen(DNA.Alphabet,
                       recode_gaps=True,
                       model_gaps=False,
                       optimise_motif_probs=True)
    lf_ben = model.makeLikelihoodFunction(lf_gen.tree)
    for param in lf_ben.getParamNames():
        if '/' in param:
            lf_ben.setParamRule(param, is_independent=True, is_constant=False)
    lf_ben.setParamRule('length', is_independent=False)
    lf_ben.setParamRule('length', edge='Opossum', is_independent=True)
    lf_ben.setAlignment(aln)
    lf_ben.optimise(local=True, show_progress=False)

    ens = get_expected_no_subs(lf_ben)
    lens = lf_ben.getParamValueDict(['edge'])['length']
    assert_almost_equal(lens['Mouse'], lens['Human'])
    for edge in lf_ben.tree.getTipNames():
        assert_almost_equal(lens[edge], ens[edge])

Example #19

0

Show file

File: results.py Project: HuttleyLab/geneticdistance

 def check(self, row):
     return inflate_likelihood_function(row).allPsubsDLC()

Example #20

0

Show file

File: test_nest.py Project: HuttleyLab/geneticdistance

def test_get_expected_no_subs():
    """expected_no_subs should return dictionary of ENS by edge"""
    GS_lf = nest.inflate_likelihood_function(_GeneralStationary)
    EN = nest.get_expected_no_subs(GS_lf)
    for name in GS_lf.tree.getTipNames():
        assert_almost_equal(EN[name], GS_lf.getParamValue('length', name))