コード例 #1
0
def test_prior_model(start_tree,
                     sim_length=100000,
                     summaries=None,
                     thinning_coef=1):
    posterior = initialize_prior_as_posterior()
    if summaries is None:
        summaries = [
            s_variable('posterior'),
            s_variable('mhr'),
            s_no_admixes()
        ]
    proposal = adaptive_proposal()  #basic_meta_proposal()
    sample_verbose_scheme = {summary.name: (1, 0) for summary in summaries}
    sample_verbose_scheme['posterior'] = (1, 1000)
    final_tree, final_posterior, results, _ = basic_chain(
        start_tree,
        summaries,
        posterior,
        proposal,
        post=None,
        N=sim_length,
        sample_verbose_scheme=sample_verbose_scheme,
        overall_thinning=int(thinning_coef + sim_length / 60000),
        i_start_from=0,
        temperature=1.0,
        proposal_update=None,
        check_trees=True)
    print results
    save_to_csv(results, summaries)
    return results
コード例 #2
0
def test_prior_model_no_admixes(start_tree,
                                sim_length=100000,
                                summaries=None,
                                thinning_coef=1):
    posterior = initialize_prior_as_posterior()
    if summaries is None:
        summaries = [
            s_variable('posterior'),
            s_variable('mhr'),
            s_no_admixes()
        ]
    proposal = adaptive_proposal_no_admix()
    #proposal.props=proposal.props[2:] #a little hack under the hood
    #proposal.params=proposal.params[2:] #a little hack under the hood.
    sample_verbose_scheme = {summary.name: (1, 0) for summary in summaries}
    final_tree, final_posterior, results, _ = basic_chain(
        start_tree,
        summaries,
        posterior,
        proposal,
        post=None,
        N=sim_length,
        sample_verbose_scheme=sample_verbose_scheme,
        overall_thinning=int(thinning_coef + sim_length / 60000),
        i_start_from=0,
        temperature=1.0,
        proposal_update=None,
        check_trees=False)
    save_to_csv(results, summaries)
    return results
コード例 #3
0
ファイル: MCMCMC.py プロジェクト: Tmacme/AdmixtureBayes
def run_test():
    from Rtree_operations import get_trivial_nodes, create_trivial_tree, get_number_of_ghost_populations, get_max_distance_to_root, get_min_distance_to_root, get_average_distance_to_root
    from posterior import initialize_prior_as_posterior, initialize_posterior
    from meta_proposal import basic_meta_proposal
    from copy import deepcopy
    from Rtree_to_covariance_matrix import make_covariance

    N = 3
    true_tree = create_trivial_tree(N)
    proposal_function = basic_meta_proposal()
    post_fun = initialize_posterior(make_covariance(true_tree))
    tree = create_trivial_tree(N)

    n = 6
    import summary
    summaries = [
        summary.s_posterior(),
        summary.s_variable('mhr'),
        summary.s_no_admixes(),
        summary.s_tree_identifier(),
        summary.s_average_branch_length(),
        summary.s_total_branch_length(),
        summary.s_basic_tree_statistics(get_number_of_ghost_populations,
                                        'ghost_pops',
                                        output='integer'),
        summary.s_basic_tree_statistics(get_max_distance_to_root, 'max_root'),
        summary.s_basic_tree_statistics(get_min_distance_to_root, 'min_root'),
        summary.s_basic_tree_statistics(get_average_distance_to_root,
                                        'average_root'),
        summary.s_variable('proposal_type', output='string')
    ]

    from temperature_scheme import fixed_geometrical
    sample_verbose_scheme = {summary.name: (1, 0) for summary in summaries}
    sample_verbose_scheme['posterior'] = (1, 100)
    #sample_verbose_scheme['min_root']=(1,100)

    ad = MCMCMC(starting_trees=[deepcopy(tree) for _ in range(n)],
                posterior_function=post_fun,
                summaries=summaries,
                temperature_scheme=fixed_geometrical(10.0, n),
                printing_schemes=[sample_verbose_scheme for _ in range(n)],
                iteration_scheme=[40] * 200,
                overall_thinnings=5,
                proposal_scheme=[proposal_function for _ in range(n)],
                cores=n,
                no_chains=n)

    ad[0].to_csv(path_or_buf='findme.csv')
    print set(map(tuple, ad[1]))
    return ad
コード例 #4
0
def run_posterior_grid(tree_files, alpha, wishart_df):

    #true_trees= [tree_generation_laboratory.load_tree(tree_file) for tree_file in tree_files]
    summaries = [
        summary.s_posterior(),
        summary.s_variable('mhr', output='double_missing'),
        summary.s_no_admixes(),
        summary.s_average_branch_length(),
        summary.s_total_branch_length(),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_number_of_ghost_populations,
            'ghost_pops',
            output='integer'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_max_distance_to_root, 'max_root'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_min_distance_to_root, 'min_root'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_average_distance_to_root, 'average_root'),
        summary.s_basic_tree_statistics(
            tree_statistics.unique_identifier_and_branch_lengths,
            'tree',
            output='string'),
        summary.s_variable('proposal_type', output='string'),
        summary.s_variable('sliding_regraft_adap_param',
                           output='double_missing'),
        summary.s_variable('rescale_adap_param', output='double_missing'),
        summary.s_likelihood(),
        summary.s_prior()
    ]

    def f(x):
        unsuffixed_filename = '.'.join(x.split('.')[:-1])
        true_tree = tree_generation_laboratory.identifier_to_tree_clean_wrapper(
            tree_generation_laboratory.load_tree(x))
        s_tree = Rtree_operations.create_trivial_tree(
            Rtree_operations.get_no_leaves(true_tree))
        simulation_sanity.test_posterior_model(
            true_tree,
            s_tree,
            100,
            summaries=summaries,
            thinning_coef=30,
            wishart_df=wishart_df,
            resimulate_regrafted_branch_length=alpha,
            filename=unsuffixed_filename + '-results.csv')

    from pathos.multiprocessing import Pool
    p = Pool(len(tree_files))
    p.map(f, tree_files)
コード例 #5
0
def run_c():
    n = 3
    s_trees = [
        Rtree_operations.create_trivial_tree(n),
        Rtree_operations.create_burled_leaved_tree(n, 1.0),
        Rtree_operations.create_balanced_tree(n, 1.0)
    ]
    summaries = [
        summary.s_variable('posterior'),
        summary.s_variable('mhr'),
        summary.s_no_admixes(),
        summary.s_tree_identifier(),
        summary.s_average_branch_length(),
        summary.s_total_branch_length(),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_number_of_ghost_populations,
            'ghost_pops',
            output='integer'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_max_distance_to_root, 'max_root'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_min_distance_to_root, 'min_root'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_average_distance_to_root, 'average_root'),
        summary.s_variable('proposal_type', output='string'),
        summary.s_tree_identifier_new_tree()
    ] + [
        summary.s_variable(s, output='double')
        for s in ['prior', 'branch_prior', 'no_admix_prior', 'top_prior']
    ]

    simulation_sanity.test_prior_model_several_chains(s_trees,
                                                      100000,
                                                      summaries=summaries,
                                                      thinning_coef=3)
    print 'finished mcmc chains'
    list_of_summaries = summaries[2:10]
    nsim = 100000
    prior_distribution = generate_prior_trees.get_distribution_under_prior(
        leaves=n, sim_length=nsim,
        list_of_summaries=list_of_summaries)  #, thinning_criteria=max_two)
    analyse_results.save_to_csv(
        [tuple(range(nsim))] +
        [tuple(prior_distribution[summ.name]) for summ in list_of_summaries],
        list_of_summaries,
        filename='sim_prior.csv',
        origin_layer=None)
    analyse_results.generate_summary_csv(summaries)
コード例 #6
0
def marginalize_out_data_in_posterior(no_leaves,
                                      no_trees=100,
                                      nsim=50000,
                                      wishart_df=None,
                                      prefix='',
                                      dest_folder='',
                                      sap_sim=False,
                                      sap_ana=False):
    summaries = [
        s_posterior(),
        s_no_admixes(),
        s_average_branch_length(),
        s_total_branch_length(),
        s_basic_tree_statistics(
            get_number_of_ghost_populations, 'ghost_pops', output='integer'),
        s_basic_tree_statistics(get_max_distance_to_root, 'max_root'),
        s_basic_tree_statistics(get_min_distance_to_root, 'min_root'),
        s_basic_tree_statistics(get_average_distance_to_root, 'average_root')
    ] + [
        s_variable(s, output='double')
        for s in ['prior', 'branch_prior', 'no_admix_prior', 'top_prior']
    ]

    for i in xrange(no_trees):
        result_file = os.path.join(dest_folder,
                                   'results_' + prefix + str(i + 1) + '.csv')
        test_posterior_model(thinning_coef=49,
                             summaries=summaries,
                             filename=result_file,
                             sim_from_wishart=True,
                             sim_length=nsim,
                             wishart_df=wishart_df,
                             no_leaves_true_tree=no_leaves,
                             sap_sim=sap_sim,
                             sap_ana=sap_ana)
コード例 #7
0
ファイル: all_rappers.py プロジェクト: Tmacme/AdmixtureBayes
def mcmcmc(observed_covariance, df , outgroup=False, chains=8, its=[50]*100):
    nodes=['s'+str(i+1) for i in range(observed_covariance.shape[0])]
    start_x=identifier_to_tree_clean(simulate_tree(4,0)),0
    summaries=[summary.s_posterior(), 
               summary.s_basic_tree_statistics(tree_statistics.unique_identifier_and_branch_lengths, 'tree', output='string'),
               summary.s_variable('add', output='double'),
               summary.s_no_admixes(),]
    options=options_object(outgroup, chains=chains)
    proposal=make_proposal(options)
    posterior_function=posterior_class(observed_covariance, M=df, nodes=nodes)
    sample_verbose_scheme=[{'posterior':(1,200), 'tree':(1,0),'add':(1,200),'no_admixes':(1,200)}]+[{s.name:(1,0) for s in summaries}]*(chains-1)

    res=MCMCMC(starting_trees=[(identifier_to_tree_clean(simulate_tree(4,0)),0) for _ in range(chains)], 
       posterior_function= posterior_function,
       summaries=summaries, 
       temperature_scheme=fixed_geometrical(800,chains), 
       printing_schemes=sample_verbose_scheme, 
       iteration_scheme=its, 
       overall_thinnings=40, 
       proposal_scheme= proposal, 
       cores=chains, 
       no_chains=chains,
       multiplier=None,
       result_file=None,
       store_permuts=False, 
       stop_criteria=None)
    res=res.loc[res.layer==0,['iteration','posterior','tree','no_admixes']]
    return res
コード例 #8
0
def analyse_data_single_chained(filename):
    emp_cov = load_data.read_data(
        filename,
        nodes=['French', 'Han', 'Karitiana', 'Sardinian', 'Yoruba'],
        noss=True)
    print emp_cov
    df = 100
    summaries = [
        summary.s_posterior(),
        summary.s_variable('mhr'),
        summary.s_no_admixes(),
        summary.s_tree_identifier(),
        summary.s_average_branch_length(),
        summary.s_total_branch_length(),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_number_of_ghost_populations,
            'ghost_pops',
            output='integer'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_max_distance_to_root, 'max_root'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_min_distance_to_root, 'min_root'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_average_distance_to_root, 'average_root'),
        summary.s_basic_tree_statistics(
            tree_statistics.unique_identifier_and_branch_lengths,
            'tree',
            output='string'),
        summary.s_basic_tree_statistics(
            tree_statistics.majority_tree, 'majority_tree', output='string'),
        summary.s_variable('proposal_type', output='string'),
        summary.s_variable('sliding_regraft_adap_param'),
        summary.s_variable('rescale_adap_param'),
        summary.s_tree_identifier_new_tree()
    ] + [
        summary.s_variable(s, output='double_missing')
        for s in ['prior', 'branch_prior', 'no_admix_prior', 'top_prior']
    ]
    r = simulation_sanity.test_posterior_model(None,
                                               None,
                                               300000,
                                               summaries=summaries,
                                               thinning_coef=20,
                                               wishart_df=df,
                                               emp_cov=emp_cov,
                                               no_leaves_true_tree=5)
コード例 #9
0
def run_a():
    n = 4
    s_tree = Rtree_operations.create_burled_leaved_tree(n, 1)
    summaries = [
        summary.s_posterior(),
        summary.s_variable('mhr'),
        summary.s_no_admixes(),
        summary.s_tree_identifier(),
        summary.s_average_branch_length(),
        summary.s_total_branch_length(),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_number_of_ghost_populations,
            'ghost_pops',
            output='integer'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_max_distance_to_root, 'max_root'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_min_distance_to_root, 'min_root'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_average_distance_to_root, 'average_root'),
        summary.s_variable('proposal_type', output='string'),
        summary.s_tree_identifier_new_tree()
    ] + [
        summary.s_variable(s, output='double')
        for s in ['prior', 'branch_prior', 'no_admix_prior', 'top_prior']
    ]

    simulation_sanity.test_prior_model(s_tree,
                                       50000,
                                       summaries=summaries,
                                       thinning_coef=3)

    def max_two(tree):
        if Rtree_operations.get_number_of_admixes(tree) > 2:
            return False
        return True

    list_of_summaries = summaries[2:10]
    nsim = 100000
コード例 #10
0
def test_prior_model_several_chains(start_trees,
                                    sim_length=100000,
                                    summaries=None,
                                    thinning_coef=1):
    posterior = initialize_prior_as_posterior()
    if summaries is None:
        summaries = [
            s_variable('posterior'),
            s_variable('mhr'),
            s_no_admixes()
        ]
    proposal = basic_meta_proposal()
    sample_verbose_scheme = {summary.name: (1, 0) for summary in summaries}
    p = Pool(len(start_trees))

    def func(nstart_tree):
        n, start_tree = nstart_tree
        final_tree, final_posterior, results, _ = basic_chain(
            start_tree,
            summaries,
            posterior,
            proposal,
            post=None,
            N=sim_length,
            sample_verbose_scheme=sample_verbose_scheme,
            overall_thinning=int(thinning_coef + sim_length / 60000),
            i_start_from=0,
            temperature=1.0,
            proposal_update=None,
            check_trees=True)
        save_to_csv(results,
                    summaries,
                    filename='results_' + str(n + 1) + 'csv',
                    origin_layer=(n + 1, 1))

    p.map(func, enumerate(start_trees))
コード例 #11
0
def get_summaries(true_tree, df=10):
    m = make_covariance(true_tree)
    posterior = initialize_posterior(m, df)
    summaries = [
        summary.s_variable_recalculated(
            'posterior', output='double', pks_function=posterior),
        summary.s_variable('mhr'),
        summary.s_no_admixes(),
        summary.s_tree_identifier(),
        summary.s_average_branch_length(),
        summary.s_total_branch_length(),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_number_of_ghost_populations,
            'ghost_pops',
            output='integer'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_max_distance_to_root, 'max_root'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_min_distance_to_root, 'min_root'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_average_distance_to_root, 'average_root'),
        summary.s_basic_tree_statistics(
            tree_statistics.unique_identifier_and_branch_lengths,
            'tree',
            output='string'),
        summary.s_variable('proposal_type', output='string'),
        summary.s_variable('sliding_regraft_adap_param',
                           output='double_missing'),
        summary.s_variable('rescale_adap_param', output='double_missing'),
        summary.s_tree_identifier_new_tree()
    ] + [
        summary.s_variable_recalculated(
            s, output='double', pks_function=posterior)
        for s in ['prior', 'branch_prior', 'no_admix_prior', 'top_prior']
    ]
    return summaries
コード例 #12
0
ファイル: all_rappers.py プロジェクト: Tmacme/AdmixtureBayes
def mcmc(observed_covariance, df, outgroup=False):
    nodes=['s'+str(i+1) for i in range(observed_covariance.shape[0])]
    start_x=identifier_to_tree_clean(simulate_tree(4,0)),0
    summaries=[summary.s_posterior(), 
               summary.s_basic_tree_statistics(tree_statistics.unique_identifier_and_branch_lengths, 'tree', output='string'),
               summary.s_variable('add', output='double'),
               summary.s_no_admixes(),]
    options=options_object(outgroup)
    proposal=make_proposal(options)[0]
    posterior_function=posterior_class(observed_covariance, M=df, nodes=nodes)
    sample_verbose_scheme={'posterior':(1,200), 'tree':(1,0),'add':(1,200),'no_admixes':(1,200)}
    a=basic_chain(start_x, summaries, posterior_function, proposal, post=None, N=5000, 
                sample_verbose_scheme=sample_verbose_scheme, overall_thinning=100, i_start_from=0, 
                temperature=1.0, proposal_update=None, multiplier=None, check_trees=False, 
                appending_result_file=None, appending_result_frequency=10)
    return a[2]
コード例 #13
0
def run_d(true_tree_as_file=None):
    #true_tree=generate_prior_trees.generate_phylogeny(8,2)
    if true_tree_as_file is None:
        true_tree = tree_statistics.identifier_to_tree_clean(
            'w.w.w.w.w.w.a.a.w-c.w.c.c.w.c.5.0.w.3.2-c.w.w.0.c.4.w-c.w.0.c.3-w.c.1-c.0;0.07-0.974-1.016-0.089-0.81-0.086-1.499-0.052-1.199-2.86-0.403-0.468-0.469-1.348-1.302-1.832-0.288-0.18-0.45-0.922-2.925-3.403;0.388-0.485'
        )
        #true_tree=Rcatalogue_of_trees.tree_good
        s_tree = tree_statistics.identifier_to_tree_clean(
            'w.w.a.w.w.a.a.a.w-c.w.c.c.w.w.c.0.w.w.6.3.2-c.w.w.0.w.c.5.w.w-c.w.0.c.3.w.w-c.w.c.2.0-w.c.1-c.0;0.828-0.21-0.197-0.247-0.568-1.06-0.799-1.162-2.632-2.001-0.45-1.048-0.834-0.469-0.191-2.759-0.871-1.896-0.473-0.019-1.236-0.287-0.179-0.981-0.456-0.91-2.114-3.368;0.655-0.506-0.389-0.23'
        )
        print Rtree_operations.pretty_string(s_tree)
        print Rtree_operations.pretty_string(true_tree)
    else:
        with open(true_tree_as_file, 'r') as f:
            s = f.readline().rstrip()
            true_tree = tree_statistics.identifier_to_tree_clean(s)
            no_leaves = Rtree_operations.get_number_of_leaves(true_tree)
            s_tree = Rtree_operations.create_trivial_tree(no_leaves)
    summaries = [
        summary.s_posterior(),
        summary.s_variable('mhr', output='double_missing'),
        summary.s_no_admixes(),
        summary.s_tree_identifier(),
        summary.s_average_branch_length(),
        summary.s_total_branch_length(),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_number_of_ghost_populations,
            'ghost_pops',
            output='integer'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_max_distance_to_root, 'max_root'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_min_distance_to_root, 'min_root'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_average_distance_to_root, 'average_root'),
        summary.s_basic_tree_statistics(
            tree_statistics.get_admixture_proportion_string,
            'admixtures',
            output='string'),
        summary.s_basic_tree_statistics(
            tree_statistics.unique_identifier_and_branch_lengths,
            'tree',
            output='string'),
        summary.s_basic_tree_statistics(
            tree_statistics.majority_tree, 'majority_tree', output='string'),
        summary.s_variable('add', output='double'),
        summary.s_variable('sliding_rescale_adap_param',
                           output='double_missing'),
        summary.s_variable('cutoff_distance', output='double_missing'),
        summary.s_variable('number_of_pieces', output='double_missing'),
        summary.s_variable('proposal_type', output='string'),
        summary.s_variable('sliding_regraft_adap_param',
                           output='double_missing'),
        summary.s_variable('rescale_constrained_adap_param',
                           output='double_missing'),
        summary.s_variable('rescale_adap_param', output='double_missing'),
        summary.s_tree_identifier_new_tree()
    ] + [
        summary.s_variable(s, output='double_missing')
        for s in ['prior', 'branch_prior', 'no_admix_prior', 'top_prior']
    ]
    r = simulation_sanity.test_posterior_model(
        true_tree,
        s_tree,
        100000,
        summaries=summaries,
        thinning_coef=20,
        wishart_df=10000,
        resimulate_regrafted_branch_length=False)  #,
    #admixtures_of_true_tree=2, no_leaves_true_tree=8, rescale_empirical_cov=True)
    print 'true_tree', tree_statistics.unique_identifier_and_branch_lengths(r)
    analyse_results.generate_summary_csv(summaries, reference_tree=true_tree)
コード例 #14
0
def get_summary_scheme(majority_tree=False,
                       light_newick_tree_summaries=False,
                       full_tree=True,
                       proposals=None,
                       acceptance_rate_information=False,
                       admixture_proportion_string=False,
                       priors=False,
                       no_chains=1,
                       nodes=None,
                       verbose_level='normal',
                       only_coldest_chain=True):

    if proposals is not None:
        props = proposals.props
        prop_names = [prop.proposal_name for prop in props]
        adaption = [prop.adaption for prop in props]

    summaries = [
        summary.s_posterior(),
        summary.s_likelihood(),
        summary.s_prior(),
        summary.s_no_admixes(),
        summary.s_variable('add', output='double'),
        summary.s_average_branch_length(),
        summary.s_total_branch_length(),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_number_of_ghost_populations,
            'ghost_pops',
            output='integer'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_max_distance_to_root, 'max_root'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_min_distance_to_root, 'min_root'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_average_distance_to_root, 'average_root'),
        summary.s_basic_tree_statistics(
            Rtree_to_covariance_matrix.get_populations_string,
            'descendant_sets',
            output='string')
    ]
    if full_tree:
        summaries.append(
            summary.s_basic_tree_statistics(
                tree_statistics.unique_identifier_and_branch_lengths,
                'tree',
                output='string'))
    if admixture_proportion_string:
        summaries.append(
            summary.s_basic_tree_statistics(
                tree_statistics.get_admixture_proportion_string,
                'admixtures',
                output='string'))
    if light_newick_tree_summaries:
        summaries.append(
            summary.s_basic_tree_statistics(tree_statistics.tree_to_0ntree,
                                            'Zero_Ntree',
                                            output='string'))
        summaries.append(
            summary.s_basic_tree_statistics(
                tree_statistics.tree_to_random_ntree,
                'random_Ntree',
                output='string'))
        summaries.append(
            summary.s_basic_tree_statistics(tree_statistics.tree_to_mode_ntree,
                                            'mode_Ntree',
                                            output='string'))
    if acceptance_rate_information:
        summaries.append(summary.s_variable('mhr', output='double_missing'))
        summaries.append(summary.s_variable('proposal_type', output='string'))
        if proposals is not None:
            for prop_name, adapt in zip(prop_names, adaption):
                if adapt:
                    summaries.append(
                        summary.s_variable(prop_name + "_adap_param",
                                           output='double_missing'))
    sample_verbose_scheme = {summary.name: (1, 0) for summary in summaries}
    sample_verbose_scheme_first = deepcopy(sample_verbose_scheme)
    if 'posterior' in sample_verbose_scheme and verbose_level != 'silent':
        sample_verbose_scheme_first['posterior'] = (1, 1)
        #sample_verbose_scheme_first['prior']=(1,1)
        sample_verbose_scheme_first['no_admixes'] = (1, 1)
    if no_chains == 1:
        return [sample_verbose_scheme_first], summaries
    elif only_coldest_chain:
        return [sample_verbose_scheme_first
                ] + [{}] * (no_chains - 1), summaries
    else:
        return [sample_verbose_scheme_first
                ] + [sample_verbose_scheme] * (no_chains - 1), summaries
コード例 #15
0
def test_posterior_model(true_tree=None,
                         start_tree=None,
                         sim_length=100000,
                         summaries=None,
                         thinning_coef=19,
                         admixtures_of_true_tree=None,
                         no_leaves_true_tree=4,
                         filename='results.csv',
                         sim_from_wishart=False,
                         wishart_df=None,
                         sap_sim=False,
                         sap_ana=False,
                         resimulate_regrafted_branch_length=False,
                         emp_cov=None,
                         big_posterior=False,
                         rescale_empirical_cov=False):
    if true_tree is None:
        if admixtures_of_true_tree is None:
            admixtures_of_true_tree = geom.rvs(p=0.5) - 1
        true_tree = generate_phylogeny(no_leaves_true_tree,
                                       admixtures_of_true_tree,
                                       skewed_admixture_prior=sap_sim)
    else:
        no_leaves_true_tree = get_no_leaves(true_tree)
        admixtures_of_true_tree = get_number_of_admixes(true_tree)

    true_x = (true_tree, 0)

    m = make_covariance(true_tree, get_trivial_nodes(no_leaves_true_tree))
    if start_tree is None:
        start_tree = true_tree

    start_x = (start_tree, 0)
    if wishart_df is None:
        wishart_df = n_mark(m)
    if sim_from_wishart:
        r = m.shape[0]
        print m
        m = wishart.rvs(df=r * wishart_df - 1, scale=m / (r * wishart_df))
        print m
    if emp_cov is not None:
        m = emp_cov
    if big_posterior:
        posterior = initialize_big_posterior(m,
                                             wishart_df,
                                             use_skewed_distr=sap_ana)
    else:
        posterior = initialize_posterior(m,
                                         wishart_df,
                                         use_skewed_distr=sap_ana,
                                         rescale=rescale_empirical_cov)
    print 'true_tree=', unique_identifier_and_branch_lengths(true_tree)
    post_ = posterior(true_x)
    print 'likelihood(true_tree)', post_[0]
    print 'prior(true_tree)', post_[1]
    print 'posterior(true_tree)', sum(post_[:2])
    if summaries is None:
        summaries = [s_posterior(), s_variable('mhr'), s_no_admixes()]
    proposal = adaptive_proposal(
        resimulate_regrafted_branch_length=resimulate_regrafted_branch_length)
    #proposal.props=proposal.props[2:] #a little hack under the hood
    #proposal.params=proposal.params[2:] #a little hack under the hood.
    sample_verbose_scheme = {summary.name: (1, 0) for summary in summaries}
    sample_verbose_scheme['posterior'] = (1, 1)
    sample_verbose_scheme['no_admixes'] = (1, 1)
    final_tree, final_posterior, results, _ = basic_chain(
        start_x,
        summaries,
        posterior,
        proposal,
        post=None,
        N=sim_length,
        sample_verbose_scheme=sample_verbose_scheme,
        overall_thinning=int(max(thinning_coef, sim_length / 60000)),
        i_start_from=0,
        temperature=1.0,
        proposal_update=None,
        check_trees=False)
    save_to_csv(results, summaries, filename=filename)
    return true_tree
コード例 #16
0
def test_posterior_model_multichain(true_tree=None,
                                    start_tree=None,
                                    sim_lengths=[250] * 800,
                                    summaries=None,
                                    thinning_coef=1,
                                    admixtures_of_true_tree=None,
                                    no_leaves_true_tree=4,
                                    wishart_df=None,
                                    sim_from_wishart=False,
                                    no_chains=8,
                                    result_file='results_mc3.csv',
                                    emp_cov=None,
                                    emp_remove=-1,
                                    rescale_empirical_cov=False):
    if true_tree is None:
        if admixtures_of_true_tree is None:
            admixtures_of_true_tree = geom.rvs(p=0.5) - 1
        true_tree = generate_phylogeny(no_leaves_true_tree,
                                       admixtures_of_true_tree)
    else:
        no_leaves_true_tree = get_no_leaves(true_tree)
        admixtures_of_true_tree = get_number_of_admixes(true_tree)
    true_x = (true_tree, 0)

    m = make_covariance(true_tree, get_trivial_nodes(no_leaves_true_tree))
    if start_tree is None:
        start_tree = true_tree

    start_x = (start_tree, 0)
    if wishart_df is None:
        wishart_df = n_mark(m)
    if sim_from_wishart:
        r = m.shape[0]
        print m
        m = wishart.rvs(df=r * wishart_df - 1, scale=m / (r * wishart_df))
        print m
    if emp_cov is not None:
        m = emp_cov
    if rescale_empirical_cov:
        posterior, multiplier = initialize_posterior(
            m,
            wishart_df,
            use_skewed_distr=True,
            rescale=rescale_empirical_cov)
    else:
        posterior = initialize_posterior(m,
                                         wishart_df,
                                         use_skewed_distr=True,
                                         rescale=rescale_empirical_cov)
        multiplier = None
    print 'true_tree=', unique_identifier_and_branch_lengths(true_tree)
    if rescale_empirical_cov:
        post_ = posterior(
            (scale_tree_copy(true_x[0],
                             1.0 / multiplier), true_x[1] / multiplier))
    else:
        post_ = posterior(true_x)
    print 'likelihood(true_tree)', post_[0]
    print 'prior(true_tree)', post_[1]
    print 'posterior(true_tree)', sum(post_)
    if summaries is None:
        summaries = [
            s_variable('posterior'),
            s_variable('mhr'),
            s_no_admixes()
        ]
    proposal = basic_meta_proposal()
    #proposal.props=proposal.props[2:] #a little hack under the hood
    #proposal.params=proposal.params[2:] #a little hack under the hood.
    sample_verbose_scheme = {summary.name: (1, 0) for summary in summaries}
    sample_verbose_scheme_first = deepcopy(sample_verbose_scheme)
    if 'posterior' in sample_verbose_scheme:
        sample_verbose_scheme_first['posterior'] = (1, 1)  #(1,1)
        sample_verbose_scheme_first['no_admixes'] = (1, 1)
    #if 'likelihood' in sample_verbose_scheme:
    #sample_verbose_scheme_first['likelihood']=(1,1)
    print sample_verbose_scheme_first
    MCMCMC(starting_trees=[deepcopy(start_x) for _ in range(no_chains)],
           posterior_function=posterior,
           summaries=summaries,
           temperature_scheme=fixed_geometrical(800.0, no_chains),
           printing_schemes=[sample_verbose_scheme_first] +
           [sample_verbose_scheme for _ in range(no_chains - 1)],
           iteration_scheme=sim_lengths,
           overall_thinnings=int(thinning_coef),
           proposal_scheme=[adaptive_proposal() for _ in range(no_chains)],
           cores=no_chains,
           no_chains=no_chains,
           multiplier=multiplier,
           result_file=result_file,
           store_permuts=False)
    print 'finished MC3'
    #save_pandas_dataframe_to_csv(results, result_file)
    #save_permuts_to_csv(permuts, get_permut_filename(result_file))
    return true_tree
コード例 #17
0
def run_analysis_of_proposals():
    #true_tree=generate_prior_trees.generate_phylogeny(8,2)
    true_tree = tree_statistics.identifier_to_tree_clean(
        'w.w.c.w.w.w.2.w-w.w.a.w.w.w.w-w.c.1.w.c.w.w.4-w.c.1.w.w.w-w.c.1.w.w-c.0.w.w-c.w.0-a.w-c.0.w-c.0;0.091-1.665-0.263-0.821-0.058-0.501-0.141-0.868-5.064-0.153-0.372-3.715-1.234-0.913-2.186-0.168-0.542-0.056-2.558-0.324;0.367-0.451'
    )
    true_tree = Rcatalogue_of_trees.tree_good
    s_tree = Rtree_operations.create_trivial_tree(4)
    summaries = [
        summary.s_posterior(),
        summary.s_variable('mhr'),
        summary.s_no_admixes(),
        summary.s_tree_identifier(),
        summary.s_average_branch_length(),
        summary.s_total_branch_length(),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_number_of_ghost_populations,
            'ghost_pops',
            output='integer'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_max_distance_to_root, 'max_root'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_min_distance_to_root, 'min_root'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_average_distance_to_root, 'average_root'),
        summary.s_basic_tree_statistics(
            tree_statistics.unique_identifier_and_branch_lengths,
            'tree',
            output='string'),
        summary.s_basic_tree_statistics(
            tree_statistics.majority_tree, 'majority_tree', output='string'),
        summary.s_bposterior_difference(lambda x: x[0],
                                        'likelihood_difference'),
        summary.s_bposterior_difference(lambda x: x[1], 'prior_difference'),
        summary.s_bposterior_difference(lambda x: x[2][0],
                                        'branch_prior_difference'),
        summary.s_bposterior_difference(lambda x: x[2][1],
                                        'no_admix_prior_difference'),
        summary.s_bposterior_difference(lambda x: x[2][2],
                                        'adix_prop_prior_difference'),
        summary.s_bposterior_difference(lambda x: x[2][3],
                                        'top_prior_difference'),
        summary.s_variable('proposal_type', output='string'),
        summary.s_variable('sliding_regraft_adap_param',
                           output='double_missing'),
        summary.s_variable('rescale_adap_param', output='double_missing'),
        summary.s_tree_identifier_new_tree()
    ] + [
        summary.s_variable(s, output='double_missing')
        for s in ['prior', 'branch_prior', 'no_admix_prior', 'top_prior']
    ]
    r = simulation_sanity.test_posterior_model(
        true_tree,
        true_tree,
        100000,
        summaries=summaries,
        thinning_coef=2,
        wishart_df=1000,
        resimulate_regrafted_branch_length=False,
        admixtures_of_true_tree=2,
        no_leaves_true_tree=4,
        big_posterior=True,
        rescale_empirical_cov=True)
    print 'true_tree', tree_statistics.unique_identifier_and_branch_lengths(r)
    analyse_results.generate_summary_csv(summaries, reference_tree=true_tree)
コード例 #18
0
def run_posterior_multichain(wishart_df=1000,
                             true_tree_as_identifier=None,
                             result_file='result_mc3.csv',
                             emp_cov_file=None,
                             emp_remove=-1,
                             remove_outgroup=False,
                             make_emp_cov_file=True):
    if true_tree_as_identifier is None:
        true_tree = Rcatalogue_of_trees.tree_good
    else:
        true_tree = tree_statistics.identifier_to_tree_clean(
            'w.w.w.w.w.w.a.a.w-c.w.c.c.w.c.5.0.w.3.2-c.w.w.0.c.4.w-c.w.0.c.3-w.c.1-c.0;0.07-0.974-1.016-0.089-0.81-0.086-1.499-0.052-1.199-2.86-0.403-0.468-0.469-1.348-1.302-1.832-0.288-0.18-0.45-0.922-2.925-3.403;0.388-0.485'
        )

        #with open(true_tree_as_identifier, 'r') as f:
        #    s=f.readline().rstrip()
        #    true_tree=tree_statistics.identifier_to_tree_clean(s)
    if remove_outgroup:
        true_tree = Rtree_operations.remove_outgroup(true_tree)
        true_tree = Rtree_operations.simple_reorder_the_leaves_after_removal_of_s1(
            true_tree)
    if make_emp_cov_file:
        cov = tree_to_data.get_empirical_matrix(s, factor=0.01, reps=400)
        tree_to_data.emp_cov_to_file(cov, filename=emp_cov_file)
    print 'true_tree', tree_statistics.unique_identifier_and_branch_lengths(
        true_tree)
    no_leaves = Rtree_operations.get_no_leaves(true_tree)
    #s_tree=tree_statistics.identifier_to_tree_clean('w.w.a.w.w.a.a.a.w-c.w.c.c.w.w.c.0.w.w.6.3.2-c.w.w.0.w.c.5.w.w-c.w.0.c.3.w.w-c.w.c.2.0-w.c.1-c.0;0.828-0.21-0.197-0.247-0.568-1.06-0.799-1.162-2.632-2.001-0.45-1.048-0.834-0.469-0.191-2.759-0.871-1.896-0.473-0.019-1.236-0.287-0.179-0.981-0.456-0.91-2.114-3.368;0.655-0.506-0.389-0.23')
    s_tree = Rtree_operations.create_burled_leaved_tree(no_leaves, 1.0)
    print 'no_leaves', no_leaves
    summaries = [
        summary.s_posterior(),
        summary.s_variable('mhr'),
        summary.s_no_admixes(),
        summary.s_tree_identifier(),
        summary.s_average_branch_length(),
        summary.s_total_branch_length(),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_number_of_ghost_populations,
            'ghost_pops',
            output='integer'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_max_distance_to_root, 'max_root'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_min_distance_to_root, 'min_root'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_average_distance_to_root, 'average_root'),
        summary.s_basic_tree_statistics(
            tree_statistics.unique_identifier_and_branch_lengths,
            'tree',
            output='string'),
        summary.s_basic_tree_statistics(
            tree_statistics.majority_tree, 'majority_tree', output='string'),
        summary.s_variable('add', output='double'),
        summary.s_variable('proposal_type', output='string'),
        summary.s_variable('sliding_regraft_adap_param',
                           output='double_missing'),
        summary.s_variable('rescale_adap_param', output='double_missing'),
        summary.s_likelihood(),
        summary.s_prior(),
        summary.s_tree_identifier_new_tree()
    ] + [
        summary.s_variable(s, output='double_missing')
        for s in ['prior', 'branch_prior', 'no_admix_prior', 'top_prior']
    ]
    if emp_cov_file is not None:
        if emp_remove < 0:
            emp_cov = tree_to_data.file_to_emp_cov(emp_cov_file)
        else:
            emp_cov = tree_to_data.file_to_emp_cov(emp_cov_file, emp_remove)
    else:
        emp_cov = None
    print 'emp_cov', emp_cov
    r = simulation_sanity.test_posterior_model_multichain(
        true_tree,
        s_tree, [50] * 20000,
        summaries=summaries,
        thinning_coef=24,
        wishart_df=wishart_df,
        result_file=result_file,
        emp_cov=emp_cov,
        rescale_empirical_cov=False)
    print 'true_tree', tree_statistics.unique_identifier_and_branch_lengths(r)
    analyse_results.generate_summary_csv(summaries, reference_tree=true_tree)
コード例 #19
0
    #call_notebook()


def call_notebook():
    ## DOESNT WORK IN THE MAC
    dir_path = os.path.dirname(os.path.realpath(__file__))
    cmd = ['Rscript', dir_path + os.path.sep + 'order_report.R']
    print cmd
    call(cmd)


if __name__ == '__main__':

    import summary
    summaries = [
        summary.s_variable('posterior'),
        summary.s_variable('mhr'),
        summary.s_no_admixes(),
        summary.s_tree_identifier(),
        summary.s_average_branch_length(),
        summary.s_total_branch_length(),
        summary.s_tree_identifier_new_tree()
    ] + [
        summary.s_variable(s) for s in [
            'backward_choices', 'backward_density', 'forward_density',
            'forward_choices', 'proposal_type', 'prior', 'branch_prior',
            'no_admix_prior', 'top_prior'
        ]
    ]
    from generate_prior_trees import get_distribution_under_prior
    prior_distribution = get_distribution_under_prior(