Ejemplo n.º 1
0
def generate_covariance(size, scale_metod='beta', return_tree=False):
    tree = generate_phylogeny(size)
    cov = make_covariance(tree)
    s = calc_s(scale_metod)
    if return_tree:
        return cov * s, scale_tree(tree, s)
    return cov * s
    def __call__(self, Rtree=None, add=None, **kwargs):
        #print kwargs['full_tree']
        #print self.nodes
        if Rtree is None:
            full_tree = kwargs['full_tree']
            outgroup_name = list(
                set(get_leaf_keys(full_tree)) - set(self.nodes))[0]
            cov = make_covariance(full_tree,
                                  node_keys=[outgroup_name] + self.nodes)
            Rcov = reduce_covariance(cov, 0)
            return {'Rcov': Rcov}, False
        #print pretty_string(Rtree)
        #print get_leaf_keys(Rtree)
        #print self.nodes
        Rcov = make_covariance(
            Rtree, node_keys=self.nodes) + float(add) * self.add_multiplier

        return {'Rcov': Rcov}, False
Ejemplo n.º 3
0
 def get_non_empirical_max_likelihood(self, x, pks={}, verbose=False):
     p_cov = make_covariance(x[0]) + x[1]
     if self.b is not None:
         p_cov += self.b
     val = self.likmat(p_cov, p_cov, None, self.M, pks=pks)
     if verbose:
         print 'empirical_matrix=', p_cov
         print 'input_matrix=', p_cov
     return val
Ejemplo n.º 4
0
def create_initial_Sigma_generator(n, streng):
    key = streng.keys()[0]
    if key == 'default':
        return fixed_initial_Sigma(None)
    elif key == 'random':
        return random_initial_Sigma(n)
    elif key == 'start':
        print streng[key]
        cov = make_covariance(streng[key][0][0],
                              node_keys=streng[key][1]) + streng[key][0][1]
        return fixed_initial_Sigma(cov)
def autogenerate_tree(no_leaves,
                      no_admixtures,
                      minimum_number_of_nonzeros=1,
                      minimum_number_of_zeros=1):
    while True:
        tree = generate_phylogeny(no_leaves, no_admixtures)
        cov = make_covariance(tree)
        zeros = [get_number_of_zeros(row) for row in cov]
        no_non_zeros = cov.shape[0] - max(zeros)
        if no_non_zeros >= minimum_number_of_nonzeros and max(
                zeros) >= minimum_number_of_zeros:
            break
    tree = add_outgroup(tree, 'z', 0.234, 1.96, 'Aa')
    cov = make_covariance(tree)
    print cov
    print reduce_covariance(cov, 0)
    plot_as_directed_graph(tree)
    suffix = str(no_leaves) + '_' + str(no_admixtures) + '_' + str(
        minimum_number_of_nonzeros) + '_' + str(minimum_number_of_zeros)
    return unique_identifier_and_branch_lengths(tree), suffix
Ejemplo n.º 6
0
 def get_size_diff(self, x):
     t, add = x
     p_cov = make_covariance(t) + add
     if self.b is not None:
         p_cov += self.b
     diffs = p_cov - self.emp_cov
     max_dif = amax(diffs)
     min_dif = amin(diffs)
     return median(
         p_cov /
         self.emp_cov), (max_dif + min_dif) / (abs(max_dif) +
                                               abs(min_dif)), norm(diffs)
Ejemplo n.º 7
0
def run_test():
    from Rtree_operations import get_trivial_nodes, create_trivial_tree, get_number_of_ghost_populations, get_max_distance_to_root, get_min_distance_to_root, get_average_distance_to_root
    from posterior import initialize_prior_as_posterior, initialize_posterior
    from meta_proposal import basic_meta_proposal
    from copy import deepcopy
    from Rtree_to_covariance_matrix import make_covariance

    N = 3
    true_tree = create_trivial_tree(N)
    proposal_function = basic_meta_proposal()
    post_fun = initialize_posterior(make_covariance(true_tree))
    tree = create_trivial_tree(N)

    n = 6
    import summary
    summaries = [
        summary.s_posterior(),
        summary.s_variable('mhr'),
        summary.s_no_admixes(),
        summary.s_tree_identifier(),
        summary.s_average_branch_length(),
        summary.s_total_branch_length(),
        summary.s_basic_tree_statistics(get_number_of_ghost_populations,
                                        'ghost_pops',
                                        output='integer'),
        summary.s_basic_tree_statistics(get_max_distance_to_root, 'max_root'),
        summary.s_basic_tree_statistics(get_min_distance_to_root, 'min_root'),
        summary.s_basic_tree_statistics(get_average_distance_to_root,
                                        'average_root'),
        summary.s_variable('proposal_type', output='string')
    ]

    from temperature_scheme import fixed_geometrical
    sample_verbose_scheme = {summary.name: (1, 0) for summary in summaries}
    sample_verbose_scheme['posterior'] = (1, 100)
    #sample_verbose_scheme['min_root']=(1,100)

    ad = MCMCMC(starting_trees=[deepcopy(tree) for _ in range(n)],
                posterior_function=post_fun,
                summaries=summaries,
                temperature_scheme=fixed_geometrical(10.0, n),
                printing_schemes=[sample_verbose_scheme for _ in range(n)],
                iteration_scheme=[40] * 200,
                overall_thinnings=5,
                proposal_scheme=[proposal_function for _ in range(n)],
                cores=n,
                no_chains=n)

    ad[0].to_csv(path_or_buf='findme.csv')
    print set(map(tuple, ad[1]))
    return ad
Ejemplo n.º 8
0
def get_posterior_A_matrices(outfile, add_multiplier=1, nodes=None, outgroup='out', thinning=100):
    a=pd.read_csv(outfile, usecols=['tree','add','layer'])
    b=a.loc[a.layer == 0, :]
    b=b[int(b.shape[0])/2::thinning]
    AmatricesA=[]
    for stree, add in zip(b['tree'], b['add']):
        #print stree
        tree=identifier_to_tree_clean(stree)
        #print pretty_string(tree)
        tree= add_outgroup(tree,  inner_node_name='new_node', to_new_root_length=float(add)*add_multiplier, to_outgroup_length=0, outgroup_name=outgroup)
        cov=make_covariance(tree, node_keys=nodes)
        #print cov
        AmatricesA.append(Areduce(cov))
    return AmatricesA
Ejemplo n.º 9
0
def get_summaries(true_tree, df=10):
    m = make_covariance(true_tree)
    posterior = initialize_posterior(m, df)
    summaries = [
        summary.s_variable_recalculated(
            'posterior', output='double', pks_function=posterior),
        summary.s_variable('mhr'),
        summary.s_no_admixes(),
        summary.s_tree_identifier(),
        summary.s_average_branch_length(),
        summary.s_total_branch_length(),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_number_of_ghost_populations,
            'ghost_pops',
            output='integer'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_max_distance_to_root, 'max_root'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_min_distance_to_root, 'min_root'),
        summary.s_basic_tree_statistics(
            Rtree_operations.get_average_distance_to_root, 'average_root'),
        summary.s_basic_tree_statistics(
            tree_statistics.unique_identifier_and_branch_lengths,
            'tree',
            output='string'),
        summary.s_variable('proposal_type', output='string'),
        summary.s_variable('sliding_regraft_adap_param',
                           output='double_missing'),
        summary.s_variable('rescale_adap_param', output='double_missing'),
        summary.s_tree_identifier_new_tree()
    ] + [
        summary.s_variable_recalculated(
            s, output='double', pks_function=posterior)
        for s in ['prior', 'branch_prior', 'no_admix_prior', 'top_prior']
    ]
    return summaries
Ejemplo n.º 10
0
def initialize_posterior2(emp_cov=None,
                          true_tree=None,
                          M=None,
                          use_skewed_distr=False,
                          p=0.5,
                          rescale=False,
                          model_choice=[
                              'empirical covariance', 'true tree covariance',
                              'wishart on true tree covariance',
                              'empirical covariance on true tree',
                              'no likelihood'
                          ],
                          simulate_true_tree=False,
                          true_tree_no_leaves=None,
                          true_tree_no_admixes=None,
                          nodes=None,
                          simulate_true_tree_with_skewed_prior=False,
                          reduce_cov=None,
                          add_outgroup_to_true_tree=False,
                          reduce_true_tree=False):

    if not isinstance(model_choice, basestring):
        model_choice = model_choice[0]

    if model_choice == 'no likelihood':
        return initialize_prior_as_posterior(), {}

    if (model_choice == 'true tree covariance'
            or model_choice == 'wishart on true tree covariance'
            or model_choice == 'empirical covariance on true tree'):

        if simulate_true_tree:
            true_tree = generate_phylogeny(
                true_tree_no_leaves, true_tree_no_admixes, nodes,
                simulate_true_tree_with_skewed_prior)

        elif isinstance(true_tree, basestring):
            if ';' in true_tree:  #this means that the true tree is a s_tree
                true_tree_s = true_tree
                true_tree = identifier_to_tree_clean(true_tree_s)
            else:
                with open(true_tree, 'r') as f:
                    true_tree_s = f.readline().rstrip()
                true_tree = identifier_to_tree_clean(true_tree_s)

        true_tree = Rtree_operations.simple_reorder_the_leaves_after_removal_of_s1(
            true_tree)

        no_leaves = get_number_of_leaves(true_tree)
        no_admixes = get_number_of_admixes(true_tree)

        cov = make_covariance(true_tree)

        if reduce_cov is not None:
            pass
        if reduce_true_tree is not None:
            true_tree = Rtree_operations.remove_outgroup(
                true_tree, reduce_true_tree)
            if reduce_true_tree == 's1' or reduce_true_tree == 0:
                pass
        if emp_cov is not None:
            if isinstance(emp_cov, basestring):
                pass

    if M is None:
        M = n_mark(emp_cov)
    if rescale:
        emp_cov, multiplier = rescale_empirical_covariance(emp_cov)
        print 'multiplier is', multiplier

    def posterior(x, pks={}):
        #print tot_branch_length
        prior_value = prior(x, p=p, use_skewed_distr=use_skewed_distr, pks=pks)
        if prior_value == -float('inf'):
            return -float('inf'), prior_value
        likelihood_value = likelihood(x, emp_cov, M=M)
        pks['prior'] = prior_value
        pks['likelihood'] = likelihood_value
        #pks['posterior']=prior_value+likelihood_value
        return likelihood_value, prior_value

    if rescale:
        return posterior, multiplier
    return posterior
Ejemplo n.º 11
0
def test_posterior_model_multichain(true_tree=None,
                                    start_tree=None,
                                    sim_lengths=[250] * 800,
                                    summaries=None,
                                    thinning_coef=1,
                                    admixtures_of_true_tree=None,
                                    no_leaves_true_tree=4,
                                    wishart_df=None,
                                    sim_from_wishart=False,
                                    no_chains=8,
                                    result_file='results_mc3.csv',
                                    emp_cov=None,
                                    emp_remove=-1,
                                    rescale_empirical_cov=False):
    if true_tree is None:
        if admixtures_of_true_tree is None:
            admixtures_of_true_tree = geom.rvs(p=0.5) - 1
        true_tree = generate_phylogeny(no_leaves_true_tree,
                                       admixtures_of_true_tree)
    else:
        no_leaves_true_tree = get_no_leaves(true_tree)
        admixtures_of_true_tree = get_number_of_admixes(true_tree)
    true_x = (true_tree, 0)

    m = make_covariance(true_tree, get_trivial_nodes(no_leaves_true_tree))
    if start_tree is None:
        start_tree = true_tree

    start_x = (start_tree, 0)
    if wishart_df is None:
        wishart_df = n_mark(m)
    if sim_from_wishart:
        r = m.shape[0]
        print m
        m = wishart.rvs(df=r * wishart_df - 1, scale=m / (r * wishart_df))
        print m
    if emp_cov is not None:
        m = emp_cov
    if rescale_empirical_cov:
        posterior, multiplier = initialize_posterior(
            m,
            wishart_df,
            use_skewed_distr=True,
            rescale=rescale_empirical_cov)
    else:
        posterior = initialize_posterior(m,
                                         wishart_df,
                                         use_skewed_distr=True,
                                         rescale=rescale_empirical_cov)
        multiplier = None
    print 'true_tree=', unique_identifier_and_branch_lengths(true_tree)
    if rescale_empirical_cov:
        post_ = posterior(
            (scale_tree_copy(true_x[0],
                             1.0 / multiplier), true_x[1] / multiplier))
    else:
        post_ = posterior(true_x)
    print 'likelihood(true_tree)', post_[0]
    print 'prior(true_tree)', post_[1]
    print 'posterior(true_tree)', sum(post_)
    if summaries is None:
        summaries = [
            s_variable('posterior'),
            s_variable('mhr'),
            s_no_admixes()
        ]
    proposal = basic_meta_proposal()
    #proposal.props=proposal.props[2:] #a little hack under the hood
    #proposal.params=proposal.params[2:] #a little hack under the hood.
    sample_verbose_scheme = {summary.name: (1, 0) for summary in summaries}
    sample_verbose_scheme_first = deepcopy(sample_verbose_scheme)
    if 'posterior' in sample_verbose_scheme:
        sample_verbose_scheme_first['posterior'] = (1, 1)  #(1,1)
        sample_verbose_scheme_first['no_admixes'] = (1, 1)
    #if 'likelihood' in sample_verbose_scheme:
    #sample_verbose_scheme_first['likelihood']=(1,1)
    print sample_verbose_scheme_first
    MCMCMC(starting_trees=[deepcopy(start_x) for _ in range(no_chains)],
           posterior_function=posterior,
           summaries=summaries,
           temperature_scheme=fixed_geometrical(800.0, no_chains),
           printing_schemes=[sample_verbose_scheme_first] +
           [sample_verbose_scheme for _ in range(no_chains - 1)],
           iteration_scheme=sim_lengths,
           overall_thinnings=int(thinning_coef),
           proposal_scheme=[adaptive_proposal() for _ in range(no_chains)],
           cores=no_chains,
           no_chains=no_chains,
           multiplier=multiplier,
           result_file=result_file,
           store_permuts=False)
    print 'finished MC3'
    #save_pandas_dataframe_to_csv(results, result_file)
    #save_permuts_to_csv(permuts, get_permut_filename(result_file))
    return true_tree
Ejemplo n.º 12
0
def test_posterior_model(true_tree=None,
                         start_tree=None,
                         sim_length=100000,
                         summaries=None,
                         thinning_coef=19,
                         admixtures_of_true_tree=None,
                         no_leaves_true_tree=4,
                         filename='results.csv',
                         sim_from_wishart=False,
                         wishart_df=None,
                         sap_sim=False,
                         sap_ana=False,
                         resimulate_regrafted_branch_length=False,
                         emp_cov=None,
                         big_posterior=False,
                         rescale_empirical_cov=False):
    if true_tree is None:
        if admixtures_of_true_tree is None:
            admixtures_of_true_tree = geom.rvs(p=0.5) - 1
        true_tree = generate_phylogeny(no_leaves_true_tree,
                                       admixtures_of_true_tree,
                                       skewed_admixture_prior=sap_sim)
    else:
        no_leaves_true_tree = get_no_leaves(true_tree)
        admixtures_of_true_tree = get_number_of_admixes(true_tree)

    true_x = (true_tree, 0)

    m = make_covariance(true_tree, get_trivial_nodes(no_leaves_true_tree))
    if start_tree is None:
        start_tree = true_tree

    start_x = (start_tree, 0)
    if wishart_df is None:
        wishart_df = n_mark(m)
    if sim_from_wishart:
        r = m.shape[0]
        print m
        m = wishart.rvs(df=r * wishart_df - 1, scale=m / (r * wishart_df))
        print m
    if emp_cov is not None:
        m = emp_cov
    if big_posterior:
        posterior = initialize_big_posterior(m,
                                             wishart_df,
                                             use_skewed_distr=sap_ana)
    else:
        posterior = initialize_posterior(m,
                                         wishart_df,
                                         use_skewed_distr=sap_ana,
                                         rescale=rescale_empirical_cov)
    print 'true_tree=', unique_identifier_and_branch_lengths(true_tree)
    post_ = posterior(true_x)
    print 'likelihood(true_tree)', post_[0]
    print 'prior(true_tree)', post_[1]
    print 'posterior(true_tree)', sum(post_[:2])
    if summaries is None:
        summaries = [s_posterior(), s_variable('mhr'), s_no_admixes()]
    proposal = adaptive_proposal(
        resimulate_regrafted_branch_length=resimulate_regrafted_branch_length)
    #proposal.props=proposal.props[2:] #a little hack under the hood
    #proposal.params=proposal.params[2:] #a little hack under the hood.
    sample_verbose_scheme = {summary.name: (1, 0) for summary in summaries}
    sample_verbose_scheme['posterior'] = (1, 1)
    sample_verbose_scheme['no_admixes'] = (1, 1)
    final_tree, final_posterior, results, _ = basic_chain(
        start_x,
        summaries,
        posterior,
        proposal,
        post=None,
        N=sim_length,
        sample_verbose_scheme=sample_verbose_scheme,
        overall_thinning=int(max(thinning_coef, sim_length / 60000)),
        i_start_from=0,
        temperature=1.0,
        proposal_update=None,
        check_trees=False)
    save_to_csv(results, summaries, filename=filename)
    return true_tree
        filename_edges,
        outgroup='out',
        snodes=['s' + str(i) for i in range(1, 11)],
        prefix='sletmig' + os.sep,
        return_format='outgroup_rooted')
    #tree=read_treemix_file2('../../../../Dropbox/Bioinformatik/AdmixtureBayes/treemix_example3/new_one2.treeout',
    #                       '../../../../Dropbox/Bioinformatik/AdmixtureBayes/treemix_example3/new_one2.vertices',
    #                       '../../../../Dropbox/Bioinformatik/AdmixtureBayes/treemix_example3/new_one2.edges', outgroup='out')
    import tree_plotting
    tree_plotting.plot_as_directed_graph(tree)
    from tree_warner import check

    check(tree)

    print pretty_string(tree)
    import numpy as np
    print pretty_string(tree)
    from Rtree_to_covariance_matrix import make_covariance
    from reduce_covariance import reduce_covariance, Areduce
    cov = make_covariance(tree,
                          node_keys=['out'] +
                          ['s' + str(i) for i in range(1, 10)])
    print cov
    cov2 = np.loadtxt(
        '../../../../Dropbox/Bioinformatik/AdmixtureBayes/treemix_example3/anew.txt'
    )
    np.set_printoptions(precision=6, linewidth=200, suppress=True)
    print cov - cov2
    print reduce_covariance(cov - cov2, 0)
    print Areduce(cov - cov2)
Ejemplo n.º 14
0
        'n6': ['n15', None, None, 0.002455554, None, 's8', 'a2']
    }

    #print plot_as_directed_graph(tree)
    sub_tree = get_subtree(tree, ['s1', 's2', 's3'])
    #print plot_as_directed_graph(sub_tree)
    print pretty_string(sub_tree)
    #plots=get_unique_plottable_tree(sub_tree)
    #print 'gotten unique_plottable'
    #print plots

    stree_difficult = 'a.w.w.c.c.w.c.4.3.6-c.w.0.w.c.w.w.4-c.w.w.w.w.0-c.w.w.w.0-c.0.w.w-c.0.w-c.0;0.014843959-0.003602704-0.002128203-0.027030132-0.008484730-0.067616899-0.021207056-0.027455759-0.011647297-0.009065170-0.053386961-0.001718477-0.009310923-0.010471979-0.036314546-0.004808845-0.055956235-0.004694887-0.003482668-0.039323330-0.014821628;1.000'
    from tree_statistics import (identifier_to_tree_clean,
                                 generate_predefined_list_string,
                                 identifier_file_to_tree_clean,
                                 unique_identifier_and_branch_lengths)
    from Rtree_to_covariance_matrix import make_covariance
    nodes = sorted(['s' + str(i + 1) for i in range(10)])
    tree_difficult = identifier_to_tree_clean(
        stree_difficult,
        leaves=generate_predefined_list_string(deepcopy(nodes)))
    cov1 = make_covariance(tree_difficult)
    tree_difficult2 = remove_non_mixing_admixtures(deepcopy(tree_difficult))
    cov2 = make_covariance(tree_difficult2)
    print cov1
    print cov2
    print cov1 - cov2
    print pretty_string(tree_difficult)
    print get_branches_to_keep(tree_difficult, ['s1', 's2', 's3'])
    sub_tree = get_subtree(tree_difficult, ['s1', 's2', 's3'])
    print pretty_string(sub_tree)
def see_covariance_matrix(stree, reduce=None, factor=1.0):
    if reduce is None:
        return make_covariance(identifier_to_tree_clean(stree)) * factor
    else:
        return reduce_covariance(
            make_covariance(identifier_to_tree_clean(stree)), 0) * factor
Ejemplo n.º 16
0
def tree_to_covariance(stree):
    tree=identifier_to_tree_clean(stree)
    nodes=sorted(get_leaf_keys(tree))
    return make_covariance(tree, node_keys=nodes)
Ejemplo n.º 17
0
def adjust_treemix_df(wishart_df, starting_tree):
    cov = make_covariance(starting_tree)
    lmax = wishart.logpdf(wishart, scale=wishart / wishart_df, df=wishart_df)
Ejemplo n.º 18
0
def get_true_mat(true_tree_file, nodes):
    scaled_true_tree=identifier_file_to_tree_clean(true_tree_file)
    C=make_covariance(scaled_true_tree, node_keys=nodes)
    return Areduce(C)
Ejemplo n.º 19
0
 coef, ni, bi= make_coefficient_matrix(tree_good)
 nodes_determined = [None]*len(ni)
 branches_determined=[None]*len(bi)
 for n,i in ni.items():
     nodes_determined[i]=n
     
 for b,i in bi.items():
     branches_determined[i]=b
 branch_lengths=get_specific_branch_lengths(tree_good, branches_determined)
 
 from numpy import array
 print coef
 print coef.dot(array(branch_lengths))
 from Rtree_to_covariance_matrix import make_covariance
 from numpy.random import normal
 print make_covariance(tree_good, node_keys= nodes_determined)
 from numpy import set_printoptions
 set_printoptions(precision=2)
 org, bi,_= get_orthogonal_branch_space(tree_good, add_one_column=True)
 branches_determined=[None]*len(bi) 
 for b,i in bi.items():
     branches_determined[i]=b
 
 updates=org.dot(normal(scale=0.01, size=org.shape[1]))
 #print pretty_string(update_specific_branch_lengths(tree_good, branches_determined, updates, add=True))
 
 #print make_covariance(tree_good, node_keys= nodes_determined)
 
 #print org.T.dot(coef)
 
 import sys
Ejemplo n.º 20
0
def theoretical_covariance_wrapper(tree, **kwargs):
    covariance= make_covariance(tree, node_keys= kwargs['full_nodes'])
    if kwargs['add_wishart_noise_to_covariance']:
        covariance=add_wishart_noise(covariance, kwargs['df_of_wishart_noise_to_covariance'])
    return covariance
Ejemplo n.º 21
0
def get_true_posterior(wishart_file='tmp_covariance_and_multiplier.txt',
                       true_tree_file='tmp_scaled_true_tree.txt',
                       p=0.5,
                       use_skewed_distr=False,
                       wishart_df_file='tmp_wishart_DF.txt',
                       outgroup='out'):
    true_tree, nodes = read_tree_file(true_tree_file)
    set_printoptions(precision=2)
    multiplier = 8.57292960745
    print make_covariance(scale_tree_copy(true_tree, multiplier),
                          node_keys=nodes)
    print reduce_covariance(
        make_covariance(scale_tree_copy(true_tree, multiplier),
                        node_keys=nodes),
        len(nodes) - 1)
    print true_tree
    print nodes
    print outgroup
    print pretty_string(true_tree)
    x = get_pruned_tree_and_add(true_tree, outgroup)
    print pretty_string(x[0])
    print x[1]

    print nodes
    nodes.remove(outgroup)
    covariance, multiplier = read_wishart_file(wishart_file, nodes)
    print 'multiplier', multiplier
    #multiplier=1.0
    print covariance
    #print make_covariance(scale_tree_copy(x[0], multiplier))
    t_covariance, t_multiplier = rescale_empirical_covariance(
        make_covariance(x[0]) + x[1])
    print t_covariance
    print t_covariance - covariance
    print(t_covariance) / covariance
    print(t_covariance / t_multiplier) - covariance / multiplier
    print(t_covariance / t_multiplier) / (covariance / multiplier)
    avg_scale = mean((make_covariance(scale_tree_copy(x[0], 1.0)) + x[1]) *
                     multiplier / covariance)
    avg_root = get_average_distance_to_root(x[0])
    max_root = get_max_distance_to_root(x[0])
    min_root = get_min_distance_to_root(x[0])
    wishart_df = read_wishart_df_file(wishart_df_file)
    posterior = posterior_class(covariance,
                                M=wishart_df,
                                p=p,
                                use_skewed_distr=use_skewed_distr,
                                multiplier=multiplier,
                                nodes=nodes)
    pks = {}
    a = posterior(x, pks)
    print a
    prior_val = pks['prior']
    lik_vals = [
        posterior.get_likelihood_from_matrix(t_covariance * c)
        for c in linspace(0.1, 2.5, 1500)
    ]
    print lik_vals
    n = sorted([(v, e) for e, v in enumerate(lik_vals)])[-1][1]
    print n, linspace(0.1, 2.5, 1500)[n]
    print lik_vals[n]
    print linspace(0.1, 2.5, 1500)[n] * t_covariance
    print covariance
    print t_covariance
    print posterior.get_likelihood_from_matrix(t_covariance)
    print sum((covariance - t_covariance)**2)
    print sum((covariance - t_covariance * linspace(0.1, 2.5, 1500)[n])**2)
    t_cov2 = deepcopy(t_covariance)
    t_cov2[3, 5] = t_cov2[5, 3] = 1.6
    t_cov2[1, 2] = t_cov2[2, 1] = 2.45
    print t_cov2
    print posterior.get_likelihood_from_matrix(t_cov2)
    print posterior.get_likelihood_from_matrix(covariance)
    print posterior.get_likelihood_from_matrix(
        covariance * (wishart_df / (wishart_df - covariance.shape[0] - 1)))
    return prior_val + max(lik_vals)
#     for node in nodes:
#         p_mat.append(ps[node])
#     x=array(p_mat)*n
#     return full_maximization(x,n)


def calculate_covariance_matrix_from_p(ps, nodes=None):
    p_mat = []
    if nodes is None:
        nodes = ps.keys()
    for node in nodes:
        p_mat.append(ps[node])

    m = array(p_mat) - mean(p_mat, axis=0)
    return cov(m)


if __name__ == '__main__':
    from Rtree_operations import create_trivial_tree
    from generate_prior_trees import generate_phylogeny
    from Rtree_to_covariance_matrix import make_covariance
    tree = generate_phylogeny(3, 1)
    nodes = ['s1', 's2', 's3']
    print make_covariance(tree, node_keys=nodes)
    p = produce_p_matrix(tree, 11)
    print p
    print remove_non_snps(p, 's1')
    #print simulate_with_binomial(p, 10)
    #print calculate_covariance_matrix_from_p(p, nodes=nodes)
    #print calculate_covariance_matrix_from_p(simulate_with_binomial(p, 10), nodes)
Ejemplo n.º 23
0
def tree_to_data_perfect_model(tree, df):
    m=make_covariance(tree)
    r=m.shape[0]
    m=wishart.rvs(df=r*df-1, scale=m/(r*df))
    return m
Ejemplo n.º 24
0
                plot_graph(self.tree, drawing_name='bad.png')
                deladmix(old_tree)
                break


if __name__ == "__main__":
    from tree_plotting import plot_as_directed_graph, plot_graph, pretty_print
    import Rtree_operations
    #plot_graph(Rtree_operations.tree_on_the_border2_with_children)
    #t=Tester(Rtree_operations.tree_on_the_border2_with_children)
    #t.many_admixes(10)
    from Rcatalogue_of_trees import tree_good, tree_one_admixture
    pks = {}

    from Rtree_to_covariance_matrix import make_covariance
    print make_covariance(tree_good)
    newt, forw, backw = addadmix(tree_good,
                                 pks=pks,
                                 check_opposite=True,
                                 new_node_names=['g', 'h'],
                                 preserve_root_distance=True)
    print 'forw', forw
    print 'back', backw
    print 'pks', pks
    pretty_print(newt)
    print make_covariance(newt)

    pks = {}
    newt, forw, backw = deladmix(newt,
                                 pks=pks,
                                 check_opposite=True,
Ejemplo n.º 25
0
    def __call__(self, *args, **kwargs):
        return rescale_admix_correction(*args, **kwargs)


if __name__ == '__main__':
    from tree_plotting import plot_graph
    from Rcatalogue_of_trees import tree_on_the_border2_with_children
    from Rtree_operations import create_trivial_tree, create_burled_leaved_tree, pretty_string, get_leaf_keys
    from Rtree_to_covariance_matrix import make_covariance
    from likelihood import likelihood
    from math import log, exp
    #plot_graph(tree_on_the_border2_with_children)
    tree = create_burled_leaved_tree(15, 1.0)
    nodes = get_leaf_keys(tree)
    print nodes
    before_covariance = make_covariance(tree, node_keys=nodes)
    #x, emp_cov, nodes=None, M=12, pks={}
    bl = likelihood((tree, 0), before_covariance, M=10000, nodes=nodes)
    print 'before covariance', bl
    for _ in xrange(1):
        new_tree, f, b, tbf = rescale_admix_correction(
            tree,
            make_correction=True,
            sigma=0.1,
            return_without_correction=True)
        print 'proposal ratio', b / f
        al = likelihood((new_tree, 0), before_covariance, M=10000, nodes=nodes)
        print 'after covariance', al
        wl = likelihood((tbf, 0), before_covariance, M=10000, nodes=nodes)
        print 'without correction', wl
        print 'jump ratio with correction', exp(log(b) - log(f) + al - bl)
Ejemplo n.º 26
0
wrong_trees_s=['w.w.a.w.w.a.a.a.w-c.w.c.c.w.w.c.0.w.w.6.3.2-c.w.w.0.w.c.5.w.w-c.w.0.c.4.w.w-c.w.c.4.0-w.c.1-c.0;0.828-0.21-0.197-0.247-0.568-1.06-0.799-1.162-2.632-2.001-0.45-1.048-0.834-0.469-0.191-2.759-0.871-1.896-0.473-0.019-1.236-0.287-0.179-0.981-0.456-0.91-2.114-3.368;0.655-0.506-0.389-0.23',
               'w.w.w.w.w.w.a.a.w-w.w.w.c.w.c.5.a.w.3.a-c.w.c.w.c.4.w.w.0.2.a-w.w.w.w.c.c.4.5.w-c.c.w.w.1.0.w-c.w.w.0.w-c.w.0.w-a.w.w-c.w.0.w-c.w.0-c.0;0.387-0.087-0.806-0.082-2.062-0.803-0.122-0.544-0.061-0.733-0.474-1.342-0.871-0.798-0.753-0.288-0.024-0.174-0.754-0.282-0.45-0.924-0.416-1.081-0.467-1.296-1.171-0.54-1.944-0.258-8.813-0.76-0.073-3.416;0.388-0.467-0.098-0.185-0.019-0.44']
wrong_trees=[identifier_to_tree_clean(tree) for tree in wrong_trees_s]

plot_as_directed_graph(true_tree,  drawing_name= 'tmp0.bmp')
plot_as_directed_graph(wrong_trees[0], drawing_name = 'tmp1.bmp')
print pretty_string(wrong_trees[0])
t=wrong_trees[0]

from Rproposal_admix import deladmix

pks={}
from Rtree_to_covariance_matrix import make_covariance
from posterior import initialize_big_posterior

true_cov=make_covariance(true_tree)
posterior_f=initialize_big_posterior(true_cov, M=10000)
nt, f,b=deladmix(t,pks=pks, fixed_remove=('a1',1))
plot_as_directed_graph(nt)

new_likelihood_value, new_prior_value, (new_branch_prior, new_no_admix_prior, new_admix_prop_prior, new_top_prior), new_covariance= posterior_f((nt,0))
old_likelihood_value, old_prior_value, (old_branch_prior, old_no_admix_prior, old_admix_prop_prior, old_top_prior), old_covariance= posterior_f((t,0))

print new_likelihood_value, old_likelihood_value, new_likelihood_value-old_likelihood_value
from numpy import get_printoptions, set_printoptions
set_printoptions(precision=3, suppress=True)
print new_covariance
print old_covariance
print new_covariance-old_covariance
print true_cov
print true_cov-old_covariance