예제 #1
0
    def test_corr_clustered_trees(self):

        systematic_tree = "morph"
        max_level = 11
        cluster_algorithm = 'average'
        matr_diff = MatrixDiff(
            "../../input/xtg/*.xtg",
            f"../../input/systematic_tree_{systematic_tree}.xtg",
            ["Angiosperms"],
            max_level=max_level,
            is_reducing=True)

        params = [(0.5, -1, 0.29536767921530455),
                  (1.0, -1, 0.04947724771267295),
                  (0.5, 3 - 1, 0.29337382536794604),
                  (1.0, 3 - 1, 0.12199025619653726)]
        for (param_a, increasing_level, expected_corr) in params:
            level_weight_multiplier = [1] * 11
            if increasing_level >= 0:
                level_weight_multiplier[increasing_level] *= 5

            global_params = GlobalParams(
                max_level=max_level,
                param_a=param_a,
                g_weight=0.0,
                chain_length_weight=0.0,
                level_weight_multiplier=level_weight_multiplier)

            experiment_matrix = matr_diff.make_experiment_matrix(global_params)

            plot_matrix = to_full_matrix(experiment_matrix)
            # convert the redundant n*n square matrix form into a condensed nC2 array
            # dist_array[{n choose 2}-{n-i choose 2} + (j-i-1)] is the distance between points i and j
            dist_array = ssd.squareform(plot_matrix)

            # clustered_trees = hierarchy.linkage(np.asarray(experiment_array), cluster_algorithm)
            clustered_trees = hierarchy.linkage(dist_array, cluster_algorithm)

            actual_corr = corr_clustered_trees(
                clustered_trees, matr_diff.names,
                matr_diff.make_systematic_matrix())

            # compare floats as == to control changes in results on refactoring
            self.assertEqual(expected_corr, actual_corr)
예제 #2
0
alg_to_corr = {}

# for tree in matrDiff.vertices:
#     print(f"{tree.name} : {tree.node.depth}")

for param_a in np.linspace(0.2, 0.8, 7):
    for systematic_tree in systematic_trees:
        for cluster_algorithm in cluster_algorithms:
            #for increasing_level in [-1, 3 - 1, 7 - 1]: #range(-1, 8):
            #for increasing_level in [-1]: #range(-1, 8):
            for use_flipping in [False, True]:
                matrDiff = MatrixDiff(
                    "../../input/xtg/*.xtg",
                    f"../../input/systematic_tree_{systematic_tree}.xtg",
                    ["Angiosperms"],
                    max_level=max_level,
                    is_reducing=True,
                    use_flipping=use_flipping)

                increasing_level = -1
                level_weight_multiplier = [1] * 11
                if increasing_level >= 0:
                    level_weight_multiplier[increasing_level] *= 5

                global_params = GlobalParams(
                    max_level=max_level,
                    param_a=param_a,
                    g_weight=0.0,
                    chain_length_weight=0.0,
                    level_weight_multiplier=level_weight_multiplier,
예제 #3
0
        total[level][EQ] += 1

    superimposed_node = SuperimposedNode(node1, node2)
    node_distance = superimposed_node.node_dist(global_params)

    total_distance = node_distance
    total_distance += proceed_node(node1.left, node2.left, level + 1)
    total_distance += proceed_node(node1.right, node2.right, level + 1)

    return total_distance


systematic_tree = "morph"
max_level = 10

global_params = GlobalParams(max_level=max_level, param_a=0.50, g_weight=0.5)

matrDiff = MatrixDiff("../../input/xtg/*.xtg", f"../../input/systematic_tree_{systematic_tree}.xtg", ["Angiosperms"],
                      max_level=max_level)
trees = matrDiff.vertices

tree_number = 0
for i in range(0, len(trees)):
    for j in range(i+1, len(trees)):
        proceed_node(trees[i].root, trees[j].root, 0)
        tree_number += 1

print(f"level total one_only eq ineq both_existing_part existing_part")
for i, item in enumerate(total):
    print(f"{i} {item[0]} {item[1] + item[2]} {item[3]} {item[4]} {'nan' if item[0] == 0 else (item[3] + item[4]) / item[0]} {item[0] / (tree_number * pow(2, i))}")
예제 #4
0
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d import Axes3D

from src.multiple_trees.matrix_diff import MatrixDiff

matrDiff = MatrixDiff("../../input/xtg/*.xtg", "../../input/systematic_tree_morph.xtg", ["Angiosperms"], max_level=10)


def create_fun(chain_length_weight, increasing_level):
    def fun(a, g_weight):
        return matrDiff.matr_diff_sum([a, g_weight, chain_length_weight, increasing_level])
    return fun


# define as a function to prevent reusing vars in create_fun by mistake on edit
def show_3d_plot():
    a = np.linspace(0.001, 1.0, 11)
    # a = 0.4
    g_weight = np.linspace(0.0, 1.0, 11)
    # g_weight = 0.4
    chain_length_weight = 0.0
    # chain_length_weight = np.linspace(0.0, 0.5, 6)
    increasing_level = -1
    # increasing_level = np.linspace(-1, 8, 10)

    x, y = np.meshgrid(a, g_weight)
    z = np.vectorize(create_fun(chain_length_weight, increasing_level))(x, y)

    fig = plt.figure()
from src.single_tree.global_params import GlobalParams
from src.multiple_trees.matrix_diff import MatrixDiff

# calculates distance between species if cut all trees at level 2, 3, 4, ..., 11

systematic_tree = "morph"

globalMatrDiff = MatrixDiff(
    "../../input/xtg/*.xtg",
    f"../../input/systematic_tree_{systematic_tree}.xtg", ["Angiosperms"],
    max_level=10)

res_matrices = []
res_corrcoef = []

# iterate over max_level
for cur_max_level in range(2, 12):

    global_params = GlobalParams(max_level=cur_max_level,
                                 param_a=0.50,
                                 g_weight=0.5)

    matrDiff = MatrixDiff("../../input/xtg/*.xtg",
                          f"../../input/systematic_tree_{systematic_tree}.xtg",
                          ["Angiosperms"],
                          max_level=cur_max_level)

    experiment_matrix = matrDiff.make_experiment_matrix(global_params)
    corrcoef = matrDiff.corrcoef(experiment_matrix=experiment_matrix)
    # print(f"max_dist: {max_dist}, corrcoef: {corrcoef}")
import numpy as np

from src.single_tree.global_params import GlobalParams
from src.multiple_trees.matrix_diff import MatrixDiff, corrcoef

# Finds max correlation between experiment matrix with and without swap_left_right
# varying params param_a, g_weight, chain_length_weight
# Also builds the table of the corr(param_a, g_weight, chain_length_weight)

systematic_tree = "morph"
max_level = 10

matrDiff = MatrixDiff("../../input/xtg/*.xtg",
                      f"../../input/systematic_tree_{systematic_tree}.xtg",
                      ["Angiosperms"],
                      max_level=max_level)

trees = matrDiff.vertices

# param_a = 0.5
# g_weight = 0.5
# chain_length_weight = 0.1

print(f"param_a g_weight chain_length_weight corr")

max_corr = 1000
for param_a in np.linspace(0.4, 0.6, 3):
    for g_weight in np.linspace(0.0, 0.3, 4):
        for chain_length_weight in np.linspace(0.0, 0.3, 4):
            global_params = GlobalParams(
                max_level=max_level,
예제 #7
0
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d import Axes3D

from src.multiple_trees.matrix_diff import MatrixDiff
from src.single_tree.global_params import GlobalParams

# Build matrices and corr coef only

systematic_tree = "morph"
cluster_algorithm = "average"
max_level = 10

johansenMatrDiff = MatrixDiff("../../input/xtg_johansen/*.xtg", f"../../input/systematic_tree_{systematic_tree}.xtg",
                              ["Angiosperms"], max_level=max_level, filter_by_taxon=False)

matrDiff = MatrixDiff("../../input/xtg/*.xtg", f"../../input/systematic_tree_{systematic_tree}.xtg",
                      ["Angiosperms"], max_level=max_level)

trees = matrDiff.vertices
johansenTrees = johansenMatrDiff.vertices


# for g_weight in np.linspace(0.0, 1.00, 11):
#     for chain_length_weight in np.linspace(0.0, 1.0, 11):
#         param_a = 0.5
#         global_params = GlobalParams(g_weight=g_weight, chain_length_weight=chain_length_weight,
#                                      param_a=0.5, max_level=max_level,
#                                      )
#         name = f"param_a={param_a}_g_weight={g_weight}_chain_length_weight={chain_length_weight}_{systematic_tree}_{cluster_algorithm}"
#         experiment_matrix = matrDiff.make_experiment_matrix(global_params)
예제 #8
0
def common_corrcoef(systematic_matrix, experiment_matrix):
    systematic_array = []
    experiment_array = []
    for i, systematic_row in enumerate(systematic_matrix):
        for j, systematic_col in enumerate(systematic_row):
            systematic_array.append(systematic_matrix[i][j])
            experiment_array.append(experiment_matrix[i][j])

    corrcoef_matrix = np.corrcoef([systematic_array, experiment_array])

    return corrcoef_matrix[0][1]


matrDiff = MatrixDiff("../../input/xtg/*.xtg",
                      "../../input/systematic_tree_morph.xtg", ["Angiosperms"],
                      max_level=10)

init_values = [0.5, 0.1, 0.0]
x = init_values
global_params = GlobalParams(max_level=10,
                             param_a=x[0],
                             g_weight=x[1],
                             chain_length_weight=x[2])

experiment_matrix = matrDiff.make_full_experiment_matrix(global_params)
print(experiment_matrix)

for level_count in range(30):
    name = f"ultrametric_{x[0]}_{x[1]}_{x[2]}_{level_count + 1}"
    ultra_matrix = get_ultra_metric(