def test_corr_clustered_trees(self): systematic_tree = "morph" max_level = 11 cluster_algorithm = 'average' matr_diff = MatrixDiff( "../../input/xtg/*.xtg", f"../../input/systematic_tree_{systematic_tree}.xtg", ["Angiosperms"], max_level=max_level, is_reducing=True) params = [(0.5, -1, 0.29536767921530455), (1.0, -1, 0.04947724771267295), (0.5, 3 - 1, 0.29337382536794604), (1.0, 3 - 1, 0.12199025619653726)] for (param_a, increasing_level, expected_corr) in params: level_weight_multiplier = [1] * 11 if increasing_level >= 0: level_weight_multiplier[increasing_level] *= 5 global_params = GlobalParams( max_level=max_level, param_a=param_a, g_weight=0.0, chain_length_weight=0.0, level_weight_multiplier=level_weight_multiplier) experiment_matrix = matr_diff.make_experiment_matrix(global_params) plot_matrix = to_full_matrix(experiment_matrix) # convert the redundant n*n square matrix form into a condensed nC2 array # dist_array[{n choose 2}-{n-i choose 2} + (j-i-1)] is the distance between points i and j dist_array = ssd.squareform(plot_matrix) # clustered_trees = hierarchy.linkage(np.asarray(experiment_array), cluster_algorithm) clustered_trees = hierarchy.linkage(dist_array, cluster_algorithm) actual_corr = corr_clustered_trees( clustered_trees, matr_diff.names, matr_diff.make_systematic_matrix()) # compare floats as == to control changes in results on refactoring self.assertEqual(expected_corr, actual_corr)
alg_to_corr = {} # for tree in matrDiff.vertices: # print(f"{tree.name} : {tree.node.depth}") for param_a in np.linspace(0.2, 0.8, 7): for systematic_tree in systematic_trees: for cluster_algorithm in cluster_algorithms: #for increasing_level in [-1, 3 - 1, 7 - 1]: #range(-1, 8): #for increasing_level in [-1]: #range(-1, 8): for use_flipping in [False, True]: matrDiff = MatrixDiff( "../../input/xtg/*.xtg", f"../../input/systematic_tree_{systematic_tree}.xtg", ["Angiosperms"], max_level=max_level, is_reducing=True, use_flipping=use_flipping) increasing_level = -1 level_weight_multiplier = [1] * 11 if increasing_level >= 0: level_weight_multiplier[increasing_level] *= 5 global_params = GlobalParams( max_level=max_level, param_a=param_a, g_weight=0.0, chain_length_weight=0.0, level_weight_multiplier=level_weight_multiplier,
total[level][EQ] += 1 superimposed_node = SuperimposedNode(node1, node2) node_distance = superimposed_node.node_dist(global_params) total_distance = node_distance total_distance += proceed_node(node1.left, node2.left, level + 1) total_distance += proceed_node(node1.right, node2.right, level + 1) return total_distance systematic_tree = "morph" max_level = 10 global_params = GlobalParams(max_level=max_level, param_a=0.50, g_weight=0.5) matrDiff = MatrixDiff("../../input/xtg/*.xtg", f"../../input/systematic_tree_{systematic_tree}.xtg", ["Angiosperms"], max_level=max_level) trees = matrDiff.vertices tree_number = 0 for i in range(0, len(trees)): for j in range(i+1, len(trees)): proceed_node(trees[i].root, trees[j].root, 0) tree_number += 1 print(f"level total one_only eq ineq both_existing_part existing_part") for i, item in enumerate(total): print(f"{i} {item[0]} {item[1] + item[2]} {item[3]} {item[4]} {'nan' if item[0] == 0 else (item[3] + item[4]) / item[0]} {item[0] / (tree_number * pow(2, i))}")
import matplotlib.pyplot as plt import numpy as np from mpl_toolkits.mplot3d import Axes3D from src.multiple_trees.matrix_diff import MatrixDiff matrDiff = MatrixDiff("../../input/xtg/*.xtg", "../../input/systematic_tree_morph.xtg", ["Angiosperms"], max_level=10) def create_fun(chain_length_weight, increasing_level): def fun(a, g_weight): return matrDiff.matr_diff_sum([a, g_weight, chain_length_weight, increasing_level]) return fun # define as a function to prevent reusing vars in create_fun by mistake on edit def show_3d_plot(): a = np.linspace(0.001, 1.0, 11) # a = 0.4 g_weight = np.linspace(0.0, 1.0, 11) # g_weight = 0.4 chain_length_weight = 0.0 # chain_length_weight = np.linspace(0.0, 0.5, 6) increasing_level = -1 # increasing_level = np.linspace(-1, 8, 10) x, y = np.meshgrid(a, g_weight) z = np.vectorize(create_fun(chain_length_weight, increasing_level))(x, y) fig = plt.figure()
from src.single_tree.global_params import GlobalParams from src.multiple_trees.matrix_diff import MatrixDiff # calculates distance between species if cut all trees at level 2, 3, 4, ..., 11 systematic_tree = "morph" globalMatrDiff = MatrixDiff( "../../input/xtg/*.xtg", f"../../input/systematic_tree_{systematic_tree}.xtg", ["Angiosperms"], max_level=10) res_matrices = [] res_corrcoef = [] # iterate over max_level for cur_max_level in range(2, 12): global_params = GlobalParams(max_level=cur_max_level, param_a=0.50, g_weight=0.5) matrDiff = MatrixDiff("../../input/xtg/*.xtg", f"../../input/systematic_tree_{systematic_tree}.xtg", ["Angiosperms"], max_level=cur_max_level) experiment_matrix = matrDiff.make_experiment_matrix(global_params) corrcoef = matrDiff.corrcoef(experiment_matrix=experiment_matrix) # print(f"max_dist: {max_dist}, corrcoef: {corrcoef}")
import numpy as np from src.single_tree.global_params import GlobalParams from src.multiple_trees.matrix_diff import MatrixDiff, corrcoef # Finds max correlation between experiment matrix with and without swap_left_right # varying params param_a, g_weight, chain_length_weight # Also builds the table of the corr(param_a, g_weight, chain_length_weight) systematic_tree = "morph" max_level = 10 matrDiff = MatrixDiff("../../input/xtg/*.xtg", f"../../input/systematic_tree_{systematic_tree}.xtg", ["Angiosperms"], max_level=max_level) trees = matrDiff.vertices # param_a = 0.5 # g_weight = 0.5 # chain_length_weight = 0.1 print(f"param_a g_weight chain_length_weight corr") max_corr = 1000 for param_a in np.linspace(0.4, 0.6, 3): for g_weight in np.linspace(0.0, 0.3, 4): for chain_length_weight in np.linspace(0.0, 0.3, 4): global_params = GlobalParams( max_level=max_level,
import matplotlib.pyplot as plt import numpy as np from mpl_toolkits.mplot3d import Axes3D from src.multiple_trees.matrix_diff import MatrixDiff from src.single_tree.global_params import GlobalParams # Build matrices and corr coef only systematic_tree = "morph" cluster_algorithm = "average" max_level = 10 johansenMatrDiff = MatrixDiff("../../input/xtg_johansen/*.xtg", f"../../input/systematic_tree_{systematic_tree}.xtg", ["Angiosperms"], max_level=max_level, filter_by_taxon=False) matrDiff = MatrixDiff("../../input/xtg/*.xtg", f"../../input/systematic_tree_{systematic_tree}.xtg", ["Angiosperms"], max_level=max_level) trees = matrDiff.vertices johansenTrees = johansenMatrDiff.vertices # for g_weight in np.linspace(0.0, 1.00, 11): # for chain_length_weight in np.linspace(0.0, 1.0, 11): # param_a = 0.5 # global_params = GlobalParams(g_weight=g_weight, chain_length_weight=chain_length_weight, # param_a=0.5, max_level=max_level, # ) # name = f"param_a={param_a}_g_weight={g_weight}_chain_length_weight={chain_length_weight}_{systematic_tree}_{cluster_algorithm}" # experiment_matrix = matrDiff.make_experiment_matrix(global_params)
def common_corrcoef(systematic_matrix, experiment_matrix): systematic_array = [] experiment_array = [] for i, systematic_row in enumerate(systematic_matrix): for j, systematic_col in enumerate(systematic_row): systematic_array.append(systematic_matrix[i][j]) experiment_array.append(experiment_matrix[i][j]) corrcoef_matrix = np.corrcoef([systematic_array, experiment_array]) return corrcoef_matrix[0][1] matrDiff = MatrixDiff("../../input/xtg/*.xtg", "../../input/systematic_tree_morph.xtg", ["Angiosperms"], max_level=10) init_values = [0.5, 0.1, 0.0] x = init_values global_params = GlobalParams(max_level=10, param_a=x[0], g_weight=x[1], chain_length_weight=x[2]) experiment_matrix = matrDiff.make_full_experiment_matrix(global_params) print(experiment_matrix) for level_count in range(30): name = f"ultrametric_{x[0]}_{x[1]}_{x[2]}_{level_count + 1}" ultra_matrix = get_ultra_metric(