def draw_trees(folder, draw_settings, is_reducing, max_level=10, param_a=0.5): global_params = GlobalParams(max_level=max_level, param_a=param_a) trees = get_prepared_trees(is_reducing, max_level) tree_drawer = TreeDrawer(draw_settings, global_params) for i in range(0, len(trees)): tree_drawer.draw_tree(trees[i], trees[i], folder)
def draw_trees(folder, draw_settings, is_reducing, use_flipping, max_level=10, param_a=0.5): global_params = GlobalParams(max_level=max_level, param_a=param_a) trees_matrix = TreesMatrix("../../input/xtg_johansen/*.xtg", max_level=max_level, is_reducing=is_reducing, use_flipping=use_flipping) trees = trees_matrix.vertices tree_drawer = TreeDrawer(draw_settings, global_params) for i in range(0, len(trees)): tree_drawer.draw_tree(trees[i], trees[i], folder)
def compare(self, path, is_reducing, expected_matr, g_weight=0.0): [_trees, matr] = get_distances_by_files(f"test/test_input/{path}", GlobalParams(max_level=11, is_test_nodes=True, g_weight=g_weight), is_reducing=is_reducing, is_test_nodes=True) for (i, expected_row) in enumerate(expected_matr): for (j, expected_value) in enumerate(expected_row): actual_value = matr[i][j + i + 1] self.assertAlmostEqual(expected_value, actual_value)
def matr_diff(self, x): increasing_level = x[3] level_weight_multiplier = [1] * 11 if increasing_level >= 0: level_weight_multiplier[int(increasing_level)] *= 5 global_params = GlobalParams( max_level=10, param_a=x[0], g_weight=x[1], chain_length_weight=x[2], level_weight_multiplier=level_weight_multiplier) experiment_matrix = self.make_experiment_matrix(global_params) return self.corrcoef(experiment_matrix)
def test_corr_clustered_trees(self): systematic_tree = "morph" max_level = 11 cluster_algorithm = 'average' matr_diff = MatrixDiff( "../../input/xtg/*.xtg", f"../../input/systematic_tree_{systematic_tree}.xtg", ["Angiosperms"], max_level=max_level, is_reducing=True) params = [(0.5, -1, 0.29536767921530455), (1.0, -1, 0.04947724771267295), (0.5, 3 - 1, 0.29337382536794604), (1.0, 3 - 1, 0.12199025619653726)] for (param_a, increasing_level, expected_corr) in params: level_weight_multiplier = [1] * 11 if increasing_level >= 0: level_weight_multiplier[increasing_level] *= 5 global_params = GlobalParams( max_level=max_level, param_a=param_a, g_weight=0.0, chain_length_weight=0.0, level_weight_multiplier=level_weight_multiplier) experiment_matrix = matr_diff.make_experiment_matrix(global_params) plot_matrix = to_full_matrix(experiment_matrix) # convert the redundant n*n square matrix form into a condensed nC2 array # dist_array[{n choose 2}-{n-i choose 2} + (j-i-1)] is the distance between points i and j dist_array = ssd.squareform(plot_matrix) # clustered_trees = hierarchy.linkage(np.asarray(experiment_array), cluster_algorithm) clustered_trees = hierarchy.linkage(dist_array, cluster_algorithm) actual_corr = corr_clustered_trees( clustered_trees, matr_diff.names, matr_diff.make_systematic_matrix()) # compare floats as == to control changes in results on refactoring self.assertEqual(expected_corr, actual_corr)
"../../input/xtg/*.xtg", f"../../input/systematic_tree_{systematic_tree}.xtg", ["Angiosperms"], max_level=max_level, is_reducing=True, use_flipping=use_flipping) increasing_level = -1 level_weight_multiplier = [1] * 11 if increasing_level >= 0: level_weight_multiplier[increasing_level] *= 5 global_params = GlobalParams( max_level=max_level, param_a=param_a, g_weight=0.0, chain_length_weight=0.0, level_weight_multiplier=level_weight_multiplier, use_flipping=use_flipping) experiment_matrix = matrDiff.make_experiment_matrix( global_params) # corr = matrDiff.corrcoef(experiment_matrix=experiment_matrix) # name = f"param_a={param_a}_corr_{corr:0.2f}_{systematic_tree}_{cluster_algorithm}_subtree_(thr,mult)=({global_params.subtree_threshold},{global_params.subtree_multiplier})_lev_mult={global_params.level_weight_multiplier}" #print_matrix(experiment_matrix, name, matrDiff.names, corr, with_headers=True) #experiment_array = make_experiment_array(experiment_matrix) effective_cluster_algorithm = cluster_algorithm if cluster_algorithm == 'ultrametric': ultra_matrix = get_ultra_metric(
systematic_array.append(systematic_matrix[i][j]) experiment_array.append(experiment_matrix[i][j]) corrcoef_matrix = np.corrcoef([systematic_array, experiment_array]) return corrcoef_matrix[0][1] matrDiff = MatrixDiff("../../input/xtg/*.xtg", "../../input/systematic_tree_morph.xtg", ["Angiosperms"], max_level=10) init_values = [0.5, 0.1, 0.0] x = init_values global_params = GlobalParams(max_level=10, param_a=x[0], g_weight=x[1], chain_length_weight=x[2]) experiment_matrix = matrDiff.make_full_experiment_matrix(global_params) print(experiment_matrix) for level_count in range(30): name = f"ultrametric_{x[0]}_{x[1]}_{x[2]}_{level_count + 1}" ultra_matrix = get_ultra_metric( experiment_matrix, UltraMetricParams(max_level=level_count + 1)) ultra_array = make_experiment_array(ultra_matrix) clustered_trees = hierarchy.linkage(np.asarray(ultra_array), 'average') corrcoef = matrDiff.corrcoef(ultra_matrix) corrcoef2 = common_corrcoef(experiment_matrix, ultra_matrix)
import copy import unittest from unittest import TestCase from src.multiple_trees.compare_trees import get_distances_by_files from src.single_tree.development_tree_reader import read_all_trees from src.single_tree.global_params import GlobalParams global_params = GlobalParams(max_level=11, param_a=0.6, g_weight=0.1, chain_length_weight=0.1) class TestDistance(TestCase): # expected_matr = top-right matr (diagonal is excluded) def compare(self, path, is_reducing, expected_matr, g_weight=0.0): [_trees, matr] = get_distances_by_files(f"test/test_input/{path}", GlobalParams(max_level=11, is_test_nodes=True, g_weight=g_weight), is_reducing=is_reducing, is_test_nodes=True) for (i, expected_row) in enumerate(expected_matr): for (j, expected_value) in enumerate(expected_row): actual_value = matr[i][j + i + 1] self.assertAlmostEqual(expected_value, actual_value) def test_chain(self): # if reduce - all trees are the same
total[level][EQ] += 1 superimposed_node = SuperimposedNode(node1, node2) node_distance = superimposed_node.node_dist(global_params) total_distance = node_distance total_distance += proceed_node(node1.left, node2.left, level + 1) total_distance += proceed_node(node1.right, node2.right, level + 1) return total_distance systematic_tree = "morph" max_level = 10 global_params = GlobalParams(max_level=max_level, param_a=0.50, g_weight=0.5) matrDiff = MatrixDiff("../../input/xtg/*.xtg", f"../../input/systematic_tree_{systematic_tree}.xtg", ["Angiosperms"], max_level=max_level) trees = matrDiff.vertices tree_number = 0 for i in range(0, len(trees)): for j in range(i+1, len(trees)): proceed_node(trees[i].root, trees[j].root, 0) tree_number += 1 print(f"level total one_only eq ineq both_existing_part existing_part") for i, item in enumerate(total): print(f"{i} {item[0]} {item[1] + item[2]} {item[3]} {item[4]} {'nan' if item[0] == 0 else (item[3] + item[4]) / item[0]} {item[0] / (tree_number * pow(2, i))}")
def specie_fertility_distance(max_level=10, is_reducing=True, use_min_common_depth=False, use_flipping=False): trees = read_all_trees(pattern="../../input/xtg/*.xtg") prepare_trees(trees, max_level, is_reducing, use_min_common_depth, use_flipping) global_params = GlobalParams(max_level=max_level, param_a=0.5, g_weight=0.0, chain_length_weight=0.0) level2count = { 0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 10: 0, 11: 0 } sp_fert_dist = [] for i in range(len(trees)): for j in range(i + 1, len(trees)): # skip repeating pairs # get array of tuples (node1, node2, distance, ...) superimposed_node = SuperimposedNode(trees[i].root, trees[j].root) distances = superimposed_node.high_fertility_distance() for [addr, dist, reduced_level, node1, node2] in distances: # ignore cases when at the last level history is completely equal if dist == 0: continue level2count[reduced_level + 1] += 1 # ignore zygote and the next level if reduced_level < 0: continue leaves1 = node1.leaves_number leaves2 = node2.leaves_number tree1 = trees[i] tree2 = trees[j] # switch order if leaves1 < leaves2: tree1 = trees[j] tree2 = trees[i] tmp_node = node1 node1 = node2 node2 = tmp_node tmp_leaves = leaves1 leaves1 = leaves2 leaves2 = tmp_leaves left_right_number = number_by_address(tree1.root, tree2.root, addr, is_reducing=is_reducing) is_left_0_descendants = (node1.left.is_none()) and ( node1.right.is_none()) is_right_0_descendants = (node2.left.is_none()) and ( node2.right.is_none()) l_or_r = "-" if is_left_0_descendants: l_or_r = trees[i].name elif is_right_0_descendants: l_or_r = trees[j].name res = [ tree1.name, tree2.name, dist, reduced_level + 1, left_right_number, is_left_0_descendants or is_right_0_descendants, l_or_r, addr, node1.address, node2.address, leaves1, leaves2 ] sp_fert_dist.append(res) return sp_fert_dist
import scipy.spatial.distance as ssd from scipy.cluster import hierarchy from src.multiple_trees.trees_matrix import to_full_matrix, print_matrix from src.single_tree.global_params import GlobalParams from src.multiple_trees.matrix_diff import MatrixDiff systematic_tree = "morph" cluster_algorithm = "complete" max_level = 10 param_a = 0.5 g_weight = 0.1 chain_length_weight = 0.0 global_params = GlobalParams(max_level=max_level, param_a=param_a, g_weight=g_weight, chain_length_weight=chain_length_weight) name = f"param_a={param_a}_{systematic_tree}_{cluster_algorithm}_subtree_(thr,mult)=({global_params.subtree_threshold},{global_params.subtree_multiplier})_lev_mult={global_params.level_weight_multiplier}" matrDiff = MatrixDiff("../../input/xtg/*.xtg", f"../../input/systematic_tree_{systematic_tree}.xtg", ["Angiosperms"], max_level=max_level) experiment_matrix = matrDiff.make_experiment_matrix(global_params) plot_matrix = to_full_matrix(experiment_matrix) corr_coef = matrDiff.corrcoef(experiment_matrix) print_matrix(plot_matrix, "experiment_matrix", matrDiff.names,
trees = matrDiff.vertices # param_a = 0.5 # g_weight = 0.5 # chain_length_weight = 0.1 print(f"param_a g_weight chain_length_weight corr") max_corr = 1000 for param_a in np.linspace(0.4, 0.6, 3): for g_weight in np.linspace(0.0, 0.3, 4): for chain_length_weight in np.linspace(0.0, 0.3, 4): global_params = GlobalParams( max_level=max_level, param_a=param_a, g_weight=g_weight, chain_length_weight=chain_length_weight, is_swap_left_right=False) experiment_matrix = matrDiff.make_experiment_matrix(global_params) swap_global_params = GlobalParams( max_level=max_level, param_a=param_a, g_weight=g_weight, chain_length_weight=chain_length_weight, is_swap_left_right=True) swap_experiment_matrix = matrDiff.make_experiment_matrix( swap_global_params) corr = corrcoef(swap_experiment_matrix, experiment_matrix) print(
def do_it(): max_level = 10 use_min_common_depth = True use_flipping = False # [param_a, is_reducing] params = [[0.5, True], [1.0, False]] for [param_a, is_reducing] in params: trees_matrix = TreesMatrix("../../input/xtg/*.xtg", max_level=max_level, is_reducing=is_reducing, use_min_common_depth=use_min_common_depth, use_flipping=use_flipping) johansen_trees_matrix = TreesMatrix( "../../input/xtg_johansen/*.xtg", max_level=max_level, is_reducing=is_reducing, use_min_common_depth=use_min_common_depth, use_flipping=use_flipping) trees = trees_matrix.vertices johansen_trees = johansen_trees_matrix.vertices global_params = GlobalParams(max_level=max_level, param_a=param_a, use_min_common_depth=True, use_flipping=use_flipping) matches_number = 0 print( f"Johansen-Batygina types to species distance, is_reducing: True, param_a: 0.5, division_weight: 1.0, " f"g_weight: 0.0, chain_length_weight: 0.0") print( f"Specie Reference_type 1st_type 1st_type_distance 2nd_type 2nd_type_distance" ) for i in range(len(trees)): print( f"{trees_matrix.names[i]} {short_embryo_name(trees[i].embryo_type)} ", end='') res = [] # min_dist = (-1, 1.0E+100) for j in range(len(johansen_trees)): min_reduced_depth = min(trees[i].root.reduced_depth, johansen_trees[j].root.reduced_depth) flipped_root = None if use_flipping: flipped_root = johansen_trees[j].flipped_roots[ min_reduced_depth] dist = full_distance( global_params, trees[i].roots[min_reduced_depth], johansen_trees[j].roots[min_reduced_depth], flipped_root) res.append((dist, johansen_trees_matrix.names[j])) # draw_tree(trees[i], johansen_trees[j], global_params, dist, 0, "johansen") res = sorted(res, key=lambda dist_name: dist_name[0]) for (dist, name) in res[:2]: print(f"{short_embryo_name(name)} {dist:0.2f} ", end='') print(f"") if trees[i].embryo_type == res[0][1]: matches_number += 1 print(f"matches_number: {matches_number}\n")
def get_corrcoef(param_a, g_weight, chain_length_weight): global_params = GlobalParams(max_level=max_level, param_a=param_a, g_weight=g_weight, chain_length_weight=chain_length_weight) #name = f"a={param_a}_{systematic_tree}_{cluster_algorithm}_subtree_(thr,mult)=({global_params.subtree_threshold},{global_params.subtree_multiplier})_lev_mult={global_params.level_weight_multiplier}" experiment_matrix = matrDiff.make_experiment_matrix(global_params) return matrDiff.corrcoef(experiment_matrix)
from src.multiple_trees.compare_trees import get_distances_by_files from src.single_tree.global_params import GlobalParams global_params = GlobalParams(max_level=10, param_a=0.50, g_weight=0.05, chain_length_weight=0.4) [trees, distance_matrix] = get_distances_by_files("../../input/xtg/*.xtg", global_params, is_reducing=True) # print distance matrix to console for tree in trees: print(f", {tree.name.replace(' ', '_')}", end='') print("") for row in distance_matrix: for item in row: print("%0.2f " % item, end='') print()
import numpy as np from src.single_tree.global_params import GlobalParams from src.multiple_trees.iterate_trees import generate_bin_tree, get_subtrees from src.multiple_trees.matrix_diff import MatrixDiff, print_matrix, corrcoef # Build matrices and corr coef only systematic_tree = "morph" max_level = 10 matrDiff = MatrixDiff("../../input/xtg/*.xtg", f"../../input/systematic_tree_{systematic_tree}.xtg", ["Angiosperms"], max_level=max_level) global_params = GlobalParams(max_level=max_level, param_a=0.5, g_weight=0, chain_length_weight=0) trees = matrDiff.vertices name2index = {} for i in range(len(matrDiff.names)): name2index[matrDiff.names[i]] = i # ползучее корневище creeping_rhizome = {"Ottelia_alismoides", "Polemonium_caeruleum", "Potamogeton_lucens", "Sagina_procumbens", "Sedum_acre", "Sedum_sieboldii", "Sparganium_simplex", "Stratiotes_aloides"} # мочковатая корн. система fibrous_root_system = {"Triticum_aestivum"}
import matplotlib.pyplot as plt from src.single_tree.development_tree_reader import read_all_trees from src.single_tree.development_tree_utils import prepare_trees from src.single_tree.global_params import GlobalParams from src.view.draw_compared_trees import TreeDrawSettings, TreeDrawer, reduced_node_caption_1, double_node_caption_1, \ load_font, FONT_PATH, node_dist_caption_2 max_level = 10 param_a = 0.5 is_reducing = False global_params = GlobalParams(max_level=max_level, param_a=param_a) draw_settings = TreeDrawSettings(color_left=0xFF285EDD, color_right=0xFFFC7074, color_eq=0xFFE8E4DE, color_ineq=0xFFE8E4DE, get_node_caption_1=double_node_caption_1, get_node_caption_2=node_dist_caption_2, font=load_font(FONT_PATH, 10), legend_font=load_font(FONT_PATH, 20), width=2000, height=720) tree_drawer = TreeDrawer(draw_settings, global_params) # read and prepare trees: reduce if necessary, precalculate some parameters # notice: file names must be "Genus_specie_type.xtg", and it will be shown as "Genus specie" trees = read_all_trees(pattern="test/test_input/sofa/test_reduce*.xtg", is_test_nodes=True, max_level=max_level) #trees = read_all_trees(pattern="test/test_input/paper_m/M2_*.xtg", is_test_nodes=True)