예제 #1
0
def draw_trees(folder, draw_settings, is_reducing, max_level=10, param_a=0.5):
    global_params = GlobalParams(max_level=max_level, param_a=param_a)

    trees = get_prepared_trees(is_reducing, max_level)

    tree_drawer = TreeDrawer(draw_settings, global_params)
    for i in range(0, len(trees)):
        tree_drawer.draw_tree(trees[i], trees[i], folder)
예제 #2
0
def draw_trees(folder, draw_settings, is_reducing, use_flipping, max_level=10, param_a=0.5):
    global_params = GlobalParams(max_level=max_level, param_a=param_a)

    trees_matrix = TreesMatrix("../../input/xtg_johansen/*.xtg", max_level=max_level, is_reducing=is_reducing,
                               use_flipping=use_flipping)

    trees = trees_matrix.vertices

    tree_drawer = TreeDrawer(draw_settings, global_params)
    for i in range(0, len(trees)):
        tree_drawer.draw_tree(trees[i], trees[i], folder)
예제 #3
0
 def compare(self, path, is_reducing, expected_matr, g_weight=0.0):
     [_trees,
      matr] = get_distances_by_files(f"test/test_input/{path}",
                                     GlobalParams(max_level=11,
                                                  is_test_nodes=True,
                                                  g_weight=g_weight),
                                     is_reducing=is_reducing,
                                     is_test_nodes=True)
     for (i, expected_row) in enumerate(expected_matr):
         for (j, expected_value) in enumerate(expected_row):
             actual_value = matr[i][j + i + 1]
             self.assertAlmostEqual(expected_value, actual_value)
예제 #4
0
    def matr_diff(self, x):
        increasing_level = x[3]
        level_weight_multiplier = [1] * 11
        if increasing_level >= 0:
            level_weight_multiplier[int(increasing_level)] *= 5

        global_params = GlobalParams(
            max_level=10,
            param_a=x[0],
            g_weight=x[1],
            chain_length_weight=x[2],
            level_weight_multiplier=level_weight_multiplier)

        experiment_matrix = self.make_experiment_matrix(global_params)
        return self.corrcoef(experiment_matrix)
예제 #5
0
    def test_corr_clustered_trees(self):

        systematic_tree = "morph"
        max_level = 11
        cluster_algorithm = 'average'
        matr_diff = MatrixDiff(
            "../../input/xtg/*.xtg",
            f"../../input/systematic_tree_{systematic_tree}.xtg",
            ["Angiosperms"],
            max_level=max_level,
            is_reducing=True)

        params = [(0.5, -1, 0.29536767921530455),
                  (1.0, -1, 0.04947724771267295),
                  (0.5, 3 - 1, 0.29337382536794604),
                  (1.0, 3 - 1, 0.12199025619653726)]
        for (param_a, increasing_level, expected_corr) in params:
            level_weight_multiplier = [1] * 11
            if increasing_level >= 0:
                level_weight_multiplier[increasing_level] *= 5

            global_params = GlobalParams(
                max_level=max_level,
                param_a=param_a,
                g_weight=0.0,
                chain_length_weight=0.0,
                level_weight_multiplier=level_weight_multiplier)

            experiment_matrix = matr_diff.make_experiment_matrix(global_params)

            plot_matrix = to_full_matrix(experiment_matrix)
            # convert the redundant n*n square matrix form into a condensed nC2 array
            # dist_array[{n choose 2}-{n-i choose 2} + (j-i-1)] is the distance between points i and j
            dist_array = ssd.squareform(plot_matrix)

            # clustered_trees = hierarchy.linkage(np.asarray(experiment_array), cluster_algorithm)
            clustered_trees = hierarchy.linkage(dist_array, cluster_algorithm)

            actual_corr = corr_clustered_trees(
                clustered_trees, matr_diff.names,
                matr_diff.make_systematic_matrix())

            # compare floats as == to control changes in results on refactoring
            self.assertEqual(expected_corr, actual_corr)
예제 #6
0
                    "../../input/xtg/*.xtg",
                    f"../../input/systematic_tree_{systematic_tree}.xtg",
                    ["Angiosperms"],
                    max_level=max_level,
                    is_reducing=True,
                    use_flipping=use_flipping)

                increasing_level = -1
                level_weight_multiplier = [1] * 11
                if increasing_level >= 0:
                    level_weight_multiplier[increasing_level] *= 5

                global_params = GlobalParams(
                    max_level=max_level,
                    param_a=param_a,
                    g_weight=0.0,
                    chain_length_weight=0.0,
                    level_weight_multiplier=level_weight_multiplier,
                    use_flipping=use_flipping)

                experiment_matrix = matrDiff.make_experiment_matrix(
                    global_params)

                # corr = matrDiff.corrcoef(experiment_matrix=experiment_matrix)
                # name = f"param_a={param_a}_corr_{corr:0.2f}_{systematic_tree}_{cluster_algorithm}_subtree_(thr,mult)=({global_params.subtree_threshold},{global_params.subtree_multiplier})_lev_mult={global_params.level_weight_multiplier}"
                #print_matrix(experiment_matrix, name, matrDiff.names, corr, with_headers=True)
                #experiment_array = make_experiment_array(experiment_matrix)

                effective_cluster_algorithm = cluster_algorithm
                if cluster_algorithm == 'ultrametric':
                    ultra_matrix = get_ultra_metric(
예제 #7
0
            systematic_array.append(systematic_matrix[i][j])
            experiment_array.append(experiment_matrix[i][j])

    corrcoef_matrix = np.corrcoef([systematic_array, experiment_array])

    return corrcoef_matrix[0][1]


matrDiff = MatrixDiff("../../input/xtg/*.xtg",
                      "../../input/systematic_tree_morph.xtg", ["Angiosperms"],
                      max_level=10)

init_values = [0.5, 0.1, 0.0]
x = init_values
global_params = GlobalParams(max_level=10,
                             param_a=x[0],
                             g_weight=x[1],
                             chain_length_weight=x[2])

experiment_matrix = matrDiff.make_full_experiment_matrix(global_params)
print(experiment_matrix)

for level_count in range(30):
    name = f"ultrametric_{x[0]}_{x[1]}_{x[2]}_{level_count + 1}"
    ultra_matrix = get_ultra_metric(
        experiment_matrix, UltraMetricParams(max_level=level_count + 1))

    ultra_array = make_experiment_array(ultra_matrix)
    clustered_trees = hierarchy.linkage(np.asarray(ultra_array), 'average')
    corrcoef = matrDiff.corrcoef(ultra_matrix)
    corrcoef2 = common_corrcoef(experiment_matrix, ultra_matrix)
예제 #8
0
import copy
import unittest
from unittest import TestCase

from src.multiple_trees.compare_trees import get_distances_by_files
from src.single_tree.development_tree_reader import read_all_trees
from src.single_tree.global_params import GlobalParams

global_params = GlobalParams(max_level=11,
                             param_a=0.6,
                             g_weight=0.1,
                             chain_length_weight=0.1)


class TestDistance(TestCase):
    # expected_matr = top-right matr (diagonal is excluded)
    def compare(self, path, is_reducing, expected_matr, g_weight=0.0):
        [_trees,
         matr] = get_distances_by_files(f"test/test_input/{path}",
                                        GlobalParams(max_level=11,
                                                     is_test_nodes=True,
                                                     g_weight=g_weight),
                                        is_reducing=is_reducing,
                                        is_test_nodes=True)
        for (i, expected_row) in enumerate(expected_matr):
            for (j, expected_value) in enumerate(expected_row):
                actual_value = matr[i][j + i + 1]
                self.assertAlmostEqual(expected_value, actual_value)

    def test_chain(self):
        # if reduce - all trees are the same
예제 #9
0
        total[level][EQ] += 1

    superimposed_node = SuperimposedNode(node1, node2)
    node_distance = superimposed_node.node_dist(global_params)

    total_distance = node_distance
    total_distance += proceed_node(node1.left, node2.left, level + 1)
    total_distance += proceed_node(node1.right, node2.right, level + 1)

    return total_distance


systematic_tree = "morph"
max_level = 10

global_params = GlobalParams(max_level=max_level, param_a=0.50, g_weight=0.5)

matrDiff = MatrixDiff("../../input/xtg/*.xtg", f"../../input/systematic_tree_{systematic_tree}.xtg", ["Angiosperms"],
                      max_level=max_level)
trees = matrDiff.vertices

tree_number = 0
for i in range(0, len(trees)):
    for j in range(i+1, len(trees)):
        proceed_node(trees[i].root, trees[j].root, 0)
        tree_number += 1

print(f"level total one_only eq ineq both_existing_part existing_part")
for i, item in enumerate(total):
    print(f"{i} {item[0]} {item[1] + item[2]} {item[3]} {item[4]} {'nan' if item[0] == 0 else (item[3] + item[4]) / item[0]} {item[0] / (tree_number * pow(2, i))}")
def specie_fertility_distance(max_level=10,
                              is_reducing=True,
                              use_min_common_depth=False,
                              use_flipping=False):
    trees = read_all_trees(pattern="../../input/xtg/*.xtg")
    prepare_trees(trees, max_level, is_reducing, use_min_common_depth,
                  use_flipping)

    global_params = GlobalParams(max_level=max_level,
                                 param_a=0.5,
                                 g_weight=0.0,
                                 chain_length_weight=0.0)

    level2count = {
        0: 0,
        1: 0,
        2: 0,
        3: 0,
        4: 0,
        5: 0,
        6: 0,
        7: 0,
        8: 0,
        9: 0,
        10: 0,
        11: 0
    }

    sp_fert_dist = []
    for i in range(len(trees)):
        for j in range(i + 1, len(trees)):  # skip repeating pairs

            # get array of tuples (node1, node2, distance, ...)
            superimposed_node = SuperimposedNode(trees[i].root, trees[j].root)
            distances = superimposed_node.high_fertility_distance()

            for [addr, dist, reduced_level, node1, node2] in distances:

                # ignore cases when at the last level history is completely equal
                if dist == 0:
                    continue

                level2count[reduced_level + 1] += 1

                # ignore zygote and the next level
                if reduced_level < 0:
                    continue

                leaves1 = node1.leaves_number
                leaves2 = node2.leaves_number

                tree1 = trees[i]
                tree2 = trees[j]

                # switch order
                if leaves1 < leaves2:
                    tree1 = trees[j]
                    tree2 = trees[i]

                    tmp_node = node1
                    node1 = node2
                    node2 = tmp_node

                    tmp_leaves = leaves1
                    leaves1 = leaves2
                    leaves2 = tmp_leaves

                left_right_number = number_by_address(tree1.root,
                                                      tree2.root,
                                                      addr,
                                                      is_reducing=is_reducing)

                is_left_0_descendants = (node1.left.is_none()) and (
                    node1.right.is_none())
                is_right_0_descendants = (node2.left.is_none()) and (
                    node2.right.is_none())
                l_or_r = "-"
                if is_left_0_descendants:
                    l_or_r = trees[i].name
                elif is_right_0_descendants:
                    l_or_r = trees[j].name

                res = [
                    tree1.name, tree2.name, dist, reduced_level + 1,
                    left_right_number, is_left_0_descendants
                    or is_right_0_descendants, l_or_r, addr, node1.address,
                    node2.address, leaves1, leaves2
                ]
                sp_fert_dist.append(res)

    return sp_fert_dist
예제 #11
0
import scipy.spatial.distance as ssd
from scipy.cluster import hierarchy

from src.multiple_trees.trees_matrix import to_full_matrix, print_matrix
from src.single_tree.global_params import GlobalParams
from src.multiple_trees.matrix_diff import MatrixDiff

systematic_tree = "morph"
cluster_algorithm = "complete"
max_level = 10
param_a = 0.5
g_weight = 0.1
chain_length_weight = 0.0

global_params = GlobalParams(max_level=max_level,
                             param_a=param_a,
                             g_weight=g_weight,
                             chain_length_weight=chain_length_weight)
name = f"param_a={param_a}_{systematic_tree}_{cluster_algorithm}_subtree_(thr,mult)=({global_params.subtree_threshold},{global_params.subtree_multiplier})_lev_mult={global_params.level_weight_multiplier}"

matrDiff = MatrixDiff("../../input/xtg/*.xtg",
                      f"../../input/systematic_tree_{systematic_tree}.xtg",
                      ["Angiosperms"],
                      max_level=max_level)

experiment_matrix = matrDiff.make_experiment_matrix(global_params)
plot_matrix = to_full_matrix(experiment_matrix)

corr_coef = matrDiff.corrcoef(experiment_matrix)
print_matrix(plot_matrix,
             "experiment_matrix",
             matrDiff.names,
trees = matrDiff.vertices

# param_a = 0.5
# g_weight = 0.5
# chain_length_weight = 0.1

print(f"param_a g_weight chain_length_weight corr")

max_corr = 1000
for param_a in np.linspace(0.4, 0.6, 3):
    for g_weight in np.linspace(0.0, 0.3, 4):
        for chain_length_weight in np.linspace(0.0, 0.3, 4):
            global_params = GlobalParams(
                max_level=max_level,
                param_a=param_a,
                g_weight=g_weight,
                chain_length_weight=chain_length_weight,
                is_swap_left_right=False)
            experiment_matrix = matrDiff.make_experiment_matrix(global_params)

            swap_global_params = GlobalParams(
                max_level=max_level,
                param_a=param_a,
                g_weight=g_weight,
                chain_length_weight=chain_length_weight,
                is_swap_left_right=True)
            swap_experiment_matrix = matrDiff.make_experiment_matrix(
                swap_global_params)

            corr = corrcoef(swap_experiment_matrix, experiment_matrix)
            print(
def do_it():
    max_level = 10
    use_min_common_depth = True
    use_flipping = False

    # [param_a, is_reducing]
    params = [[0.5, True], [1.0, False]]

    for [param_a, is_reducing] in params:
        trees_matrix = TreesMatrix("../../input/xtg/*.xtg",
                                   max_level=max_level,
                                   is_reducing=is_reducing,
                                   use_min_common_depth=use_min_common_depth,
                                   use_flipping=use_flipping)

        johansen_trees_matrix = TreesMatrix(
            "../../input/xtg_johansen/*.xtg",
            max_level=max_level,
            is_reducing=is_reducing,
            use_min_common_depth=use_min_common_depth,
            use_flipping=use_flipping)

        trees = trees_matrix.vertices
        johansen_trees = johansen_trees_matrix.vertices

        global_params = GlobalParams(max_level=max_level,
                                     param_a=param_a,
                                     use_min_common_depth=True,
                                     use_flipping=use_flipping)

        matches_number = 0
        print(
            f"Johansen-Batygina types to species distance, is_reducing: True, param_a: 0.5, division_weight: 1.0, "
            f"g_weight: 0.0, chain_length_weight: 0.0")
        print(
            f"Specie Reference_type 1st_type 1st_type_distance 2nd_type 2nd_type_distance"
        )
        for i in range(len(trees)):
            print(
                f"{trees_matrix.names[i]} {short_embryo_name(trees[i].embryo_type)} ",
                end='')
            res = []
            # min_dist = (-1, 1.0E+100)
            for j in range(len(johansen_trees)):
                min_reduced_depth = min(trees[i].root.reduced_depth,
                                        johansen_trees[j].root.reduced_depth)

                flipped_root = None
                if use_flipping:
                    flipped_root = johansen_trees[j].flipped_roots[
                        min_reduced_depth]

                dist = full_distance(
                    global_params, trees[i].roots[min_reduced_depth],
                    johansen_trees[j].roots[min_reduced_depth], flipped_root)

                res.append((dist, johansen_trees_matrix.names[j]))
                # draw_tree(trees[i], johansen_trees[j], global_params, dist, 0, "johansen")

            res = sorted(res, key=lambda dist_name: dist_name[0])
            for (dist, name) in res[:2]:
                print(f"{short_embryo_name(name)} {dist:0.2f} ", end='')
            print(f"")

            if trees[i].embryo_type == res[0][1]:
                matches_number += 1
        print(f"matches_number: {matches_number}\n")
예제 #14
0
def get_corrcoef(param_a, g_weight, chain_length_weight):
    global_params = GlobalParams(max_level=max_level, param_a=param_a, g_weight=g_weight,
                                 chain_length_weight=chain_length_weight)
    #name = f"a={param_a}_{systematic_tree}_{cluster_algorithm}_subtree_(thr,mult)=({global_params.subtree_threshold},{global_params.subtree_multiplier})_lev_mult={global_params.level_weight_multiplier}"
    experiment_matrix = matrDiff.make_experiment_matrix(global_params)
    return matrDiff.corrcoef(experiment_matrix)
예제 #15
0
from src.multiple_trees.compare_trees import get_distances_by_files
from src.single_tree.global_params import GlobalParams

global_params = GlobalParams(max_level=10, param_a=0.50, g_weight=0.05, chain_length_weight=0.4)

[trees, distance_matrix] = get_distances_by_files("../../input/xtg/*.xtg", global_params, is_reducing=True)

# print distance matrix to console
for tree in trees:
    print(f", {tree.name.replace(' ', '_')}", end='')
print("")
for row in distance_matrix:
    for item in row:
        print("%0.2f " % item, end='')
    print()
예제 #16
0
import numpy as np

from src.single_tree.global_params import GlobalParams
from src.multiple_trees.iterate_trees import generate_bin_tree, get_subtrees
from src.multiple_trees.matrix_diff import MatrixDiff, print_matrix, corrcoef

# Build matrices and corr coef only

systematic_tree = "morph"
max_level = 10

matrDiff = MatrixDiff("../../input/xtg/*.xtg", f"../../input/systematic_tree_{systematic_tree}.xtg",
                      ["Angiosperms"], max_level=max_level)

global_params = GlobalParams(max_level=max_level, param_a=0.5, g_weight=0, chain_length_weight=0)

trees = matrDiff.vertices


name2index = {}
for i in range(len(matrDiff.names)):
    name2index[matrDiff.names[i]] = i

# ползучее корневище
creeping_rhizome = {"Ottelia_alismoides", "Polemonium_caeruleum", "Potamogeton_lucens", "Sagina_procumbens",
                    "Sedum_acre", "Sedum_sieboldii", "Sparganium_simplex", "Stratiotes_aloides"}

# мочковатая корн. система
fibrous_root_system = {"Triticum_aestivum"}

예제 #17
0
import matplotlib.pyplot as plt

from src.single_tree.development_tree_reader import read_all_trees
from src.single_tree.development_tree_utils import prepare_trees
from src.single_tree.global_params import GlobalParams
from src.view.draw_compared_trees import TreeDrawSettings, TreeDrawer, reduced_node_caption_1, double_node_caption_1, \
    load_font, FONT_PATH, node_dist_caption_2

max_level = 10
param_a = 0.5
is_reducing = False
global_params = GlobalParams(max_level=max_level, param_a=param_a)

draw_settings = TreeDrawSettings(color_left=0xFF285EDD,
                                 color_right=0xFFFC7074,
                                 color_eq=0xFFE8E4DE,
                                 color_ineq=0xFFE8E4DE,
                                 get_node_caption_1=double_node_caption_1,
                                 get_node_caption_2=node_dist_caption_2,
                                 font=load_font(FONT_PATH, 10),
                                 legend_font=load_font(FONT_PATH, 20),
                                 width=2000,
                                 height=720)
tree_drawer = TreeDrawer(draw_settings, global_params)

# read and prepare trees: reduce if necessary, precalculate some parameters
# notice: file names must be "Genus_specie_type.xtg", and it will be shown as "Genus specie"
trees = read_all_trees(pattern="test/test_input/sofa/test_reduce*.xtg",
                       is_test_nodes=True,
                       max_level=max_level)
#trees = read_all_trees(pattern="test/test_input/paper_m/M2_*.xtg", is_test_nodes=True)