def NJ_Algorithm(): """ Neighbor-Joining algorithm to join species and create phylogenetic tree """ phylo_tree = None tree_dict = dict() # Loop through species until down to one pair for k in range(len(species)-1): # Get average distance for every node node_avg = ComputeNodeAvgDist() # Find lowest pair of species to join from average distance species list min_val, min_a, min_b = FindLowestPair(node_avg) if species[min_a] in tree_dict: # If the first joined species is already in the tree dictionary, retrieve it child1 = tree_dict[species[min_a]] else: # Else create new species tree child1 = Tree(species[min_a]) # v_i = D_ij/2 + (u_i - u_j)/2 child1.value = min_val/2.0 + (node_avg[min_a] - node_avg[min_b])/2.0 if species[min_b] in tree_dict: # If the second joined species is already in the tree dictionary, retrieve it child2 = tree_dict[species[min_b]] else: # Else create new species tree child2 = Tree(species[min_b]) # v_j = D_ij/2 + (u_j - u_i)/2 child2.value = min_val/2.0 + (node_avg[min_b] - node_avg[min_a])/2.0 # Create new tree with both A and B species joined key = "(" + species[min_a] + "),(" + species[min_b] + ")" phylo_tree = Tree(key) phylo_tree.value = min_val/2.0 # Put lower value to left of tree if child1.value <= child2.value: phylo_tree.AddLeftChild(child1, child1.value) phylo_tree.AddRightChild(child2, child2.value) else: phylo_tree.AddLeftChild(child2, child2.value) phylo_tree.AddRightChild(child1, child1.value) #phylo_tree.printNodeInfo() tree_dict[key] = phylo_tree # Create joined values to put back into 2d matrix joinedDist = CreateJoinedDistances(min_a, min_b, min_val) # Remove A and B individually and insert AB joined CombineSpecies(joinedDist, min_a, min_b) return phylo_tree
def UPGMA_Algorithm(): """ UPGMA algorithm to join species and create phylogenetic tree """ phylo_tree = None tree_dict = dict() # Loop through species until down to one pair for k in range(len(species) - 1): min_val, min_i, min_j = FindLowestPair() if species[min_i] in tree_dict: # If the first joined species is already in the tree dictionary, retrieve it child1 = tree_dict[species[min_i]] else: # Else create new species tree child1 = Tree(species[min_i]) child1.value = min_val / 2.0 if species[min_j] in tree_dict: # If the second joined species is already in the tree dictionary, retrieve it child2 = tree_dict[species[min_j]] else: # Else create new species tree child2 = Tree(species[min_j]) child2.value = min_val / 2.0 # Create new tree with both A and B species joined key = "(" + species[min_i] + "),(" + species[min_j] + ")" phylo_tree = Tree(key) phylo_tree.value = min_val / 2.0 # Put lower value to left of tree if child1.value <= child2.value: phylo_tree.AddLeftChild(child1, (phylo_tree.value - child1.value)) phylo_tree.AddRightChild(child2, (phylo_tree.value - child2.value)) else: phylo_tree.AddLeftChild(child2, (phylo_tree.value - child2.value)) phylo_tree.AddRightChild(child1, (phylo_tree.value - child1.value)) #phylo_tree.printNodeInfo() tree_dict[key] = phylo_tree # Create joined values to put back into 2d matrix joined_values = CreateJoinedDistances(min_i, min_j) # Remove A and B individually and insert AB joined CombineSpecies(joined_values, min_i, min_j) return phylo_tree
def BuildTree(path_tuple): """ Build a tree structure from a given path tuple """ if _debug: print "##\nBuild Tree...\ntuple:", path_tuple node_to_split, y_value = FindBestGain(path_tuple) if node_to_split < 0: if _debug: print "found leaf for tuple: ", path_tuple training_tree = Tree("leaf") training_tree.y_value = y_value training_tree.is_leaf = True else: # Create node for attribute to split on training_tree = Tree(_attrib_dict[str(node_to_split)]) # Build left side of tree for node left_tuple = path_tuple + [(node_to_split, 0)] left_tree = BuildTree(left_tuple) training_tree.AddLeftChild(left_tree) # Build right side of tree for node right_tuple = path_tuple + [(node_to_split, 1)] right_tree = BuildTree(right_tuple) training_tree.AddRightChild(right_tree) return training_tree