Exemple #1
0
def tree_map(tree_root, bipart_list):
    """This replaces the labels of each node in a species tree with the 
    numbers of entries that apply to that node in bipart_list. 

    WARNING: this function *replaces* the labels in the provided tree, it
    doesn't make a new tree with different ones new ones. Be careful when 
    you call it.
    """

    bipart_dict = {}

    # Count the numbers at each node.
    for bipart in bipart_list:
        key = str(bipart.species_node)
        if key in list(bipart_dict.keys()):
            bipart_dict[key] += 1
        else:
            bipart_dict[key] = 1

    # Add the numbers to the tree. You would not believe how long it took
    # me to get this bit to work for what it is.
    for key in list(bipart_dict.keys()):
        label = str(bipart_dict[key])
        node = read_trees.node_finder(tree_root, key)
        node.label = label
Exemple #2
0
def tree_map2(tree_root, rel_list, label):
    """This replaces the labels of each node in a gene tree with a given
    label, provided they are in rel_list.
    """
    bipart_dict = {}
    if tree_root.parent:
        tree_root = tree_root.parent

    for rel in rel_list:
        key = str(rel.ortholog_node)
        bipart_dict[key] = label

    for key in list(bipart_dict.keys()):
        node = read_trees.node_finder(tree_root, key)
        node.label = label
Exemple #3
0
def tree_map3(tree_root, rel_list, outname, multi_info):
    """This replaces the labels of each node in a gene tree with the
    number of times a gene had the relationship
    """
    bipart_dict = {}
    gene_name_dict = {}
    duplicate = {}

    # Species node can only informed once per gene tree in this case
    for bipart in rel_list:
        gene_name = str(bipart.gene_name)
        key = str(bipart.species_node)

        if key in list(gene_name_dict.keys()):
            if gene_name not in gene_name_dict[key]:
                gene_name_dict[key].append(gene_name)
            # This means the single tree has had more than one con* with the relationship
            else:

                # make a key that is both the key and gene name
                tup = (key, gene_name)
                if tup in list(duplicate.keys()):
                    duplicate[tup].append(gene_name)
                # This is the first time it is notices to add it multiple times to account
                # for the first time when it wasn't added
                else:
                    duplicate[tup] = []
                    duplicate[tup].append(gene_name)
                    duplicate[tup].append(gene_name)
        else:
            gene_name_dict[key] = []
            gene_name_dict[key].append(gene_name)

    outw_multi = open(multi_info, "w")
    for key in list(duplicate.keys()):
        outw_multi.write(key[0] + "," + ";".join(duplicate[key])+"\n")
    outw_multi.close()

    # Add the numbers to the tree. You would not believe how long it took
    # me to get this bit to work for what it is. (Haha, been there)
    outw = open(outname, "w")
    for key in list(gene_name_dict.keys()):
        outw.write(key + "," + ";".join(gene_name_dict[key])+"\n")
        label = str(len(gene_name_dict[key]))
        node = read_trees.node_finder(tree_root, key)
        node.label = label
    outw.close()
def conflict_stats(conflicts_dict, tree, outfile):
    """This function should take a dictionary from sort_conflicts and 
    calculate the most common conflict at each node, second-most common, 
    etc.
    """

    # We made this as a dictionary earlier because it was easier to do it
    # that way then, but now we want to put things in a defined order so we
    # need a list.
    stats_dict = {}

    for node in conflicts_dict.keys():
        stats_dict[node] = []

        for name in conflicts_dict[node].keys():
            conflict_list = conflicts_dict[node][name]
            new_list = [name, conflict_list]
            stats_dict[node].append(new_list)

    outfile.write(
        "node_id,species_bipart,ortholog_bipart,alternative_conflicts,number_of_conflicts,percentage,genes\n")

    for node in stats_dict.keys():
        # Order all the conflicts within each node from most to least
        # common.
        node_on_tree = read_trees.node_finder(tree, node)
        node_bipart = read_trees.postorder3(node_on_tree)
        stats_dict[node].sort(reverse=True, key=length_of_2nd_entry)

        # Get the total so we can calculate percentages.
        total = 0
        for conflict in stats_dict[node]:
            total += len(conflict[1])

        counter = 0
        cumulative_percent = 0

        for conflict in stats_dict[node]:
            
            how_common = len(conflict[1])
            percent = float(how_common)/total * 100

            # Write each result out to a table. Double-check this!
            output = []
            output.append(str(node))
            output.append(";".join(node_bipart.bipart_proper))
            output.append(";".join(conflict[1][0].ortholog_bipart))


            # Alternative conflicts should be included where they exist.
            if conflict[1][0].alt_conflict:
                alternatives = []
                alternatives.append(
                    ";".join(sorted(conflict[1][0].alt_conflict)))
                for i in conflict[1]:
                    include = False
                    for j in alternatives:
                        if i.alt_conflict:
                            if ";".join(sorted(i.alt_conflict)) != j:
                                include = True
                    if include:
                        alternatives.append(";".join(sorted(i.alt_conflict)))
                output.append(" : ".join(alternatives))
            else:
                output.append("")
            output.append(str(how_common))
            output.append(str(percent))
            
            #get the gene names
            gene_names_joined = ""
            gene_names_joined = get_gene_names(conflict[1])
            output.append(gene_names_joined)
            	
            string = ",".join(output) + "\n"
            outfile.write(string)

            percent = round(percent, 2)
            cumulative_percent += percent
            counter += 1